3093 lines
111 KiB
Python
3093 lines
111 KiB
Python
from __future__ import annotations
|
|
|
|
import warnings
|
|
from abc import ABC
|
|
from copy import copy, deepcopy
|
|
from datetime import datetime, timedelta
|
|
from textwrap import dedent
|
|
from typing import Generic
|
|
|
|
import numpy as np
|
|
import pandas as pd
|
|
import pytest
|
|
import pytz
|
|
|
|
from xarray import DataArray, Dataset, IndexVariable, Variable, set_options
|
|
from xarray.core import dtypes, duck_array_ops, indexing
|
|
from xarray.core.common import full_like, ones_like, zeros_like
|
|
from xarray.core.indexing import (
|
|
BasicIndexer,
|
|
CopyOnWriteArray,
|
|
DaskIndexingAdapter,
|
|
LazilyIndexedArray,
|
|
MemoryCachedArray,
|
|
NumpyIndexingAdapter,
|
|
OuterIndexer,
|
|
PandasIndexingAdapter,
|
|
VectorizedIndexer,
|
|
)
|
|
from xarray.core.types import T_DuckArray
|
|
from xarray.core.utils import NDArrayMixin
|
|
from xarray.core.variable import as_compatible_data, as_variable
|
|
from xarray.namedarray.pycompat import array_type
|
|
from xarray.tests import (
|
|
assert_allclose,
|
|
assert_array_equal,
|
|
assert_equal,
|
|
assert_identical,
|
|
assert_no_warnings,
|
|
has_dask_ge_2024_11_0,
|
|
has_pandas_3,
|
|
raise_if_dask_computes,
|
|
requires_bottleneck,
|
|
requires_cupy,
|
|
requires_dask,
|
|
requires_pint,
|
|
requires_sparse,
|
|
source_ndarray,
|
|
)
|
|
from xarray.tests.test_namedarray import NamedArraySubclassobjects
|
|
|
|
dask_array_type = array_type("dask")
|
|
|
|
_PAD_XR_NP_ARGS = [
|
|
[{"x": (2, 1)}, ((2, 1), (0, 0), (0, 0))],
|
|
[{"x": 1}, ((1, 1), (0, 0), (0, 0))],
|
|
[{"y": (0, 3)}, ((0, 0), (0, 3), (0, 0))],
|
|
[{"x": (3, 1), "z": (2, 0)}, ((3, 1), (0, 0), (2, 0))],
|
|
[{"x": (3, 1), "z": 2}, ((3, 1), (0, 0), (2, 2))],
|
|
]
|
|
|
|
|
|
@pytest.fixture
|
|
def var():
|
|
return Variable(dims=list("xyz"), data=np.random.rand(3, 4, 5))
|
|
|
|
|
|
@pytest.mark.parametrize(
|
|
"data",
|
|
[
|
|
np.array(["a", "bc", "def"], dtype=object),
|
|
np.array(["2019-01-01", "2019-01-02", "2019-01-03"], dtype="datetime64[ns]"),
|
|
],
|
|
)
|
|
def test_as_compatible_data_writeable(data):
|
|
pd.set_option("mode.copy_on_write", True)
|
|
# GH8843, ensure writeable arrays for data_vars even with
|
|
# pandas copy-on-write mode
|
|
assert as_compatible_data(data).flags.writeable
|
|
pd.reset_option("mode.copy_on_write")
|
|
|
|
|
|
class VariableSubclassobjects(NamedArraySubclassobjects, ABC):
|
|
@pytest.fixture
|
|
def target(self, data):
|
|
data = 0.5 * np.arange(10).reshape(2, 5)
|
|
return Variable(["x", "y"], data)
|
|
|
|
def test_getitem_dict(self):
|
|
v = self.cls(["x"], np.random.randn(5))
|
|
actual = v[{"x": 0}]
|
|
expected = v[0]
|
|
assert_identical(expected, actual)
|
|
|
|
def test_getitem_1d(self):
|
|
data = np.array([0, 1, 2])
|
|
v = self.cls(["x"], data)
|
|
|
|
v_new = v[dict(x=[0, 1])]
|
|
assert v_new.dims == ("x",)
|
|
assert_array_equal(v_new, data[[0, 1]])
|
|
|
|
v_new = v[dict(x=slice(None))]
|
|
assert v_new.dims == ("x",)
|
|
assert_array_equal(v_new, data)
|
|
|
|
v_new = v[dict(x=Variable("a", [0, 1]))]
|
|
assert v_new.dims == ("a",)
|
|
assert_array_equal(v_new, data[[0, 1]])
|
|
|
|
v_new = v[dict(x=1)]
|
|
assert v_new.dims == ()
|
|
assert_array_equal(v_new, data[1])
|
|
|
|
# tuple argument
|
|
v_new = v[slice(None)]
|
|
assert v_new.dims == ("x",)
|
|
assert_array_equal(v_new, data)
|
|
|
|
def test_getitem_1d_fancy(self):
|
|
v = self.cls(["x"], [0, 1, 2])
|
|
# 1d-variable should be indexable by multi-dimensional Variable
|
|
ind = Variable(("a", "b"), [[0, 1], [0, 1]])
|
|
v_new = v[ind]
|
|
assert v_new.dims == ("a", "b")
|
|
expected = np.array(v._data)[([0, 1], [0, 1]), ...]
|
|
assert_array_equal(v_new, expected)
|
|
|
|
# boolean indexing
|
|
ind = Variable(("x",), [True, False, True])
|
|
v_new = v[ind]
|
|
assert_identical(v[[0, 2]], v_new)
|
|
v_new = v[[True, False, True]]
|
|
assert_identical(v[[0, 2]], v_new)
|
|
|
|
with pytest.raises(IndexError, match=r"Boolean indexer should"):
|
|
ind = Variable(("a",), [True, False, True])
|
|
v[ind]
|
|
|
|
def test_getitem_with_mask(self):
|
|
v = self.cls(["x"], [0, 1, 2])
|
|
assert_identical(v._getitem_with_mask(-1), Variable((), np.nan))
|
|
assert_identical(
|
|
v._getitem_with_mask([0, -1, 1]), self.cls(["x"], [0, np.nan, 1])
|
|
)
|
|
assert_identical(v._getitem_with_mask(slice(2)), self.cls(["x"], [0, 1]))
|
|
assert_identical(
|
|
v._getitem_with_mask([0, -1, 1], fill_value=-99),
|
|
self.cls(["x"], [0, -99, 1]),
|
|
)
|
|
|
|
def test_getitem_with_mask_size_zero(self):
|
|
v = self.cls(["x"], [])
|
|
assert_identical(v._getitem_with_mask(-1), Variable((), np.nan))
|
|
assert_identical(
|
|
v._getitem_with_mask([-1, -1, -1]),
|
|
self.cls(["x"], [np.nan, np.nan, np.nan]),
|
|
)
|
|
|
|
def test_getitem_with_mask_nd_indexer(self):
|
|
v = self.cls(["x"], [0, 1, 2])
|
|
indexer = Variable(("x", "y"), [[0, -1], [-1, 2]])
|
|
assert_identical(v._getitem_with_mask(indexer, fill_value=-1), indexer)
|
|
|
|
def _assertIndexedLikeNDArray(self, variable, expected_value0, expected_dtype=None):
|
|
"""Given a 1-dimensional variable, verify that the variable is indexed
|
|
like a numpy.ndarray.
|
|
"""
|
|
assert variable[0].shape == ()
|
|
assert variable[0].ndim == 0
|
|
assert variable[0].size == 1
|
|
# test identity
|
|
assert variable.equals(variable.copy())
|
|
assert variable.identical(variable.copy())
|
|
# check value is equal for both ndarray and Variable
|
|
with warnings.catch_warnings():
|
|
warnings.filterwarnings("ignore", "In the future, 'NAT == x'")
|
|
np.testing.assert_equal(variable.values[0], expected_value0)
|
|
np.testing.assert_equal(variable[0].values, expected_value0)
|
|
# check type or dtype is consistent for both ndarray and Variable
|
|
if expected_dtype is None:
|
|
# check output type instead of array dtype
|
|
assert type(variable.values[0]) is type(expected_value0)
|
|
assert type(variable[0].values) is type(expected_value0)
|
|
elif expected_dtype is not False:
|
|
assert variable.values[0].dtype == expected_dtype
|
|
assert variable[0].values.dtype == expected_dtype
|
|
|
|
def test_index_0d_int(self):
|
|
for value, dtype in [(0, np.int_), (np.int32(0), np.int32)]:
|
|
x = self.cls(["x"], [value])
|
|
self._assertIndexedLikeNDArray(x, value, dtype)
|
|
|
|
def test_index_0d_float(self):
|
|
for value, dtype in [(0.5, float), (np.float32(0.5), np.float32)]:
|
|
x = self.cls(["x"], [value])
|
|
self._assertIndexedLikeNDArray(x, value, dtype)
|
|
|
|
def test_index_0d_string(self):
|
|
value = "foo"
|
|
dtype = np.dtype("U3")
|
|
x = self.cls(["x"], [value])
|
|
self._assertIndexedLikeNDArray(x, value, dtype)
|
|
|
|
def test_index_0d_datetime(self):
|
|
d = datetime(2000, 1, 1)
|
|
x = self.cls(["x"], [d])
|
|
self._assertIndexedLikeNDArray(x, np.datetime64(d))
|
|
|
|
x = self.cls(["x"], [np.datetime64(d)])
|
|
self._assertIndexedLikeNDArray(x, np.datetime64(d), "datetime64[us]")
|
|
|
|
expected_unit = "us" if has_pandas_3 else "ns"
|
|
x = self.cls(["x"], pd.DatetimeIndex([d]))
|
|
self._assertIndexedLikeNDArray(
|
|
x, np.datetime64(d), f"datetime64[{expected_unit}]"
|
|
)
|
|
|
|
def test_index_0d_timedelta64(self):
|
|
td = timedelta(hours=1)
|
|
# todo: discussion needed
|
|
x = self.cls(["x"], [np.timedelta64(td)])
|
|
self._assertIndexedLikeNDArray(
|
|
x, np.timedelta64(td), np.dtype("timedelta64[us]")
|
|
)
|
|
|
|
x = self.cls(["x"], pd.to_timedelta([td]))
|
|
self._assertIndexedLikeNDArray(x, np.timedelta64(td), "timedelta64[ns]")
|
|
|
|
def test_index_0d_not_a_time(self):
|
|
d = np.datetime64("NaT", "ns")
|
|
x = self.cls(["x"], [d])
|
|
self._assertIndexedLikeNDArray(x, d)
|
|
|
|
def test_index_0d_object(self):
|
|
class HashableItemWrapper:
|
|
def __init__(self, item):
|
|
self.item = item
|
|
|
|
def __eq__(self, other):
|
|
return self.item == other.item
|
|
|
|
def __hash__(self):
|
|
return hash(self.item)
|
|
|
|
def __repr__(self):
|
|
return f"{type(self).__name__}(item={self.item!r})"
|
|
|
|
item = HashableItemWrapper((1, 2, 3))
|
|
x = self.cls("x", [item])
|
|
self._assertIndexedLikeNDArray(x, item, expected_dtype=False)
|
|
|
|
def test_0d_object_array_with_list(self):
|
|
listarray = np.empty((1,), dtype=object)
|
|
listarray[0] = [1, 2, 3]
|
|
x = self.cls("x", listarray)
|
|
assert_array_equal(x.data, listarray)
|
|
assert_array_equal(x[0].data, listarray.squeeze())
|
|
assert_array_equal(x.squeeze().data, listarray.squeeze())
|
|
|
|
def test_index_and_concat_datetime(self):
|
|
# regression test for #125
|
|
date_range = pd.date_range("2011-09-01", periods=10)
|
|
for dates in [date_range, date_range.values, date_range.to_pydatetime()]:
|
|
expected = self.cls("t", dates)
|
|
for times in [
|
|
[expected[i] for i in range(10)],
|
|
[expected[i : (i + 1)] for i in range(10)],
|
|
[expected[[i]] for i in range(10)],
|
|
]:
|
|
actual = Variable.concat(times, "t")
|
|
assert expected.dtype == actual.dtype
|
|
assert_array_equal(expected, actual)
|
|
|
|
def test_0d_time_data(self):
|
|
# regression test for #105
|
|
x = self.cls("time", pd.date_range("2000-01-01", periods=5))
|
|
expected = np.datetime64("2000-01-01", "ns")
|
|
assert x[0].values == expected
|
|
|
|
dt64_data = pd.date_range("1970-01-01", periods=3)
|
|
|
|
@pytest.mark.parametrize(
|
|
"values, unit",
|
|
[
|
|
(dt64_data, "ns"),
|
|
(dt64_data.values, "ns"),
|
|
(dt64_data.values.astype("datetime64[m]"), "s"),
|
|
(dt64_data.values.astype("datetime64[s]"), "s"),
|
|
(dt64_data.values.astype("datetime64[ps]"), "ns"),
|
|
(
|
|
dt64_data.to_pydatetime(),
|
|
"us" if has_pandas_3 else "ns",
|
|
),
|
|
],
|
|
)
|
|
def test_datetime64_conversion(self, values, unit):
|
|
v = self.cls(["t"], values)
|
|
assert v.dtype == np.dtype(f"datetime64[{unit}]")
|
|
assert_array_equal(v.values, self.dt64_data.values)
|
|
assert v.values.dtype == np.dtype(f"datetime64[{unit}]")
|
|
|
|
td64_data = pd.timedelta_range(start=0, periods=3)
|
|
|
|
@pytest.mark.parametrize(
|
|
"values, unit",
|
|
[
|
|
(td64_data, "ns"),
|
|
(td64_data.values, "ns"),
|
|
(td64_data.values.astype("timedelta64[m]"), "s"),
|
|
(td64_data.values.astype("timedelta64[s]"), "s"),
|
|
(td64_data.values.astype("timedelta64[ps]"), "ns"),
|
|
(td64_data.to_pytimedelta(), "ns"),
|
|
],
|
|
)
|
|
def test_timedelta64_conversion(self, values, unit):
|
|
v = self.cls(["t"], values)
|
|
assert v.dtype == np.dtype(f"timedelta64[{unit}]")
|
|
assert_array_equal(v.values, self.td64_data.values)
|
|
assert v.values.dtype == np.dtype(f"timedelta64[{unit}]")
|
|
|
|
def test_object_conversion(self):
|
|
data = np.arange(5).astype(str).astype(object)
|
|
actual = self.cls("x", data)
|
|
assert actual.dtype == data.dtype
|
|
|
|
def test_pandas_data(self):
|
|
v = self.cls(["x"], pd.Series([0, 1, 2], index=[3, 2, 1]))
|
|
assert_identical(v, v[[0, 1, 2]])
|
|
v = self.cls(["x"], pd.Index([0, 1, 2]))
|
|
assert v[0].values == v.values[0]
|
|
|
|
def test_pandas_period_index(self):
|
|
v = self.cls(["x"], pd.period_range(start="2000", periods=20, freq="D"))
|
|
v = v.load() # for dask-based Variable
|
|
assert v[0] == pd.Period("2000", freq="D")
|
|
assert "Period('2000-01-01', 'D')" in repr(v)
|
|
|
|
@pytest.mark.parametrize("dtype", [float, int])
|
|
def test_1d_math(self, dtype: np.typing.DTypeLike) -> None:
|
|
x = np.arange(5, dtype=dtype)
|
|
y = np.ones(5, dtype=dtype)
|
|
|
|
# should we need `.to_base_variable()`?
|
|
# probably a break that `+v` changes type?
|
|
v = self.cls(["x"], x)
|
|
base_v = v.to_base_variable()
|
|
# unary ops
|
|
assert_identical(base_v, +v)
|
|
assert_identical(base_v, abs(v))
|
|
assert_array_equal((-v).values, -x)
|
|
# binary ops with numbers
|
|
assert_identical(base_v, v + 0)
|
|
assert_identical(base_v, 0 + v)
|
|
assert_identical(base_v, v * 1)
|
|
if dtype is int:
|
|
assert_identical(base_v, v << 0)
|
|
assert_array_equal(v << 3, x << 3)
|
|
assert_array_equal(v >> 2, x >> 2)
|
|
# binary ops with numpy arrays
|
|
assert_array_equal((v * x).values, x**2)
|
|
assert_array_equal((x * v).values, x**2)
|
|
assert_array_equal(v - y, v - 1)
|
|
assert_array_equal(y - v, 1 - v)
|
|
if dtype is int:
|
|
assert_array_equal(v << x, x << x)
|
|
assert_array_equal(v >> x, x >> x)
|
|
# verify attributes are dropped
|
|
v2 = self.cls(["x"], x, {"units": "meters"})
|
|
with set_options(keep_attrs=False):
|
|
assert_identical(base_v, +v2)
|
|
# binary ops with all variables
|
|
assert_array_equal(v + v, 2 * v)
|
|
w = self.cls(["x"], y, {"foo": "bar"})
|
|
assert_identical(v + w, self.cls(["x"], x + y).to_base_variable())
|
|
assert_array_equal((v * w).values, x * y)
|
|
|
|
# something complicated
|
|
assert_array_equal((v**2 * w - 1 + x).values, x**2 * y - 1 + x)
|
|
# make sure dtype is preserved (for Index objects)
|
|
assert dtype == (+v).dtype
|
|
assert dtype == (+v).values.dtype
|
|
assert dtype == (0 + v).dtype
|
|
assert dtype == (0 + v).values.dtype
|
|
# check types of returned data
|
|
assert isinstance(+v, Variable)
|
|
assert not isinstance(+v, IndexVariable)
|
|
assert isinstance(0 + v, Variable)
|
|
assert not isinstance(0 + v, IndexVariable)
|
|
|
|
def test_1d_reduce(self):
|
|
x = np.arange(5)
|
|
v = self.cls(["x"], x)
|
|
actual = v.sum()
|
|
expected = Variable((), 10)
|
|
assert_identical(expected, actual)
|
|
assert type(actual) is Variable
|
|
|
|
def test_array_interface(self):
|
|
x = np.arange(5)
|
|
v = self.cls(["x"], x)
|
|
assert_array_equal(np.asarray(v), x)
|
|
# test patched in methods
|
|
assert_array_equal(v.astype(float), x.astype(float))
|
|
# think this is a break, that argsort changes the type
|
|
assert_identical(v.argsort(), v.to_base_variable())
|
|
assert_identical(v.clip(2, 3), self.cls("x", x.clip(2, 3)).to_base_variable())
|
|
# test ufuncs
|
|
assert_identical(np.sin(v), self.cls(["x"], np.sin(x)).to_base_variable())
|
|
assert isinstance(np.sin(v), Variable)
|
|
assert not isinstance(np.sin(v), IndexVariable)
|
|
|
|
def example_1d_objects(self):
|
|
for data in [
|
|
range(3),
|
|
0.5 * np.arange(3),
|
|
0.5 * np.arange(3, dtype=np.float32),
|
|
pd.date_range("2000-01-01", periods=3),
|
|
np.array(["a", "b", "c"], dtype=object),
|
|
]:
|
|
yield (self.cls("x", data), data)
|
|
|
|
def test___array__(self):
|
|
for v, data in self.example_1d_objects():
|
|
assert_array_equal(v.values, np.asarray(data))
|
|
assert_array_equal(np.asarray(v), np.asarray(data))
|
|
assert v[0].values == np.asarray(data)[0]
|
|
assert np.asarray(v[0]) == np.asarray(data)[0]
|
|
|
|
def test_equals_all_dtypes(self):
|
|
for v, _ in self.example_1d_objects():
|
|
v2 = v.copy()
|
|
assert v.equals(v2)
|
|
assert v.identical(v2)
|
|
assert v.no_conflicts(v2)
|
|
assert v[0].equals(v2[0])
|
|
assert v[0].identical(v2[0])
|
|
assert v[0].no_conflicts(v2[0])
|
|
assert v[:2].equals(v2[:2])
|
|
assert v[:2].identical(v2[:2])
|
|
assert v[:2].no_conflicts(v2[:2])
|
|
|
|
def test_eq_all_dtypes(self):
|
|
# ensure that we don't choke on comparisons for which numpy returns
|
|
# scalars
|
|
expected = Variable("x", 3 * [False])
|
|
for v, _ in self.example_1d_objects():
|
|
actual = "z" == v
|
|
assert_identical(expected, actual)
|
|
actual = ~("z" != v)
|
|
assert_identical(expected, actual)
|
|
|
|
def test_encoding_preserved(self):
|
|
expected = self.cls("x", range(3), {"foo": 1}, {"bar": 2})
|
|
for actual in [
|
|
expected.T,
|
|
expected[...],
|
|
expected.squeeze(),
|
|
expected.isel(x=slice(None)),
|
|
expected.set_dims({"x": 3}),
|
|
expected.copy(deep=True),
|
|
expected.copy(deep=False),
|
|
]:
|
|
assert_identical(expected.to_base_variable(), actual.to_base_variable())
|
|
assert expected.encoding == actual.encoding
|
|
|
|
def test_drop_encoding(self) -> None:
|
|
encoding1 = {"scale_factor": 1}
|
|
# encoding set via cls constructor
|
|
v1 = self.cls(["a"], [0, 1, 2], encoding=encoding1)
|
|
assert v1.encoding == encoding1
|
|
v2 = v1.drop_encoding()
|
|
assert v1.encoding == encoding1
|
|
assert v2.encoding == {}
|
|
|
|
# encoding set via setter
|
|
encoding3 = {"scale_factor": 10}
|
|
v3 = self.cls(["a"], [0, 1, 2], encoding=encoding3)
|
|
assert v3.encoding == encoding3
|
|
v4 = v3.drop_encoding()
|
|
assert v3.encoding == encoding3
|
|
assert v4.encoding == {}
|
|
|
|
def test_concat(self):
|
|
x = np.arange(5)
|
|
y = np.arange(5, 10)
|
|
v = self.cls(["a"], x)
|
|
w = self.cls(["a"], y)
|
|
assert_identical(
|
|
Variable(["b", "a"], np.array([x, y])), Variable.concat([v, w], "b")
|
|
)
|
|
assert_identical(
|
|
Variable(["b", "a"], np.array([x, y])), Variable.concat((v, w), "b")
|
|
)
|
|
assert_identical(
|
|
Variable(["b", "a"], np.array([x, y])), Variable.concat((v, w), "b")
|
|
)
|
|
with pytest.raises(ValueError, match=r"Variable has dimensions"):
|
|
Variable.concat([v, Variable(["c"], y)], "b")
|
|
# test indexers
|
|
actual = Variable.concat(
|
|
[v, w], positions=[np.arange(0, 10, 2), np.arange(1, 10, 2)], dim="a"
|
|
)
|
|
expected = Variable("a", np.array([x, y]).ravel(order="F"))
|
|
assert_identical(expected, actual)
|
|
# test concatenating along a dimension
|
|
v = Variable(["time", "x"], np.random.random((10, 8)))
|
|
assert_identical(v, Variable.concat([v[:5], v[5:]], "time"))
|
|
assert_identical(v, Variable.concat([v[:5], v[5:6], v[6:]], "time"))
|
|
assert_identical(v, Variable.concat([v[:1], v[1:]], "time"))
|
|
# test dimension order
|
|
assert_identical(v, Variable.concat([v[:, :5], v[:, 5:]], "x"))
|
|
with pytest.raises(ValueError, match=r"all input arrays must have"):
|
|
Variable.concat([v[:, 0], v[:, 1:]], "x")
|
|
|
|
def test_concat_attrs(self):
|
|
# always keep attrs from first variable
|
|
v = self.cls("a", np.arange(5), {"foo": "bar"})
|
|
w = self.cls("a", np.ones(5))
|
|
expected = self.cls(
|
|
"a", np.concatenate([np.arange(5), np.ones(5)])
|
|
).to_base_variable()
|
|
expected.attrs["foo"] = "bar"
|
|
assert_identical(expected, Variable.concat([v, w], "a"))
|
|
|
|
def test_concat_fixed_len_str(self):
|
|
# regression test for #217
|
|
for kind in ["S", "U"]:
|
|
x = self.cls("animal", np.array(["horse"], dtype=kind))
|
|
y = self.cls("animal", np.array(["aardvark"], dtype=kind))
|
|
actual = Variable.concat([x, y], "animal")
|
|
expected = Variable("animal", np.array(["horse", "aardvark"], dtype=kind))
|
|
assert_equal(expected, actual)
|
|
|
|
def test_concat_number_strings(self):
|
|
# regression test for #305
|
|
a = self.cls("x", ["0", "1", "2"])
|
|
b = self.cls("x", ["3", "4"])
|
|
actual = Variable.concat([a, b], dim="x")
|
|
expected = Variable("x", np.arange(5).astype(str))
|
|
assert_identical(expected, actual)
|
|
assert actual.dtype.kind == expected.dtype.kind
|
|
|
|
def test_concat_mixed_dtypes(self):
|
|
a = self.cls("x", [0, 1])
|
|
b = self.cls("x", ["two"])
|
|
actual = Variable.concat([a, b], dim="x")
|
|
expected = Variable("x", np.array([0, 1, "two"], dtype=object))
|
|
assert_identical(expected, actual)
|
|
assert actual.dtype == object
|
|
|
|
@pytest.mark.parametrize("deep", [True, False])
|
|
@pytest.mark.parametrize("astype", [float, int, str])
|
|
def test_copy(self, deep: bool, astype: type[object]) -> None:
|
|
v = self.cls("x", (0.5 * np.arange(10)).astype(astype), {"foo": "bar"})
|
|
w = v.copy(deep=deep)
|
|
assert type(v) is type(w)
|
|
assert_identical(v, w)
|
|
assert v.dtype == w.dtype
|
|
if self.cls is Variable:
|
|
if deep:
|
|
assert source_ndarray(v.values) is not source_ndarray(w.values)
|
|
else:
|
|
assert source_ndarray(v.values) is source_ndarray(w.values)
|
|
assert_identical(v, copy(v))
|
|
|
|
def test_copy_deep_recursive(self) -> None:
|
|
# GH:issue:7111
|
|
|
|
# direct recursion
|
|
v = self.cls("x", [0, 1])
|
|
v.attrs["other"] = v
|
|
|
|
# TODO: cannot use assert_identical on recursive Vars yet...
|
|
# lets just ensure that deep copy works without RecursionError
|
|
v.copy(deep=True)
|
|
|
|
# indirect recursion
|
|
v2 = self.cls("y", [2, 3])
|
|
v.attrs["other"] = v2
|
|
v2.attrs["other"] = v
|
|
|
|
# TODO: cannot use assert_identical on recursive Vars yet...
|
|
# lets just ensure that deep copy works without RecursionError
|
|
v.copy(deep=True)
|
|
v2.copy(deep=True)
|
|
|
|
def test_copy_index(self):
|
|
midx = pd.MultiIndex.from_product(
|
|
[["a", "b"], [1, 2], [-1, -2]], names=("one", "two", "three")
|
|
)
|
|
v = self.cls("x", midx)
|
|
for deep in [True, False]:
|
|
w = v.copy(deep=deep)
|
|
assert isinstance(w._data, PandasIndexingAdapter)
|
|
assert isinstance(w.to_index(), pd.MultiIndex)
|
|
assert_array_equal(v._data.array, w._data.array)
|
|
|
|
def test_copy_with_data(self) -> None:
|
|
orig = Variable(("x", "y"), [[1.5, 2.0], [3.1, 4.3]], {"foo": "bar"})
|
|
new_data = np.array([[2.5, 5.0], [7.1, 43]])
|
|
actual = orig.copy(data=new_data)
|
|
expected = orig.copy()
|
|
expected.data = new_data
|
|
assert_identical(expected, actual)
|
|
|
|
def test_copy_with_data_errors(self) -> None:
|
|
orig = Variable(("x", "y"), [[1.5, 2.0], [3.1, 4.3]], {"foo": "bar"})
|
|
new_data = [2.5, 5.0]
|
|
with pytest.raises(ValueError, match=r"must match shape of object"):
|
|
orig.copy(data=new_data) # type: ignore[arg-type]
|
|
|
|
def test_copy_index_with_data(self) -> None:
|
|
orig = IndexVariable("x", np.arange(5))
|
|
new_data = np.arange(5, 10)
|
|
actual = orig.copy(data=new_data)
|
|
expected = IndexVariable("x", np.arange(5, 10))
|
|
assert_identical(expected, actual)
|
|
|
|
def test_copy_index_with_data_errors(self) -> None:
|
|
orig = IndexVariable("x", np.arange(5))
|
|
new_data = np.arange(5, 20)
|
|
with pytest.raises(ValueError, match=r"must match shape of object"):
|
|
orig.copy(data=new_data)
|
|
with pytest.raises(ValueError, match=r"Cannot assign to the .data"):
|
|
orig.data = new_data
|
|
with pytest.raises(ValueError, match=r"Cannot assign to the .values"):
|
|
orig.values = new_data
|
|
|
|
def test_replace(self):
|
|
var = Variable(("x", "y"), [[1.5, 2.0], [3.1, 4.3]], {"foo": "bar"})
|
|
result = var._replace()
|
|
assert_identical(result, var)
|
|
|
|
new_data = np.arange(4).reshape(2, 2)
|
|
result = var._replace(data=new_data)
|
|
assert_array_equal(result.data, new_data)
|
|
|
|
def test_real_and_imag(self):
|
|
v = self.cls("x", np.arange(3) - 1j * np.arange(3), {"foo": "bar"})
|
|
expected_re = self.cls("x", np.arange(3), {"foo": "bar"})
|
|
assert_identical(v.real, expected_re)
|
|
|
|
expected_im = self.cls("x", -np.arange(3), {"foo": "bar"})
|
|
assert_identical(v.imag, expected_im)
|
|
|
|
expected_abs = self.cls("x", np.sqrt(2 * np.arange(3) ** 2)).to_base_variable()
|
|
assert_allclose(abs(v), expected_abs)
|
|
|
|
def test_aggregate_complex(self):
|
|
# should skip NaNs
|
|
v = self.cls("x", [1, 2j, np.nan])
|
|
expected = Variable((), 0.5 + 1j)
|
|
assert_allclose(v.mean(), expected)
|
|
|
|
def test_pandas_categorical_dtype(self):
|
|
data = pd.Categorical(np.arange(10, dtype="int64"))
|
|
v = self.cls("x", data)
|
|
print(v) # should not error
|
|
assert v.dtype == "int64"
|
|
|
|
def test_pandas_datetime64_with_tz(self):
|
|
data = pd.date_range(
|
|
start="2000-01-01",
|
|
tz=pytz.timezone("America/New_York"),
|
|
periods=10,
|
|
freq="1h",
|
|
)
|
|
v = self.cls("x", data)
|
|
print(v) # should not error
|
|
if "America/New_York" in str(data.dtype):
|
|
# pandas is new enough that it has datetime64 with timezone dtype
|
|
assert v.dtype == "object"
|
|
|
|
def test_multiindex(self):
|
|
idx = pd.MultiIndex.from_product([list("abc"), [0, 1]])
|
|
v = self.cls("x", idx)
|
|
assert_identical(Variable((), ("a", 0)), v[0])
|
|
assert_identical(v, v[:])
|
|
|
|
def test_load(self):
|
|
array = self.cls("x", np.arange(5))
|
|
orig_data = array._data
|
|
copied = array.copy(deep=True)
|
|
if array.chunks is None:
|
|
array.load()
|
|
assert type(array._data) is type(orig_data)
|
|
assert type(copied._data) is type(orig_data)
|
|
assert_identical(array, copied)
|
|
|
|
def test_getitem_advanced(self):
|
|
v = self.cls(["x", "y"], [[0, 1, 2], [3, 4, 5]])
|
|
v_data = v.compute().data
|
|
|
|
# orthogonal indexing
|
|
v_new = v[([0, 1], [1, 0])]
|
|
assert v_new.dims == ("x", "y")
|
|
assert_array_equal(v_new, v_data[[0, 1]][:, [1, 0]])
|
|
|
|
v_new = v[[0, 1]]
|
|
assert v_new.dims == ("x", "y")
|
|
assert_array_equal(v_new, v_data[[0, 1]])
|
|
|
|
# with mixed arguments
|
|
ind = Variable(["a"], [0, 1])
|
|
v_new = v[dict(x=[0, 1], y=ind)]
|
|
assert v_new.dims == ("x", "a")
|
|
assert_array_equal(v_new, v_data[[0, 1]][:, [0, 1]])
|
|
|
|
# boolean indexing
|
|
v_new = v[dict(x=[True, False], y=[False, True, False])]
|
|
assert v_new.dims == ("x", "y")
|
|
assert_array_equal(v_new, v_data[0][1])
|
|
|
|
# with scalar variable
|
|
ind = Variable((), 2)
|
|
v_new = v[dict(y=ind)]
|
|
expected = v[dict(y=2)]
|
|
assert_array_equal(v_new, expected)
|
|
|
|
# with boolean variable with wrong shape
|
|
ind = np.array([True, False])
|
|
with pytest.raises(IndexError, match=r"Boolean array size 2 is "):
|
|
v[Variable(("a", "b"), [[0, 1]]), ind]
|
|
|
|
# boolean indexing with different dimension
|
|
ind = Variable(["a"], [True, False, False])
|
|
with pytest.raises(IndexError, match=r"Boolean indexer should be"):
|
|
v[dict(y=ind)]
|
|
|
|
def test_getitem_uint_1d(self):
|
|
# regression test for #1405
|
|
v = self.cls(["x"], [0, 1, 2])
|
|
v_data = v.compute().data
|
|
|
|
v_new = v[np.array([0])]
|
|
assert_array_equal(v_new, v_data[0])
|
|
v_new = v[np.array([0], dtype="uint64")]
|
|
assert_array_equal(v_new, v_data[0])
|
|
|
|
def test_getitem_uint(self):
|
|
# regression test for #1405
|
|
v = self.cls(["x", "y"], [[0, 1, 2], [3, 4, 5]])
|
|
v_data = v.compute().data
|
|
|
|
v_new = v[np.array([0])]
|
|
assert_array_equal(v_new, v_data[[0], :])
|
|
v_new = v[np.array([0], dtype="uint64")]
|
|
assert_array_equal(v_new, v_data[[0], :])
|
|
|
|
v_new = v[np.uint64(0)]
|
|
assert_array_equal(v_new, v_data[0, :])
|
|
|
|
def test_getitem_0d_array(self):
|
|
# make sure 0d-np.array can be used as an indexer
|
|
v = self.cls(["x"], [0, 1, 2])
|
|
v_data = v.compute().data
|
|
|
|
v_new = v[np.array([0])[0]]
|
|
assert_array_equal(v_new, v_data[0])
|
|
|
|
v_new = v[np.array(0)]
|
|
assert_array_equal(v_new, v_data[0])
|
|
|
|
v_new = v[Variable((), np.array(0))]
|
|
assert_array_equal(v_new, v_data[0])
|
|
|
|
def test_getitem_fancy(self):
|
|
v = self.cls(["x", "y"], [[0, 1, 2], [3, 4, 5]])
|
|
v_data = v.compute().data
|
|
|
|
ind = Variable(["a", "b"], [[0, 1, 1], [1, 1, 0]])
|
|
v_new = v[ind]
|
|
assert v_new.dims == ("a", "b", "y")
|
|
assert_array_equal(v_new, v_data[[[0, 1, 1], [1, 1, 0]], :])
|
|
|
|
# It would be ok if indexed with the multi-dimensional array including
|
|
# the same name
|
|
ind = Variable(["x", "b"], [[0, 1, 1], [1, 1, 0]])
|
|
v_new = v[ind]
|
|
assert v_new.dims == ("x", "b", "y")
|
|
assert_array_equal(v_new, v_data[[[0, 1, 1], [1, 1, 0]], :])
|
|
|
|
ind = Variable(["a", "b"], [[0, 1, 2], [2, 1, 0]])
|
|
v_new = v[dict(y=ind)]
|
|
assert v_new.dims == ("x", "a", "b")
|
|
assert_array_equal(v_new, v_data[:, ([0, 1, 2], [2, 1, 0])])
|
|
|
|
ind = Variable(["a", "b"], [[0, 0], [1, 1]])
|
|
v_new = v[dict(x=[1, 0], y=ind)]
|
|
assert v_new.dims == ("x", "a", "b")
|
|
assert_array_equal(v_new, v_data[[1, 0]][:, ind])
|
|
|
|
# along diagonal
|
|
ind = Variable(["a"], [0, 1])
|
|
v_new = v[ind, ind]
|
|
assert v_new.dims == ("a",)
|
|
assert_array_equal(v_new, v_data[[0, 1], [0, 1]])
|
|
|
|
# with integer
|
|
ind = Variable(["a", "b"], [[0, 0], [1, 1]])
|
|
v_new = v[dict(x=0, y=ind)]
|
|
assert v_new.dims == ("a", "b")
|
|
assert_array_equal(v_new[0], v_data[0][[0, 0]])
|
|
assert_array_equal(v_new[1], v_data[0][[1, 1]])
|
|
|
|
# with slice
|
|
ind = Variable(["a", "b"], [[0, 0], [1, 1]])
|
|
v_new = v[dict(x=slice(None), y=ind)]
|
|
assert v_new.dims == ("x", "a", "b")
|
|
assert_array_equal(v_new, v_data[:, [[0, 0], [1, 1]]])
|
|
|
|
ind = Variable(["a", "b"], [[0, 0], [1, 1]])
|
|
v_new = v[dict(x=ind, y=slice(None))]
|
|
assert v_new.dims == ("a", "b", "y")
|
|
assert_array_equal(v_new, v_data[[[0, 0], [1, 1]], :])
|
|
|
|
ind = Variable(["a", "b"], [[0, 0], [1, 1]])
|
|
v_new = v[dict(x=ind, y=slice(None, 1))]
|
|
assert v_new.dims == ("a", "b", "y")
|
|
assert_array_equal(v_new, v_data[[[0, 0], [1, 1]], slice(None, 1)])
|
|
|
|
# slice matches explicit dimension
|
|
ind = Variable(["y"], [0, 1])
|
|
v_new = v[ind, :2]
|
|
assert v_new.dims == ("y",)
|
|
assert_array_equal(v_new, v_data[[0, 1], [0, 1]])
|
|
|
|
# with multiple slices
|
|
v = self.cls(["x", "y", "z"], [[[1, 2, 3], [4, 5, 6]]])
|
|
ind = Variable(["a", "b"], [[0]])
|
|
v_new = v[ind, :, :]
|
|
expected = Variable(["a", "b", "y", "z"], v.data[np.newaxis, ...])
|
|
assert_identical(v_new, expected)
|
|
|
|
v = Variable(["w", "x", "y", "z"], [[[[1, 2, 3], [4, 5, 6]]]])
|
|
ind = Variable(["y"], [0])
|
|
v_new = v[ind, :, 1:2, 2]
|
|
expected = Variable(["y", "x"], [[6]])
|
|
assert_identical(v_new, expected)
|
|
|
|
# slice and vector mixed indexing resulting in the same dimension
|
|
v = Variable(["x", "y", "z"], np.arange(60).reshape(3, 4, 5))
|
|
ind = Variable(["x"], [0, 1, 2])
|
|
v_new = v[:, ind]
|
|
expected = Variable(("x", "z"), np.zeros((3, 5)))
|
|
expected[0] = v.data[0, 0]
|
|
expected[1] = v.data[1, 1]
|
|
expected[2] = v.data[2, 2]
|
|
assert_identical(v_new, expected)
|
|
|
|
v_new = v[:, ind.data]
|
|
assert v_new.shape == (3, 3, 5)
|
|
|
|
def test_getitem_error(self):
|
|
v = self.cls(["x", "y"], [[0, 1, 2], [3, 4, 5]])
|
|
|
|
with pytest.raises(IndexError, match=r"labeled multi-"):
|
|
v[[[0, 1], [1, 2]]]
|
|
|
|
ind_x = Variable(["a"], [0, 1, 1])
|
|
ind_y = Variable(["a"], [0, 1])
|
|
with pytest.raises(IndexError, match=r"Dimensions of indexers "):
|
|
v[ind_x, ind_y]
|
|
|
|
ind = Variable(["a", "b"], [[True, False], [False, True]])
|
|
with pytest.raises(IndexError, match=r"2-dimensional boolean"):
|
|
v[dict(x=ind)]
|
|
|
|
v = Variable(["x", "y", "z"], np.arange(60).reshape(3, 4, 5))
|
|
ind = Variable(["x"], [0, 1])
|
|
with pytest.raises(IndexError, match=r"Dimensions of indexers mis"):
|
|
v[:, ind]
|
|
|
|
@pytest.mark.parametrize(
|
|
"mode",
|
|
[
|
|
"mean",
|
|
"median",
|
|
"reflect",
|
|
"edge",
|
|
"linear_ramp",
|
|
"maximum",
|
|
"minimum",
|
|
"symmetric",
|
|
"wrap",
|
|
],
|
|
)
|
|
@pytest.mark.parametrize("xr_arg, np_arg", _PAD_XR_NP_ARGS)
|
|
@pytest.mark.filterwarnings(
|
|
r"ignore:dask.array.pad.+? converts integers to floats."
|
|
)
|
|
def test_pad(self, mode, xr_arg, np_arg):
|
|
data = np.arange(4 * 3 * 2).reshape(4, 3, 2)
|
|
v = self.cls(["x", "y", "z"], data)
|
|
|
|
actual = v.pad(mode=mode, **xr_arg)
|
|
expected = np.pad(data, np_arg, mode=mode)
|
|
|
|
assert_array_equal(actual, expected)
|
|
assert isinstance(actual._data, type(v._data))
|
|
|
|
@pytest.mark.parametrize("xr_arg, np_arg", _PAD_XR_NP_ARGS)
|
|
def test_pad_constant_values(self, xr_arg, np_arg):
|
|
data = np.arange(4 * 3 * 2).reshape(4, 3, 2)
|
|
v = self.cls(["x", "y", "z"], data)
|
|
|
|
actual = v.pad(**xr_arg)
|
|
expected = np.pad(
|
|
np.array(duck_array_ops.astype(v.data, float)),
|
|
np_arg,
|
|
mode="constant",
|
|
constant_values=np.nan,
|
|
)
|
|
assert_array_equal(actual, expected)
|
|
assert isinstance(actual._data, type(v._data))
|
|
|
|
# for the boolean array, we pad False
|
|
data = np.full_like(data, False, dtype=bool).reshape(4, 3, 2)
|
|
v = self.cls(["x", "y", "z"], data)
|
|
|
|
actual = v.pad(mode="constant", constant_values=False, **xr_arg)
|
|
expected = np.pad(
|
|
np.array(v.data), np_arg, mode="constant", constant_values=False
|
|
)
|
|
assert_array_equal(actual, expected)
|
|
|
|
@pytest.mark.parametrize(
|
|
["keep_attrs", "attrs", "expected"],
|
|
[
|
|
pytest.param(None, {"a": 1, "b": 2}, {"a": 1, "b": 2}, id="default"),
|
|
pytest.param(False, {"a": 1, "b": 2}, {}, id="False"),
|
|
pytest.param(True, {"a": 1, "b": 2}, {"a": 1, "b": 2}, id="True"),
|
|
],
|
|
)
|
|
def test_pad_keep_attrs(self, keep_attrs, attrs, expected):
|
|
data = np.arange(10, dtype=float)
|
|
v = self.cls(["x"], data, attrs)
|
|
|
|
keep_attrs_ = "default" if keep_attrs is None else keep_attrs
|
|
|
|
with set_options(keep_attrs=keep_attrs_):
|
|
actual = v.pad({"x": (1, 1)}, mode="constant", constant_values=np.nan)
|
|
|
|
assert actual.attrs == expected
|
|
|
|
actual = v.pad(
|
|
{"x": (1, 1)},
|
|
mode="constant",
|
|
constant_values=np.nan,
|
|
keep_attrs=keep_attrs,
|
|
)
|
|
assert actual.attrs == expected
|
|
|
|
@pytest.mark.parametrize("d, w", (("x", 3), ("y", 5)))
|
|
def test_rolling_window(self, d, w):
|
|
# Just a working test. See test_nputils for the algorithm validation
|
|
v = self.cls(["x", "y", "z"], np.arange(40 * 30 * 2).reshape(40, 30, 2))
|
|
v_rolling = v.rolling_window(d, w, d + "_window")
|
|
assert v_rolling.dims == ("x", "y", "z", d + "_window")
|
|
assert v_rolling.shape == v.shape + (w,)
|
|
|
|
v_rolling = v.rolling_window(d, w, d + "_window", center=True)
|
|
assert v_rolling.dims == ("x", "y", "z", d + "_window")
|
|
assert v_rolling.shape == v.shape + (w,)
|
|
|
|
# dask and numpy result should be the same
|
|
v_loaded = v.load().rolling_window(d, w, d + "_window", center=True)
|
|
assert_array_equal(v_rolling, v_loaded)
|
|
|
|
# numpy backend should not be over-written
|
|
if isinstance(v._data, np.ndarray):
|
|
with pytest.raises(ValueError):
|
|
v_loaded[0] = 1.0
|
|
|
|
def test_rolling_1d(self):
|
|
x = self.cls("x", np.array([1, 2, 3, 4], dtype=float))
|
|
|
|
kwargs = dict(dim="x", window=3, window_dim="xw")
|
|
actual = x.rolling_window(**kwargs, center=True, fill_value=np.nan)
|
|
expected = Variable(
|
|
("x", "xw"),
|
|
np.array(
|
|
[[np.nan, 1, 2], [1, 2, 3], [2, 3, 4], [3, 4, np.nan]], dtype=float
|
|
),
|
|
)
|
|
assert_equal(actual, expected)
|
|
|
|
actual = x.rolling_window(**kwargs, center=False, fill_value=0.0)
|
|
expected = self.cls(
|
|
("x", "xw"),
|
|
np.array([[0, 0, 1], [0, 1, 2], [1, 2, 3], [2, 3, 4]], dtype=float),
|
|
)
|
|
assert_equal(actual, expected)
|
|
|
|
x = self.cls(("y", "x"), np.stack([x, x * 1.1]))
|
|
actual = x.rolling_window(**kwargs, center=False, fill_value=0.0)
|
|
expected = self.cls(
|
|
("y", "x", "xw"), np.stack([expected.data, expected.data * 1.1], axis=0)
|
|
)
|
|
assert_equal(actual, expected)
|
|
|
|
@pytest.mark.parametrize("center", [[True, True], [False, False]])
|
|
@pytest.mark.parametrize("dims", [("x", "y"), ("y", "z"), ("z", "x")])
|
|
def test_nd_rolling(self, center, dims):
|
|
x = self.cls(
|
|
("x", "y", "z"),
|
|
np.arange(7 * 6 * 8).reshape(7, 6, 8).astype(float),
|
|
)
|
|
window = [3, 3]
|
|
actual = x.rolling_window(
|
|
dim=dims,
|
|
window=window,
|
|
window_dim=[f"{k}w" for k in dims],
|
|
center=center,
|
|
fill_value=np.nan,
|
|
)
|
|
expected = x
|
|
for dim, win, cent in zip(dims, window, center, strict=True):
|
|
expected = expected.rolling_window(
|
|
dim=dim,
|
|
window=win,
|
|
window_dim=f"{dim}w",
|
|
center=cent,
|
|
fill_value=np.nan,
|
|
)
|
|
assert_equal(actual, expected)
|
|
|
|
@pytest.mark.parametrize(
|
|
("dim, window, window_dim, center"),
|
|
[
|
|
("x", [3, 3], "x_w", True),
|
|
("x", 3, ("x_w", "x_w"), True),
|
|
("x", 3, "x_w", [True, True]),
|
|
],
|
|
)
|
|
def test_rolling_window_errors(self, dim, window, window_dim, center):
|
|
x = self.cls(
|
|
("x", "y", "z"),
|
|
np.arange(7 * 6 * 8).reshape(7, 6, 8).astype(float),
|
|
)
|
|
with pytest.raises(ValueError):
|
|
x.rolling_window(
|
|
dim=dim,
|
|
window=window,
|
|
window_dim=window_dim,
|
|
center=center,
|
|
)
|
|
|
|
|
|
class TestVariable(VariableSubclassobjects):
|
|
def cls(self, *args, **kwargs) -> Variable:
|
|
return Variable(*args, **kwargs)
|
|
|
|
@pytest.fixture(autouse=True)
|
|
def setup(self):
|
|
self.d = np.random.random((10, 3)).astype(np.float64)
|
|
|
|
def test_values(self):
|
|
v = Variable(["time", "x"], self.d)
|
|
assert_array_equal(v.values, self.d)
|
|
assert source_ndarray(v.values) is self.d
|
|
with pytest.raises(ValueError):
|
|
# wrong size
|
|
v.values = np.random.random(5)
|
|
d2 = np.random.random((10, 3))
|
|
v.values = d2
|
|
assert source_ndarray(v.values) is d2
|
|
|
|
def test_numpy_same_methods(self):
|
|
v = Variable([], np.float32(0.0))
|
|
assert v.item() == 0
|
|
assert type(v.item()) is float
|
|
|
|
v = IndexVariable("x", np.arange(5))
|
|
assert 2 == v.searchsorted(2)
|
|
|
|
@pytest.mark.parametrize(
|
|
"values, unit",
|
|
[
|
|
(np.datetime64("2000-01-01"), "s"),
|
|
(
|
|
pd.Timestamp("2000-01-01T00"),
|
|
"s" if has_pandas_3 else "ns",
|
|
),
|
|
(
|
|
datetime(2000, 1, 1),
|
|
"us" if has_pandas_3 else "ns",
|
|
),
|
|
(np.datetime64("2000-01-01T00:00:00.1234567891"), "ns"),
|
|
],
|
|
)
|
|
def test_datetime64_conversion_scalar(self, values, unit):
|
|
v = Variable([], values)
|
|
assert v.dtype == np.dtype(f"datetime64[{unit}]")
|
|
assert np.issubdtype(v.values, "datetime64")
|
|
assert v.values.dtype == np.dtype(f"datetime64[{unit}]")
|
|
|
|
@pytest.mark.parametrize(
|
|
"values, unit",
|
|
[
|
|
(np.timedelta64(1, "m"), "s"),
|
|
(np.timedelta64(1, "D"), "s"),
|
|
(np.timedelta64(1001, "ps"), "ns"),
|
|
(pd.Timedelta("1 day"), "ns"),
|
|
(timedelta(days=1), "ns"),
|
|
],
|
|
)
|
|
def test_timedelta64_conversion_scalar(self, values, unit):
|
|
v = Variable([], values)
|
|
assert v.dtype == np.dtype(f"timedelta64[{unit}]")
|
|
assert np.issubdtype(v.values, "timedelta64")
|
|
assert v.values.dtype == np.dtype(f"timedelta64[{unit}]")
|
|
|
|
def test_0d_str(self):
|
|
v = Variable([], "foo")
|
|
assert v.dtype == np.dtype("U3")
|
|
assert v.values == "foo"
|
|
|
|
v = Variable([], np.bytes_("foo"))
|
|
assert v.dtype == np.dtype("S3")
|
|
assert v.values == "foo".encode("ascii")
|
|
|
|
def test_0d_datetime(self):
|
|
v = Variable([], pd.Timestamp("2000-01-01"))
|
|
expected_unit = "s" if has_pandas_3 else "ns"
|
|
assert v.dtype == np.dtype(f"datetime64[{expected_unit}]")
|
|
assert v.values == np.datetime64("2000-01-01", expected_unit)
|
|
|
|
@pytest.mark.parametrize(
|
|
"values, unit", [(pd.to_timedelta("1s"), "ns"), (np.timedelta64(1, "s"), "s")]
|
|
)
|
|
def test_0d_timedelta(self, values, unit):
|
|
# todo: check, if this test is OK
|
|
v = Variable([], values)
|
|
assert v.dtype == np.dtype(f"timedelta64[{unit}]")
|
|
assert v.values == np.timedelta64(10**9, "ns")
|
|
|
|
def test_equals_and_identical(self):
|
|
d = np.random.rand(10, 3)
|
|
d[0, 0] = np.nan
|
|
v1 = Variable(("dim1", "dim2"), data=d, attrs={"att1": 3, "att2": [1, 2, 3]})
|
|
v2 = Variable(("dim1", "dim2"), data=d, attrs={"att1": 3, "att2": [1, 2, 3]})
|
|
assert v1.equals(v2)
|
|
assert v1.identical(v2)
|
|
|
|
v3 = Variable(("dim1", "dim3"), data=d)
|
|
assert not v1.equals(v3)
|
|
|
|
v4 = Variable(("dim1", "dim2"), data=d)
|
|
assert v1.equals(v4)
|
|
assert not v1.identical(v4)
|
|
|
|
v5 = deepcopy(v1)
|
|
v5.values[:] = np.random.rand(10, 3)
|
|
assert not v1.equals(v5)
|
|
|
|
assert not v1.equals(None)
|
|
assert not v1.equals(d)
|
|
|
|
assert not v1.identical(None)
|
|
assert not v1.identical(d)
|
|
|
|
def test_broadcast_equals(self):
|
|
v1 = Variable((), np.nan)
|
|
v2 = Variable(("x"), [np.nan, np.nan])
|
|
assert v1.broadcast_equals(v2)
|
|
assert not v1.equals(v2)
|
|
assert not v1.identical(v2)
|
|
|
|
v3 = Variable(("x"), [np.nan])
|
|
assert v1.broadcast_equals(v3)
|
|
assert not v1.equals(v3)
|
|
assert not v1.identical(v3)
|
|
|
|
assert not v1.broadcast_equals(None)
|
|
|
|
v4 = Variable(("x"), [np.nan] * 3)
|
|
assert not v2.broadcast_equals(v4)
|
|
|
|
def test_no_conflicts(self):
|
|
v1 = Variable(("x"), [1, 2, np.nan, np.nan])
|
|
v2 = Variable(("x"), [np.nan, 2, 3, np.nan])
|
|
assert v1.no_conflicts(v2)
|
|
assert not v1.equals(v2)
|
|
assert not v1.broadcast_equals(v2)
|
|
assert not v1.identical(v2)
|
|
|
|
assert not v1.no_conflicts(None)
|
|
|
|
v3 = Variable(("y"), [np.nan, 2, 3, np.nan])
|
|
assert not v3.no_conflicts(v1)
|
|
|
|
d = np.array([1, 2, np.nan, np.nan])
|
|
assert not v1.no_conflicts(d)
|
|
assert not v2.no_conflicts(d)
|
|
|
|
v4 = Variable(("w", "x"), [d])
|
|
assert v1.no_conflicts(v4)
|
|
|
|
def test_as_variable(self):
|
|
data = np.arange(10)
|
|
expected = Variable("x", data)
|
|
expected_extra = Variable(
|
|
"x", data, attrs={"myattr": "val"}, encoding={"scale_factor": 1}
|
|
)
|
|
|
|
assert_identical(expected, as_variable(expected))
|
|
|
|
ds = Dataset({"x": expected})
|
|
var = as_variable(ds["x"]).to_base_variable()
|
|
assert_identical(expected, var)
|
|
assert not isinstance(ds["x"], Variable)
|
|
assert isinstance(as_variable(ds["x"]), Variable)
|
|
|
|
xarray_tuple = (
|
|
expected_extra.dims,
|
|
expected_extra.values,
|
|
expected_extra.attrs,
|
|
expected_extra.encoding,
|
|
)
|
|
assert_identical(expected_extra, as_variable(xarray_tuple))
|
|
|
|
with pytest.raises(TypeError, match=r"tuple of form"):
|
|
as_variable(tuple(data))
|
|
with pytest.raises(ValueError, match=r"tuple of form"): # GH1016
|
|
as_variable(("five", "six", "seven"))
|
|
with pytest.raises(TypeError, match=r"without an explicit list of dimensions"):
|
|
as_variable(data)
|
|
|
|
with pytest.warns(FutureWarning, match="IndexVariable"):
|
|
actual = as_variable(data, name="x")
|
|
assert_identical(expected.to_index_variable(), actual)
|
|
|
|
actual = as_variable(0)
|
|
expected = Variable([], 0)
|
|
assert_identical(expected, actual)
|
|
|
|
data = np.arange(9).reshape((3, 3))
|
|
expected = Variable(("x", "y"), data)
|
|
with pytest.raises(ValueError, match=r"without explicit dimension names"):
|
|
as_variable(data, name="x")
|
|
|
|
# name of nD variable matches dimension name
|
|
actual = as_variable(expected, name="x")
|
|
assert_identical(expected, actual)
|
|
|
|
# test datetime, timedelta conversion
|
|
dt = np.array([datetime(1999, 1, 1) + timedelta(days=x) for x in range(10)])
|
|
with pytest.warns(FutureWarning, match="IndexVariable"):
|
|
assert as_variable(dt, "time").dtype.kind == "M"
|
|
td = np.array([timedelta(days=x) for x in range(10)])
|
|
with pytest.warns(FutureWarning, match="IndexVariable"):
|
|
assert as_variable(td, "time").dtype.kind == "m"
|
|
|
|
with pytest.raises(TypeError):
|
|
as_variable(("x", DataArray([])))
|
|
|
|
def test_repr(self):
|
|
v = Variable(["time", "x"], [[1, 2, 3], [4, 5, 6]], {"foo": "bar"})
|
|
v = v.astype(np.uint64)
|
|
expected = dedent(
|
|
"""
|
|
<xarray.Variable (time: 2, x: 3)> Size: 48B
|
|
array([[1, 2, 3],
|
|
[4, 5, 6]], dtype=uint64)
|
|
Attributes:
|
|
foo: bar
|
|
"""
|
|
).strip()
|
|
assert expected == repr(v)
|
|
|
|
def test_repr_lazy_data(self):
|
|
v = Variable("x", LazilyIndexedArray(np.arange(2e5)))
|
|
assert "200000 values with dtype" in repr(v)
|
|
assert isinstance(v._data, LazilyIndexedArray)
|
|
|
|
def test_detect_indexer_type(self):
|
|
"""Tests indexer type was correctly detected."""
|
|
data = np.random.random((10, 11))
|
|
v = Variable(["x", "y"], data)
|
|
|
|
_, ind, _ = v._broadcast_indexes((0, 1))
|
|
assert type(ind) is indexing.BasicIndexer
|
|
|
|
_, ind, _ = v._broadcast_indexes((0, slice(0, 8, 2)))
|
|
assert type(ind) is indexing.BasicIndexer
|
|
|
|
_, ind, _ = v._broadcast_indexes((0, [0, 1]))
|
|
assert type(ind) is indexing.OuterIndexer
|
|
|
|
_, ind, _ = v._broadcast_indexes(([0, 1], 1))
|
|
assert type(ind) is indexing.OuterIndexer
|
|
|
|
_, ind, _ = v._broadcast_indexes(([0, 1], [1, 2]))
|
|
assert type(ind) is indexing.OuterIndexer
|
|
|
|
_, ind, _ = v._broadcast_indexes(([0, 1], slice(0, 8, 2)))
|
|
assert type(ind) is indexing.OuterIndexer
|
|
|
|
vind = Variable(("a",), [0, 1])
|
|
_, ind, _ = v._broadcast_indexes((vind, slice(0, 8, 2)))
|
|
assert type(ind) is indexing.OuterIndexer
|
|
|
|
vind = Variable(("y",), [0, 1])
|
|
_, ind, _ = v._broadcast_indexes((vind, 3))
|
|
assert type(ind) is indexing.OuterIndexer
|
|
|
|
vind = Variable(("a",), [0, 1])
|
|
_, ind, _ = v._broadcast_indexes((vind, vind))
|
|
assert type(ind) is indexing.VectorizedIndexer
|
|
|
|
vind = Variable(("a", "b"), [[0, 2], [1, 3]])
|
|
_, ind, _ = v._broadcast_indexes((vind, 3))
|
|
assert type(ind) is indexing.VectorizedIndexer
|
|
|
|
def test_indexer_type(self):
|
|
# GH:issue:1688. Wrong indexer type induces NotImplementedError
|
|
data = np.random.random((10, 11))
|
|
v = Variable(["x", "y"], data)
|
|
|
|
def assert_indexer_type(key, object_type):
|
|
dims, index_tuple, new_order = v._broadcast_indexes(key)
|
|
assert isinstance(index_tuple, object_type)
|
|
|
|
# should return BasicIndexer
|
|
assert_indexer_type((0, 1), BasicIndexer)
|
|
assert_indexer_type((0, slice(None, None)), BasicIndexer)
|
|
assert_indexer_type((Variable([], 3), slice(None, None)), BasicIndexer)
|
|
assert_indexer_type((Variable([], 3), (Variable([], 6))), BasicIndexer)
|
|
|
|
# should return OuterIndexer
|
|
assert_indexer_type(([0, 1], 1), OuterIndexer)
|
|
assert_indexer_type(([0, 1], [1, 2]), OuterIndexer)
|
|
assert_indexer_type((Variable(("x"), [0, 1]), 1), OuterIndexer)
|
|
assert_indexer_type((Variable(("x"), [0, 1]), slice(None, None)), OuterIndexer)
|
|
assert_indexer_type(
|
|
(Variable(("x"), [0, 1]), Variable(("y"), [0, 1])), OuterIndexer
|
|
)
|
|
|
|
# should return VectorizedIndexer
|
|
assert_indexer_type((Variable(("y"), [0, 1]), [0, 1]), VectorizedIndexer)
|
|
assert_indexer_type(
|
|
(Variable(("z"), [0, 1]), Variable(("z"), [0, 1])), VectorizedIndexer
|
|
)
|
|
assert_indexer_type(
|
|
(
|
|
Variable(("a", "b"), [[0, 1], [1, 2]]),
|
|
Variable(("a", "b"), [[0, 1], [1, 2]]),
|
|
),
|
|
VectorizedIndexer,
|
|
)
|
|
|
|
def test_items(self):
|
|
data = np.random.random((10, 11))
|
|
v = Variable(["x", "y"], data)
|
|
# test slicing
|
|
assert_identical(v, v[:])
|
|
assert_identical(v, v[...])
|
|
assert_identical(Variable(["y"], data[0]), v[0])
|
|
assert_identical(Variable(["x"], data[:, 0]), v[:, 0])
|
|
assert_identical(Variable(["x", "y"], data[:3, :2]), v[:3, :2])
|
|
# test array indexing
|
|
x = Variable(["x"], np.arange(10))
|
|
y = Variable(["y"], np.arange(11))
|
|
assert_identical(v, v[x.values])
|
|
assert_identical(v, v[x])
|
|
assert_identical(v[:3], v[x < 3])
|
|
assert_identical(v[:, 3:], v[:, y >= 3])
|
|
assert_identical(v[:3, 3:], v[x < 3, y >= 3])
|
|
assert_identical(v[:3, :2], v[x[:3], y[:2]])
|
|
assert_identical(v[:3, :2], v[range(3), range(2)])
|
|
# test iteration
|
|
for n, item in enumerate(v):
|
|
assert_identical(Variable(["y"], data[n]), item)
|
|
with pytest.raises(TypeError, match=r"iteration over a 0-d"):
|
|
iter(Variable([], 0))
|
|
# test setting
|
|
v.values[:] = 0
|
|
assert np.all(v.values == 0)
|
|
# test orthogonal setting
|
|
v[range(10), range(11)] = 1
|
|
assert_array_equal(v.values, np.ones((10, 11)))
|
|
|
|
def test_getitem_basic(self):
|
|
v = self.cls(["x", "y"], [[0, 1, 2], [3, 4, 5]])
|
|
|
|
# int argument
|
|
v_new = v[0]
|
|
assert v_new.dims == ("y",)
|
|
assert_array_equal(v_new, v._data[0])
|
|
|
|
# slice argument
|
|
v_new = v[:2]
|
|
assert v_new.dims == ("x", "y")
|
|
assert_array_equal(v_new, v._data[:2])
|
|
|
|
# list arguments
|
|
v_new = v[[0]]
|
|
assert v_new.dims == ("x", "y")
|
|
assert_array_equal(v_new, v._data[[0]])
|
|
|
|
v_new = v[[]]
|
|
assert v_new.dims == ("x", "y")
|
|
assert_array_equal(v_new, v._data[[]])
|
|
|
|
# dict arguments
|
|
v_new = v[dict(x=0)]
|
|
assert v_new.dims == ("y",)
|
|
assert_array_equal(v_new, v._data[0])
|
|
|
|
v_new = v[dict(x=0, y=slice(None))]
|
|
assert v_new.dims == ("y",)
|
|
assert_array_equal(v_new, v._data[0])
|
|
|
|
v_new = v[dict(x=0, y=1)]
|
|
assert v_new.dims == ()
|
|
assert_array_equal(v_new, v._data[0, 1])
|
|
|
|
v_new = v[dict(y=1)]
|
|
assert v_new.dims == ("x",)
|
|
assert_array_equal(v_new, v._data[:, 1])
|
|
|
|
# tuple argument
|
|
v_new = v[(slice(None), 1)]
|
|
assert v_new.dims == ("x",)
|
|
assert_array_equal(v_new, v._data[:, 1])
|
|
|
|
# test that we obtain a modifiable view when taking a 0d slice
|
|
v_new = v[0, 0]
|
|
v_new[...] += 99
|
|
assert_array_equal(v_new, v._data[0, 0])
|
|
|
|
def test_getitem_with_mask_2d_input(self):
|
|
v = Variable(("x", "y"), [[0, 1, 2], [3, 4, 5]])
|
|
assert_identical(
|
|
v._getitem_with_mask(([-1, 0], [1, -1])),
|
|
Variable(("x", "y"), [[np.nan, np.nan], [1, np.nan]]),
|
|
)
|
|
assert_identical(v._getitem_with_mask((slice(2), [0, 1, 2])), v)
|
|
|
|
def test_isel(self):
|
|
v = Variable(["time", "x"], self.d)
|
|
assert_identical(v.isel(time=slice(None)), v)
|
|
assert_identical(v.isel(time=0), v[0])
|
|
assert_identical(v.isel(time=slice(0, 3)), v[:3])
|
|
assert_identical(v.isel(x=0), v[:, 0])
|
|
assert_identical(v.isel(x=[0, 2]), v[:, [0, 2]])
|
|
assert_identical(v.isel(time=[]), v[[]])
|
|
with pytest.raises(
|
|
ValueError,
|
|
match=r"Dimensions {'not_a_dim'} do not exist. Expected one or more of "
|
|
r"\('time', 'x'\)",
|
|
):
|
|
v.isel(not_a_dim=0)
|
|
with pytest.warns(
|
|
UserWarning,
|
|
match=r"Dimensions {'not_a_dim'} do not exist. Expected one or more of "
|
|
r"\('time', 'x'\)",
|
|
):
|
|
v.isel(not_a_dim=0, missing_dims="warn")
|
|
assert_identical(v, v.isel(not_a_dim=0, missing_dims="ignore"))
|
|
|
|
def test_index_0d_numpy_string(self):
|
|
# regression test to verify our work around for indexing 0d strings
|
|
v = Variable([], np.bytes_("asdf"))
|
|
assert_identical(v[()], v)
|
|
|
|
v = Variable([], np.str_("asdf"))
|
|
assert_identical(v[()], v)
|
|
|
|
def test_indexing_0d_unicode(self):
|
|
# regression test for GH568
|
|
actual = Variable(("x"), ["tmax"])[0][()]
|
|
expected = Variable((), "tmax")
|
|
assert_identical(actual, expected)
|
|
|
|
@pytest.mark.parametrize("fill_value", [dtypes.NA, 2, 2.0])
|
|
def test_shift(self, fill_value):
|
|
v = Variable("x", [1, 2, 3, 4, 5])
|
|
|
|
assert_identical(v, v.shift(x=0))
|
|
assert v is not v.shift(x=0)
|
|
|
|
expected = Variable("x", [np.nan, np.nan, 1, 2, 3])
|
|
assert_identical(expected, v.shift(x=2))
|
|
|
|
if fill_value == dtypes.NA:
|
|
# if we supply the default, we expect the missing value for a
|
|
# float array
|
|
fill_value_exp = np.nan
|
|
else:
|
|
fill_value_exp = fill_value
|
|
|
|
expected = Variable("x", [fill_value_exp, 1, 2, 3, 4])
|
|
assert_identical(expected, v.shift(x=1, fill_value=fill_value))
|
|
|
|
expected = Variable("x", [2, 3, 4, 5, fill_value_exp])
|
|
assert_identical(expected, v.shift(x=-1, fill_value=fill_value))
|
|
|
|
expected = Variable("x", [fill_value_exp] * 5)
|
|
assert_identical(expected, v.shift(x=5, fill_value=fill_value))
|
|
assert_identical(expected, v.shift(x=6, fill_value=fill_value))
|
|
|
|
with pytest.raises(ValueError, match=r"dimension"):
|
|
v.shift(z=0)
|
|
|
|
v = Variable("x", [1, 2, 3, 4, 5], {"foo": "bar"})
|
|
assert_identical(v, v.shift(x=0))
|
|
|
|
expected = Variable("x", [fill_value_exp, 1, 2, 3, 4], {"foo": "bar"})
|
|
assert_identical(expected, v.shift(x=1, fill_value=fill_value))
|
|
|
|
def test_shift2d(self):
|
|
v = Variable(("x", "y"), [[1, 2], [3, 4]])
|
|
expected = Variable(("x", "y"), [[np.nan, np.nan], [np.nan, 1]])
|
|
assert_identical(expected, v.shift(x=1, y=1))
|
|
|
|
def test_roll(self):
|
|
v = Variable("x", [1, 2, 3, 4, 5])
|
|
|
|
assert_identical(v, v.roll(x=0))
|
|
assert v is not v.roll(x=0)
|
|
|
|
expected = Variable("x", [5, 1, 2, 3, 4])
|
|
assert_identical(expected, v.roll(x=1))
|
|
assert_identical(expected, v.roll(x=-4))
|
|
assert_identical(expected, v.roll(x=6))
|
|
|
|
expected = Variable("x", [4, 5, 1, 2, 3])
|
|
assert_identical(expected, v.roll(x=2))
|
|
assert_identical(expected, v.roll(x=-3))
|
|
|
|
with pytest.raises(ValueError, match=r"dimension"):
|
|
v.roll(z=0)
|
|
|
|
def test_roll_consistency(self):
|
|
v = Variable(("x", "y"), np.random.randn(5, 6))
|
|
|
|
for axis, dim in [(0, "x"), (1, "y")]:
|
|
for shift in [-3, 0, 1, 7, 11]:
|
|
expected = np.roll(v.values, shift, axis=axis)
|
|
actual = v.roll(**{dim: shift}).values
|
|
assert_array_equal(expected, actual)
|
|
|
|
def test_transpose(self):
|
|
v = Variable(["time", "x"], self.d)
|
|
v2 = Variable(["x", "time"], self.d.T)
|
|
assert_identical(v, v2.transpose())
|
|
assert_identical(v.transpose(), v.T)
|
|
x = np.random.randn(2, 3, 4, 5)
|
|
w = Variable(["a", "b", "c", "d"], x)
|
|
w2 = Variable(["d", "b", "c", "a"], np.einsum("abcd->dbca", x))
|
|
assert w2.shape == (5, 3, 4, 2)
|
|
assert_identical(w2, w.transpose("d", "b", "c", "a"))
|
|
assert_identical(w2, w.transpose("d", ..., "a"))
|
|
assert_identical(w2, w.transpose("d", "b", "c", ...))
|
|
assert_identical(w2, w.transpose(..., "b", "c", "a"))
|
|
assert_identical(w, w2.transpose("a", "b", "c", "d"))
|
|
w3 = Variable(["b", "c", "d", "a"], np.einsum("abcd->bcda", x))
|
|
assert_identical(w, w3.transpose("a", "b", "c", "d"))
|
|
|
|
# test missing dimension, raise error
|
|
with pytest.raises(ValueError):
|
|
v.transpose(..., "not_a_dim")
|
|
|
|
# test missing dimension, ignore error
|
|
actual = v.transpose(..., "not_a_dim", missing_dims="ignore")
|
|
expected_ell = v.transpose(...)
|
|
assert_identical(expected_ell, actual)
|
|
|
|
# test missing dimension, raise warning
|
|
with pytest.warns(UserWarning):
|
|
v.transpose(..., "not_a_dim", missing_dims="warn")
|
|
assert_identical(expected_ell, actual)
|
|
|
|
def test_transpose_0d(self):
|
|
for value in [
|
|
3.5,
|
|
("a", 1),
|
|
np.datetime64("2000-01-01"),
|
|
np.timedelta64(1, "h"),
|
|
None,
|
|
object(),
|
|
]:
|
|
variable = Variable([], value)
|
|
actual = variable.transpose()
|
|
assert_identical(actual, variable)
|
|
|
|
def test_pandas_categorical_dtype(self):
|
|
data = pd.Categorical(np.arange(10, dtype="int64"))
|
|
v = self.cls("x", data)
|
|
print(v) # should not error
|
|
assert pd.api.types.is_extension_array_dtype(v.dtype)
|
|
|
|
def test_pandas_categorical_no_chunk(self):
|
|
data = pd.Categorical(np.arange(10, dtype="int64"))
|
|
v = self.cls("x", data)
|
|
with pytest.raises(
|
|
ValueError, match=r".*was found to be a Pandas ExtensionArray.*"
|
|
):
|
|
v.chunk((5,))
|
|
|
|
def test_squeeze(self):
|
|
v = Variable(["x", "y"], [[1]])
|
|
assert_identical(Variable([], 1), v.squeeze())
|
|
assert_identical(Variable(["y"], [1]), v.squeeze("x"))
|
|
assert_identical(Variable(["y"], [1]), v.squeeze(["x"]))
|
|
assert_identical(Variable(["x"], [1]), v.squeeze("y"))
|
|
assert_identical(Variable([], 1), v.squeeze(["x", "y"]))
|
|
|
|
v = Variable(["x", "y"], [[1, 2]])
|
|
assert_identical(Variable(["y"], [1, 2]), v.squeeze())
|
|
assert_identical(Variable(["y"], [1, 2]), v.squeeze("x"))
|
|
with pytest.raises(ValueError, match=r"cannot select a dimension"):
|
|
v.squeeze("y")
|
|
|
|
def test_get_axis_num(self) -> None:
|
|
v = Variable(["x", "y", "z"], np.random.randn(2, 3, 4))
|
|
assert v.get_axis_num("x") == 0
|
|
assert v.get_axis_num(["x"]) == (0,)
|
|
assert v.get_axis_num(["x", "y"]) == (0, 1)
|
|
assert v.get_axis_num(["z", "y", "x"]) == (2, 1, 0)
|
|
with pytest.raises(ValueError, match=r"not found in array dim"):
|
|
v.get_axis_num("foobar")
|
|
# Test the type annotations: mypy will complain if the inferred
|
|
# type is wrong
|
|
v.get_axis_num("x") + 0
|
|
v.get_axis_num(["x"]) + ()
|
|
v.get_axis_num(("x", "y")) + ()
|
|
|
|
def test_set_dims(self):
|
|
v = Variable(["x"], [0, 1])
|
|
actual = v.set_dims(["x", "y"])
|
|
expected = Variable(["x", "y"], [[0], [1]])
|
|
assert_identical(actual, expected)
|
|
|
|
actual = v.set_dims(["y", "x"])
|
|
assert_identical(actual, expected.T)
|
|
|
|
actual = v.set_dims({"x": 2, "y": 2})
|
|
expected = Variable(["x", "y"], [[0, 0], [1, 1]])
|
|
assert_identical(actual, expected)
|
|
|
|
v = Variable(["foo"], [0, 1])
|
|
actual = v.set_dims("foo")
|
|
expected = v
|
|
assert_identical(actual, expected)
|
|
|
|
with pytest.raises(ValueError, match=r"must be a superset"):
|
|
v.set_dims(["z"])
|
|
|
|
def test_set_dims_object_dtype(self):
|
|
v = Variable([], ("a", 1))
|
|
actual = v.set_dims(("x",), (3,))
|
|
exp_values = np.empty((3,), dtype=object)
|
|
for i in range(3):
|
|
exp_values[i] = ("a", 1)
|
|
expected = Variable(["x"], exp_values)
|
|
assert_identical(actual, expected)
|
|
|
|
def test_stack(self):
|
|
v = Variable(["x", "y"], [[0, 1], [2, 3]], {"foo": "bar"})
|
|
actual = v.stack(z=("x", "y"))
|
|
expected = Variable("z", [0, 1, 2, 3], v.attrs)
|
|
assert_identical(actual, expected)
|
|
|
|
actual = v.stack(z=("x",))
|
|
expected = Variable(("y", "z"), v.data.T, v.attrs)
|
|
assert_identical(actual, expected)
|
|
|
|
actual = v.stack(z=())
|
|
assert_identical(actual, v)
|
|
|
|
actual = v.stack(X=("x",), Y=("y",)).transpose("X", "Y")
|
|
expected = Variable(("X", "Y"), v.data, v.attrs)
|
|
assert_identical(actual, expected)
|
|
|
|
def test_stack_errors(self):
|
|
v = Variable(["x", "y"], [[0, 1], [2, 3]], {"foo": "bar"})
|
|
|
|
with pytest.raises(ValueError, match=r"invalid existing dim"):
|
|
v.stack(z=("x1",))
|
|
with pytest.raises(ValueError, match=r"cannot create a new dim"):
|
|
v.stack(x=("x",))
|
|
|
|
def test_unstack(self):
|
|
v = Variable("z", [0, 1, 2, 3], {"foo": "bar"})
|
|
actual = v.unstack(z={"x": 2, "y": 2})
|
|
expected = Variable(("x", "y"), [[0, 1], [2, 3]], v.attrs)
|
|
assert_identical(actual, expected)
|
|
|
|
actual = v.unstack(z={"x": 4, "y": 1})
|
|
expected = Variable(("x", "y"), [[0], [1], [2], [3]], v.attrs)
|
|
assert_identical(actual, expected)
|
|
|
|
actual = v.unstack(z={"x": 4})
|
|
expected = Variable("x", [0, 1, 2, 3], v.attrs)
|
|
assert_identical(actual, expected)
|
|
|
|
def test_unstack_errors(self):
|
|
v = Variable("z", [0, 1, 2, 3])
|
|
with pytest.raises(ValueError, match=r"invalid existing dim"):
|
|
v.unstack(foo={"x": 4})
|
|
with pytest.raises(ValueError, match=r"cannot create a new dim"):
|
|
v.stack(z=("z",))
|
|
with pytest.raises(ValueError, match=r"the product of the new dim"):
|
|
v.unstack(z={"x": 5})
|
|
|
|
def test_unstack_2d(self):
|
|
v = Variable(["x", "y"], [[0, 1], [2, 3]])
|
|
actual = v.unstack(y={"z": 2})
|
|
expected = Variable(["x", "z"], v.data)
|
|
assert_identical(actual, expected)
|
|
|
|
actual = v.unstack(x={"z": 2})
|
|
expected = Variable(["y", "z"], v.data.T)
|
|
assert_identical(actual, expected)
|
|
|
|
def test_stack_unstack_consistency(self):
|
|
v = Variable(["x", "y"], [[0, 1], [2, 3]])
|
|
actual = v.stack(z=("x", "y")).unstack(z={"x": 2, "y": 2})
|
|
assert_identical(actual, v)
|
|
|
|
@pytest.mark.filterwarnings("error::RuntimeWarning")
|
|
def test_unstack_without_missing(self):
|
|
v = Variable(["z"], [0, 1, 2, 3])
|
|
expected = Variable(["x", "y"], [[0, 1], [2, 3]])
|
|
|
|
actual = v.unstack(z={"x": 2, "y": 2})
|
|
|
|
assert_identical(actual, expected)
|
|
|
|
def test_broadcasting_math(self):
|
|
x = np.random.randn(2, 3)
|
|
v = Variable(["a", "b"], x)
|
|
# 1d to 2d broadcasting
|
|
assert_identical(v * v, Variable(["a", "b"], np.einsum("ab,ab->ab", x, x)))
|
|
assert_identical(v * v[0], Variable(["a", "b"], np.einsum("ab,b->ab", x, x[0])))
|
|
assert_identical(v[0] * v, Variable(["b", "a"], np.einsum("b,ab->ba", x[0], x)))
|
|
assert_identical(
|
|
v[0] * v[:, 0], Variable(["b", "a"], np.einsum("b,a->ba", x[0], x[:, 0]))
|
|
)
|
|
# higher dim broadcasting
|
|
y = np.random.randn(3, 4, 5)
|
|
w = Variable(["b", "c", "d"], y)
|
|
assert_identical(
|
|
v * w, Variable(["a", "b", "c", "d"], np.einsum("ab,bcd->abcd", x, y))
|
|
)
|
|
assert_identical(
|
|
w * v, Variable(["b", "c", "d", "a"], np.einsum("bcd,ab->bcda", y, x))
|
|
)
|
|
assert_identical(
|
|
v * w[0], Variable(["a", "b", "c", "d"], np.einsum("ab,cd->abcd", x, y[0]))
|
|
)
|
|
|
|
@pytest.mark.filterwarnings("ignore:Duplicate dimension names")
|
|
def test_broadcasting_failures(self):
|
|
a = Variable(["x"], np.arange(10))
|
|
b = Variable(["x"], np.arange(5))
|
|
c = Variable(["x", "x"], np.arange(100).reshape(10, 10))
|
|
with pytest.raises(ValueError, match=r"mismatched lengths"):
|
|
a + b
|
|
with pytest.raises(ValueError, match=r"duplicate dimensions"):
|
|
a + c
|
|
|
|
def test_inplace_math(self):
|
|
x = np.arange(5)
|
|
v = Variable(["x"], x)
|
|
v2 = v
|
|
v2 += 1
|
|
assert v is v2
|
|
# since we provided an ndarray for data, it is also modified in-place
|
|
assert source_ndarray(v.values) is x
|
|
assert_array_equal(v.values, np.arange(5) + 1)
|
|
|
|
with pytest.raises(ValueError, match=r"dimensions cannot change"):
|
|
v += Variable("y", np.arange(5))
|
|
|
|
def test_inplace_math_error(self):
|
|
x = np.arange(5)
|
|
v = IndexVariable(["x"], x)
|
|
with pytest.raises(
|
|
TypeError, match=r"Values of an IndexVariable are immutable"
|
|
):
|
|
v += 1
|
|
|
|
def test_reduce(self):
|
|
v = Variable(["x", "y"], self.d, {"ignored": "attributes"})
|
|
assert_identical(v.reduce(np.std, "x"), Variable(["y"], self.d.std(axis=0)))
|
|
assert_identical(v.reduce(np.std, axis=0), v.reduce(np.std, dim="x"))
|
|
assert_identical(
|
|
v.reduce(np.std, ["y", "x"]), Variable([], self.d.std(axis=(0, 1)))
|
|
)
|
|
assert_identical(v.reduce(np.std), Variable([], self.d.std()))
|
|
assert_identical(
|
|
v.reduce(np.mean, "x").reduce(np.std, "y"),
|
|
Variable([], self.d.mean(axis=0).std()),
|
|
)
|
|
assert_allclose(v.mean("x"), v.reduce(np.mean, "x"))
|
|
|
|
with pytest.raises(ValueError, match=r"cannot supply both"):
|
|
v.mean(dim="x", axis=0)
|
|
|
|
@requires_bottleneck
|
|
@pytest.mark.parametrize("compute_backend", ["bottleneck"], indirect=True)
|
|
def test_reduce_use_bottleneck(self, monkeypatch, compute_backend):
|
|
def raise_if_called(*args, **kwargs):
|
|
raise RuntimeError("should not have been called")
|
|
|
|
import bottleneck as bn
|
|
|
|
monkeypatch.setattr(bn, "nanmin", raise_if_called)
|
|
|
|
v = Variable("x", [0.0, np.nan, 1.0])
|
|
with pytest.raises(RuntimeError, match="should not have been called"):
|
|
with set_options(use_bottleneck=True):
|
|
v.min()
|
|
|
|
with set_options(use_bottleneck=False):
|
|
v.min()
|
|
|
|
@pytest.mark.parametrize("skipna", [True, False, None])
|
|
@pytest.mark.parametrize("q", [0.25, [0.50], [0.25, 0.75]])
|
|
@pytest.mark.parametrize(
|
|
"axis, dim",
|
|
zip([None, 0, [0], [0, 1]], [None, "x", ["x"], ["x", "y"]], strict=True),
|
|
)
|
|
def test_quantile(self, q, axis, dim, skipna):
|
|
d = self.d.copy()
|
|
d[0, 0] = np.nan
|
|
|
|
v = Variable(["x", "y"], d)
|
|
actual = v.quantile(q, dim=dim, skipna=skipna)
|
|
_percentile_func = np.nanpercentile if skipna in (True, None) else np.percentile
|
|
expected = _percentile_func(d, np.array(q) * 100, axis=axis)
|
|
np.testing.assert_allclose(actual.values, expected)
|
|
|
|
@requires_dask
|
|
@pytest.mark.parametrize("q", [0.25, [0.50], [0.25, 0.75]])
|
|
@pytest.mark.parametrize("axis, dim", [[1, "y"], [[1], ["y"]]])
|
|
def test_quantile_dask(self, q, axis, dim):
|
|
v = Variable(["x", "y"], self.d).chunk({"x": 2})
|
|
actual = v.quantile(q, dim=dim)
|
|
assert isinstance(actual.data, dask_array_type)
|
|
expected = np.nanpercentile(self.d, np.array(q) * 100, axis=axis)
|
|
np.testing.assert_allclose(actual.values, expected)
|
|
|
|
@pytest.mark.parametrize("method", ["midpoint", "lower"])
|
|
@pytest.mark.parametrize(
|
|
"use_dask", [pytest.param(True, marks=requires_dask), False]
|
|
)
|
|
def test_quantile_method(self, method, use_dask) -> None:
|
|
v = Variable(["x", "y"], self.d)
|
|
if use_dask:
|
|
v = v.chunk({"x": 2})
|
|
|
|
q = np.array([0.25, 0.5, 0.75])
|
|
actual = v.quantile(q, dim="y", method=method)
|
|
|
|
expected = np.nanquantile(self.d, q, axis=1, method=method)
|
|
|
|
if use_dask:
|
|
assert isinstance(actual.data, dask_array_type)
|
|
|
|
np.testing.assert_allclose(actual.values, expected)
|
|
|
|
@pytest.mark.parametrize("method", ["midpoint", "lower"])
|
|
def test_quantile_interpolation_deprecation(self, method) -> None:
|
|
v = Variable(["x", "y"], self.d)
|
|
q = np.array([0.25, 0.5, 0.75])
|
|
|
|
with pytest.warns(
|
|
FutureWarning,
|
|
match="`interpolation` argument to quantile was renamed to `method`",
|
|
):
|
|
actual = v.quantile(q, dim="y", interpolation=method)
|
|
|
|
expected = v.quantile(q, dim="y", method=method)
|
|
|
|
np.testing.assert_allclose(actual.values, expected.values)
|
|
|
|
with warnings.catch_warnings(record=True):
|
|
with pytest.raises(TypeError, match="interpolation and method keywords"):
|
|
v.quantile(q, dim="y", interpolation=method, method=method)
|
|
|
|
@requires_dask
|
|
def test_quantile_chunked_dim_error(self):
|
|
v = Variable(["x", "y"], self.d).chunk({"x": 2})
|
|
|
|
if has_dask_ge_2024_11_0:
|
|
# Dask rechunks
|
|
np.testing.assert_allclose(
|
|
v.compute().quantile(0.5, dim="x"), v.quantile(0.5, dim="x")
|
|
)
|
|
|
|
else:
|
|
# this checks for ValueError in dask.array.apply_gufunc
|
|
with pytest.raises(ValueError, match=r"consists of multiple chunks"):
|
|
v.quantile(0.5, dim="x")
|
|
|
|
@pytest.mark.parametrize("compute_backend", ["numbagg", None], indirect=True)
|
|
@pytest.mark.parametrize("q", [-0.1, 1.1, [2], [0.25, 2]])
|
|
def test_quantile_out_of_bounds(self, q, compute_backend):
|
|
v = Variable(["x", "y"], self.d)
|
|
|
|
# escape special characters
|
|
with pytest.raises(
|
|
ValueError,
|
|
match=r"(Q|q)uantiles must be in the range \[0, 1\]",
|
|
):
|
|
v.quantile(q, dim="x")
|
|
|
|
@requires_dask
|
|
@requires_bottleneck
|
|
def test_rank_dask(self):
|
|
# Instead of a single test here, we could parameterize the other tests for both
|
|
# arrays. But this is sufficient.
|
|
v = Variable(
|
|
["x", "y"], [[30.0, 1.0, np.nan, 20.0, 4.0], [30.0, 1.0, np.nan, 20.0, 4.0]]
|
|
).chunk(x=1)
|
|
expected = Variable(
|
|
["x", "y"], [[4.0, 1.0, np.nan, 3.0, 2.0], [4.0, 1.0, np.nan, 3.0, 2.0]]
|
|
)
|
|
assert_equal(v.rank("y").compute(), expected)
|
|
|
|
with pytest.raises(
|
|
ValueError, match=r" with dask='parallelized' consists of multiple chunks"
|
|
):
|
|
v.rank("x")
|
|
|
|
def test_rank_use_bottleneck(self):
|
|
v = Variable(["x"], [3.0, 1.0, np.nan, 2.0, 4.0])
|
|
with set_options(use_bottleneck=False):
|
|
with pytest.raises(RuntimeError):
|
|
v.rank("x")
|
|
|
|
@requires_bottleneck
|
|
def test_rank(self):
|
|
import bottleneck as bn
|
|
|
|
# floats
|
|
v = Variable(["x", "y"], [[3, 4, np.nan, 1]])
|
|
expect_0 = bn.nanrankdata(v.data, axis=0)
|
|
expect_1 = bn.nanrankdata(v.data, axis=1)
|
|
np.testing.assert_allclose(v.rank("x").values, expect_0)
|
|
np.testing.assert_allclose(v.rank("y").values, expect_1)
|
|
# int
|
|
v = Variable(["x"], [3, 2, 1])
|
|
expect = bn.rankdata(v.data, axis=0)
|
|
np.testing.assert_allclose(v.rank("x").values, expect)
|
|
# str
|
|
v = Variable(["x"], ["c", "b", "a"])
|
|
expect = bn.rankdata(v.data, axis=0)
|
|
np.testing.assert_allclose(v.rank("x").values, expect)
|
|
# pct
|
|
v = Variable(["x"], [3.0, 1.0, np.nan, 2.0, 4.0])
|
|
v_expect = Variable(["x"], [0.75, 0.25, np.nan, 0.5, 1.0])
|
|
assert_equal(v.rank("x", pct=True), v_expect)
|
|
# invalid dim
|
|
with pytest.raises(ValueError):
|
|
# apply_ufunc error message isn't great here — `ValueError: tuple.index(x): x not in tuple`
|
|
v.rank("y")
|
|
|
|
def test_big_endian_reduce(self):
|
|
# regression test for GH489
|
|
data = np.ones(5, dtype=">f4")
|
|
v = Variable(["x"], data)
|
|
expected = Variable([], 5)
|
|
assert_identical(expected, v.sum())
|
|
|
|
def test_reduce_funcs(self):
|
|
v = Variable("x", np.array([1, np.nan, 2, 3]))
|
|
assert_identical(v.mean(), Variable([], 2))
|
|
assert_identical(v.mean(skipna=True), Variable([], 2))
|
|
assert_identical(v.mean(skipna=False), Variable([], np.nan))
|
|
assert_identical(np.mean(v), Variable([], 2))
|
|
|
|
assert_identical(v.prod(), Variable([], 6))
|
|
assert_identical(v.cumsum(axis=0), Variable("x", np.array([1, 1, 3, 6])))
|
|
assert_identical(v.cumprod(axis=0), Variable("x", np.array([1, 1, 2, 6])))
|
|
assert_identical(v.var(), Variable([], 2.0 / 3))
|
|
assert_identical(v.median(), Variable([], 2))
|
|
|
|
v = Variable("x", [True, False, False])
|
|
assert_identical(v.any(), Variable([], True))
|
|
assert_identical(v.all(dim="x"), Variable([], False))
|
|
|
|
v = Variable("t", pd.date_range("2000-01-01", periods=3))
|
|
assert v.argmax(skipna=True, dim="t") == 2
|
|
|
|
assert_identical(v.max(), Variable([], pd.Timestamp("2000-01-03")))
|
|
|
|
def test_reduce_keepdims(self):
|
|
v = Variable(["x", "y"], self.d)
|
|
|
|
with set_options(use_numbagg=False):
|
|
assert_identical(
|
|
v.mean(keepdims=True), Variable(v.dims, np.mean(self.d, keepdims=True))
|
|
)
|
|
assert_identical(
|
|
v.mean(dim="x", keepdims=True),
|
|
Variable(v.dims, np.mean(self.d, axis=0, keepdims=True)),
|
|
)
|
|
assert_identical(
|
|
v.mean(dim="y", keepdims=True),
|
|
Variable(v.dims, np.mean(self.d, axis=1, keepdims=True)),
|
|
)
|
|
assert_identical(
|
|
v.mean(dim=["y", "x"], keepdims=True),
|
|
Variable(v.dims, np.mean(self.d, axis=(1, 0), keepdims=True)),
|
|
)
|
|
|
|
v = Variable([], 1.0)
|
|
assert_identical(
|
|
v.mean(keepdims=True), Variable([], np.mean(v.data, keepdims=True))
|
|
)
|
|
|
|
@requires_dask
|
|
def test_reduce_keepdims_dask(self):
|
|
import dask.array
|
|
|
|
v = Variable(["x", "y"], self.d).chunk()
|
|
|
|
actual = v.mean(keepdims=True)
|
|
assert isinstance(actual.data, dask.array.Array)
|
|
|
|
expected = Variable(v.dims, np.mean(self.d, keepdims=True))
|
|
assert_identical(actual, expected)
|
|
|
|
actual = v.mean(dim="y", keepdims=True)
|
|
assert isinstance(actual.data, dask.array.Array)
|
|
|
|
expected = Variable(v.dims, np.mean(self.d, axis=1, keepdims=True))
|
|
assert_identical(actual, expected)
|
|
|
|
def test_reduce_keep_attrs(self):
|
|
_attrs = {"units": "test", "long_name": "testing"}
|
|
|
|
v = Variable(["x", "y"], self.d, _attrs)
|
|
|
|
# Test dropped attrs
|
|
vm = v.mean()
|
|
assert len(vm.attrs) == 0
|
|
assert vm.attrs == {}
|
|
|
|
# Test kept attrs
|
|
vm = v.mean(keep_attrs=True)
|
|
assert len(vm.attrs) == len(_attrs)
|
|
assert vm.attrs == _attrs
|
|
|
|
def test_binary_ops_keep_attrs(self):
|
|
_attrs = {"units": "test", "long_name": "testing"}
|
|
a = Variable(["x", "y"], np.random.randn(3, 3), _attrs)
|
|
b = Variable(["x", "y"], np.random.randn(3, 3), _attrs)
|
|
# Test dropped attrs
|
|
d = a - b # just one operation
|
|
assert d.attrs == {}
|
|
# Test kept attrs
|
|
with set_options(keep_attrs=True):
|
|
d = a - b
|
|
assert d.attrs == _attrs
|
|
|
|
def test_count(self):
|
|
expected = Variable([], 3)
|
|
actual = Variable(["x"], [1, 2, 3, np.nan]).count()
|
|
assert_identical(expected, actual)
|
|
|
|
v = Variable(["x"], np.array(["1", "2", "3", np.nan], dtype=object))
|
|
actual = v.count()
|
|
assert_identical(expected, actual)
|
|
|
|
actual = Variable(["x"], [True, False, True]).count()
|
|
assert_identical(expected, actual)
|
|
assert actual.dtype == int
|
|
|
|
expected = Variable(["x"], [2, 3])
|
|
actual = Variable(["x", "y"], [[1, 0, np.nan], [1, 1, 1]]).count("y")
|
|
assert_identical(expected, actual)
|
|
|
|
def test_setitem(self):
|
|
v = Variable(["x", "y"], [[0, 3, 2], [3, 4, 5]])
|
|
v[0, 1] = 1
|
|
assert v[0, 1] == 1
|
|
|
|
v = Variable(["x", "y"], [[0, 3, 2], [3, 4, 5]])
|
|
v[dict(x=[0, 1])] = 1
|
|
assert_array_equal(v[[0, 1]], np.ones_like(v[[0, 1]]))
|
|
|
|
# boolean indexing
|
|
v = Variable(["x", "y"], [[0, 3, 2], [3, 4, 5]])
|
|
v[dict(x=[True, False])] = 1
|
|
|
|
assert_array_equal(v[0], np.ones_like(v[0]))
|
|
v = Variable(["x", "y"], [[0, 3, 2], [3, 4, 5]])
|
|
v[dict(x=[True, False], y=[False, True, False])] = 1
|
|
assert v[0, 1] == 1
|
|
|
|
def test_setitem_fancy(self):
|
|
# assignment which should work as np.ndarray does
|
|
def assert_assigned_2d(array, key_x, key_y, values):
|
|
expected = array.copy()
|
|
expected[key_x, key_y] = values
|
|
v = Variable(["x", "y"], array)
|
|
v[dict(x=key_x, y=key_y)] = values
|
|
assert_array_equal(expected, v)
|
|
|
|
# 1d vectorized indexing
|
|
assert_assigned_2d(
|
|
np.random.randn(4, 3),
|
|
key_x=Variable(["a"], [0, 1]),
|
|
key_y=Variable(["a"], [0, 1]),
|
|
values=0,
|
|
)
|
|
assert_assigned_2d(
|
|
np.random.randn(4, 3),
|
|
key_x=Variable(["a"], [0, 1]),
|
|
key_y=Variable(["a"], [0, 1]),
|
|
values=Variable((), 0),
|
|
)
|
|
assert_assigned_2d(
|
|
np.random.randn(4, 3),
|
|
key_x=Variable(["a"], [0, 1]),
|
|
key_y=Variable(["a"], [0, 1]),
|
|
values=Variable(("a"), [3, 2]),
|
|
)
|
|
assert_assigned_2d(
|
|
np.random.randn(4, 3),
|
|
key_x=slice(None),
|
|
key_y=Variable(["a"], [0, 1]),
|
|
values=Variable(("a"), [3, 2]),
|
|
)
|
|
|
|
# 2d-vectorized indexing
|
|
assert_assigned_2d(
|
|
np.random.randn(4, 3),
|
|
key_x=Variable(["a", "b"], [[0, 1]]),
|
|
key_y=Variable(["a", "b"], [[1, 0]]),
|
|
values=0,
|
|
)
|
|
assert_assigned_2d(
|
|
np.random.randn(4, 3),
|
|
key_x=Variable(["a", "b"], [[0, 1]]),
|
|
key_y=Variable(["a", "b"], [[1, 0]]),
|
|
values=[0],
|
|
)
|
|
assert_assigned_2d(
|
|
np.random.randn(5, 4),
|
|
key_x=Variable(["a", "b"], [[0, 1], [2, 3]]),
|
|
key_y=Variable(["a", "b"], [[1, 0], [3, 3]]),
|
|
values=[2, 3],
|
|
)
|
|
|
|
# vindex with slice
|
|
v = Variable(["x", "y", "z"], np.ones((4, 3, 2)))
|
|
ind = Variable(["a"], [0, 1])
|
|
v[dict(x=ind, z=ind)] = 0
|
|
expected = Variable(["x", "y", "z"], np.ones((4, 3, 2)))
|
|
expected[0, :, 0] = 0
|
|
expected[1, :, 1] = 0
|
|
assert_identical(expected, v)
|
|
|
|
# dimension broadcast
|
|
v = Variable(["x", "y"], np.ones((3, 2)))
|
|
ind = Variable(["a", "b"], [[0, 1]])
|
|
v[ind, :] = 0
|
|
expected = Variable(["x", "y"], [[0, 0], [0, 0], [1, 1]])
|
|
assert_identical(expected, v)
|
|
|
|
with pytest.raises(ValueError, match=r"shape mismatch"):
|
|
v[ind, ind] = np.zeros((1, 2, 1))
|
|
|
|
v = Variable(["x", "y"], [[0, 3, 2], [3, 4, 5]])
|
|
ind = Variable(["a"], [0, 1])
|
|
v[dict(x=ind)] = Variable(["a", "y"], np.ones((2, 3), dtype=int) * 10)
|
|
assert_array_equal(v[0], np.ones_like(v[0]) * 10)
|
|
assert_array_equal(v[1], np.ones_like(v[1]) * 10)
|
|
assert v.dims == ("x", "y") # dimension should not change
|
|
|
|
# increment
|
|
v = Variable(["x", "y"], np.arange(6).reshape(3, 2))
|
|
ind = Variable(["a"], [0, 1])
|
|
v[dict(x=ind)] += 1
|
|
expected = Variable(["x", "y"], [[1, 2], [3, 4], [4, 5]])
|
|
assert_identical(v, expected)
|
|
|
|
ind = Variable(["a"], [0, 0])
|
|
v[dict(x=ind)] += 1
|
|
expected = Variable(["x", "y"], [[2, 3], [3, 4], [4, 5]])
|
|
assert_identical(v, expected)
|
|
|
|
def test_coarsen(self):
|
|
v = self.cls(["x"], [0, 1, 2, 3, 4])
|
|
actual = v.coarsen({"x": 2}, boundary="pad", func="mean")
|
|
expected = self.cls(["x"], [0.5, 2.5, 4])
|
|
assert_identical(actual, expected)
|
|
|
|
actual = v.coarsen({"x": 2}, func="mean", boundary="pad", side="right")
|
|
expected = self.cls(["x"], [0, 1.5, 3.5])
|
|
assert_identical(actual, expected)
|
|
|
|
actual = v.coarsen({"x": 2}, func=np.mean, side="right", boundary="trim")
|
|
expected = self.cls(["x"], [1.5, 3.5])
|
|
assert_identical(actual, expected)
|
|
|
|
# working test
|
|
v = self.cls(["x", "y", "z"], np.arange(40 * 30 * 2).reshape(40, 30, 2))
|
|
for windows, func, side, boundary in [
|
|
({"x": 2}, np.mean, "left", "trim"),
|
|
({"x": 2}, np.median, {"x": "left"}, "pad"),
|
|
({"x": 2, "y": 3}, np.max, "left", {"x": "pad", "y": "trim"}),
|
|
]:
|
|
v.coarsen(windows, func, boundary, side)
|
|
|
|
def test_coarsen_2d(self):
|
|
# 2d-mean should be the same with the successive 1d-mean
|
|
v = self.cls(["x", "y"], np.arange(6 * 12).reshape(6, 12))
|
|
actual = v.coarsen({"x": 3, "y": 4}, func="mean")
|
|
expected = v.coarsen({"x": 3}, func="mean").coarsen({"y": 4}, func="mean")
|
|
assert_equal(actual, expected)
|
|
|
|
v = self.cls(["x", "y"], np.arange(7 * 12).reshape(7, 12))
|
|
actual = v.coarsen({"x": 3, "y": 4}, func="mean", boundary="trim")
|
|
expected = v.coarsen({"x": 3}, func="mean", boundary="trim").coarsen(
|
|
{"y": 4}, func="mean", boundary="trim"
|
|
)
|
|
assert_equal(actual, expected)
|
|
|
|
# if there is nan, the two should be different
|
|
v = self.cls(["x", "y"], 1.0 * np.arange(6 * 12).reshape(6, 12))
|
|
v[2, 4] = np.nan
|
|
v[3, 5] = np.nan
|
|
actual = v.coarsen({"x": 3, "y": 4}, func="mean", boundary="trim")
|
|
expected = (
|
|
v.coarsen({"x": 3}, func="sum", boundary="trim").coarsen(
|
|
{"y": 4}, func="sum", boundary="trim"
|
|
)
|
|
/ 12
|
|
)
|
|
assert not actual.equals(expected)
|
|
# adjusting the nan count
|
|
expected[0, 1] *= 12 / 11
|
|
expected[1, 1] *= 12 / 11
|
|
assert_allclose(actual, expected)
|
|
|
|
v = self.cls(("x", "y"), np.arange(4 * 4, dtype=np.float32).reshape(4, 4))
|
|
actual = v.coarsen(dict(x=2, y=2), func="count", boundary="exact")
|
|
expected = self.cls(("x", "y"), 4 * np.ones((2, 2)))
|
|
assert_equal(actual, expected)
|
|
|
|
v[0, 0] = np.nan
|
|
v[-1, -1] = np.nan
|
|
expected[0, 0] = 3
|
|
expected[-1, -1] = 3
|
|
actual = v.coarsen(dict(x=2, y=2), func="count", boundary="exact")
|
|
assert_equal(actual, expected)
|
|
|
|
actual = v.coarsen(dict(x=2, y=2), func="sum", boundary="exact", skipna=False)
|
|
expected = self.cls(("x", "y"), [[np.nan, 18], [42, np.nan]])
|
|
assert_equal(actual, expected)
|
|
|
|
actual = v.coarsen(dict(x=2, y=2), func="sum", boundary="exact", skipna=True)
|
|
expected = self.cls(("x", "y"), [[10, 18], [42, 35]])
|
|
assert_equal(actual, expected)
|
|
|
|
# perhaps @pytest.mark.parametrize("operation", [f for f in duck_array_ops])
|
|
def test_coarsen_keep_attrs(self, operation="mean"):
|
|
_attrs = {"units": "test", "long_name": "testing"}
|
|
|
|
test_func = getattr(duck_array_ops, operation, None)
|
|
|
|
# Test dropped attrs
|
|
with set_options(keep_attrs=False):
|
|
new = Variable(["coord"], np.linspace(1, 10, 100), attrs=_attrs).coarsen(
|
|
windows={"coord": 1}, func=test_func, boundary="exact", side="left"
|
|
)
|
|
assert new.attrs == {}
|
|
|
|
# Test kept attrs
|
|
with set_options(keep_attrs=True):
|
|
new = Variable(["coord"], np.linspace(1, 10, 100), attrs=_attrs).coarsen(
|
|
windows={"coord": 1},
|
|
func=test_func,
|
|
boundary="exact",
|
|
side="left",
|
|
)
|
|
assert new.attrs == _attrs
|
|
|
|
|
|
@requires_dask
|
|
class TestVariableWithDask(VariableSubclassobjects):
|
|
def cls(self, *args, **kwargs) -> Variable:
|
|
return Variable(*args, **kwargs).chunk()
|
|
|
|
def test_chunk(self):
|
|
unblocked = Variable(["dim_0", "dim_1"], np.ones((3, 4)))
|
|
assert unblocked.chunks is None
|
|
|
|
blocked = unblocked.chunk()
|
|
assert blocked.chunks == ((3,), (4,))
|
|
first_dask_name = blocked.data.name
|
|
|
|
blocked = unblocked.chunk(chunks=((2, 1), (2, 2)))
|
|
assert blocked.chunks == ((2, 1), (2, 2))
|
|
assert blocked.data.name != first_dask_name
|
|
|
|
blocked = unblocked.chunk(chunks=(3, 3))
|
|
assert blocked.chunks == ((3,), (3, 1))
|
|
assert blocked.data.name != first_dask_name
|
|
|
|
# name doesn't change when rechunking by same amount
|
|
# this fails if ReprObject doesn't have __dask_tokenize__ defined
|
|
assert unblocked.chunk(2).data.name == unblocked.chunk(2).data.name
|
|
|
|
assert blocked.load().chunks is None
|
|
|
|
# Check that kwargs are passed
|
|
import dask.array as da
|
|
|
|
blocked = unblocked.chunk(name="testname_")
|
|
assert isinstance(blocked.data, da.Array)
|
|
assert "testname_" in blocked.data.name
|
|
|
|
# test kwargs form of chunks
|
|
blocked = unblocked.chunk(dim_0=3, dim_1=3)
|
|
assert blocked.chunks == ((3,), (3, 1))
|
|
assert blocked.data.name != first_dask_name
|
|
|
|
@pytest.mark.skip
|
|
def test_0d_object_array_with_list(self):
|
|
super().test_0d_object_array_with_list()
|
|
|
|
@pytest.mark.skip
|
|
def test_array_interface(self):
|
|
# dask array does not have `argsort`
|
|
super().test_array_interface()
|
|
|
|
@pytest.mark.skip
|
|
def test_copy_index(self):
|
|
super().test_copy_index()
|
|
|
|
@pytest.mark.skip
|
|
@pytest.mark.filterwarnings("ignore:elementwise comparison failed.*:FutureWarning")
|
|
def test_eq_all_dtypes(self):
|
|
super().test_eq_all_dtypes()
|
|
|
|
def test_getitem_fancy(self):
|
|
super().test_getitem_fancy()
|
|
|
|
def test_getitem_1d_fancy(self):
|
|
super().test_getitem_1d_fancy()
|
|
|
|
def test_getitem_with_mask_nd_indexer(self):
|
|
import dask.array as da
|
|
|
|
v = Variable(["x"], da.arange(3, chunks=3))
|
|
indexer = Variable(("x", "y"), [[0, -1], [-1, 2]])
|
|
assert_identical(
|
|
v._getitem_with_mask(indexer, fill_value=-1),
|
|
self.cls(("x", "y"), [[0, -1], [-1, 2]]),
|
|
)
|
|
|
|
@pytest.mark.parametrize("dim", ["x", "y"])
|
|
@pytest.mark.parametrize("window", [3, 8, 11])
|
|
@pytest.mark.parametrize("center", [True, False])
|
|
def test_dask_rolling(self, dim, window, center):
|
|
import dask
|
|
import dask.array as da
|
|
|
|
dask.config.set(scheduler="single-threaded")
|
|
|
|
x = Variable(("x", "y"), np.array(np.random.randn(100, 40), dtype=float))
|
|
dx = Variable(("x", "y"), da.from_array(x, chunks=[(6, 30, 30, 20, 14), 8]))
|
|
|
|
expected = x.rolling_window(
|
|
dim, window, "window", center=center, fill_value=np.nan
|
|
)
|
|
with raise_if_dask_computes():
|
|
actual = dx.rolling_window(
|
|
dim, window, "window", center=center, fill_value=np.nan
|
|
)
|
|
assert isinstance(actual.data, da.Array)
|
|
assert actual.shape == expected.shape
|
|
assert_equal(actual, expected)
|
|
|
|
@pytest.mark.xfail(reason="https://github.com/dask/dask/issues/11585")
|
|
def test_multiindex(self):
|
|
super().test_multiindex()
|
|
|
|
@pytest.mark.parametrize(
|
|
"mode",
|
|
[
|
|
"mean",
|
|
pytest.param(
|
|
"median",
|
|
marks=pytest.mark.xfail(reason="median is not implemented by Dask"),
|
|
),
|
|
pytest.param(
|
|
"reflect", marks=pytest.mark.xfail(reason="dask.array.pad bug")
|
|
),
|
|
"edge",
|
|
"linear_ramp",
|
|
"maximum",
|
|
"minimum",
|
|
"symmetric",
|
|
"wrap",
|
|
],
|
|
)
|
|
@pytest.mark.parametrize("xr_arg, np_arg", _PAD_XR_NP_ARGS)
|
|
@pytest.mark.filterwarnings(
|
|
r"ignore:dask.array.pad.+? converts integers to floats."
|
|
)
|
|
def test_pad(self, mode, xr_arg, np_arg):
|
|
super().test_pad(mode, xr_arg, np_arg)
|
|
|
|
def test_pandas_categorical_dtype(self):
|
|
data = pd.Categorical(np.arange(10, dtype="int64"))
|
|
with pytest.raises(ValueError, match="was found to be a Pandas ExtensionArray"):
|
|
self.cls("x", data)
|
|
|
|
|
|
@requires_sparse
|
|
class TestVariableWithSparse:
|
|
# TODO inherit VariableSubclassobjects to cover more tests
|
|
|
|
def test_as_sparse(self):
|
|
data = np.arange(12).reshape(3, 4)
|
|
var = Variable(("x", "y"), data)._as_sparse(fill_value=-1)
|
|
actual = var._to_dense()
|
|
assert_identical(var, actual)
|
|
|
|
|
|
class TestIndexVariable(VariableSubclassobjects):
|
|
def cls(self, *args, **kwargs) -> IndexVariable:
|
|
return IndexVariable(*args, **kwargs)
|
|
|
|
def test_init(self):
|
|
with pytest.raises(ValueError, match=r"must be 1-dimensional"):
|
|
IndexVariable((), 0)
|
|
|
|
def test_to_index(self):
|
|
data = 0.5 * np.arange(10)
|
|
v = IndexVariable(["time"], data, {"foo": "bar"})
|
|
assert pd.Index(data, name="time").identical(v.to_index())
|
|
|
|
def test_to_index_multiindex_level(self):
|
|
midx = pd.MultiIndex.from_product([["a", "b"], [1, 2]], names=("one", "two"))
|
|
ds = Dataset(coords={"x": midx})
|
|
assert ds.one.variable.to_index().equals(midx.get_level_values("one"))
|
|
|
|
def test_multiindex_default_level_names(self):
|
|
midx = pd.MultiIndex.from_product([["a", "b"], [1, 2]])
|
|
v = IndexVariable(["x"], midx, {"foo": "bar"})
|
|
assert v.to_index().names == ("x_level_0", "x_level_1")
|
|
|
|
def test_data(self):
|
|
x = IndexVariable("x", np.arange(3.0))
|
|
assert isinstance(x._data, PandasIndexingAdapter)
|
|
assert isinstance(x.data, np.ndarray)
|
|
assert float == x.dtype
|
|
assert_array_equal(np.arange(3), x)
|
|
assert float == x.values.dtype
|
|
with pytest.raises(TypeError, match=r"cannot be modified"):
|
|
x[:] = 0
|
|
|
|
def test_name(self):
|
|
coord = IndexVariable("x", [10.0])
|
|
assert coord.name == "x"
|
|
|
|
with pytest.raises(AttributeError):
|
|
coord.name = "y"
|
|
|
|
def test_level_names(self):
|
|
midx = pd.MultiIndex.from_product(
|
|
[["a", "b"], [1, 2]], names=["level_1", "level_2"]
|
|
)
|
|
x = IndexVariable("x", midx)
|
|
assert x.level_names == midx.names
|
|
|
|
assert IndexVariable("y", [10.0]).level_names is None
|
|
|
|
def test_get_level_variable(self):
|
|
midx = pd.MultiIndex.from_product(
|
|
[["a", "b"], [1, 2]], names=["level_1", "level_2"]
|
|
)
|
|
x = IndexVariable("x", midx)
|
|
level_1 = IndexVariable("x", midx.get_level_values("level_1"))
|
|
assert_identical(x.get_level_variable("level_1"), level_1)
|
|
|
|
with pytest.raises(ValueError, match=r"has no MultiIndex"):
|
|
IndexVariable("y", [10.0]).get_level_variable("level")
|
|
|
|
def test_concat_periods(self):
|
|
periods = pd.period_range("2000-01-01", periods=10)
|
|
coords = [IndexVariable("t", periods[:5]), IndexVariable("t", periods[5:])]
|
|
expected = IndexVariable("t", periods)
|
|
actual = IndexVariable.concat(coords, dim="t")
|
|
assert_identical(actual, expected)
|
|
assert isinstance(actual.to_index(), pd.PeriodIndex)
|
|
|
|
positions = [list(range(5)), list(range(5, 10))]
|
|
actual = IndexVariable.concat(coords, dim="t", positions=positions)
|
|
assert_identical(actual, expected)
|
|
assert isinstance(actual.to_index(), pd.PeriodIndex)
|
|
|
|
def test_concat_multiindex(self):
|
|
idx = pd.MultiIndex.from_product([[0, 1, 2], ["a", "b"]])
|
|
coords = [IndexVariable("x", idx[:2]), IndexVariable("x", idx[2:])]
|
|
expected = IndexVariable("x", idx)
|
|
actual = IndexVariable.concat(coords, dim="x")
|
|
assert_identical(actual, expected)
|
|
assert isinstance(actual.to_index(), pd.MultiIndex)
|
|
|
|
@pytest.mark.parametrize("dtype", [str, bytes])
|
|
def test_concat_str_dtype(self, dtype):
|
|
a = IndexVariable("x", np.array(["a"], dtype=dtype))
|
|
b = IndexVariable("x", np.array(["b"], dtype=dtype))
|
|
expected = IndexVariable("x", np.array(["a", "b"], dtype=dtype))
|
|
|
|
actual = IndexVariable.concat([a, b])
|
|
assert actual.identical(expected)
|
|
assert np.issubdtype(actual.dtype, dtype)
|
|
|
|
def test_datetime64(self):
|
|
# GH:1932 Make sure indexing keeps precision
|
|
t = np.array([1518418799999986560, 1518418799999996560], dtype="datetime64[ns]")
|
|
v = IndexVariable("t", t)
|
|
assert v[0].data == t[0]
|
|
|
|
# These tests make use of multi-dimensional variables, which are not valid
|
|
# IndexVariable objects:
|
|
@pytest.mark.skip
|
|
def test_getitem_error(self):
|
|
super().test_getitem_error()
|
|
|
|
@pytest.mark.skip
|
|
def test_getitem_advanced(self):
|
|
super().test_getitem_advanced()
|
|
|
|
@pytest.mark.skip
|
|
def test_getitem_fancy(self):
|
|
super().test_getitem_fancy()
|
|
|
|
@pytest.mark.skip
|
|
def test_getitem_uint(self):
|
|
super().test_getitem_fancy()
|
|
|
|
@pytest.mark.skip
|
|
@pytest.mark.parametrize(
|
|
"mode",
|
|
[
|
|
"mean",
|
|
"median",
|
|
"reflect",
|
|
"edge",
|
|
"linear_ramp",
|
|
"maximum",
|
|
"minimum",
|
|
"symmetric",
|
|
"wrap",
|
|
],
|
|
)
|
|
@pytest.mark.parametrize("xr_arg, np_arg", _PAD_XR_NP_ARGS)
|
|
def test_pad(self, mode, xr_arg, np_arg):
|
|
super().test_pad(mode, xr_arg, np_arg)
|
|
|
|
@pytest.mark.skip
|
|
def test_pad_constant_values(self, xr_arg, np_arg):
|
|
super().test_pad_constant_values(xr_arg, np_arg)
|
|
|
|
@pytest.mark.skip
|
|
def test_rolling_window(self):
|
|
super().test_rolling_window()
|
|
|
|
@pytest.mark.skip
|
|
def test_rolling_1d(self):
|
|
super().test_rolling_1d()
|
|
|
|
@pytest.mark.skip
|
|
def test_nd_rolling(self):
|
|
super().test_nd_rolling()
|
|
|
|
@pytest.mark.skip
|
|
def test_rolling_window_errors(self):
|
|
super().test_rolling_window_errors()
|
|
|
|
@pytest.mark.skip
|
|
def test_coarsen_2d(self):
|
|
super().test_coarsen_2d()
|
|
|
|
def test_to_index_variable_copy(self) -> None:
|
|
# to_index_variable should return a copy
|
|
# https://github.com/pydata/xarray/issues/6931
|
|
a = IndexVariable("x", ["a"])
|
|
b = a.to_index_variable()
|
|
assert a is not b
|
|
b.dims = ("y",)
|
|
assert a.dims == ("x",)
|
|
|
|
|
|
class TestAsCompatibleData(Generic[T_DuckArray]):
|
|
def test_unchanged_types(self):
|
|
types = (np.asarray, PandasIndexingAdapter, LazilyIndexedArray)
|
|
for t in types:
|
|
for data in [
|
|
np.arange(3),
|
|
pd.date_range("2000-01-01", periods=3),
|
|
pd.date_range("2000-01-01", periods=3).values,
|
|
]:
|
|
x = t(data)
|
|
assert source_ndarray(x) is source_ndarray(as_compatible_data(x))
|
|
|
|
def test_converted_types(self):
|
|
for input_array in [
|
|
[[0, 1, 2]],
|
|
pd.DataFrame([[0, 1, 2]]),
|
|
np.float64(1.4),
|
|
np.str_("abc"),
|
|
]:
|
|
actual = as_compatible_data(input_array)
|
|
assert_array_equal(np.asarray(input_array), actual)
|
|
assert np.ndarray is type(actual)
|
|
assert np.asarray(input_array).dtype == actual.dtype
|
|
|
|
def test_masked_array(self):
|
|
original = np.ma.MaskedArray(np.arange(5))
|
|
expected = np.arange(5)
|
|
actual = as_compatible_data(original)
|
|
assert_array_equal(expected, actual)
|
|
assert np.dtype(int) == actual.dtype
|
|
|
|
original = np.ma.MaskedArray(np.arange(5), mask=4 * [False] + [True])
|
|
expected = np.arange(5.0)
|
|
expected[-1] = np.nan
|
|
actual = as_compatible_data(original)
|
|
assert_array_equal(expected, actual)
|
|
assert np.dtype(float) == actual.dtype
|
|
|
|
original = np.ma.MaskedArray([1.0, 2.0], mask=[True, False])
|
|
original.flags.writeable = False
|
|
expected = [np.nan, 2.0]
|
|
actual = as_compatible_data(original)
|
|
assert_array_equal(expected, actual)
|
|
assert np.dtype(float) == actual.dtype
|
|
|
|
# GH2377
|
|
actual = Variable(dims=tuple(), data=np.ma.masked)
|
|
expected = Variable(dims=tuple(), data=np.nan)
|
|
assert_array_equal(expected, actual)
|
|
assert actual.dtype == expected.dtype
|
|
|
|
def test_datetime(self):
|
|
expected = np.datetime64("2000-01-01")
|
|
actual = as_compatible_data(expected)
|
|
assert expected == actual
|
|
assert np.ndarray is type(actual)
|
|
assert np.dtype("datetime64[s]") == actual.dtype
|
|
|
|
expected = np.array([np.datetime64("2000-01-01")])
|
|
actual = as_compatible_data(expected)
|
|
assert np.asarray(expected) == actual
|
|
assert np.ndarray is type(actual)
|
|
assert np.dtype("datetime64[s]") == actual.dtype
|
|
|
|
expected = np.array([np.datetime64("2000-01-01", "ns")])
|
|
actual = as_compatible_data(expected)
|
|
assert np.asarray(expected) == actual
|
|
assert np.ndarray is type(actual)
|
|
assert np.dtype("datetime64[ns]") == actual.dtype
|
|
assert expected is source_ndarray(np.asarray(actual))
|
|
|
|
expected = np.datetime64(
|
|
"2000-01-01",
|
|
"us" if has_pandas_3 else "ns",
|
|
)
|
|
actual = as_compatible_data(datetime(2000, 1, 1))
|
|
assert np.asarray(expected) == actual
|
|
assert np.ndarray is type(actual)
|
|
assert expected.dtype == actual.dtype
|
|
|
|
def test_tz_datetime(self) -> None:
|
|
tz = pytz.timezone("America/New_York")
|
|
times_ns = pd.date_range("2000", periods=1, tz=tz)
|
|
|
|
times_s = times_ns.astype(pd.DatetimeTZDtype("s", tz)) # type: ignore[arg-type]
|
|
with warnings.catch_warnings():
|
|
warnings.simplefilter("ignore")
|
|
actual: T_DuckArray = as_compatible_data(times_s)
|
|
assert actual.array == times_s
|
|
assert actual.array.dtype == pd.DatetimeTZDtype("s", tz) # type: ignore[arg-type]
|
|
|
|
series = pd.Series(times_s)
|
|
with warnings.catch_warnings():
|
|
warnings.simplefilter("ignore")
|
|
actual2: T_DuckArray = as_compatible_data(series)
|
|
|
|
np.testing.assert_array_equal(actual2, np.asarray(series.values))
|
|
assert actual2.dtype == np.dtype("datetime64[s]")
|
|
|
|
def test_full_like(self) -> None:
|
|
# For more thorough tests, see test_variable.py
|
|
orig = Variable(
|
|
dims=("x", "y"), data=[[1.5, 2.0], [3.1, 4.3]], attrs={"foo": "bar"}
|
|
)
|
|
|
|
expect = orig.copy(deep=True)
|
|
# see https://github.com/python/mypy/issues/3004 for why we need to ignore type
|
|
expect.values = [[2.0, 2.0], [2.0, 2.0]] # type: ignore[assignment]
|
|
assert_identical(expect, full_like(orig, 2))
|
|
|
|
# override dtype
|
|
expect.values = [[True, True], [True, True]] # type: ignore[assignment]
|
|
assert expect.dtype == bool
|
|
assert_identical(expect, full_like(orig, True, dtype=bool))
|
|
|
|
# raise error on non-scalar fill_value
|
|
with pytest.raises(ValueError, match=r"must be scalar"):
|
|
full_like(orig, [1.0, 2.0])
|
|
|
|
with pytest.raises(ValueError, match="'dtype' cannot be dict-like"):
|
|
full_like(orig, True, dtype={"x": bool})
|
|
|
|
@requires_dask
|
|
def test_full_like_dask(self) -> None:
|
|
orig = Variable(
|
|
dims=("x", "y"), data=[[1.5, 2.0], [3.1, 4.3]], attrs={"foo": "bar"}
|
|
).chunk(dict(x=(1, 1), y=(2,)))
|
|
|
|
def check(actual, expect_dtype, expect_values):
|
|
assert actual.dtype == expect_dtype
|
|
assert actual.shape == orig.shape
|
|
assert actual.dims == orig.dims
|
|
assert actual.attrs == orig.attrs
|
|
assert actual.chunks == orig.chunks
|
|
assert_array_equal(actual.values, expect_values)
|
|
|
|
check(full_like(orig, 2), orig.dtype, np.full_like(orig.values, 2))
|
|
# override dtype
|
|
check(
|
|
full_like(orig, True, dtype=bool),
|
|
bool,
|
|
np.full_like(orig.values, True, dtype=bool),
|
|
)
|
|
|
|
# Check that there's no array stored inside dask
|
|
# (e.g. we didn't create a numpy array and then we chunked it!)
|
|
dsk = full_like(orig, 1).data.dask
|
|
for v in dsk.values():
|
|
if isinstance(v, tuple):
|
|
for vi in v:
|
|
assert not isinstance(vi, np.ndarray)
|
|
else:
|
|
assert not isinstance(v, np.ndarray)
|
|
|
|
def test_zeros_like(self) -> None:
|
|
orig = Variable(
|
|
dims=("x", "y"), data=[[1.5, 2.0], [3.1, 4.3]], attrs={"foo": "bar"}
|
|
)
|
|
assert_identical(zeros_like(orig), full_like(orig, 0))
|
|
assert_identical(zeros_like(orig, dtype=int), full_like(orig, 0, dtype=int))
|
|
|
|
def test_ones_like(self) -> None:
|
|
orig = Variable(
|
|
dims=("x", "y"), data=[[1.5, 2.0], [3.1, 4.3]], attrs={"foo": "bar"}
|
|
)
|
|
assert_identical(ones_like(orig), full_like(orig, 1))
|
|
assert_identical(ones_like(orig, dtype=int), full_like(orig, 1, dtype=int))
|
|
|
|
def test_numpy_ndarray_subclass(self):
|
|
class SubclassedArray(np.ndarray):
|
|
def __new__(cls, array, foo):
|
|
obj = np.asarray(array).view(cls)
|
|
obj.foo = foo
|
|
return obj
|
|
|
|
data = SubclassedArray([1, 2, 3], foo="bar")
|
|
actual = as_compatible_data(data)
|
|
assert isinstance(actual, SubclassedArray)
|
|
assert actual.foo == "bar"
|
|
assert_array_equal(data, actual)
|
|
|
|
def test_numpy_matrix(self):
|
|
with pytest.warns(PendingDeprecationWarning):
|
|
data = np.matrix([[1, 2], [3, 4]])
|
|
actual = as_compatible_data(data)
|
|
assert isinstance(actual, np.ndarray)
|
|
assert_array_equal(data, actual)
|
|
|
|
def test_unsupported_type(self):
|
|
# Non indexable type
|
|
class CustomArray(NDArrayMixin):
|
|
def __init__(self, array):
|
|
self.array = array
|
|
|
|
class CustomIndexable(CustomArray, indexing.ExplicitlyIndexed):
|
|
pass
|
|
|
|
# Type with data stored in values attribute
|
|
class CustomWithValuesAttr:
|
|
def __init__(self, array):
|
|
self.values = array
|
|
|
|
array = CustomArray(np.arange(3))
|
|
orig = Variable(dims=("x"), data=array, attrs={"foo": "bar"})
|
|
assert isinstance(orig._data, np.ndarray) # should not be CustomArray
|
|
|
|
array = CustomIndexable(np.arange(3))
|
|
orig = Variable(dims=("x"), data=array, attrs={"foo": "bar"})
|
|
assert isinstance(orig._data, CustomIndexable)
|
|
|
|
array = CustomWithValuesAttr(np.arange(3))
|
|
orig = Variable(dims=(), data=array)
|
|
assert isinstance(orig._data.item(), CustomWithValuesAttr)
|
|
|
|
|
|
def test_raise_no_warning_for_nan_in_binary_ops():
|
|
with assert_no_warnings():
|
|
_ = Variable("x", [1, 2, np.nan]) > 0
|
|
|
|
|
|
class TestBackendIndexing:
|
|
"""Make sure all the array wrappers can be indexed."""
|
|
|
|
@pytest.fixture(autouse=True)
|
|
def setUp(self):
|
|
self.d = np.random.random((10, 3)).astype(np.float64)
|
|
|
|
def check_orthogonal_indexing(self, v):
|
|
assert np.allclose(v.isel(x=[8, 3], y=[2, 1]), self.d[[8, 3]][:, [2, 1]])
|
|
|
|
def check_vectorized_indexing(self, v):
|
|
ind_x = Variable("z", [0, 2])
|
|
ind_y = Variable("z", [2, 1])
|
|
assert np.allclose(v.isel(x=ind_x, y=ind_y), self.d[ind_x, ind_y])
|
|
|
|
def test_NumpyIndexingAdapter(self):
|
|
v = Variable(dims=("x", "y"), data=NumpyIndexingAdapter(self.d))
|
|
self.check_orthogonal_indexing(v)
|
|
self.check_vectorized_indexing(v)
|
|
# could not doubly wrapping
|
|
with pytest.raises(TypeError, match=r"NumpyIndexingAdapter only wraps "):
|
|
v = Variable(
|
|
dims=("x", "y"), data=NumpyIndexingAdapter(NumpyIndexingAdapter(self.d))
|
|
)
|
|
|
|
def test_LazilyIndexedArray(self):
|
|
v = Variable(dims=("x", "y"), data=LazilyIndexedArray(self.d))
|
|
self.check_orthogonal_indexing(v)
|
|
self.check_vectorized_indexing(v)
|
|
# doubly wrapping
|
|
v = Variable(
|
|
dims=("x", "y"),
|
|
data=LazilyIndexedArray(LazilyIndexedArray(self.d)),
|
|
)
|
|
self.check_orthogonal_indexing(v)
|
|
# hierarchical wrapping
|
|
v = Variable(
|
|
dims=("x", "y"), data=LazilyIndexedArray(NumpyIndexingAdapter(self.d))
|
|
)
|
|
self.check_orthogonal_indexing(v)
|
|
|
|
def test_CopyOnWriteArray(self):
|
|
v = Variable(dims=("x", "y"), data=CopyOnWriteArray(self.d))
|
|
self.check_orthogonal_indexing(v)
|
|
self.check_vectorized_indexing(v)
|
|
# doubly wrapping
|
|
v = Variable(dims=("x", "y"), data=CopyOnWriteArray(LazilyIndexedArray(self.d)))
|
|
self.check_orthogonal_indexing(v)
|
|
self.check_vectorized_indexing(v)
|
|
|
|
def test_MemoryCachedArray(self):
|
|
v = Variable(dims=("x", "y"), data=MemoryCachedArray(self.d))
|
|
self.check_orthogonal_indexing(v)
|
|
self.check_vectorized_indexing(v)
|
|
# doubly wrapping
|
|
v = Variable(dims=("x", "y"), data=CopyOnWriteArray(MemoryCachedArray(self.d)))
|
|
self.check_orthogonal_indexing(v)
|
|
self.check_vectorized_indexing(v)
|
|
|
|
@requires_dask
|
|
def test_DaskIndexingAdapter(self):
|
|
import dask.array as da
|
|
|
|
da = da.asarray(self.d)
|
|
v = Variable(dims=("x", "y"), data=DaskIndexingAdapter(da))
|
|
self.check_orthogonal_indexing(v)
|
|
self.check_vectorized_indexing(v)
|
|
# doubly wrapping
|
|
v = Variable(dims=("x", "y"), data=CopyOnWriteArray(DaskIndexingAdapter(da)))
|
|
self.check_orthogonal_indexing(v)
|
|
self.check_vectorized_indexing(v)
|
|
|
|
|
|
def test_clip(var):
|
|
# Copied from test_dataarray (would there be a way to combine the tests?)
|
|
result = var.clip(min=0.5)
|
|
assert result.min(...) >= 0.5
|
|
|
|
result = var.clip(max=0.5)
|
|
assert result.max(...) <= 0.5
|
|
|
|
result = var.clip(min=0.25, max=0.75)
|
|
assert result.min(...) >= 0.25
|
|
assert result.max(...) <= 0.75
|
|
|
|
result = var.clip(min=var.mean("x"), max=var.mean("z"))
|
|
assert result.dims == var.dims
|
|
assert_array_equal(
|
|
result.data,
|
|
np.clip(
|
|
var.data,
|
|
var.mean("x").data[np.newaxis, :, :],
|
|
var.mean("z").data[:, :, np.newaxis],
|
|
),
|
|
)
|
|
|
|
|
|
@pytest.mark.parametrize("Var", [Variable, IndexVariable])
|
|
class TestNumpyCoercion:
|
|
def test_from_numpy(self, Var):
|
|
v = Var("x", [1, 2, 3])
|
|
|
|
assert_identical(v.as_numpy(), v)
|
|
np.testing.assert_equal(v.to_numpy(), np.array([1, 2, 3]))
|
|
|
|
@requires_dask
|
|
def test_from_dask(self, Var):
|
|
v = Var("x", [1, 2, 3])
|
|
v_chunked = v.chunk(1)
|
|
|
|
assert_identical(v_chunked.as_numpy(), v.compute())
|
|
np.testing.assert_equal(v.to_numpy(), np.array([1, 2, 3]))
|
|
|
|
@requires_pint
|
|
def test_from_pint(self, Var):
|
|
import pint
|
|
|
|
arr = np.array([1, 2, 3])
|
|
|
|
# IndexVariable strips the unit
|
|
with warnings.catch_warnings():
|
|
warnings.simplefilter("ignore", category=pint.UnitStrippedWarning)
|
|
v = Var("x", pint.Quantity(arr, units="m"))
|
|
|
|
assert_identical(v.as_numpy(), Var("x", arr))
|
|
np.testing.assert_equal(v.to_numpy(), arr)
|
|
|
|
@requires_sparse
|
|
def test_from_sparse(self, Var):
|
|
if Var is IndexVariable:
|
|
pytest.skip("Can't have 2D IndexVariables")
|
|
|
|
import sparse
|
|
|
|
arr = np.diagflat([1, 2, 3])
|
|
sparr = sparse.COO(coords=[[0, 1, 2], [0, 1, 2]], data=[1, 2, 3])
|
|
v = Variable(["x", "y"], sparr)
|
|
|
|
assert_identical(v.as_numpy(), Variable(["x", "y"], arr))
|
|
np.testing.assert_equal(v.to_numpy(), arr)
|
|
|
|
@requires_cupy
|
|
def test_from_cupy(self, Var):
|
|
if Var is IndexVariable:
|
|
pytest.skip("cupy in default indexes is not supported at the moment")
|
|
import cupy as cp
|
|
|
|
arr = np.array([1, 2, 3])
|
|
v = Var("x", cp.array(arr))
|
|
|
|
assert_identical(v.as_numpy(), Var("x", arr))
|
|
np.testing.assert_equal(v.to_numpy(), arr)
|
|
|
|
@requires_dask
|
|
@requires_pint
|
|
def test_from_pint_wrapping_dask(self, Var):
|
|
import dask
|
|
import pint
|
|
|
|
arr = np.array([1, 2, 3])
|
|
d = dask.array.from_array(np.array([1, 2, 3]))
|
|
|
|
# IndexVariable strips the unit
|
|
with warnings.catch_warnings():
|
|
warnings.simplefilter("ignore", category=pint.UnitStrippedWarning)
|
|
v = Var("x", pint.Quantity(d, units="m"))
|
|
|
|
result = v.as_numpy()
|
|
assert_identical(result, Var("x", arr))
|
|
np.testing.assert_equal(v.to_numpy(), arr)
|
|
|
|
|
|
@pytest.mark.parametrize(
|
|
("values", "unit"),
|
|
[
|
|
(np.datetime64("2000-01-01", "ns"), "ns"),
|
|
(np.datetime64("2000-01-01", "s"), "s"),
|
|
(np.array([np.datetime64("2000-01-01", "ns")]), "ns"),
|
|
(np.array([np.datetime64("2000-01-01", "s")]), "s"),
|
|
(pd.date_range("2000", periods=1), "ns"),
|
|
(
|
|
datetime(2000, 1, 1),
|
|
"us" if has_pandas_3 else "ns",
|
|
),
|
|
(
|
|
np.array([datetime(2000, 1, 1)]),
|
|
"us" if has_pandas_3 else "ns",
|
|
),
|
|
(pd.date_range("2000", periods=1, tz=pytz.timezone("America/New_York")), "ns"),
|
|
(
|
|
pd.Series(
|
|
pd.date_range("2000", periods=1, tz=pytz.timezone("America/New_York"))
|
|
),
|
|
"ns",
|
|
),
|
|
],
|
|
ids=lambda x: f"{x}",
|
|
)
|
|
def test_datetime_conversion(values, unit) -> None:
|
|
# todo: check for redundancy (suggested per review)
|
|
dims = ["time"] if isinstance(values, np.ndarray | pd.Index | pd.Series) else []
|
|
var = Variable(dims, values)
|
|
if var.dtype.kind == "M":
|
|
assert var.dtype == np.dtype(f"datetime64[{unit}]")
|
|
else:
|
|
# The only case where a non-datetime64 dtype can occur currently is in
|
|
# the case that the variable is backed by a timezone-aware
|
|
# DatetimeIndex, and thus is hidden within the PandasIndexingAdapter class.
|
|
assert isinstance(var._data, PandasIndexingAdapter)
|
|
assert var._data.array.dtype == pd.DatetimeTZDtype(
|
|
"ns", pytz.timezone("America/New_York")
|
|
)
|
|
|
|
|
|
tz_ny = pytz.timezone("America/New_York")
|
|
|
|
|
|
@pytest.mark.parametrize(
|
|
["data", "dtype"],
|
|
[
|
|
pytest.param(pd.date_range("2000", periods=1), "datetime64[s]", id="index-sec"),
|
|
pytest.param(
|
|
pd.Series(pd.date_range("2000", periods=1)),
|
|
"datetime64[s]",
|
|
id="series-sec",
|
|
),
|
|
pytest.param(
|
|
pd.date_range("2000", periods=1, tz=tz_ny),
|
|
pd.DatetimeTZDtype("s", tz_ny), # type: ignore[arg-type]
|
|
id="index-timezone",
|
|
),
|
|
pytest.param(
|
|
pd.Series(pd.date_range("2000", periods=1, tz=tz_ny)),
|
|
pd.DatetimeTZDtype("s", tz_ny), # type: ignore[arg-type]
|
|
id="series-timezone",
|
|
),
|
|
],
|
|
)
|
|
def test_pandas_two_only_datetime_conversion_warnings(
|
|
data: pd.DatetimeIndex | pd.Series, dtype: str | pd.DatetimeTZDtype
|
|
) -> None:
|
|
# todo: check for redundancy (suggested per review)
|
|
var = Variable(["time"], data.astype(dtype)) # type: ignore[arg-type]
|
|
|
|
if var.dtype.kind == "M":
|
|
assert var.dtype == np.dtype("datetime64[s]")
|
|
else:
|
|
# The only case where a non-datetime64 dtype can occur currently is in
|
|
# the case that the variable is backed by a timezone-aware
|
|
# DatetimeIndex, and thus is hidden within the PandasIndexingAdapter class.
|
|
assert isinstance(var._data, PandasIndexingAdapter)
|
|
assert var._data.array.dtype == pd.DatetimeTZDtype("s", tz_ny)
|
|
|
|
|
|
@pytest.mark.parametrize(
|
|
("values", "unit"),
|
|
[
|
|
(np.timedelta64(10, "ns"), "ns"),
|
|
(np.timedelta64(10, "s"), "s"),
|
|
(np.array([np.timedelta64(10, "ns")]), "ns"),
|
|
(np.array([np.timedelta64(10, "s")]), "s"),
|
|
(pd.timedelta_range("1", periods=1), "ns"),
|
|
(timedelta(days=1), "ns"),
|
|
(np.array([timedelta(days=1)]), "ns"),
|
|
(pd.timedelta_range("1", periods=1).astype("timedelta64[s]"), "s"),
|
|
],
|
|
ids=lambda x: f"{x}",
|
|
)
|
|
def test_timedelta_conversion(values, unit) -> None:
|
|
# todo: check for redundancy
|
|
dims = ["time"] if isinstance(values, np.ndarray | pd.Index) else []
|
|
var = Variable(dims, values)
|
|
assert var.dtype == np.dtype(f"timedelta64[{unit}]")
|