CCR/.venv/lib/python3.12/site-packages/xarray/tests/test_dataarray.py

7238 lines
257 KiB
Python

from __future__ import annotations
import pickle
import re
import sys
import warnings
from collections.abc import Hashable
from copy import deepcopy
from textwrap import dedent
from typing import Any, Final, Literal, cast
import numpy as np
import pandas as pd
import pytest
# remove once numpy 2.0 is the oldest supported version
try:
from numpy.exceptions import RankWarning
except ImportError:
from numpy import RankWarning # type: ignore[no-redef,attr-defined,unused-ignore]
import xarray as xr
import xarray.core.missing
from xarray import (
DataArray,
Dataset,
IndexVariable,
Variable,
align,
broadcast,
set_options,
)
from xarray.coders import CFDatetimeCoder
from xarray.core import dtypes
from xarray.core.common import full_like
from xarray.core.coordinates import Coordinates
from xarray.core.indexes import Index, PandasIndex, filter_indexes_from_coords
from xarray.core.types import QueryEngineOptions, QueryParserOptions
from xarray.core.utils import is_scalar
from xarray.testing import _assert_internal_invariants
from xarray.tests import (
InaccessibleArray,
ReturnItem,
assert_allclose,
assert_array_equal,
assert_chunks_equal,
assert_equal,
assert_identical,
assert_no_warnings,
has_dask,
has_dask_ge_2025_1_0,
raise_if_dask_computes,
requires_bottleneck,
requires_cupy,
requires_dask,
requires_dask_expr,
requires_iris,
requires_numexpr,
requires_pint,
requires_scipy,
requires_sparse,
source_ndarray,
)
try:
from pandas.errors import UndefinedVariableError
except ImportError:
# TODO: remove once we stop supporting pandas<1.4.3
from pandas.core.computation.ops import UndefinedVariableError
pytestmark = [
pytest.mark.filterwarnings("error:Mean of empty slice"),
pytest.mark.filterwarnings("error:All-NaN (slice|axis) encountered"),
]
class TestDataArray:
@pytest.fixture(autouse=True)
def setup(self):
self.attrs = {"attr1": "value1", "attr2": 2929}
self.x = np.random.random((10, 20))
self.v = Variable(["x", "y"], self.x)
self.va = Variable(["x", "y"], self.x, self.attrs)
self.ds = Dataset({"foo": self.v})
self.dv = self.ds["foo"]
self.mindex = pd.MultiIndex.from_product(
[["a", "b"], [1, 2]], names=("level_1", "level_2")
)
self.mda = DataArray([0, 1, 2, 3], coords={"x": self.mindex}, dims="x").astype(
np.uint64
)
def test_repr(self) -> None:
v = Variable(["time", "x"], [[1, 2, 3], [4, 5, 6]], {"foo": "bar"})
v = v.astype(np.uint64)
coords = {"x": np.arange(3, dtype=np.uint64), "other": np.uint64(0)}
data_array = DataArray(v, coords, name="my_variable")
expected = dedent(
"""\
<xarray.DataArray 'my_variable' (time: 2, x: 3)> Size: 48B
array([[1, 2, 3],
[4, 5, 6]], dtype=uint64)
Coordinates:
* x (x) uint64 24B 0 1 2
other uint64 8B 0
Dimensions without coordinates: time
Attributes:
foo: bar"""
)
assert expected == repr(data_array)
def test_repr_multiindex(self) -> None:
obj_size = np.dtype("O").itemsize
expected = dedent(
f"""\
<xarray.DataArray (x: 4)> Size: 32B
array([0, 1, 2, 3], dtype=uint64)
Coordinates:
* x (x) object {4 * obj_size}B MultiIndex
* level_1 (x) object {4 * obj_size}B 'a' 'a' 'b' 'b'
* level_2 (x) int64 32B 1 2 1 2"""
)
assert expected == repr(self.mda)
def test_repr_multiindex_long(self) -> None:
mindex_long = pd.MultiIndex.from_product(
[["a", "b", "c", "d"], [1, 2, 3, 4, 5, 6, 7, 8]],
names=("level_1", "level_2"),
)
mda_long = DataArray(
list(range(32)), coords={"x": mindex_long}, dims="x"
).astype(np.uint64)
obj_size = np.dtype("O").itemsize
expected = dedent(
f"""\
<xarray.DataArray (x: 32)> Size: 256B
array([ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31],
dtype=uint64)
Coordinates:
* x (x) object {32 * obj_size}B MultiIndex
* level_1 (x) object {32 * obj_size}B 'a' 'a' 'a' 'a' 'a' 'a' ... 'd' 'd' 'd' 'd' 'd' 'd'
* level_2 (x) int64 256B 1 2 3 4 5 6 7 8 1 2 3 4 ... 5 6 7 8 1 2 3 4 5 6 7 8"""
)
assert expected == repr(mda_long)
def test_properties(self) -> None:
assert_equal(self.dv.variable, self.v)
assert_array_equal(self.dv.values, self.v.values)
for attr in ["dims", "dtype", "shape", "size", "nbytes", "ndim", "attrs"]:
assert getattr(self.dv, attr) == getattr(self.v, attr)
assert len(self.dv) == len(self.v)
assert_equal(self.dv.variable, self.v)
assert set(self.dv.coords) == set(self.ds.coords)
for k, v in self.dv.coords.items():
assert_array_equal(v, self.ds.coords[k])
with pytest.raises(AttributeError):
_ = self.dv.dataset
assert isinstance(self.ds["x"].to_index(), pd.Index)
with pytest.raises(ValueError, match=r"must be 1-dimensional"):
self.ds["foo"].to_index()
with pytest.raises(AttributeError):
self.dv.variable = self.v
def test_data_property(self) -> None:
array = DataArray(np.zeros((3, 4)))
actual = array.copy()
actual.values = np.ones((3, 4))
assert_array_equal(np.ones((3, 4)), actual.values)
actual.data = 2 * np.ones((3, 4))
assert_array_equal(2 * np.ones((3, 4)), actual.data)
assert_array_equal(actual.data, actual.values)
def test_indexes(self) -> None:
array = DataArray(np.zeros((2, 3)), [("x", [0, 1]), ("y", ["a", "b", "c"])])
expected_indexes = {"x": pd.Index([0, 1]), "y": pd.Index(["a", "b", "c"])}
expected_xindexes = {
k: PandasIndex(idx, k) for k, idx in expected_indexes.items()
}
assert array.xindexes.keys() == expected_xindexes.keys()
assert array.indexes.keys() == expected_indexes.keys()
assert all(isinstance(idx, pd.Index) for idx in array.indexes.values())
assert all(isinstance(idx, Index) for idx in array.xindexes.values())
for k in expected_indexes:
assert array.xindexes[k].equals(expected_xindexes[k])
assert array.indexes[k].equals(expected_indexes[k])
def test_get_index(self) -> None:
array = DataArray(np.zeros((2, 3)), coords={"x": ["a", "b"]}, dims=["x", "y"])
assert array.get_index("x").equals(pd.Index(["a", "b"]))
assert array.get_index("y").equals(pd.Index([0, 1, 2]))
with pytest.raises(KeyError):
array.get_index("z")
def test_get_index_size_zero(self) -> None:
array = DataArray(np.zeros((0,)), dims=["x"])
actual = array.get_index("x")
expected = pd.Index([], dtype=np.int64)
assert actual.equals(expected)
assert actual.dtype == expected.dtype
def test_struct_array_dims(self) -> None:
"""
This test checks subtraction of two DataArrays for the case
when dimension is a structured array.
"""
# GH837, GH861
# checking array subtraction when dims are the same
p_data = np.array(
[("Abe", 180), ("Stacy", 150), ("Dick", 200)],
dtype=[("name", "|S256"), ("height", object)],
)
weights_0 = DataArray(
[80, 56, 120], dims=["participant"], coords={"participant": p_data}
)
weights_1 = DataArray(
[81, 52, 115], dims=["participant"], coords={"participant": p_data}
)
actual = weights_1 - weights_0
expected = DataArray(
[1, -4, -5], dims=["participant"], coords={"participant": p_data}
)
assert_identical(actual, expected)
# checking array subtraction when dims are not the same
p_data_alt = np.array(
[("Abe", 180), ("Stacy", 151), ("Dick", 200)],
dtype=[("name", "|S256"), ("height", object)],
)
weights_1 = DataArray(
[81, 52, 115], dims=["participant"], coords={"participant": p_data_alt}
)
actual = weights_1 - weights_0
expected = DataArray(
[1, -5], dims=["participant"], coords={"participant": p_data[[0, 2]]}
)
assert_identical(actual, expected)
# checking array subtraction when dims are not the same and one
# is np.nan
p_data_nan = np.array(
[("Abe", 180), ("Stacy", np.nan), ("Dick", 200)],
dtype=[("name", "|S256"), ("height", object)],
)
weights_1 = DataArray(
[81, 52, 115], dims=["participant"], coords={"participant": p_data_nan}
)
actual = weights_1 - weights_0
expected = DataArray(
[1, -5], dims=["participant"], coords={"participant": p_data[[0, 2]]}
)
assert_identical(actual, expected)
def test_name(self) -> None:
arr = self.dv
assert arr.name == "foo"
copied = arr.copy()
arr.name = "bar"
assert arr.name == "bar"
assert_equal(copied, arr)
actual = DataArray(IndexVariable("x", [3]))
actual.name = "y"
expected = DataArray([3], [("x", [3])], name="y")
assert_identical(actual, expected)
def test_dims(self) -> None:
arr = self.dv
assert arr.dims == ("x", "y")
with pytest.raises(AttributeError, match=r"you cannot assign"):
arr.dims = ("w", "z")
def test_sizes(self) -> None:
array = DataArray(np.zeros((3, 4)), dims=["x", "y"])
assert array.sizes == {"x": 3, "y": 4}
assert tuple(array.sizes) == array.dims
with pytest.raises(TypeError):
array.sizes["foo"] = 5 # type: ignore[index]
def test_encoding(self) -> None:
expected = {"foo": "bar"}
self.dv.encoding["foo"] = "bar"
assert expected == self.dv.encoding
expected2 = {"baz": 0}
self.dv.encoding = expected2
assert expected2 is not self.dv.encoding
def test_drop_encoding(self) -> None:
array = self.mda
encoding = {"scale_factor": 10}
array.encoding = encoding
array["x"].encoding = encoding
assert array.encoding == encoding
assert array["x"].encoding == encoding
actual = array.drop_encoding()
# did not modify in place
assert array.encoding == encoding
assert array["x"].encoding == encoding
# variable and coord encoding is empty
assert actual.encoding == {}
assert actual["x"].encoding == {}
def test_constructor(self) -> None:
data = np.random.random((2, 3))
# w/o coords, w/o dims
actual = DataArray(data)
expected = Dataset({None: (["dim_0", "dim_1"], data)})[None]
assert_identical(expected, actual)
actual = DataArray(data, [["a", "b"], [-1, -2, -3]])
expected = Dataset(
{
None: (["dim_0", "dim_1"], data),
"dim_0": ("dim_0", ["a", "b"]),
"dim_1": ("dim_1", [-1, -2, -3]),
}
)[None]
assert_identical(expected, actual)
# pd.Index coords, w/o dims
actual = DataArray(
data, [pd.Index(["a", "b"], name="x"), pd.Index([-1, -2, -3], name="y")]
)
expected = Dataset(
{None: (["x", "y"], data), "x": ("x", ["a", "b"]), "y": ("y", [-1, -2, -3])}
)[None]
assert_identical(expected, actual)
# list coords, w dims
coords1: list[Any] = [["a", "b"], [-1, -2, -3]]
actual = DataArray(data, coords1, ["x", "y"])
assert_identical(expected, actual)
# pd.Index coords, w dims
coords2: list[pd.Index] = [
pd.Index(["a", "b"], name="A"),
pd.Index([-1, -2, -3], name="B"),
]
actual = DataArray(data, coords2, ["x", "y"])
assert_identical(expected, actual)
# dict coords, w dims
coords3 = {"x": ["a", "b"], "y": [-1, -2, -3]}
actual = DataArray(data, coords3, ["x", "y"])
assert_identical(expected, actual)
# dict coords, w/o dims
actual = DataArray(data, coords3)
assert_identical(expected, actual)
# tuple[dim, list] coords, w/o dims
coords4 = [("x", ["a", "b"]), ("y", [-1, -2, -3])]
actual = DataArray(data, coords4)
assert_identical(expected, actual)
# partial dict coords, w dims
expected = Dataset({None: (["x", "y"], data), "x": ("x", ["a", "b"])})[None]
actual = DataArray(data, {"x": ["a", "b"]}, ["x", "y"])
assert_identical(expected, actual)
# w/o coords, w dims
actual = DataArray(data, dims=["x", "y"])
expected = Dataset({None: (["x", "y"], data)})[None]
assert_identical(expected, actual)
# w/o coords, w dims, w name
actual = DataArray(data, dims=["x", "y"], name="foo")
expected = Dataset({"foo": (["x", "y"], data)})["foo"]
assert_identical(expected, actual)
# w/o coords, w/o dims, w name
actual = DataArray(data, name="foo")
expected = Dataset({"foo": (["dim_0", "dim_1"], data)})["foo"]
assert_identical(expected, actual)
# w/o coords, w dims, w attrs
actual = DataArray(data, dims=["x", "y"], attrs={"bar": 2})
expected = Dataset({None: (["x", "y"], data, {"bar": 2})})[None]
assert_identical(expected, actual)
# w/o coords, w dims (ds has attrs)
actual = DataArray(data, dims=["x", "y"])
expected = Dataset({None: (["x", "y"], data, {}, {"bar": 2})})[None]
assert_identical(expected, actual)
# data is list, w coords
actual = DataArray([1, 2, 3], coords={"x": [0, 1, 2]})
expected = DataArray([1, 2, 3], coords=[("x", [0, 1, 2])])
assert_identical(expected, actual)
def test_constructor_invalid(self) -> None:
data = np.random.randn(3, 2)
with pytest.raises(ValueError, match=r"coords is not dict-like"):
DataArray(data, [[0, 1, 2]], ["x", "y"])
with pytest.raises(ValueError, match=r"not a subset of the .* dim"):
DataArray(data, {"x": [0, 1, 2]}, ["a", "b"])
with pytest.raises(ValueError, match=r"not a subset of the .* dim"):
DataArray(data, {"x": [0, 1, 2]})
with pytest.raises(TypeError, match=r"is not hashable"):
DataArray(data, dims=["x", []]) # type: ignore[list-item]
with pytest.raises(ValueError, match=r"conflicting sizes for dim"):
DataArray([1, 2, 3], coords=[("x", [0, 1])])
with pytest.raises(ValueError, match=r"conflicting sizes for dim"):
DataArray([1, 2], coords={"x": [0, 1], "y": ("x", [1])}, dims="x")
with pytest.raises(ValueError, match=r"conflicting MultiIndex"):
DataArray(np.random.rand(4, 4), [("x", self.mindex), ("y", self.mindex)])
with pytest.raises(ValueError, match=r"conflicting MultiIndex"):
DataArray(np.random.rand(4, 4), [("x", self.mindex), ("level_1", range(4))])
def test_constructor_from_self_described(self) -> None:
data: list[list[float]] = [[-0.1, 21], [0, 2]]
expected = DataArray(
data,
coords={"x": ["a", "b"], "y": [-1, -2]},
dims=["x", "y"],
name="foobar",
attrs={"bar": 2},
)
actual = DataArray(expected)
assert_identical(expected, actual)
actual = DataArray(expected.values, actual.coords)
assert_equal(expected, actual)
frame = pd.DataFrame(
data,
index=pd.Index(["a", "b"], name="x"),
columns=pd.Index([-1, -2], name="y"),
)
actual = DataArray(frame)
assert_equal(expected, actual)
series = pd.Series(data[0], index=pd.Index([-1, -2], name="y"))
actual = DataArray(series)
assert_equal(expected[0].reset_coords("x", drop=True), actual)
expected = DataArray(
data,
coords={"x": ["a", "b"], "y": [-1, -2], "a": 0, "z": ("x", [-0.5, 0.5])},
dims=["x", "y"],
)
actual = DataArray(expected)
assert_identical(expected, actual)
actual = DataArray(expected.values, expected.coords)
assert_identical(expected, actual)
expected = Dataset({"foo": ("foo", ["a", "b"])})["foo"]
actual = DataArray(pd.Index(["a", "b"], name="foo"))
assert_identical(expected, actual)
actual = DataArray(IndexVariable("foo", ["a", "b"]))
assert_identical(expected, actual)
@requires_dask
def test_constructor_from_self_described_chunked(self) -> None:
expected = DataArray(
[[-0.1, 21], [0, 2]],
coords={"x": ["a", "b"], "y": [-1, -2]},
dims=["x", "y"],
name="foobar",
attrs={"bar": 2},
).chunk()
actual = DataArray(expected)
assert_identical(expected, actual)
assert_chunks_equal(expected, actual)
def test_constructor_from_0d(self) -> None:
expected = Dataset({None: ([], 0)})[None]
actual = DataArray(0)
assert_identical(expected, actual)
@requires_dask
def test_constructor_dask_coords(self) -> None:
# regression test for GH1684
import dask.array as da
coord = da.arange(8, chunks=(4,))
data = da.random.random((8, 8), chunks=(4, 4)) + 1
actual = DataArray(data, coords={"x": coord, "y": coord}, dims=["x", "y"])
ecoord = np.arange(8)
expected = DataArray(data, coords={"x": ecoord, "y": ecoord}, dims=["x", "y"])
assert_equal(actual, expected)
def test_constructor_no_default_index(self) -> None:
# explicitly passing a Coordinates object skips the creation of default index
da = DataArray(range(3), coords=Coordinates({"x": [1, 2, 3]}, indexes={}))
assert "x" in da.coords
assert "x" not in da.xindexes
def test_constructor_multiindex(self) -> None:
midx = pd.MultiIndex.from_product([["a", "b"], [1, 2]], names=("one", "two"))
coords = Coordinates.from_pandas_multiindex(midx, "x")
da = DataArray(range(4), coords=coords, dims="x")
assert_identical(da.coords, coords)
def test_constructor_custom_index(self) -> None:
class CustomIndex(Index): ...
coords = Coordinates(
coords={"x": ("x", [1, 2, 3])}, indexes={"x": CustomIndex()}
)
da = DataArray(range(3), coords=coords)
assert isinstance(da.xindexes["x"], CustomIndex)
# test coordinate variables copied
assert da.coords["x"] is not coords.variables["x"]
def test_equals_and_identical(self) -> None:
orig = DataArray(np.arange(5.0), {"a": 42}, dims="x")
expected = orig
actual = orig.copy()
assert expected.equals(actual)
assert expected.identical(actual)
actual = expected.rename("baz")
assert expected.equals(actual)
assert not expected.identical(actual)
actual = expected.rename({"x": "xxx"})
assert not expected.equals(actual)
assert not expected.identical(actual)
actual = expected.copy()
actual.attrs["foo"] = "bar"
assert expected.equals(actual)
assert not expected.identical(actual)
actual = expected.copy()
actual["x"] = ("x", -np.arange(5))
assert not expected.equals(actual)
assert not expected.identical(actual)
actual = expected.reset_coords(drop=True)
assert not expected.equals(actual)
assert not expected.identical(actual)
actual = orig.copy()
actual[0] = np.nan
expected = actual.copy()
assert expected.equals(actual)
assert expected.identical(actual)
actual[:] = np.nan
assert not expected.equals(actual)
assert not expected.identical(actual)
actual = expected.copy()
actual["a"] = 100000
assert not expected.equals(actual)
assert not expected.identical(actual)
def test_equals_failures(self) -> None:
orig = DataArray(np.arange(5.0), {"a": 42}, dims="x")
assert not orig.equals(np.arange(5)) # type: ignore[arg-type]
assert not orig.identical(123) # type: ignore[arg-type]
assert not orig.broadcast_equals({1: 2}) # type: ignore[arg-type]
def test_broadcast_equals(self) -> None:
a = DataArray([0, 0], {"y": 0}, dims="x")
b = DataArray([0, 0], {"y": ("x", [0, 0])}, dims="x")
assert a.broadcast_equals(b)
assert b.broadcast_equals(a)
assert not a.equals(b)
assert not a.identical(b)
c = DataArray([0], coords={"x": 0}, dims="y")
assert not a.broadcast_equals(c)
assert not c.broadcast_equals(a)
def test_getitem(self) -> None:
# strings pull out dataarrays
assert_identical(self.dv, self.ds["foo"])
x = self.dv["x"]
y = self.dv["y"]
assert_identical(self.ds["x"], x)
assert_identical(self.ds["y"], y)
arr = ReturnItem()
for i in [
arr[:],
arr[...],
arr[x.values],
arr[x.variable],
arr[x],
arr[x, y],
arr[x.values > -1],
arr[x.variable > -1],
arr[x > -1],
arr[x > -1, y > -1],
]:
assert_equal(self.dv, self.dv[i])
for i in [
arr[0],
arr[:, 0],
arr[:3, :2],
arr[x.values[:3]],
arr[x.variable[:3]],
arr[x[:3]],
arr[x[:3], y[:4]],
arr[x.values > 3],
arr[x.variable > 3],
arr[x > 3],
arr[x > 3, y > 3],
]:
assert_array_equal(self.v[i], self.dv[i])
def test_getitem_dict(self) -> None:
actual = self.dv[{"x": slice(3), "y": 0}]
expected = self.dv.isel(x=slice(3), y=0)
assert_identical(expected, actual)
def test_getitem_coords(self) -> None:
orig = DataArray(
[[10], [20]],
{
"x": [1, 2],
"y": [3],
"z": 4,
"x2": ("x", ["a", "b"]),
"y2": ("y", ["c"]),
"xy": (["y", "x"], [["d", "e"]]),
},
dims=["x", "y"],
)
assert_identical(orig, orig[:])
assert_identical(orig, orig[:, :])
assert_identical(orig, orig[...])
assert_identical(orig, orig[:2, :1])
assert_identical(orig, orig[[0, 1], [0]])
actual = orig[0, 0]
expected = DataArray(
10, {"x": 1, "y": 3, "z": 4, "x2": "a", "y2": "c", "xy": "d"}
)
assert_identical(expected, actual)
actual = orig[0, :]
expected = DataArray(
[10],
{
"x": 1,
"y": [3],
"z": 4,
"x2": "a",
"y2": ("y", ["c"]),
"xy": ("y", ["d"]),
},
dims="y",
)
assert_identical(expected, actual)
actual = orig[:, 0]
expected = DataArray(
[10, 20],
{
"x": [1, 2],
"y": 3,
"z": 4,
"x2": ("x", ["a", "b"]),
"y2": "c",
"xy": ("x", ["d", "e"]),
},
dims="x",
)
assert_identical(expected, actual)
def test_getitem_dataarray(self) -> None:
# It should not conflict
da = DataArray(np.arange(12).reshape((3, 4)), dims=["x", "y"])
ind = DataArray([[0, 1], [0, 1]], dims=["x", "z"])
actual = da[ind]
assert_array_equal(actual, da.values[[[0, 1], [0, 1]], :])
da = DataArray(
np.arange(12).reshape((3, 4)),
dims=["x", "y"],
coords={"x": [0, 1, 2], "y": ["a", "b", "c", "d"]},
)
ind = xr.DataArray([[0, 1], [0, 1]], dims=["X", "Y"])
actual = da[ind]
expected = da.values[[[0, 1], [0, 1]], :]
assert_array_equal(actual, expected)
assert actual.dims == ("X", "Y", "y")
# boolean indexing
ind = xr.DataArray([True, True, False], dims=["x"])
assert_equal(da[ind], da[[0, 1], :])
assert_equal(da[ind], da[[0, 1]])
assert_equal(da[ind], da[ind.values])
def test_getitem_empty_index(self) -> None:
da = DataArray(np.arange(12).reshape((3, 4)), dims=["x", "y"])
assert_identical(da[{"x": []}], DataArray(np.zeros((0, 4)), dims=["x", "y"]))
assert_identical(
da.loc[{"y": []}], DataArray(np.zeros((3, 0)), dims=["x", "y"])
)
assert_identical(da[[]], DataArray(np.zeros((0, 4)), dims=["x", "y"]))
def test_setitem(self) -> None:
# basic indexing should work as numpy's indexing
tuples = [
(0, 0),
(0, slice(None, None)),
(slice(None, None), slice(None, None)),
(slice(None, None), 0),
([1, 0], slice(None, None)),
(slice(None, None), [1, 0]),
]
for t in tuples:
expected = np.arange(6).reshape(3, 2)
orig = DataArray(
np.arange(6).reshape(3, 2),
{
"x": [1, 2, 3],
"y": ["a", "b"],
"z": 4,
"x2": ("x", ["a", "b", "c"]),
"y2": ("y", ["d", "e"]),
},
dims=["x", "y"],
)
orig[t] = 1
expected[t] = 1
assert_array_equal(orig.values, expected)
def test_setitem_fancy(self) -> None:
# vectorized indexing
da = DataArray(np.ones((3, 2)), dims=["x", "y"])
ind = Variable(["a"], [0, 1])
da[dict(x=ind, y=ind)] = 0
expected = DataArray([[0, 1], [1, 0], [1, 1]], dims=["x", "y"])
assert_identical(expected, da)
# assign another 0d-variable
da[dict(x=ind, y=ind)] = Variable((), 0)
expected = DataArray([[0, 1], [1, 0], [1, 1]], dims=["x", "y"])
assert_identical(expected, da)
# assign another 1d-variable
da[dict(x=ind, y=ind)] = Variable(["a"], [2, 3])
expected = DataArray([[2, 1], [1, 3], [1, 1]], dims=["x", "y"])
assert_identical(expected, da)
# 2d-vectorized indexing
da = DataArray(np.ones((3, 2)), dims=["x", "y"])
ind_x = DataArray([[0, 1]], dims=["a", "b"])
ind_y = DataArray([[1, 0]], dims=["a", "b"])
da[dict(x=ind_x, y=ind_y)] = 0
expected = DataArray([[1, 0], [0, 1], [1, 1]], dims=["x", "y"])
assert_identical(expected, da)
da = DataArray(np.ones((3, 2)), dims=["x", "y"])
ind = Variable(["a"], [0, 1])
da[ind] = 0
expected = DataArray([[0, 0], [0, 0], [1, 1]], dims=["x", "y"])
assert_identical(expected, da)
def test_setitem_dataarray(self) -> None:
def get_data():
return DataArray(
np.ones((4, 3, 2)),
dims=["x", "y", "z"],
coords={
"x": np.arange(4),
"y": ["a", "b", "c"],
"non-dim": ("x", [1, 3, 4, 2]),
},
)
da = get_data()
# indexer with inconsistent coordinates.
ind = DataArray(np.arange(1, 4), dims=["x"], coords={"x": np.random.randn(3)})
with pytest.raises(IndexError, match=r"dimension coordinate 'x'"):
da[dict(x=ind)] = 0
# indexer with consistent coordinates.
ind = DataArray(np.arange(1, 4), dims=["x"], coords={"x": np.arange(1, 4)})
da[dict(x=ind)] = 0 # should not raise
assert np.allclose(da[dict(x=ind)].values, 0)
assert_identical(da["x"], get_data()["x"])
assert_identical(da["non-dim"], get_data()["non-dim"])
da = get_data()
# conflict in the assigning values
value = xr.DataArray(
np.zeros((3, 3, 2)),
dims=["x", "y", "z"],
coords={"x": [0, 1, 2], "non-dim": ("x", [0, 2, 4])},
)
with pytest.raises(IndexError, match=r"dimension coordinate 'x'"):
da[dict(x=ind)] = value
# consistent coordinate in the assigning values
value = xr.DataArray(
np.zeros((3, 3, 2)),
dims=["x", "y", "z"],
coords={"x": [1, 2, 3], "non-dim": ("x", [0, 2, 4])},
)
da[dict(x=ind)] = value
assert np.allclose(da[dict(x=ind)].values, 0)
assert_identical(da["x"], get_data()["x"])
assert_identical(da["non-dim"], get_data()["non-dim"])
# Conflict in the non-dimension coordinate
value = xr.DataArray(
np.zeros((3, 3, 2)),
dims=["x", "y", "z"],
coords={"x": [1, 2, 3], "non-dim": ("x", [0, 2, 4])},
)
da[dict(x=ind)] = value # should not raise
# conflict in the assigning values
value = xr.DataArray(
np.zeros((3, 3, 2)),
dims=["x", "y", "z"],
coords={"x": [0, 1, 2], "non-dim": ("x", [0, 2, 4])},
)
with pytest.raises(IndexError, match=r"dimension coordinate 'x'"):
da[dict(x=ind)] = value
# consistent coordinate in the assigning values
value = xr.DataArray(
np.zeros((3, 3, 2)),
dims=["x", "y", "z"],
coords={"x": [1, 2, 3], "non-dim": ("x", [0, 2, 4])},
)
da[dict(x=ind)] = value # should not raise
def test_setitem_vectorized(self) -> None:
# Regression test for GH:7030
# Positional indexing
v = xr.DataArray(np.r_[:120].reshape(2, 3, 4, 5), dims=["a", "b", "c", "d"])
b = xr.DataArray([[0, 0], [1, 0]], dims=["u", "v"])
c = xr.DataArray([[0, 1], [2, 3]], dims=["u", "v"])
w = xr.DataArray([-1, -2], dims=["u"])
index = dict(b=b, c=c)
v[index] = w
assert (v[index] == w).all()
# Indexing with coordinates
v = xr.DataArray(np.r_[:120].reshape(2, 3, 4, 5), dims=["a", "b", "c", "d"])
v.coords["b"] = [2, 4, 6]
b = xr.DataArray([[2, 2], [4, 2]], dims=["u", "v"])
c = xr.DataArray([[0, 1], [2, 3]], dims=["u", "v"])
w = xr.DataArray([-1, -2], dims=["u"])
index = dict(b=b, c=c)
v.loc[index] = w
assert (v.loc[index] == w).all()
def test_contains(self) -> None:
data_array = DataArray([1, 2])
assert 1 in data_array
assert 3 not in data_array
def test_pickle(self) -> None:
data = DataArray(np.random.random((3, 3)), dims=("id", "time"))
roundtripped = pickle.loads(pickle.dumps(data))
assert_identical(data, roundtripped)
@requires_dask
def test_chunk(self) -> None:
unblocked = DataArray(np.ones((3, 4)))
assert unblocked.chunks is None
blocked = unblocked.chunk()
assert blocked.chunks == ((3,), (4,))
first_dask_name = blocked.data.name
with pytest.warns(DeprecationWarning):
blocked = unblocked.chunk(chunks=((2, 1), (2, 2))) # type: ignore[arg-type]
assert blocked.chunks == ((2, 1), (2, 2))
assert blocked.data.name != first_dask_name
blocked = unblocked.chunk(chunks=(3, 3))
assert blocked.chunks == ((3,), (3, 1))
assert blocked.data.name != first_dask_name
with pytest.raises(ValueError):
blocked.chunk(chunks=(3, 3, 3))
# name doesn't change when rechunking by same amount
# this fails if ReprObject doesn't have __dask_tokenize__ defined
assert unblocked.chunk(2).data.name == unblocked.chunk(2).data.name
assert blocked.load().chunks is None
# Check that kwargs are passed
import dask.array as da
blocked = unblocked.chunk(name_prefix="testname_")
assert isinstance(blocked.data, da.Array)
assert "testname_" in blocked.data.name
# test kwargs form of chunks
blocked = unblocked.chunk(dim_0=3, dim_1=3)
assert blocked.chunks == ((3,), (3, 1))
assert blocked.data.name != first_dask_name
def test_isel(self) -> None:
assert_identical(self.dv[0], self.dv.isel(x=0))
assert_identical(self.dv, self.dv.isel(x=slice(None)))
assert_identical(self.dv[:3], self.dv.isel(x=slice(3)))
assert_identical(self.dv[:3, :5], self.dv.isel(x=slice(3), y=slice(5)))
with pytest.raises(
ValueError,
match=r"Dimensions {'not_a_dim'} do not exist. Expected "
r"one or more of \('x', 'y'\)",
):
self.dv.isel(not_a_dim=0)
with pytest.warns(
UserWarning,
match=r"Dimensions {'not_a_dim'} do not exist. "
r"Expected one or more of \('x', 'y'\)",
):
self.dv.isel(not_a_dim=0, missing_dims="warn")
assert_identical(self.dv, self.dv.isel(not_a_dim=0, missing_dims="ignore"))
def test_isel_types(self) -> None:
# regression test for #1405
da = DataArray([1, 2, 3], dims="x")
# uint64
assert_identical(
da.isel(x=np.array([0], dtype="uint64")), da.isel(x=np.array([0]))
)
# uint32
assert_identical(
da.isel(x=np.array([0], dtype="uint32")), da.isel(x=np.array([0]))
)
# int64
assert_identical(
da.isel(x=np.array([0], dtype="int64")), da.isel(x=np.array([0]))
)
@pytest.mark.filterwarnings("ignore::DeprecationWarning")
def test_isel_fancy(self) -> None:
shape = (10, 7, 6)
np_array = np.random.random(shape)
da = DataArray(
np_array, dims=["time", "y", "x"], coords={"time": np.arange(0, 100, 10)}
)
y = [1, 3]
x = [3, 0]
expected = da.values[:, y, x]
actual = da.isel(y=(("test_coord",), y), x=(("test_coord",), x))
assert actual.coords["test_coord"].shape == (len(y),)
assert list(actual.coords) == ["time"]
assert actual.dims == ("time", "test_coord")
np.testing.assert_equal(actual, expected)
# a few corner cases
da.isel(
time=(("points",), [1, 2]), x=(("points",), [2, 2]), y=(("points",), [3, 4])
)
np.testing.assert_allclose(
da.isel(
time=(("p",), [1]), x=(("p",), [2]), y=(("p",), [4])
).values.squeeze(),
np_array[1, 4, 2].squeeze(),
)
da.isel(time=(("points",), [1, 2]))
y = [-1, 0]
x = [-2, 2]
expected2 = da.values[:, y, x]
actual2 = da.isel(x=(("points",), x), y=(("points",), y)).values
np.testing.assert_equal(actual2, expected2)
# test that the order of the indexers doesn't matter
assert_identical(
da.isel(y=(("points",), y), x=(("points",), x)),
da.isel(x=(("points",), x), y=(("points",), y)),
)
# make sure we're raising errors in the right places
with pytest.raises(IndexError, match=r"Dimensions of indexers mismatch"):
da.isel(y=(("points",), [1, 2]), x=(("points",), [1, 2, 3]))
# tests using index or DataArray as indexers
stations = Dataset()
stations["station"] = (("station",), ["A", "B", "C"])
stations["dim1s"] = (("station",), [1, 2, 3])
stations["dim2s"] = (("station",), [4, 5, 1])
actual3 = da.isel(x=stations["dim1s"], y=stations["dim2s"])
assert "station" in actual3.coords
assert "station" in actual3.dims
assert_identical(actual3["station"], stations["station"])
with pytest.raises(ValueError, match=r"conflicting values/indexes on "):
da.isel(
x=DataArray([0, 1, 2], dims="station", coords={"station": [0, 1, 2]}),
y=DataArray([0, 1, 2], dims="station", coords={"station": [0, 1, 3]}),
)
# multi-dimensional selection
stations = Dataset()
stations["a"] = (("a",), ["A", "B", "C"])
stations["b"] = (("b",), [0, 1])
stations["dim1s"] = (("a", "b"), [[1, 2], [2, 3], [3, 4]])
stations["dim2s"] = (("a",), [4, 5, 1])
actual4 = da.isel(x=stations["dim1s"], y=stations["dim2s"])
assert "a" in actual4.coords
assert "a" in actual4.dims
assert "b" in actual4.coords
assert "b" in actual4.dims
assert_identical(actual4["a"], stations["a"])
assert_identical(actual4["b"], stations["b"])
expected4 = da.variable[
:, stations["dim2s"].variable, stations["dim1s"].variable
]
assert_array_equal(actual4, expected4)
def test_sel(self) -> None:
self.ds["x"] = ("x", np.array(list("abcdefghij")))
da = self.ds["foo"]
assert_identical(da, da.sel(x=slice(None)))
assert_identical(da[1], da.sel(x="b"))
assert_identical(da[:3], da.sel(x=slice("c")))
assert_identical(da[:3], da.sel(x=["a", "b", "c"]))
assert_identical(da[:, :4], da.sel(y=(self.ds["y"] < 4)))
# verify that indexing with a dataarray works
b = DataArray("b")
assert_identical(da[1], da.sel(x=b))
assert_identical(da[[1]], da.sel(x=slice(b, b)))
def test_sel_dataarray(self) -> None:
# indexing with DataArray
self.ds["x"] = ("x", np.array(list("abcdefghij")))
da = self.ds["foo"]
ind = DataArray(["a", "b", "c"], dims=["x"])
actual = da.sel(x=ind)
assert_identical(actual, da.isel(x=[0, 1, 2]))
# along new dimension
ind = DataArray(["a", "b", "c"], dims=["new_dim"])
actual = da.sel(x=ind)
assert_array_equal(actual, da.isel(x=[0, 1, 2]))
assert "new_dim" in actual.dims
# with coordinate
ind = DataArray(
["a", "b", "c"], dims=["new_dim"], coords={"new_dim": [0, 1, 2]}
)
actual = da.sel(x=ind)
assert_array_equal(actual, da.isel(x=[0, 1, 2]))
assert "new_dim" in actual.dims
assert "new_dim" in actual.coords
assert_equal(actual["new_dim"].drop_vars("x"), ind["new_dim"])
def test_sel_invalid_slice(self) -> None:
array = DataArray(np.arange(10), [("x", np.arange(10))])
with pytest.raises(ValueError, match=r"cannot use non-scalar arrays"):
array.sel(x=slice(array.x))
def test_sel_dataarray_datetime_slice(self) -> None:
# regression test for GH1240
times = pd.date_range("2000-01-01", freq="D", periods=365)
array = DataArray(np.arange(365), [("time", times)])
result = array.sel(time=slice(array.time[0], array.time[-1]))
assert_equal(result, array)
array = DataArray(np.arange(365), [("delta", times - times[0])])
result = array.sel(delta=slice(array.delta[0], array.delta[-1]))
assert_equal(result, array)
@pytest.mark.parametrize(
["coord_values", "indices"],
(
pytest.param(
np.array([0.0, 0.111, 0.222, 0.333], dtype="float64"),
slice(1, 3),
id="float64",
),
pytest.param(
np.array([0.0, 0.111, 0.222, 0.333], dtype="float32"),
slice(1, 3),
id="float32",
),
pytest.param(
np.array([0.0, 0.111, 0.222, 0.333], dtype="float32"), [2], id="scalar"
),
),
)
def test_sel_float(self, coord_values, indices) -> None:
data_values = np.arange(4)
arr = DataArray(data_values, coords={"x": coord_values}, dims="x")
actual = arr.sel(x=coord_values[indices])
expected = DataArray(
data_values[indices], coords={"x": coord_values[indices]}, dims="x"
)
assert_equal(actual, expected)
def test_sel_float16(self) -> None:
data_values = np.arange(4)
coord_values = np.array([0.0, 0.111, 0.222, 0.333], dtype="float16")
indices = slice(1, 3)
message = "`pandas.Index` does not support the `float16` dtype.*"
with pytest.warns(DeprecationWarning, match=message):
arr = DataArray(data_values, coords={"x": coord_values}, dims="x")
with pytest.warns(DeprecationWarning, match=message):
expected = DataArray(
data_values[indices], coords={"x": coord_values[indices]}, dims="x"
)
actual = arr.sel(x=coord_values[indices])
assert_equal(actual, expected)
def test_sel_float_multiindex(self) -> None:
# regression test https://github.com/pydata/xarray/issues/5691
# test multi-index created from coordinates, one with dtype=float32
lvl1 = ["a", "a", "b", "b"]
lvl2 = np.array([0.1, 0.2, 0.3, 0.4], dtype=np.float32)
da = xr.DataArray(
[1, 2, 3, 4], dims="x", coords={"lvl1": ("x", lvl1), "lvl2": ("x", lvl2)}
)
da = da.set_index(x=["lvl1", "lvl2"])
actual = da.sel(lvl1="a", lvl2=0.1)
expected = da.isel(x=0)
assert_equal(actual, expected)
def test_sel_no_index(self) -> None:
array = DataArray(np.arange(10), dims="x")
assert_identical(array[0], array.sel(x=0))
assert_identical(array[:5], array.sel(x=slice(5)))
assert_identical(array[[0, -1]], array.sel(x=[0, -1]))
assert_identical(array[array < 5], array.sel(x=(array < 5)))
def test_sel_method(self) -> None:
data = DataArray(np.random.randn(3, 4), [("x", [0, 1, 2]), ("y", list("abcd"))])
with pytest.raises(KeyError, match="Try setting the `method`"):
data.sel(y="ab")
expected = data.sel(y=["a", "b"])
actual = data.sel(y=["ab", "ba"], method="pad")
assert_identical(expected, actual)
expected = data.sel(x=[1, 2])
actual = data.sel(x=[0.9, 1.9], method="backfill", tolerance=1)
assert_identical(expected, actual)
def test_sel_drop(self) -> None:
data = DataArray([1, 2, 3], [("x", [0, 1, 2])])
expected = DataArray(1)
selected = data.sel(x=0, drop=True)
assert_identical(expected, selected)
expected = DataArray(1, {"x": 0})
selected = data.sel(x=0, drop=False)
assert_identical(expected, selected)
data = DataArray([1, 2, 3], dims=["x"])
expected = DataArray(1)
selected = data.sel(x=0, drop=True)
assert_identical(expected, selected)
def test_isel_drop(self) -> None:
data = DataArray([1, 2, 3], [("x", [0, 1, 2])])
expected = DataArray(1)
selected = data.isel(x=0, drop=True)
assert_identical(expected, selected)
expected = DataArray(1, {"x": 0})
selected = data.isel(x=0, drop=False)
assert_identical(expected, selected)
def test_head(self) -> None:
assert_equal(self.dv.isel(x=slice(5)), self.dv.head(x=5))
assert_equal(self.dv.isel(x=slice(0)), self.dv.head(x=0))
assert_equal(
self.dv.isel({dim: slice(6) for dim in self.dv.dims}), self.dv.head(6)
)
assert_equal(
self.dv.isel({dim: slice(5) for dim in self.dv.dims}), self.dv.head()
)
with pytest.raises(TypeError, match=r"either dict-like or a single int"):
self.dv.head([3])
with pytest.raises(TypeError, match=r"expected integer type"):
self.dv.head(x=3.1)
with pytest.raises(ValueError, match=r"expected positive int"):
self.dv.head(-3)
def test_tail(self) -> None:
assert_equal(self.dv.isel(x=slice(-5, None)), self.dv.tail(x=5))
assert_equal(self.dv.isel(x=slice(0)), self.dv.tail(x=0))
assert_equal(
self.dv.isel({dim: slice(-6, None) for dim in self.dv.dims}),
self.dv.tail(6),
)
assert_equal(
self.dv.isel({dim: slice(-5, None) for dim in self.dv.dims}), self.dv.tail()
)
with pytest.raises(TypeError, match=r"either dict-like or a single int"):
self.dv.tail([3])
with pytest.raises(TypeError, match=r"expected integer type"):
self.dv.tail(x=3.1)
with pytest.raises(ValueError, match=r"expected positive int"):
self.dv.tail(-3)
def test_thin(self) -> None:
assert_equal(self.dv.isel(x=slice(None, None, 5)), self.dv.thin(x=5))
assert_equal(
self.dv.isel({dim: slice(None, None, 6) for dim in self.dv.dims}),
self.dv.thin(6),
)
with pytest.raises(TypeError, match=r"either dict-like or a single int"):
self.dv.thin([3])
with pytest.raises(TypeError, match=r"expected integer type"):
self.dv.thin(x=3.1)
with pytest.raises(ValueError, match=r"expected positive int"):
self.dv.thin(-3)
with pytest.raises(ValueError, match=r"cannot be zero"):
self.dv.thin(time=0)
def test_loc(self) -> None:
self.ds["x"] = ("x", np.array(list("abcdefghij")))
da = self.ds["foo"]
# typing issue: see https://github.com/python/mypy/issues/2410
assert_identical(da[:3], da.loc[:"c"]) # type: ignore[misc]
assert_identical(da[1], da.loc["b"])
assert_identical(da[1], da.loc[{"x": "b"}])
assert_identical(da[1], da.loc["b", ...])
assert_identical(da[:3], da.loc[["a", "b", "c"]])
assert_identical(da[:3, :4], da.loc[["a", "b", "c"], np.arange(4)])
assert_identical(da[:, :4], da.loc[:, self.ds["y"] < 4])
def test_loc_datetime64_value(self) -> None:
# regression test for https://github.com/pydata/xarray/issues/4283
t = np.array(["2017-09-05T12", "2017-09-05T15"], dtype="datetime64[ns]")
array = DataArray(np.ones(t.shape), dims=("time",), coords=(t,))
assert_identical(array.loc[{"time": t[0]}], array[0])
def test_loc_assign(self) -> None:
self.ds["x"] = ("x", np.array(list("abcdefghij")))
da = self.ds["foo"]
# assignment
# typing issue: see https://github.com/python/mypy/issues/2410
da.loc["a":"j"] = 0 # type: ignore[misc]
assert np.all(da.values == 0)
da.loc[{"x": slice("a", "j")}] = 2
assert np.all(da.values == 2)
da.loc[{"x": slice("a", "j")}] = 2
assert np.all(da.values == 2)
# Multi dimensional case
da = DataArray(np.arange(12).reshape(3, 4), dims=["x", "y"])
da.loc[0, 0] = 0
assert da.values[0, 0] == 0
assert da.values[0, 1] != 0
da = DataArray(np.arange(12).reshape(3, 4), dims=["x", "y"])
da.loc[0] = 0
assert np.all(da.values[0] == np.zeros(4))
assert da.values[1, 0] != 0
def test_loc_assign_dataarray(self) -> None:
def get_data():
return DataArray(
np.ones((4, 3, 2)),
dims=["x", "y", "z"],
coords={
"x": np.arange(4),
"y": ["a", "b", "c"],
"non-dim": ("x", [1, 3, 4, 2]),
},
)
da = get_data()
# indexer with inconsistent coordinates.
ind = DataArray(np.arange(1, 4), dims=["y"], coords={"y": np.random.randn(3)})
with pytest.raises(IndexError, match=r"dimension coordinate 'y'"):
da.loc[dict(x=ind)] = 0
# indexer with consistent coordinates.
ind = DataArray(np.arange(1, 4), dims=["x"], coords={"x": np.arange(1, 4)})
da.loc[dict(x=ind)] = 0 # should not raise
assert np.allclose(da[dict(x=ind)].values, 0)
assert_identical(da["x"], get_data()["x"])
assert_identical(da["non-dim"], get_data()["non-dim"])
da = get_data()
# conflict in the assigning values
value = xr.DataArray(
np.zeros((3, 3, 2)),
dims=["x", "y", "z"],
coords={"x": [0, 1, 2], "non-dim": ("x", [0, 2, 4])},
)
with pytest.raises(IndexError, match=r"dimension coordinate 'x'"):
da.loc[dict(x=ind)] = value
# consistent coordinate in the assigning values
value = xr.DataArray(
np.zeros((3, 3, 2)),
dims=["x", "y", "z"],
coords={"x": [1, 2, 3], "non-dim": ("x", [0, 2, 4])},
)
da.loc[dict(x=ind)] = value
assert np.allclose(da[dict(x=ind)].values, 0)
assert_identical(da["x"], get_data()["x"])
assert_identical(da["non-dim"], get_data()["non-dim"])
def test_loc_single_boolean(self) -> None:
data = DataArray([0, 1], coords=[[True, False]])
assert data.loc[True] == 0
assert data.loc[False] == 1
def test_loc_dim_name_collision_with_sel_params(self) -> None:
da = xr.DataArray(
[[0, 0], [1, 1]],
dims=["dim1", "method"],
coords={"dim1": ["x", "y"], "method": ["a", "b"]},
)
np.testing.assert_array_equal(
da.loc[dict(dim1=["x", "y"], method=["a"])], [[0], [1]]
)
def test_selection_multiindex(self) -> None:
mindex = pd.MultiIndex.from_product(
[["a", "b"], [1, 2], [-1, -2]], names=("one", "two", "three")
)
mdata = DataArray(range(8), [("x", mindex)])
def test_sel(
lab_indexer, pos_indexer, replaced_idx=False, renamed_dim=None
) -> None:
da = mdata.sel(x=lab_indexer)
expected_da = mdata.isel(x=pos_indexer)
if not replaced_idx:
assert_identical(da, expected_da)
else:
if renamed_dim:
assert da.dims[0] == renamed_dim
da = da.rename({renamed_dim: "x"})
assert_identical(da.variable, expected_da.variable)
assert not da["x"].equals(expected_da["x"])
test_sel(("a", 1, -1), 0)
test_sel(("b", 2, -2), -1)
test_sel(("a", 1), [0, 1], replaced_idx=True, renamed_dim="three")
test_sel(("a",), range(4), replaced_idx=True)
test_sel("a", range(4), replaced_idx=True)
test_sel([("a", 1, -1), ("b", 2, -2)], [0, 7])
test_sel(slice("a", "b"), range(8))
test_sel(slice(("a", 1), ("b", 1)), range(6))
test_sel({"one": "a", "two": 1, "three": -1}, 0)
test_sel({"one": "a", "two": 1}, [0, 1], replaced_idx=True, renamed_dim="three")
test_sel({"one": "a"}, range(4), replaced_idx=True)
assert_identical(mdata.loc["a"], mdata.sel(x="a"))
assert_identical(mdata.loc[("a", 1), ...], mdata.sel(x=("a", 1)))
assert_identical(mdata.loc[{"one": "a"}, ...], mdata.sel(x={"one": "a"}))
with pytest.raises(IndexError):
mdata.loc[("a", 1)]
assert_identical(mdata.sel(x={"one": "a", "two": 1}), mdata.sel(one="a", two=1))
def test_selection_multiindex_remove_unused(self) -> None:
# GH2619. For MultiIndex, we need to call remove_unused.
ds = xr.DataArray(
np.arange(40).reshape(8, 5),
dims=["x", "y"],
coords={"x": np.arange(8), "y": np.arange(5)},
)
ds = ds.stack(xy=["x", "y"])
ds_isel = ds.isel(xy=ds["x"] < 4)
with pytest.raises(KeyError):
ds_isel.sel(x=5)
actual = ds_isel.unstack()
expected = ds.reset_index("xy").isel(xy=ds["x"] < 4)
expected = expected.set_index(xy=["x", "y"]).unstack()
assert_identical(expected, actual)
def test_selection_multiindex_from_level(self) -> None:
# GH: 3512
da = DataArray([0, 1], dims=["x"], coords={"x": [0, 1], "y": "a"})
db = DataArray([2, 3], dims=["x"], coords={"x": [0, 1], "y": "b"})
data = xr.concat([da, db], dim="x").set_index(xy=["x", "y"])
assert data.dims == ("xy",)
actual = data.sel(y="a")
expected = data.isel(xy=[0, 1]).unstack("xy").squeeze("y")
assert_equal(actual, expected)
def test_virtual_default_coords(self) -> None:
array = DataArray(np.zeros((5,)), dims="x")
expected = DataArray(range(5), dims="x", name="x")
assert_identical(expected, array["x"])
assert_identical(expected, array.coords["x"])
def test_virtual_time_components(self) -> None:
dates = pd.date_range("2000-01-01", periods=10)
da = DataArray(np.arange(1, 11), [("time", dates)])
assert_array_equal(da["time.dayofyear"], da.values)
assert_array_equal(da.coords["time.dayofyear"], da.values)
def test_coords(self) -> None:
# use int64 to ensure repr() consistency on windows
coords = [
IndexVariable("x", np.array([-1, -2], "int64")),
IndexVariable("y", np.array([0, 1, 2], "int64")),
]
da = DataArray(np.random.randn(2, 3), coords, name="foo")
# len
assert len(da.coords) == 2
# iter
assert list(da.coords) == ["x", "y"]
assert coords[0].identical(da.coords["x"])
assert coords[1].identical(da.coords["y"])
assert "x" in da.coords
assert 0 not in da.coords
assert "foo" not in da.coords
with pytest.raises(KeyError):
da.coords[0]
with pytest.raises(KeyError):
da.coords["foo"]
# repr
expected_repr = dedent(
"""\
Coordinates:
* x (x) int64 16B -1 -2
* y (y) int64 24B 0 1 2"""
)
actual = repr(da.coords)
assert expected_repr == actual
# dtypes
assert da.coords.dtypes == {"x": np.dtype("int64"), "y": np.dtype("int64")}
del da.coords["x"]
da._indexes = filter_indexes_from_coords(da.xindexes, set(da.coords))
expected = DataArray(da.values, {"y": [0, 1, 2]}, dims=["x", "y"], name="foo")
assert_identical(da, expected)
with pytest.raises(
ValueError, match=r"cannot drop or update coordinate.*corrupt.*index "
):
self.mda["level_1"] = ("x", np.arange(4))
self.mda.coords["level_1"] = ("x", np.arange(4))
def test_coords_to_index(self) -> None:
da = DataArray(np.zeros((2, 3)), [("x", [1, 2]), ("y", list("abc"))])
with pytest.raises(ValueError, match=r"no valid index"):
da[0, 0].coords.to_index()
expected = pd.Index(["a", "b", "c"], name="y")
actual = da[0].coords.to_index()
assert expected.equals(actual)
expected = pd.MultiIndex.from_product(
[[1, 2], ["a", "b", "c"]], names=["x", "y"]
)
actual = da.coords.to_index()
assert expected.equals(actual)
expected = pd.MultiIndex.from_product(
[["a", "b", "c"], [1, 2]], names=["y", "x"]
)
actual = da.coords.to_index(["y", "x"])
assert expected.equals(actual)
with pytest.raises(ValueError, match=r"ordered_dims must match"):
da.coords.to_index(["x"])
def test_coord_coords(self) -> None:
orig = DataArray(
[10, 20], {"x": [1, 2], "x2": ("x", ["a", "b"]), "z": 4}, dims="x"
)
actual = orig.coords["x"]
expected = DataArray(
[1, 2], {"z": 4, "x2": ("x", ["a", "b"]), "x": [1, 2]}, dims="x", name="x"
)
assert_identical(expected, actual)
del actual.coords["x2"]
assert_identical(expected.reset_coords("x2", drop=True), actual)
actual.coords["x3"] = ("x", ["a", "b"])
expected = DataArray(
[1, 2], {"z": 4, "x3": ("x", ["a", "b"]), "x": [1, 2]}, dims="x", name="x"
)
assert_identical(expected, actual)
def test_reset_coords(self) -> None:
data = DataArray(
np.zeros((3, 4)),
{"bar": ("x", ["a", "b", "c"]), "baz": ("y", range(4)), "y": range(4)},
dims=["x", "y"],
name="foo",
)
actual1 = data.reset_coords()
expected1 = Dataset(
{
"foo": (["x", "y"], np.zeros((3, 4))),
"bar": ("x", ["a", "b", "c"]),
"baz": ("y", range(4)),
"y": range(4),
}
)
assert_identical(actual1, expected1)
actual2 = data.reset_coords(["bar", "baz"])
assert_identical(actual2, expected1)
actual3 = data.reset_coords("bar")
expected3 = Dataset(
{"foo": (["x", "y"], np.zeros((3, 4))), "bar": ("x", ["a", "b", "c"])},
{"baz": ("y", range(4)), "y": range(4)},
)
assert_identical(actual3, expected3)
actual4 = data.reset_coords(["bar"])
assert_identical(actual4, expected3)
actual5 = data.reset_coords(drop=True)
expected5 = DataArray(
np.zeros((3, 4)), coords={"y": range(4)}, dims=["x", "y"], name="foo"
)
assert_identical(actual5, expected5)
actual6 = data.copy().reset_coords(drop=True)
assert_identical(actual6, expected5)
actual7 = data.reset_coords("bar", drop=True)
expected7 = DataArray(
np.zeros((3, 4)),
{"baz": ("y", range(4)), "y": range(4)},
dims=["x", "y"],
name="foo",
)
assert_identical(actual7, expected7)
with pytest.raises(ValueError, match=r"cannot be found"):
data.reset_coords("foo", drop=True)
with pytest.raises(ValueError, match=r"cannot be found"):
data.reset_coords("not_found")
with pytest.raises(ValueError, match=r"cannot remove index"):
data.reset_coords("y")
# non-dimension index coordinate
midx = pd.MultiIndex.from_product([["a", "b"], [0, 1]], names=("lvl1", "lvl2"))
data = DataArray([1, 2, 3, 4], coords={"x": midx}, dims="x", name="foo")
with pytest.raises(ValueError, match=r"cannot remove index"):
data.reset_coords("lvl1")
def test_assign_coords(self) -> None:
array = DataArray(10)
actual = array.assign_coords(c=42)
expected = DataArray(10, {"c": 42})
assert_identical(actual, expected)
with pytest.raises(
ValueError, match=r"cannot drop or update coordinate.*corrupt.*index "
):
self.mda.assign_coords(level_1=("x", range(4)))
# GH: 2112
da = xr.DataArray([0, 1, 2], dims="x")
with pytest.raises(ValueError):
da["x"] = [0, 1, 2, 3] # size conflict
with pytest.raises(ValueError):
da.coords["x"] = [0, 1, 2, 3] # size conflict
with pytest.raises(ValueError):
da.coords["x"] = ("y", [1, 2, 3]) # no new dimension to a DataArray
def test_assign_coords_existing_multiindex(self) -> None:
data = self.mda
with pytest.warns(
FutureWarning, match=r"updating coordinate.*MultiIndex.*inconsistent"
):
data.assign_coords(x=range(4))
def test_assign_coords_custom_index(self) -> None:
class CustomIndex(Index):
pass
coords = Coordinates(
coords={"x": ("x", [1, 2, 3])}, indexes={"x": CustomIndex()}
)
da = xr.DataArray([0, 1, 2], dims="x")
actual = da.assign_coords(coords)
assert isinstance(actual.xindexes["x"], CustomIndex)
def test_assign_coords_no_default_index(self) -> None:
coords = Coordinates({"y": [1, 2, 3]}, indexes={})
da = DataArray([1, 2, 3], dims="y")
actual = da.assign_coords(coords)
assert_identical(actual.coords, coords, check_default_indexes=False)
assert "y" not in actual.xindexes
def test_coords_alignment(self) -> None:
lhs = DataArray([1, 2, 3], [("x", [0, 1, 2])])
rhs = DataArray([2, 3, 4], [("x", [1, 2, 3])])
lhs.coords["rhs"] = rhs
expected = DataArray(
[1, 2, 3], coords={"rhs": ("x", [np.nan, 2, 3]), "x": [0, 1, 2]}, dims="x"
)
assert_identical(lhs, expected)
def test_set_coords_update_index(self) -> None:
actual = DataArray([1, 2, 3], [("x", [1, 2, 3])])
actual.coords["x"] = ["a", "b", "c"]
assert actual.xindexes["x"].to_pandas_index().equals(pd.Index(["a", "b", "c"]))
def test_set_coords_multiindex_level(self) -> None:
with pytest.raises(
ValueError, match=r"cannot drop or update coordinate.*corrupt.*index "
):
self.mda["level_1"] = range(4)
def test_coords_replacement_alignment(self) -> None:
# regression test for GH725
arr = DataArray([0, 1, 2], dims=["abc"])
new_coord = DataArray([1, 2, 3], dims=["abc"], coords=[[1, 2, 3]])
arr["abc"] = new_coord
expected = DataArray([0, 1, 2], coords=[("abc", [1, 2, 3])])
assert_identical(arr, expected)
def test_coords_non_string(self) -> None:
arr = DataArray(0, coords={1: 2})
actual = arr.coords[1]
expected = DataArray(2, coords={1: 2}, name=1)
assert_identical(actual, expected)
def test_coords_delitem_delete_indexes(self) -> None:
# regression test for GH3746
arr = DataArray(np.ones((2,)), dims="x", coords={"x": [0, 1]})
del arr.coords["x"]
assert "x" not in arr.xindexes
def test_coords_delitem_multiindex_level(self) -> None:
with pytest.raises(
ValueError, match=r"cannot remove coordinate.*corrupt.*index "
):
del self.mda.coords["level_1"]
def test_broadcast_like(self) -> None:
arr1 = DataArray(
np.ones((2, 3)),
dims=["x", "y"],
coords={"x": ["a", "b"], "y": ["a", "b", "c"]},
)
arr2 = DataArray(
np.ones((3, 2)),
dims=["x", "y"],
coords={"x": ["a", "b", "c"], "y": ["a", "b"]},
)
orig1, orig2 = broadcast(arr1, arr2)
new1 = arr1.broadcast_like(arr2)
new2 = arr2.broadcast_like(arr1)
assert_identical(orig1, new1)
assert_identical(orig2, new2)
orig3 = DataArray(np.random.randn(5), [("x", range(5))])
orig4 = DataArray(np.random.randn(6), [("y", range(6))])
new3, new4 = broadcast(orig3, orig4)
assert_identical(orig3.broadcast_like(orig4), new3.transpose("y", "x"))
assert_identical(orig4.broadcast_like(orig3), new4)
def test_reindex_like(self) -> None:
foo = DataArray(np.random.randn(5, 6), [("x", range(5)), ("y", range(6))])
bar = foo[:2, :2]
assert_identical(foo.reindex_like(bar), bar)
expected = foo.copy()
expected[:] = np.nan
expected[:2, :2] = bar
assert_identical(bar.reindex_like(foo), expected)
def test_reindex_like_no_index(self) -> None:
foo = DataArray(np.random.randn(5, 6), dims=["x", "y"])
assert_identical(foo, foo.reindex_like(foo))
bar = foo[:4]
with pytest.raises(ValueError, match=r"different size for unlabeled"):
foo.reindex_like(bar)
def test_reindex_regressions(self) -> None:
da = DataArray(np.random.randn(5), coords=[("time", range(5))])
time2 = DataArray(np.arange(5), dims="time2")
with pytest.raises(ValueError):
da.reindex(time=time2)
# regression test for #736, reindex can not change complex nums dtype
xnp = np.array([1, 2, 3], dtype=complex)
x = DataArray(xnp, coords=[[0.1, 0.2, 0.3]])
y = DataArray([2, 5, 6, 7, 8], coords=[[-1.1, 0.21, 0.31, 0.41, 0.51]])
re_dtype = x.reindex_like(y, method="pad").dtype
assert x.dtype == re_dtype
def test_reindex_method(self) -> None:
x = DataArray([10, 20], dims="y", coords={"y": [0, 1]})
y = [-0.1, 0.5, 1.1]
actual = x.reindex(y=y, method="backfill", tolerance=0.2)
expected = DataArray([10, np.nan, np.nan], coords=[("y", y)])
assert_identical(expected, actual)
actual = x.reindex(y=y, method="backfill", tolerance=[0.1, 0.1, 0.01])
expected = DataArray([10, np.nan, np.nan], coords=[("y", y)])
assert_identical(expected, actual)
alt = Dataset({"y": y})
actual = x.reindex_like(alt, method="backfill")
expected = DataArray([10, 20, np.nan], coords=[("y", y)])
assert_identical(expected, actual)
@pytest.mark.parametrize("fill_value", [dtypes.NA, 2, 2.0, {None: 2, "u": 1}])
def test_reindex_fill_value(self, fill_value) -> None:
x = DataArray([10, 20], dims="y", coords={"y": [0, 1], "u": ("y", [1, 2])})
y = [0, 1, 2]
if fill_value == dtypes.NA:
# if we supply the default, we expect the missing value for a
# float array
fill_value_var = fill_value_u = np.nan
elif isinstance(fill_value, dict):
fill_value_var = fill_value[None]
fill_value_u = fill_value["u"]
else:
fill_value_var = fill_value_u = fill_value
actual = x.reindex(y=y, fill_value=fill_value)
expected = DataArray(
[10, 20, fill_value_var],
dims="y",
coords={"y": y, "u": ("y", [1, 2, fill_value_u])},
)
assert_identical(expected, actual)
@pytest.mark.parametrize("dtype", [str, bytes])
def test_reindex_str_dtype(self, dtype) -> None:
data = DataArray(
[1, 2], dims="x", coords={"x": np.array(["a", "b"], dtype=dtype)}
)
actual = data.reindex(x=data.x)
expected = data
assert_identical(expected, actual)
assert actual.dtype == expected.dtype
def test_reindex_empty_array_dtype(self) -> None:
# Dtype of reindex result should match dtype of the original DataArray.
# See GH issue #7299
x = xr.DataArray([], dims=("x",), coords={"x": []}).astype("float32")
y = x.reindex(x=[1.0, 2.0])
assert (
x.dtype == y.dtype
), "Dtype of reindexed DataArray should match dtype of the original DataArray"
assert (
y.dtype == np.float32
), "Dtype of reindexed DataArray should remain float32"
def test_rename(self) -> None:
da = xr.DataArray(
[1, 2, 3], dims="dim", name="name", coords={"coord": ("dim", [5, 6, 7])}
)
# change name
renamed_name = da.rename("name_new")
assert renamed_name.name == "name_new"
expected_name = da.copy()
expected_name.name = "name_new"
assert_identical(renamed_name, expected_name)
# change name to None?
renamed_noname = da.rename(None)
assert renamed_noname.name is None
expected_noname = da.copy()
expected_noname.name = None
assert_identical(renamed_noname, expected_noname)
renamed_noname = da.rename()
assert renamed_noname.name is None
assert_identical(renamed_noname, expected_noname)
# change dim
renamed_dim = da.rename({"dim": "dim_new"})
assert renamed_dim.dims == ("dim_new",)
expected_dim = xr.DataArray(
[1, 2, 3],
dims="dim_new",
name="name",
coords={"coord": ("dim_new", [5, 6, 7])},
)
assert_identical(renamed_dim, expected_dim)
# change dim with kwargs
renamed_dimkw = da.rename(dim="dim_new")
assert renamed_dimkw.dims == ("dim_new",)
assert_identical(renamed_dimkw, expected_dim)
# change coords
renamed_coord = da.rename({"coord": "coord_new"})
assert "coord_new" in renamed_coord.coords
expected_coord = xr.DataArray(
[1, 2, 3], dims="dim", name="name", coords={"coord_new": ("dim", [5, 6, 7])}
)
assert_identical(renamed_coord, expected_coord)
# change coords with kwargs
renamed_coordkw = da.rename(coord="coord_new")
assert "coord_new" in renamed_coordkw.coords
assert_identical(renamed_coordkw, expected_coord)
# change coord and dim
renamed_both = da.rename({"dim": "dim_new", "coord": "coord_new"})
assert renamed_both.dims == ("dim_new",)
assert "coord_new" in renamed_both.coords
expected_both = xr.DataArray(
[1, 2, 3],
dims="dim_new",
name="name",
coords={"coord_new": ("dim_new", [5, 6, 7])},
)
assert_identical(renamed_both, expected_both)
# change coord and dim with kwargs
renamed_bothkw = da.rename(dim="dim_new", coord="coord_new")
assert renamed_bothkw.dims == ("dim_new",)
assert "coord_new" in renamed_bothkw.coords
assert_identical(renamed_bothkw, expected_both)
# change all
renamed_all = da.rename("name_new", dim="dim_new", coord="coord_new")
assert renamed_all.name == "name_new"
assert renamed_all.dims == ("dim_new",)
assert "coord_new" in renamed_all.coords
expected_all = xr.DataArray(
[1, 2, 3],
dims="dim_new",
name="name_new",
coords={"coord_new": ("dim_new", [5, 6, 7])},
)
assert_identical(renamed_all, expected_all)
def test_rename_dimension_coord_warnings(self) -> None:
# create a dimension coordinate by renaming a dimension or coordinate
# should raise a warning (no index created)
da = DataArray([0, 0], coords={"x": ("y", [0, 1])}, dims="y")
with pytest.warns(
UserWarning, match="rename 'x' to 'y' does not create an index.*"
):
da.rename(x="y")
da = xr.DataArray([0, 0], coords={"y": ("x", [0, 1])}, dims="x")
with pytest.warns(
UserWarning, match="rename 'x' to 'y' does not create an index.*"
):
da.rename(x="y")
# No operation should not raise a warning
da = xr.DataArray(
data=np.ones((2, 3)),
dims=["x", "y"],
coords={"x": range(2), "y": range(3), "a": ("x", [3, 4])},
)
with warnings.catch_warnings():
warnings.simplefilter("error")
da.rename(x="x")
def test_init_value(self) -> None:
expected = DataArray(
np.full((3, 4), 3), dims=["x", "y"], coords=[range(3), range(4)]
)
actual = DataArray(3, dims=["x", "y"], coords=[range(3), range(4)])
assert_identical(expected, actual)
expected = DataArray(
np.full((1, 10, 2), 0),
dims=["w", "x", "y"],
coords={"x": np.arange(10), "y": ["north", "south"]},
)
actual = DataArray(0, dims=expected.dims, coords=expected.coords)
assert_identical(expected, actual)
expected = DataArray(
np.full((10, 2), np.nan), coords=[("x", np.arange(10)), ("y", ["a", "b"])]
)
actual = DataArray(coords=[("x", np.arange(10)), ("y", ["a", "b"])])
assert_identical(expected, actual)
with pytest.raises(ValueError, match=r"different number of dim"):
DataArray(np.array(1), coords={"x": np.arange(10)}, dims=["x"])
with pytest.raises(ValueError, match=r"does not match the 0 dim"):
DataArray(np.array(1), coords=[("x", np.arange(10))])
def test_swap_dims(self) -> None:
array = DataArray(np.random.randn(3), {"x": list("abc")}, "x")
expected = DataArray(array.values, {"x": ("y", list("abc"))}, dims="y")
actual = array.swap_dims({"x": "y"})
assert_identical(expected, actual)
for dim_name in set().union(expected.xindexes.keys(), actual.xindexes.keys()):
assert actual.xindexes[dim_name].equals(expected.xindexes[dim_name])
# as kwargs
array = DataArray(np.random.randn(3), {"x": list("abc")}, "x")
expected = DataArray(array.values, {"x": ("y", list("abc"))}, dims="y")
actual = array.swap_dims(x="y")
assert_identical(expected, actual)
for dim_name in set().union(expected.xindexes.keys(), actual.xindexes.keys()):
assert actual.xindexes[dim_name].equals(expected.xindexes[dim_name])
# multiindex case
idx = pd.MultiIndex.from_arrays([list("aab"), list("yzz")], names=["y1", "y2"])
array = DataArray(np.random.randn(3), {"y": ("x", idx)}, "x")
expected = DataArray(array.values, {"y": idx}, "y")
actual = array.swap_dims({"x": "y"})
assert_identical(expected, actual)
for dim_name in set().union(expected.xindexes.keys(), actual.xindexes.keys()):
assert actual.xindexes[dim_name].equals(expected.xindexes[dim_name])
def test_expand_dims_error(self) -> None:
array = DataArray(
np.random.randn(3, 4),
dims=["x", "dim_0"],
coords={"x": np.linspace(0.0, 1.0, 3)},
attrs={"key": "entry"},
)
with pytest.raises(TypeError, match=r"dim should be Hashable or"):
array.expand_dims(0)
with pytest.raises(ValueError, match=r"lengths of dim and axis"):
# dims and axis argument should be the same length
array.expand_dims(dim=["a", "b"], axis=[1, 2, 3])
with pytest.raises(ValueError, match=r"Dimension x already"):
# Should not pass the already existing dimension.
array.expand_dims(dim=["x"])
# raise if duplicate
with pytest.raises(ValueError, match=r"duplicate values"):
array.expand_dims(dim=["y", "y"])
with pytest.raises(ValueError, match=r"duplicate values"):
array.expand_dims(dim=["y", "z"], axis=[1, 1])
with pytest.raises(ValueError, match=r"duplicate values"):
array.expand_dims(dim=["y", "z"], axis=[2, -2])
# out of bounds error, axis must be in [-4, 3]
with pytest.raises(IndexError):
array.expand_dims(dim=["y", "z"], axis=[2, 4])
with pytest.raises(IndexError):
array.expand_dims(dim=["y", "z"], axis=[2, -5])
# Does not raise an IndexError
array.expand_dims(dim=["y", "z"], axis=[2, -4])
array.expand_dims(dim=["y", "z"], axis=[2, 3])
array = DataArray(
np.random.randn(3, 4),
dims=["x", "dim_0"],
coords={"x": np.linspace(0.0, 1.0, 3)},
attrs={"key": "entry"},
)
with pytest.raises(TypeError):
array.expand_dims({"new_dim": 3.2})
# Attempt to use both dim and kwargs
with pytest.raises(ValueError):
array.expand_dims({"d": 4}, e=4)
def test_expand_dims(self) -> None:
array = DataArray(
np.random.randn(3, 4),
dims=["x", "dim_0"],
coords={"x": np.linspace(0.0, 1.0, 3)},
attrs={"key": "entry"},
)
# pass only dim label
actual = array.expand_dims(dim="y")
expected = DataArray(
np.expand_dims(array.values, 0),
dims=["y", "x", "dim_0"],
coords={"x": np.linspace(0.0, 1.0, 3)},
attrs={"key": "entry"},
)
assert_identical(expected, actual)
roundtripped = actual.squeeze("y", drop=True)
assert_identical(array, roundtripped)
# pass multiple dims
actual = array.expand_dims(dim=["y", "z"])
expected = DataArray(
np.expand_dims(np.expand_dims(array.values, 0), 0),
dims=["y", "z", "x", "dim_0"],
coords={"x": np.linspace(0.0, 1.0, 3)},
attrs={"key": "entry"},
)
assert_identical(expected, actual)
roundtripped = actual.squeeze(["y", "z"], drop=True)
assert_identical(array, roundtripped)
# pass multiple dims and axis. Axis is out of order
actual = array.expand_dims(dim=["z", "y"], axis=[2, 1])
expected = DataArray(
np.expand_dims(np.expand_dims(array.values, 1), 2),
dims=["x", "y", "z", "dim_0"],
coords={"x": np.linspace(0.0, 1.0, 3)},
attrs={"key": "entry"},
)
assert_identical(expected, actual)
# make sure the attrs are tracked
assert actual.attrs["key"] == "entry"
roundtripped = actual.squeeze(["z", "y"], drop=True)
assert_identical(array, roundtripped)
# Negative axis and they are out of order
actual = array.expand_dims(dim=["y", "z"], axis=[-1, -2])
expected = DataArray(
np.expand_dims(np.expand_dims(array.values, -1), -1),
dims=["x", "dim_0", "z", "y"],
coords={"x": np.linspace(0.0, 1.0, 3)},
attrs={"key": "entry"},
)
assert_identical(expected, actual)
assert actual.attrs["key"] == "entry"
roundtripped = actual.squeeze(["y", "z"], drop=True)
assert_identical(array, roundtripped)
def test_expand_dims_with_scalar_coordinate(self) -> None:
array = DataArray(
np.random.randn(3, 4),
dims=["x", "dim_0"],
coords={"x": np.linspace(0.0, 1.0, 3), "z": 1.0},
attrs={"key": "entry"},
)
actual = array.expand_dims(dim="z")
expected = DataArray(
np.expand_dims(array.values, 0),
dims=["z", "x", "dim_0"],
coords={"x": np.linspace(0.0, 1.0, 3), "z": np.ones(1)},
attrs={"key": "entry"},
)
assert_identical(expected, actual)
roundtripped = actual.squeeze(["z"], drop=False)
assert_identical(array, roundtripped)
def test_expand_dims_with_greater_dim_size(self) -> None:
array = DataArray(
np.random.randn(3, 4),
dims=["x", "dim_0"],
coords={"x": np.linspace(0.0, 1.0, 3), "z": 1.0},
attrs={"key": "entry"},
)
actual = array.expand_dims({"y": 2, "z": 1, "dim_1": ["a", "b", "c"]})
expected_coords = {
"y": [0, 1],
"z": [1.0],
"dim_1": ["a", "b", "c"],
"x": np.linspace(0, 1, 3),
"dim_0": range(4),
}
expected = DataArray(
array.values * np.ones([2, 1, 3, 3, 4]),
coords=expected_coords,
dims=list(expected_coords.keys()),
attrs={"key": "entry"},
).drop_vars(["y", "dim_0"])
assert_identical(expected, actual)
# Test with kwargs instead of passing dict to dim arg.
other_way = array.expand_dims(dim_1=["a", "b", "c"])
other_way_expected = DataArray(
array.values * np.ones([3, 3, 4]),
coords={
"dim_1": ["a", "b", "c"],
"x": np.linspace(0, 1, 3),
"dim_0": range(4),
"z": 1.0,
},
dims=["dim_1", "x", "dim_0"],
attrs={"key": "entry"},
).drop_vars("dim_0")
assert_identical(other_way_expected, other_way)
def test_set_index(self) -> None:
indexes = [self.mindex.get_level_values(n) for n in self.mindex.names]
coords = {idx.name: ("x", idx) for idx in indexes}
array = DataArray(self.mda.values, coords=coords, dims="x")
expected = self.mda.copy()
level_3 = ("x", [1, 2, 3, 4])
array["level_3"] = level_3
expected["level_3"] = level_3
obj = array.set_index(x=self.mindex.names)
assert_identical(obj, expected)
obj = obj.set_index(x="level_3", append=True)
expected = array.set_index(x=["level_1", "level_2", "level_3"])
assert_identical(obj, expected)
array = array.set_index(x=["level_1", "level_2", "level_3"])
assert_identical(array, expected)
array2d = DataArray(
np.random.rand(2, 2),
coords={"x": ("x", [0, 1]), "level": ("y", [1, 2])},
dims=("x", "y"),
)
with pytest.raises(ValueError, match=r"dimension mismatch"):
array2d.set_index(x="level")
# Issue 3176: Ensure clear error message on key error.
with pytest.raises(ValueError, match=r".*variable\(s\) do not exist"):
obj.set_index(x="level_4")
def test_reset_index(self) -> None:
indexes = [self.mindex.get_level_values(n) for n in self.mindex.names]
coords = {idx.name: ("x", idx) for idx in indexes}
expected = DataArray(self.mda.values, coords=coords, dims="x")
obj = self.mda.reset_index("x")
assert_identical(obj, expected, check_default_indexes=False)
assert len(obj.xindexes) == 0
obj = self.mda.reset_index(self.mindex.names)
assert_identical(obj, expected, check_default_indexes=False)
assert len(obj.xindexes) == 0
obj = self.mda.reset_index(["x", "level_1"])
assert_identical(obj, expected, check_default_indexes=False)
assert len(obj.xindexes) == 0
coords = {
"x": ("x", self.mindex.droplevel("level_1")),
"level_1": ("x", self.mindex.get_level_values("level_1")),
}
expected = DataArray(self.mda.values, coords=coords, dims="x")
obj = self.mda.reset_index(["level_1"])
assert_identical(obj, expected, check_default_indexes=False)
assert list(obj.xindexes) == ["x"]
assert type(obj.xindexes["x"]) is PandasIndex
expected = DataArray(self.mda.values, dims="x")
obj = self.mda.reset_index("x", drop=True)
assert_identical(obj, expected, check_default_indexes=False)
array = self.mda.copy()
array = array.reset_index(["x"], drop=True)
assert_identical(array, expected, check_default_indexes=False)
# single index
array = DataArray([1, 2], coords={"x": ["a", "b"]}, dims="x")
obj = array.reset_index("x")
print(obj.x.variable)
print(array.x.variable)
assert_equal(obj.x.variable, array.x.variable.to_base_variable())
assert len(obj.xindexes) == 0
def test_reset_index_keep_attrs(self) -> None:
coord_1 = DataArray([1, 2], dims=["coord_1"], attrs={"attrs": True})
da = DataArray([1, 0], [coord_1])
obj = da.reset_index("coord_1")
assert obj.coord_1.attrs == da.coord_1.attrs
assert len(obj.xindexes) == 0
def test_reorder_levels(self) -> None:
midx = self.mindex.reorder_levels(["level_2", "level_1"])
expected = DataArray(self.mda.values, coords={"x": midx}, dims="x")
obj = self.mda.reorder_levels(x=["level_2", "level_1"])
assert_identical(obj, expected)
array = DataArray([1, 2], dims="x")
with pytest.raises(KeyError):
array.reorder_levels(x=["level_1", "level_2"])
array["x"] = [0, 1]
with pytest.raises(ValueError, match=r"has no MultiIndex"):
array.reorder_levels(x=["level_1", "level_2"])
def test_set_xindex(self) -> None:
da = DataArray(
[1, 2, 3, 4], coords={"foo": ("x", ["a", "a", "b", "b"])}, dims="x"
)
class IndexWithOptions(Index):
def __init__(self, opt):
self.opt = opt
@classmethod
def from_variables(cls, variables, options):
return cls(options["opt"])
indexed = da.set_xindex("foo", IndexWithOptions, opt=1)
assert "foo" in indexed.xindexes
assert indexed.xindexes["foo"].opt == 1 # type: ignore[attr-defined]
def test_dataset_getitem(self) -> None:
dv = self.ds["foo"]
assert_identical(dv, self.dv)
def test_array_interface(self) -> None:
assert_array_equal(np.asarray(self.dv), self.x)
# test patched in methods
assert_array_equal(self.dv.astype(float), self.v.astype(float))
assert_array_equal(self.dv.argsort(), self.v.argsort())
assert_array_equal(self.dv.clip(2, 3), self.v.clip(2, 3))
# test ufuncs
expected = deepcopy(self.ds)
expected["foo"][:] = np.sin(self.x)
assert_equal(expected["foo"], np.sin(self.dv))
assert_array_equal(self.dv, np.maximum(self.v, self.dv))
bar = Variable(["x", "y"], np.zeros((10, 20)))
assert_equal(self.dv, np.maximum(self.dv, bar))
def test_astype_attrs(self) -> None:
for v in [self.va.copy(), self.mda.copy(), self.ds.copy()]:
v.attrs["foo"] = "bar"
assert v.attrs == v.astype(float).attrs
assert not v.astype(float, keep_attrs=False).attrs
def test_astype_dtype(self) -> None:
original = DataArray([-1, 1, 2, 3, 1000])
converted = original.astype(float)
assert_array_equal(original, converted)
assert np.issubdtype(original.dtype, np.integer)
assert np.issubdtype(converted.dtype, np.floating)
def test_astype_order(self) -> None:
original = DataArray([[1, 2], [3, 4]])
converted = original.astype("d", order="F")
assert_equal(original, converted)
assert original.values.flags["C_CONTIGUOUS"]
assert converted.values.flags["F_CONTIGUOUS"]
def test_astype_subok(self) -> None:
class NdArraySubclass(np.ndarray):
pass
original = DataArray(NdArraySubclass(np.arange(3)))
converted_not_subok = original.astype("d", subok=False)
converted_subok = original.astype("d", subok=True)
if not isinstance(original.data, NdArraySubclass):
pytest.xfail("DataArray cannot be backed yet by a subclasses of np.ndarray")
assert isinstance(converted_not_subok.data, np.ndarray)
assert not isinstance(converted_not_subok.data, NdArraySubclass)
assert isinstance(converted_subok.data, NdArraySubclass)
def test_is_null(self) -> None:
x = np.random.default_rng(42).random((5, 6))
x[x < 0] = np.nan
original = DataArray(x, [-np.arange(5), np.arange(6)], ["x", "y"])
expected = DataArray(pd.isnull(x), [-np.arange(5), np.arange(6)], ["x", "y"])
assert_identical(expected, original.isnull())
assert_identical(~expected, original.notnull())
def test_math(self) -> None:
x = self.x
v = self.v
a = self.dv
# variable math was already tested extensively, so let's just make sure
# that all types are properly converted here
assert_equal(a, +a)
assert_equal(a, a + 0)
assert_equal(a, 0 + a)
assert_equal(a, a + 0 * v)
assert_equal(a, 0 * v + a)
assert_equal(a, a + 0 * x)
assert_equal(a, 0 * x + a)
assert_equal(a, a + 0 * a)
assert_equal(a, 0 * a + a)
def test_math_automatic_alignment(self) -> None:
a = DataArray(range(5), [("x", range(5))])
b = DataArray(range(5), [("x", range(1, 6))])
expected = DataArray(np.ones(4), [("x", [1, 2, 3, 4])])
assert_identical(a - b, expected)
def test_non_overlapping_dataarrays_return_empty_result(self) -> None:
a = DataArray(range(5), [("x", range(5))])
result = a.isel(x=slice(2)) + a.isel(x=slice(2, None))
assert len(result["x"]) == 0
def test_empty_dataarrays_return_empty_result(self) -> None:
a = DataArray(data=[])
result = a * a
assert len(result["dim_0"]) == 0
def test_inplace_math_basics(self) -> None:
x = self.x
a = self.dv
v = a.variable
b = a
b += 1
assert b is a
assert b.variable is v
assert_array_equal(b.values, x)
assert source_ndarray(b.values) is x
def test_inplace_math_error(self) -> None:
data = np.random.rand(4)
times = np.arange(4)
foo = DataArray(data, coords=[times], dims=["time"])
b = times.copy()
with pytest.raises(
TypeError, match=r"Values of an IndexVariable are immutable"
):
foo.coords["time"] += 1
# Check error throwing prevented inplace operation
assert_array_equal(foo.coords["time"], b)
def test_inplace_math_automatic_alignment(self) -> None:
a = DataArray(range(5), [("x", range(5))])
b = DataArray(range(1, 6), [("x", range(1, 6))])
with pytest.raises(xr.MergeError, match="Automatic alignment is not supported"):
a += b
with pytest.raises(xr.MergeError, match="Automatic alignment is not supported"):
b += a
def test_math_name(self) -> None:
# Verify that name is preserved only when it can be done unambiguously.
# The rule (copied from pandas.Series) is keep the current name only if
# the other object has the same name or no name attribute and this
# object isn't a coordinate; otherwise reset to None.
a = self.dv
assert (+a).name == "foo"
assert (a + 0).name == "foo"
assert (a + a.rename(None)).name is None
assert (a + a.rename("bar")).name is None
assert (a + a).name == "foo"
assert (+a["x"]).name == "x"
assert (a["x"] + 0).name == "x"
assert (a + a["x"]).name is None
def test_math_with_coords(self) -> None:
coords = {
"x": [-1, -2],
"y": ["ab", "cd", "ef"],
"lat": (["x", "y"], [[1, 2, 3], [-1, -2, -3]]),
"c": -999,
}
orig = DataArray(np.random.randn(2, 3), coords, dims=["x", "y"])
actual = orig + 1
expected = DataArray(orig.values + 1, orig.coords)
assert_identical(expected, actual)
actual = 1 + orig
assert_identical(expected, actual)
actual = orig + orig[0, 0]
exp_coords = {k: v for k, v in coords.items() if k != "lat"}
expected = DataArray(
orig.values + orig.values[0, 0], exp_coords, dims=["x", "y"]
)
assert_identical(expected, actual)
actual = orig[0, 0] + orig
assert_identical(expected, actual)
actual = orig[0, 0] + orig[-1, -1]
expected = DataArray(orig.values[0, 0] + orig.values[-1, -1], {"c": -999})
assert_identical(expected, actual)
actual = orig[:, 0] + orig[0, :]
exp_values = orig[:, 0].values[:, None] + orig[0, :].values[None, :]
expected = DataArray(exp_values, exp_coords, dims=["x", "y"])
assert_identical(expected, actual)
actual = orig[0, :] + orig[:, 0]
assert_identical(expected.transpose(transpose_coords=True), actual)
actual = orig - orig.transpose(transpose_coords=True)
expected = DataArray(np.zeros((2, 3)), orig.coords)
assert_identical(expected, actual)
actual = orig.transpose(transpose_coords=True) - orig
assert_identical(expected.transpose(transpose_coords=True), actual)
alt = DataArray([1, 1], {"x": [-1, -2], "c": "foo", "d": 555}, "x")
actual = orig + alt
expected = orig + 1
expected.coords["d"] = 555
del expected.coords["c"]
assert_identical(expected, actual)
actual = alt + orig
assert_identical(expected, actual)
def test_index_math(self) -> None:
orig = DataArray(range(3), dims="x", name="x")
actual = orig + 1
expected = DataArray(1 + np.arange(3), dims="x", name="x")
assert_identical(expected, actual)
# regression tests for #254
actual = orig[0] < orig
expected = DataArray([False, True, True], dims="x", name="x")
assert_identical(expected, actual)
actual = orig > orig[0]
assert_identical(expected, actual)
def test_dataset_math(self) -> None:
# more comprehensive tests with multiple dataset variables
obs = Dataset(
{"tmin": ("x", np.arange(5)), "tmax": ("x", 10 + np.arange(5))},
{"x": ("x", 0.5 * np.arange(5)), "loc": ("x", range(-2, 3))},
)
actual1 = 2 * obs["tmax"]
expected1 = DataArray(2 * (10 + np.arange(5)), obs.coords, name="tmax")
assert_identical(actual1, expected1)
actual2 = obs["tmax"] - obs["tmin"]
expected2 = DataArray(10 * np.ones(5), obs.coords)
assert_identical(actual2, expected2)
sim = Dataset(
{
"tmin": ("x", 1 + np.arange(5)),
"tmax": ("x", 11 + np.arange(5)),
# does *not* include 'loc' as a coordinate
"x": ("x", 0.5 * np.arange(5)),
}
)
actual3 = sim["tmin"] - obs["tmin"]
expected3 = DataArray(np.ones(5), obs.coords, name="tmin")
assert_identical(actual3, expected3)
actual4 = -obs["tmin"] + sim["tmin"]
assert_identical(actual4, expected3)
actual5 = sim["tmin"].copy()
actual5 -= obs["tmin"]
assert_identical(actual5, expected3)
actual6 = sim.copy()
actual6["tmin"] = sim["tmin"] - obs["tmin"]
expected6 = Dataset(
{"tmin": ("x", np.ones(5)), "tmax": ("x", sim["tmax"].values)}, obs.coords
)
assert_identical(actual6, expected6)
actual7 = sim.copy()
actual7["tmin"] -= obs["tmin"]
assert_identical(actual7, expected6)
def test_stack_unstack(self) -> None:
orig = DataArray(
[[0, 1], [2, 3]],
dims=["x", "y"],
attrs={"foo": 2},
)
assert_identical(orig, orig.unstack())
# test GH3000
a = orig[:0, :1].stack(new_dim=("x", "y")).indexes["new_dim"]
b = pd.MultiIndex(
levels=[pd.Index([], dtype=np.int64), pd.Index([0], dtype=np.int64)],
codes=[[], []],
names=["x", "y"],
)
pd.testing.assert_index_equal(a, b)
actual = orig.stack(z=["x", "y"]).unstack("z").drop_vars(["x", "y"])
assert_identical(orig, actual)
actual = orig.stack(z=[...]).unstack("z").drop_vars(["x", "y"])
assert_identical(orig, actual)
dims = ["a", "b", "c", "d", "e"]
coords = {
"a": [0],
"b": [1, 2],
"c": [3, 4, 5],
"d": [6, 7],
"e": [8],
}
orig = xr.DataArray(np.random.rand(1, 2, 3, 2, 1), coords=coords, dims=dims)
stacked = orig.stack(ab=["a", "b"], cd=["c", "d"])
unstacked = stacked.unstack(["ab", "cd"])
assert_identical(orig, unstacked.transpose(*dims))
unstacked = stacked.unstack()
assert_identical(orig, unstacked.transpose(*dims))
def test_stack_unstack_decreasing_coordinate(self) -> None:
# regression test for GH980
orig = DataArray(
np.random.rand(3, 4),
dims=("y", "x"),
coords={"x": np.arange(4), "y": np.arange(3, 0, -1)},
)
stacked = orig.stack(allpoints=["y", "x"])
actual = stacked.unstack("allpoints")
assert_identical(orig, actual)
def test_unstack_pandas_consistency(self) -> None:
df = pd.DataFrame({"foo": range(3), "x": ["a", "b", "b"], "y": [0, 0, 1]})
s = df.set_index(["x", "y"])["foo"]
expected = DataArray(s.unstack(), name="foo")
actual = DataArray(s, dims="z").unstack("z")
assert_identical(expected, actual)
def test_unstack_requires_unique(self) -> None:
df = pd.DataFrame({"foo": range(2), "x": ["a", "a"], "y": [0, 0]})
s = df.set_index(["x", "y"])["foo"]
with pytest.raises(
ValueError, match="Cannot unstack MultiIndex containing duplicates"
):
DataArray(s, dims="z").unstack("z")
@pytest.mark.filterwarnings("error")
def test_unstack_roundtrip_integer_array(self) -> None:
arr = xr.DataArray(
np.arange(6).reshape(2, 3),
coords={"x": ["a", "b"], "y": [0, 1, 2]},
dims=["x", "y"],
)
stacked = arr.stack(z=["x", "y"])
roundtripped = stacked.unstack()
assert_identical(arr, roundtripped)
def test_stack_nonunique_consistency(self, da) -> None:
da = da.isel(time=0, drop=True) # 2D
actual = da.stack(z=["a", "x"])
expected = DataArray(da.to_pandas().stack(), dims="z")
assert_identical(expected, actual)
def test_to_unstacked_dataset_raises_value_error(self) -> None:
data = DataArray([0, 1], dims="x", coords={"x": [0, 1]})
with pytest.raises(ValueError, match="'x' is not a stacked coordinate"):
data.to_unstacked_dataset("x", 0)
def test_transpose(self) -> None:
da = DataArray(
np.random.randn(3, 4, 5),
dims=("x", "y", "z"),
coords={
"x": range(3),
"y": range(4),
"z": range(5),
"xy": (("x", "y"), np.random.randn(3, 4)),
},
)
actual = da.transpose(transpose_coords=False)
expected = DataArray(da.values.T, dims=("z", "y", "x"), coords=da.coords)
assert_equal(expected, actual)
actual = da.transpose("z", "y", "x", transpose_coords=True)
expected = DataArray(
da.values.T,
dims=("z", "y", "x"),
coords={
"x": da.x.values,
"y": da.y.values,
"z": da.z.values,
"xy": (("y", "x"), da.xy.values.T),
},
)
assert_equal(expected, actual)
# same as previous but with ellipsis
actual = da.transpose("z", ..., "x", transpose_coords=True)
assert_equal(expected, actual)
# same as previous but with a missing dimension
actual = da.transpose(
"z", "y", "x", "not_a_dim", transpose_coords=True, missing_dims="ignore"
)
assert_equal(expected, actual)
with pytest.raises(ValueError):
da.transpose("x", "y")
with pytest.raises(ValueError):
da.transpose("not_a_dim", "z", "x", ...)
with pytest.warns(UserWarning):
da.transpose("not_a_dim", "y", "x", ..., missing_dims="warn")
def test_squeeze(self) -> None:
assert_equal(self.dv.variable.squeeze(), self.dv.squeeze().variable)
def test_squeeze_drop(self) -> None:
array = DataArray([1], [("x", [0])])
expected = DataArray(1)
actual = array.squeeze(drop=True)
assert_identical(expected, actual)
expected = DataArray(1, {"x": 0})
actual = array.squeeze(drop=False)
assert_identical(expected, actual)
array = DataArray([[[0.0, 1.0]]], dims=["dim_0", "dim_1", "dim_2"])
expected = DataArray([[0.0, 1.0]], dims=["dim_1", "dim_2"])
actual = array.squeeze(axis=0)
assert_identical(expected, actual)
array = DataArray([[[[0.0, 1.0]]]], dims=["dim_0", "dim_1", "dim_2", "dim_3"])
expected = DataArray([[0.0, 1.0]], dims=["dim_1", "dim_3"])
actual = array.squeeze(axis=(0, 2))
assert_identical(expected, actual)
array = DataArray([[[0.0, 1.0]]], dims=["dim_0", "dim_1", "dim_2"])
with pytest.raises(ValueError):
array.squeeze(axis=0, dim="dim_1")
def test_drop_coordinates(self) -> None:
expected = DataArray(np.random.randn(2, 3), dims=["x", "y"])
arr = expected.copy()
arr.coords["z"] = 2
actual = arr.drop_vars("z")
assert_identical(expected, actual)
with pytest.raises(ValueError):
arr.drop_vars("not found")
actual = expected.drop_vars("not found", errors="ignore")
assert_identical(actual, expected)
with pytest.raises(ValueError, match=r"cannot be found"):
arr.drop_vars("w")
actual = expected.drop_vars("w", errors="ignore")
assert_identical(actual, expected)
renamed = arr.rename("foo")
with pytest.raises(ValueError, match=r"cannot be found"):
renamed.drop_vars("foo")
actual = renamed.drop_vars("foo", errors="ignore")
assert_identical(actual, renamed)
def test_drop_vars_callable(self) -> None:
A = DataArray(
np.random.randn(2, 3), dims=["x", "y"], coords={"x": [1, 2], "y": [3, 4, 5]}
)
expected = A.drop_vars(["x", "y"])
actual = A.drop_vars(lambda x: x.indexes)
assert_identical(expected, actual)
def test_drop_multiindex_level(self) -> None:
# GH6505
expected = self.mda.drop_vars(["x", "level_1", "level_2"])
with pytest.warns(DeprecationWarning):
actual = self.mda.drop_vars("level_1")
assert_identical(expected, actual)
def test_drop_all_multiindex_levels(self) -> None:
dim_levels = ["x", "level_1", "level_2"]
actual = self.mda.drop_vars(dim_levels)
# no error, multi-index dropped
for key in dim_levels:
assert key not in actual.xindexes
def test_drop_index_labels(self) -> None:
arr = DataArray(np.random.randn(2, 3), coords={"y": [0, 1, 2]}, dims=["x", "y"])
actual = arr.drop_sel(y=[0, 1])
expected = arr[:, 2:]
assert_identical(actual, expected)
with pytest.raises((KeyError, ValueError), match=r"not .* in axis"):
actual = arr.drop_sel(y=[0, 1, 3])
actual = arr.drop_sel(y=[0, 1, 3], errors="ignore")
assert_identical(actual, expected)
with pytest.warns(DeprecationWarning):
arr.drop([0, 1, 3], dim="y", errors="ignore") # type: ignore[arg-type]
def test_drop_index_positions(self) -> None:
arr = DataArray(np.random.randn(2, 3), dims=["x", "y"])
actual = arr.drop_isel(y=[0, 1])
expected = arr[:, 2:]
assert_identical(actual, expected)
def test_drop_indexes(self) -> None:
arr = DataArray([1, 2, 3], coords={"x": ("x", [1, 2, 3])}, dims="x")
actual = arr.drop_indexes("x")
assert "x" not in actual.xindexes
actual = arr.drop_indexes("not_a_coord", errors="ignore")
assert_identical(actual, arr)
def test_dropna(self) -> None:
x = np.random.randn(4, 4)
x[::2, 0] = np.nan
arr = DataArray(x, dims=["a", "b"])
actual = arr.dropna("a")
expected = arr[1::2]
assert_identical(actual, expected)
actual = arr.dropna("b", how="all")
assert_identical(actual, arr)
actual = arr.dropna("a", thresh=1)
assert_identical(actual, arr)
actual = arr.dropna("b", thresh=3)
expected = arr[:, 1:]
assert_identical(actual, expected)
def test_where(self) -> None:
arr = DataArray(np.arange(4), dims="x")
expected = arr.sel(x=slice(2))
actual = arr.where(arr.x < 2, drop=True)
assert_identical(actual, expected)
def test_where_lambda(self) -> None:
arr = DataArray(np.arange(4), dims="y")
expected = arr.sel(y=slice(2))
actual = arr.where(lambda x: x.y < 2, drop=True)
assert_identical(actual, expected)
def test_where_other_lambda(self) -> None:
arr = DataArray(np.arange(4), dims="y")
expected = xr.concat(
[arr.sel(y=slice(2)), arr.sel(y=slice(2, None)) + 1], dim="y"
)
actual = arr.where(lambda x: x.y < 2, lambda x: x + 1)
assert_identical(actual, expected)
def test_where_string(self) -> None:
array = DataArray(["a", "b"])
expected = DataArray(np.array(["a", np.nan], dtype=object))
actual = array.where([True, False])
assert_identical(actual, expected)
def test_cumops(self) -> None:
coords = {
"x": [-1, -2],
"y": ["ab", "cd", "ef"],
"lat": (["x", "y"], [[1, 2, 3], [-1, -2, -3]]),
"c": -999,
}
orig = DataArray([[-1, 0, 1], [-3, 0, 3]], coords, dims=["x", "y"])
actual = orig.cumsum()
expected = DataArray([[-1, -1, 0], [-4, -4, 0]], coords, dims=["x", "y"])
assert_identical(expected, actual)
actual = orig.cumsum("x")
expected = DataArray([[-1, 0, 1], [-4, 0, 4]], coords, dims=["x", "y"])
assert_identical(expected, actual)
actual = orig.cumsum("y")
expected = DataArray([[-1, -1, 0], [-3, -3, 0]], coords, dims=["x", "y"])
assert_identical(expected, actual)
actual = orig.cumprod("x")
expected = DataArray([[-1, 0, 1], [3, 0, 3]], coords, dims=["x", "y"])
assert_identical(expected, actual)
actual = orig.cumprod("y")
expected = DataArray([[-1, 0, 0], [-3, 0, 0]], coords, dims=["x", "y"])
assert_identical(expected, actual)
def test_reduce(self) -> None:
coords = {
"x": [-1, -2],
"y": ["ab", "cd", "ef"],
"lat": (["x", "y"], [[1, 2, 3], [-1, -2, -3]]),
"c": -999,
}
orig = DataArray([[-1, 0, 1], [-3, 0, 3]], coords, dims=["x", "y"])
actual = orig.mean()
expected = DataArray(0, {"c": -999})
assert_identical(expected, actual)
actual = orig.mean(["x", "y"])
assert_identical(expected, actual)
actual = orig.mean("x")
expected = DataArray([-2, 0, 2], {"y": coords["y"], "c": -999}, "y")
assert_identical(expected, actual)
actual = orig.mean(["x"])
assert_identical(expected, actual)
actual = orig.mean("y")
expected = DataArray([0, 0], {"x": coords["x"], "c": -999}, "x")
assert_identical(expected, actual)
assert_equal(self.dv.reduce(np.mean, "x").variable, self.v.reduce(np.mean, "x"))
orig = DataArray([[1, 0, np.nan], [3, 0, 3]], coords, dims=["x", "y"])
actual = orig.count()
expected = DataArray(5, {"c": -999})
assert_identical(expected, actual)
# uint support
orig = DataArray(np.arange(6).reshape(3, 2).astype("uint"), dims=["x", "y"])
assert orig.dtype.kind == "u"
actual = orig.mean(dim="x", skipna=True)
expected = DataArray(orig.values.astype(int), dims=["x", "y"]).mean("x")
assert_equal(actual, expected)
def test_reduce_keepdims(self) -> None:
coords = {
"x": [-1, -2],
"y": ["ab", "cd", "ef"],
"lat": (["x", "y"], [[1, 2, 3], [-1, -2, -3]]),
"c": -999,
}
orig = DataArray([[-1, 0, 1], [-3, 0, 3]], coords, dims=["x", "y"])
# Mean on all axes loses non-constant coordinates
actual = orig.mean(keepdims=True)
expected = DataArray(
orig.data.mean(keepdims=True),
dims=orig.dims,
coords={k: v for k, v in coords.items() if k in ["c"]},
)
assert_equal(actual, expected)
assert actual.sizes["x"] == 1
assert actual.sizes["y"] == 1
# Mean on specific axes loses coordinates not involving that axis
actual = orig.mean("y", keepdims=True)
expected = DataArray(
orig.data.mean(axis=1, keepdims=True),
dims=orig.dims,
coords={k: v for k, v in coords.items() if k not in ["y", "lat"]},
)
assert_equal(actual, expected)
@requires_bottleneck
def test_reduce_keepdims_bottleneck(self) -> None:
import bottleneck
coords = {
"x": [-1, -2],
"y": ["ab", "cd", "ef"],
"lat": (["x", "y"], [[1, 2, 3], [-1, -2, -3]]),
"c": -999,
}
orig = DataArray([[-1, 0, 1], [-3, 0, 3]], coords, dims=["x", "y"])
# Bottleneck does not have its own keepdims implementation
actual = orig.reduce(bottleneck.nanmean, keepdims=True)
expected = orig.mean(keepdims=True)
assert_equal(actual, expected)
def test_reduce_dtype(self) -> None:
coords = {
"x": [-1, -2],
"y": ["ab", "cd", "ef"],
"lat": (["x", "y"], [[1, 2, 3], [-1, -2, -3]]),
"c": -999,
}
orig = DataArray([[-1, 0, 1], [-3, 0, 3]], coords, dims=["x", "y"])
for dtype in [np.float16, np.float32, np.float64]:
assert orig.astype(float).mean(dtype=dtype).dtype == dtype
def test_reduce_out(self) -> None:
coords = {
"x": [-1, -2],
"y": ["ab", "cd", "ef"],
"lat": (["x", "y"], [[1, 2, 3], [-1, -2, -3]]),
"c": -999,
}
orig = DataArray([[-1, 0, 1], [-3, 0, 3]], coords, dims=["x", "y"])
with pytest.raises(TypeError):
orig.mean(out=np.ones(orig.shape))
@pytest.mark.parametrize("compute_backend", ["numbagg", None], indirect=True)
@pytest.mark.parametrize("skipna", [True, False, None])
@pytest.mark.parametrize("q", [0.25, [0.50], [0.25, 0.75]])
@pytest.mark.parametrize(
"axis, dim",
zip([None, 0, [0], [0, 1]], [None, "x", ["x"], ["x", "y"]], strict=True),
)
def test_quantile(self, q, axis, dim, skipna, compute_backend) -> None:
va = self.va.copy(deep=True)
va[0, 0] = np.nan
actual = DataArray(va).quantile(q, dim=dim, keep_attrs=True, skipna=skipna)
_percentile_func = np.nanpercentile if skipna in (True, None) else np.percentile
expected = _percentile_func(va.values, np.array(q) * 100, axis=axis)
np.testing.assert_allclose(actual.values, expected)
if is_scalar(q):
assert "quantile" not in actual.dims
else:
assert "quantile" in actual.dims
assert actual.attrs == self.attrs
@pytest.mark.parametrize("method", ["midpoint", "lower"])
def test_quantile_method(self, method) -> None:
q = [0.25, 0.5, 0.75]
actual = DataArray(self.va).quantile(q, method=method)
expected = np.nanquantile(self.dv.values, np.array(q), method=method)
np.testing.assert_allclose(actual.values, expected)
@pytest.mark.parametrize("method", ["midpoint", "lower"])
def test_quantile_interpolation_deprecated(self, method) -> None:
da = DataArray(self.va)
q = [0.25, 0.5, 0.75]
with pytest.warns(
FutureWarning,
match="`interpolation` argument to quantile was renamed to `method`",
):
actual = da.quantile(q, interpolation=method)
expected = da.quantile(q, method=method)
np.testing.assert_allclose(actual.values, expected.values)
with warnings.catch_warnings(record=True):
with pytest.raises(TypeError, match="interpolation and method keywords"):
da.quantile(q, method=method, interpolation=method)
def test_reduce_keep_attrs(self) -> None:
# Test dropped attrs
vm = self.va.mean()
assert len(vm.attrs) == 0
assert vm.attrs == {}
# Test kept attrs
vm = self.va.mean(keep_attrs=True)
assert len(vm.attrs) == len(self.attrs)
assert vm.attrs == self.attrs
def test_assign_attrs(self) -> None:
expected = DataArray([], attrs=dict(a=1, b=2))
expected.attrs["a"] = 1
expected.attrs["b"] = 2
new = DataArray([])
actual = DataArray([]).assign_attrs(a=1, b=2)
assert_identical(actual, expected)
assert new.attrs == {}
expected.attrs["c"] = 3
new_actual = actual.assign_attrs({"c": 3})
assert_identical(new_actual, expected)
assert actual.attrs == {"a": 1, "b": 2}
def test_drop_attrs(self) -> None:
# Mostly tested in test_dataset.py, but adding a very small test here
da = DataArray([], attrs=dict(a=1, b=2))
assert da.drop_attrs().attrs == {}
@pytest.mark.parametrize(
"func", [lambda x: x.clip(0, 1), lambda x: np.float64(1.0) * x, np.abs, abs]
)
def test_propagate_attrs(self, func) -> None:
da = DataArray(self.va)
# test defaults
assert func(da).attrs == da.attrs
with set_options(keep_attrs=False):
assert func(da).attrs == {}
with set_options(keep_attrs=True):
assert func(da).attrs == da.attrs
def test_fillna(self) -> None:
a = DataArray([np.nan, 1, np.nan, 3], coords={"x": range(4)}, dims="x")
actual = a.fillna(-1)
expected = DataArray([-1, 1, -1, 3], coords={"x": range(4)}, dims="x")
assert_identical(expected, actual)
b = DataArray(range(4), coords={"x": range(4)}, dims="x")
actual = a.fillna(b)
expected = b.copy()
assert_identical(expected, actual)
actual = a.fillna(np.arange(4))
assert_identical(expected, actual)
actual = a.fillna(b[:3])
assert_identical(expected, actual)
actual = a.fillna(b[:0])
assert_identical(a, actual)
with pytest.raises(TypeError, match=r"fillna on a DataArray"):
a.fillna({0: 0})
with pytest.raises(ValueError, match=r"broadcast"):
a.fillna(np.array([1, 2]))
def test_align(self) -> None:
array = DataArray(
np.random.random((6, 8)), coords={"x": list("abcdef")}, dims=["x", "y"]
)
array1, array2 = align(array, array[:5], join="inner")
assert_identical(array1, array[:5])
assert_identical(array2, array[:5])
def test_align_dtype(self) -> None:
# regression test for #264
x1 = np.arange(30)
x2 = np.arange(5, 35)
a = DataArray(np.random.random((30,)).astype(np.float32), [("x", x1)])
b = DataArray(np.random.random((30,)).astype(np.float32), [("x", x2)])
c, d = align(a, b, join="outer")
assert c.dtype == np.float32
def test_align_copy(self) -> None:
x = DataArray([1, 2, 3], coords=[("a", [1, 2, 3])])
y = DataArray([1, 2], coords=[("a", [3, 1])])
expected_x2 = x
expected_y2 = DataArray([2, np.nan, 1], coords=[("a", [1, 2, 3])])
x2, y2 = align(x, y, join="outer", copy=False)
assert_identical(expected_x2, x2)
assert_identical(expected_y2, y2)
assert source_ndarray(x2.data) is source_ndarray(x.data)
x2, y2 = align(x, y, join="outer", copy=True)
assert_identical(expected_x2, x2)
assert_identical(expected_y2, y2)
assert source_ndarray(x2.data) is not source_ndarray(x.data)
# Trivial align - 1 element
x = DataArray([1, 2, 3], coords=[("a", [1, 2, 3])])
(x2,) = align(x, copy=False)
assert_identical(x, x2)
assert source_ndarray(x2.data) is source_ndarray(x.data)
(x2,) = align(x, copy=True)
assert_identical(x, x2)
assert source_ndarray(x2.data) is not source_ndarray(x.data)
def test_align_override(self) -> None:
left = DataArray([1, 2, 3], dims="x", coords={"x": [0, 1, 2]})
right = DataArray(
np.arange(9).reshape((3, 3)),
dims=["x", "y"],
coords={"x": [0.1, 1.1, 2.1], "y": [1, 2, 3]},
)
expected_right = DataArray(
np.arange(9).reshape(3, 3),
dims=["x", "y"],
coords={"x": [0, 1, 2], "y": [1, 2, 3]},
)
new_left, new_right = align(left, right, join="override")
assert_identical(left, new_left)
assert_identical(new_right, expected_right)
new_left, new_right = align(left, right, exclude="x", join="override")
assert_identical(left, new_left)
assert_identical(right, new_right)
new_left, new_right = xr.align(
left.isel(x=0, drop=True), right, exclude="x", join="override"
)
assert_identical(left.isel(x=0, drop=True), new_left)
assert_identical(right, new_right)
with pytest.raises(
ValueError, match=r"cannot align.*join.*override.*same size"
):
align(left.isel(x=0).expand_dims("x"), right, join="override")
@pytest.mark.parametrize(
"darrays",
[
[
DataArray(0),
DataArray([1], [("x", [1])]),
DataArray([2, 3], [("x", [2, 3])]),
],
[
DataArray([2, 3], [("x", [2, 3])]),
DataArray([1], [("x", [1])]),
DataArray(0),
],
],
)
def test_align_override_error(self, darrays) -> None:
with pytest.raises(
ValueError, match=r"cannot align.*join.*override.*same size"
):
xr.align(*darrays, join="override")
def test_align_exclude(self) -> None:
x = DataArray([[1, 2], [3, 4]], coords=[("a", [-1, -2]), ("b", [3, 4])])
y = DataArray([[1, 2], [3, 4]], coords=[("a", [-1, 20]), ("b", [5, 6])])
z = DataArray([1], dims=["a"], coords={"a": [20], "b": 7})
x2, y2, z2 = align(x, y, z, join="outer", exclude=["b"])
expected_x2 = DataArray(
[[3, 4], [1, 2], [np.nan, np.nan]],
coords=[("a", [-2, -1, 20]), ("b", [3, 4])],
)
expected_y2 = DataArray(
[[np.nan, np.nan], [1, 2], [3, 4]],
coords=[("a", [-2, -1, 20]), ("b", [5, 6])],
)
expected_z2 = DataArray(
[np.nan, np.nan, 1], dims=["a"], coords={"a": [-2, -1, 20], "b": 7}
)
assert_identical(expected_x2, x2)
assert_identical(expected_y2, y2)
assert_identical(expected_z2, z2)
def test_align_indexes(self) -> None:
x = DataArray([1, 2, 3], coords=[("a", [-1, 10, -2])])
y = DataArray([1, 2], coords=[("a", [-2, -1])])
x2, y2 = align(x, y, join="outer", indexes={"a": [10, -1, -2]})
expected_x2 = DataArray([2, 1, 3], coords=[("a", [10, -1, -2])])
expected_y2 = DataArray([np.nan, 2, 1], coords=[("a", [10, -1, -2])])
assert_identical(expected_x2, x2)
assert_identical(expected_y2, y2)
(x2,) = align(x, join="outer", indexes={"a": [-2, 7, 10, -1]})
expected_x2 = DataArray([3, np.nan, 2, 1], coords=[("a", [-2, 7, 10, -1])])
assert_identical(expected_x2, x2)
def test_align_without_indexes_exclude(self) -> None:
arrays = [DataArray([1, 2, 3], dims=["x"]), DataArray([1, 2], dims=["x"])]
result0, result1 = align(*arrays, exclude=["x"])
assert_identical(result0, arrays[0])
assert_identical(result1, arrays[1])
def test_align_mixed_indexes(self) -> None:
array_no_coord = DataArray([1, 2], dims=["x"])
array_with_coord = DataArray([1, 2], coords=[("x", ["a", "b"])])
result0, result1 = align(array_no_coord, array_with_coord)
assert_identical(result0, array_with_coord)
assert_identical(result1, array_with_coord)
result0, result1 = align(array_no_coord, array_with_coord, exclude=["x"])
assert_identical(result0, array_no_coord)
assert_identical(result1, array_with_coord)
def test_align_without_indexes_errors(self) -> None:
with pytest.raises(
ValueError,
match=r"cannot.*align.*dimension.*conflicting.*sizes.*",
):
align(DataArray([1, 2, 3], dims=["x"]), DataArray([1, 2], dims=["x"]))
with pytest.raises(
ValueError,
match=r"cannot.*align.*dimension.*conflicting.*sizes.*",
):
align(
DataArray([1, 2, 3], dims=["x"]),
DataArray([1, 2], coords=[("x", [0, 1])]),
)
def test_align_str_dtype(self) -> None:
a = DataArray([0, 1], dims=["x"], coords={"x": ["a", "b"]})
b = DataArray([1, 2], dims=["x"], coords={"x": ["b", "c"]})
expected_a = DataArray(
[0, 1, np.nan], dims=["x"], coords={"x": ["a", "b", "c"]}
)
expected_b = DataArray(
[np.nan, 1, 2], dims=["x"], coords={"x": ["a", "b", "c"]}
)
actual_a, actual_b = xr.align(a, b, join="outer")
assert_identical(expected_a, actual_a)
assert expected_a.x.dtype == actual_a.x.dtype
assert_identical(expected_b, actual_b)
assert expected_b.x.dtype == actual_b.x.dtype
def test_broadcast_on_vs_off_global_option_different_dims(self) -> None:
xda_1 = xr.DataArray([1], dims="x1")
xda_2 = xr.DataArray([1], dims="x2")
with xr.set_options(arithmetic_broadcast=True):
expected_xda = xr.DataArray([[1.0]], dims=("x1", "x2"))
actual_xda = xda_1 / xda_2
assert_identical(actual_xda, expected_xda)
with xr.set_options(arithmetic_broadcast=False):
with pytest.raises(
ValueError,
match=re.escape(
"Broadcasting is necessary but automatic broadcasting is disabled via "
"global option `'arithmetic_broadcast'`. "
"Use `xr.set_options(arithmetic_broadcast=True)` to enable automatic broadcasting."
),
):
xda_1 / xda_2
@pytest.mark.parametrize("arithmetic_broadcast", [True, False])
def test_broadcast_on_vs_off_global_option_same_dims(
self, arithmetic_broadcast: bool
) -> None:
# Ensure that no error is raised when arithmetic broadcasting is disabled,
# when broadcasting is not needed. The two DataArrays have the same
# dimensions of the same size.
xda_1 = xr.DataArray([1], dims="x")
xda_2 = xr.DataArray([1], dims="x")
expected_xda = xr.DataArray([2.0], dims=("x",))
with xr.set_options(arithmetic_broadcast=arithmetic_broadcast):
assert_identical(xda_1 + xda_2, expected_xda)
assert_identical(xda_1 + np.array([1.0]), expected_xda)
assert_identical(np.array([1.0]) + xda_1, expected_xda)
def test_broadcast_arrays(self) -> None:
x = DataArray([1, 2], coords=[("a", [-1, -2])], name="x")
y = DataArray([1, 2], coords=[("b", [3, 4])], name="y")
x2, y2 = broadcast(x, y)
expected_coords = [("a", [-1, -2]), ("b", [3, 4])]
expected_x2 = DataArray([[1, 1], [2, 2]], expected_coords, name="x")
expected_y2 = DataArray([[1, 2], [1, 2]], expected_coords, name="y")
assert_identical(expected_x2, x2)
assert_identical(expected_y2, y2)
x = DataArray(np.random.randn(2, 3), dims=["a", "b"])
y = DataArray(np.random.randn(3, 2), dims=["b", "a"])
x2, y2 = broadcast(x, y)
expected_x2 = x
expected_y2 = y.T
assert_identical(expected_x2, x2)
assert_identical(expected_y2, y2)
def test_broadcast_arrays_misaligned(self) -> None:
# broadcast on misaligned coords must auto-align
x = DataArray([[1, 2], [3, 4]], coords=[("a", [-1, -2]), ("b", [3, 4])])
y = DataArray([1, 2], coords=[("a", [-1, 20])])
expected_x2 = DataArray(
[[3, 4], [1, 2], [np.nan, np.nan]],
coords=[("a", [-2, -1, 20]), ("b", [3, 4])],
)
expected_y2 = DataArray(
[[np.nan, np.nan], [1, 1], [2, 2]],
coords=[("a", [-2, -1, 20]), ("b", [3, 4])],
)
x2, y2 = broadcast(x, y)
assert_identical(expected_x2, x2)
assert_identical(expected_y2, y2)
def test_broadcast_arrays_nocopy(self) -> None:
# Test that input data is not copied over in case
# no alteration is needed
x = DataArray([1, 2], coords=[("a", [-1, -2])], name="x")
y = DataArray(3, name="y")
expected_x2 = DataArray([1, 2], coords=[("a", [-1, -2])], name="x")
expected_y2 = DataArray([3, 3], coords=[("a", [-1, -2])], name="y")
x2, y2 = broadcast(x, y)
assert_identical(expected_x2, x2)
assert_identical(expected_y2, y2)
assert source_ndarray(x2.data) is source_ndarray(x.data)
# single-element broadcast (trivial case)
(x2,) = broadcast(x)
assert_identical(x, x2)
assert source_ndarray(x2.data) is source_ndarray(x.data)
def test_broadcast_arrays_exclude(self) -> None:
x = DataArray([[1, 2], [3, 4]], coords=[("a", [-1, -2]), ("b", [3, 4])])
y = DataArray([1, 2], coords=[("a", [-1, 20])])
z = DataArray(5, coords={"b": 5})
x2, y2, z2 = broadcast(x, y, z, exclude=["b"])
expected_x2 = DataArray(
[[3, 4], [1, 2], [np.nan, np.nan]],
coords=[("a", [-2, -1, 20]), ("b", [3, 4])],
)
expected_y2 = DataArray([np.nan, 1, 2], coords=[("a", [-2, -1, 20])])
expected_z2 = DataArray(
[5, 5, 5], dims=["a"], coords={"a": [-2, -1, 20], "b": 5}
)
assert_identical(expected_x2, x2)
assert_identical(expected_y2, y2)
assert_identical(expected_z2, z2)
def test_broadcast_coordinates(self) -> None:
# regression test for GH649
ds = Dataset({"a": (["x", "y"], np.ones((5, 6)))})
x_bc, y_bc, a_bc = broadcast(ds.x, ds.y, ds.a)
assert_identical(ds.a, a_bc)
X, Y = np.meshgrid(np.arange(5), np.arange(6), indexing="ij")
exp_x = DataArray(X, dims=["x", "y"], name="x")
exp_y = DataArray(Y, dims=["x", "y"], name="y")
assert_identical(exp_x, x_bc)
assert_identical(exp_y, y_bc)
def test_to_pandas(self) -> None:
# 0d
actual_xr = DataArray(42).to_pandas()
expected = np.array(42)
assert_array_equal(actual_xr, expected)
# 1d
values = np.random.randn(3)
index = pd.Index(["a", "b", "c"], name="x")
da = DataArray(values, coords=[index])
actual_s = da.to_pandas()
assert_array_equal(np.asarray(actual_s.values), values)
assert_array_equal(actual_s.index, index)
assert_array_equal(actual_s.index.name, "x")
# 2d
values = np.random.randn(3, 2)
da = DataArray(
values, coords=[("x", ["a", "b", "c"]), ("y", [0, 1])], name="foo"
)
actual_df = da.to_pandas()
assert_array_equal(np.asarray(actual_df.values), values)
assert_array_equal(actual_df.index, ["a", "b", "c"])
assert_array_equal(actual_df.columns, [0, 1])
# roundtrips
for shape in [(3,), (3, 4)]:
dims = list("abc")[: len(shape)]
da = DataArray(np.random.randn(*shape), dims=dims)
roundtripped = DataArray(da.to_pandas()).drop_vars(dims)
assert_identical(da, roundtripped)
with pytest.raises(ValueError, match=r"Cannot convert"):
DataArray(np.random.randn(1, 2, 3, 4, 5)).to_pandas()
def test_to_dataframe(self) -> None:
# regression test for #260
arr_np = np.random.randn(3, 4)
arr = DataArray(arr_np, [("B", [1, 2, 3]), ("A", list("cdef"))], name="foo")
expected_s = arr.to_series()
actual_s = arr.to_dataframe()["foo"]
assert_array_equal(np.asarray(expected_s.values), np.asarray(actual_s.values))
assert_array_equal(np.asarray(expected_s.name), np.asarray(actual_s.name))
assert_array_equal(expected_s.index.values, actual_s.index.values)
actual_s = arr.to_dataframe(dim_order=["A", "B"])["foo"]
assert_array_equal(arr_np.transpose().reshape(-1), np.asarray(actual_s.values))
# regression test for coords with different dimensions
arr.coords["C"] = ("B", [-1, -2, -3])
expected_df = arr.to_series().to_frame()
expected_df["C"] = [-1] * 4 + [-2] * 4 + [-3] * 4
expected_df = expected_df[["C", "foo"]]
actual_df = arr.to_dataframe()
assert_array_equal(np.asarray(expected_df.values), np.asarray(actual_df.values))
assert_array_equal(expected_df.columns.values, actual_df.columns.values)
assert_array_equal(expected_df.index.values, actual_df.index.values)
with pytest.raises(ValueError, match="does not match the set of dimensions"):
arr.to_dataframe(dim_order=["B", "A", "C"])
with pytest.raises(ValueError, match=r"cannot convert a scalar"):
arr.sel(A="c", B=2).to_dataframe()
arr.name = None # unnamed
with pytest.raises(ValueError, match=r"unnamed"):
arr.to_dataframe()
def test_to_dataframe_multiindex(self) -> None:
# regression test for #3008
arr_np = np.random.randn(4, 3)
mindex = pd.MultiIndex.from_product([[1, 2], list("ab")], names=["A", "B"])
arr = DataArray(arr_np, [("MI", mindex), ("C", [5, 6, 7])], name="foo")
actual = arr.to_dataframe()
index_pd = actual.index
assert isinstance(index_pd, pd.MultiIndex)
assert_array_equal(np.asarray(actual["foo"].values), arr_np.flatten())
assert_array_equal(index_pd.names, list("ABC"))
assert_array_equal(index_pd.levels[0], [1, 2])
assert_array_equal(index_pd.levels[1], ["a", "b"])
assert_array_equal(index_pd.levels[2], [5, 6, 7])
def test_to_dataframe_0length(self) -> None:
# regression test for #3008
arr_np = np.random.randn(4, 0)
mindex = pd.MultiIndex.from_product([[1, 2], list("ab")], names=["A", "B"])
arr = DataArray(arr_np, [("MI", mindex), ("C", [])], name="foo")
actual = arr.to_dataframe()
assert len(actual) == 0
assert_array_equal(actual.index.names, list("ABC"))
@requires_dask_expr
@requires_dask
@pytest.mark.xfail(not has_dask_ge_2025_1_0, reason="dask-expr is broken")
def test_to_dask_dataframe(self) -> None:
arr_np = np.arange(3 * 4).reshape(3, 4)
arr = DataArray(arr_np, [("B", [1, 2, 3]), ("A", list("cdef"))], name="foo")
expected_s = arr.to_series()
actual = arr.to_dask_dataframe()["foo"]
assert_array_equal(actual.values, np.asarray(expected_s.values))
actual = arr.to_dask_dataframe(dim_order=["A", "B"])["foo"]
assert_array_equal(arr_np.transpose().reshape(-1), actual.values)
# regression test for coords with different dimensions
arr.coords["C"] = ("B", [-1, -2, -3])
expected_df = arr.to_series().to_frame()
expected_df["C"] = [-1] * 4 + [-2] * 4 + [-3] * 4
expected_df = expected_df[["C", "foo"]]
actual = arr.to_dask_dataframe()[["C", "foo"]]
assert_array_equal(expected_df.values, np.asarray(actual.values))
assert_array_equal(
expected_df.columns.values, np.asarray(actual.columns.values)
)
with pytest.raises(ValueError, match="does not match the set of dimensions"):
arr.to_dask_dataframe(dim_order=["B", "A", "C"])
arr.name = None
with pytest.raises(
ValueError,
match="Cannot convert an unnamed DataArray",
):
arr.to_dask_dataframe()
def test_to_pandas_name_matches_coordinate(self) -> None:
# coordinate with same name as array
arr = DataArray([1, 2, 3], dims="x", name="x")
series = arr.to_series()
assert_array_equal([1, 2, 3], list(series.values))
assert_array_equal([0, 1, 2], list(series.index.values))
assert "x" == series.name
assert "x" == series.index.name
frame = arr.to_dataframe()
expected = series.to_frame()
assert expected.equals(frame)
def test_to_and_from_series(self) -> None:
expected = self.dv.to_dataframe()["foo"]
actual = self.dv.to_series()
assert_array_equal(expected.values, actual.values)
assert_array_equal(expected.index.values, actual.index.values)
assert "foo" == actual.name
# test roundtrip
assert_identical(self.dv, DataArray.from_series(actual).drop_vars(["x", "y"]))
# test name is None
actual.name = None
expected_da = self.dv.rename(None)
assert_identical(
expected_da, DataArray.from_series(actual).drop_vars(["x", "y"])
)
def test_from_series_multiindex(self) -> None:
# GH:3951
df = pd.DataFrame({"B": [1, 2, 3], "A": [4, 5, 6]})
df = df.rename_axis("num").rename_axis("alpha", axis=1)
actual = df.stack("alpha").to_xarray()
assert (actual.sel(alpha="B") == [1, 2, 3]).all()
assert (actual.sel(alpha="A") == [4, 5, 6]).all()
@requires_sparse
def test_from_series_sparse(self) -> None:
import sparse
series = pd.Series([1, 2], index=[("a", 1), ("b", 2)])
actual_sparse = DataArray.from_series(series, sparse=True)
actual_dense = DataArray.from_series(series, sparse=False)
assert isinstance(actual_sparse.data, sparse.COO)
actual_sparse.data = actual_sparse.data.todense()
assert_identical(actual_sparse, actual_dense)
@requires_sparse
def test_from_multiindex_series_sparse(self) -> None:
# regression test for GH4019
import sparse
idx = pd.MultiIndex.from_product([np.arange(3), np.arange(5)], names=["a", "b"])
series: pd.Series = pd.Series(
np.random.default_rng(0).random(len(idx)), index=idx
).sample(n=5, random_state=3)
dense = DataArray.from_series(series, sparse=False)
expected_coords = sparse.COO.from_numpy(dense.data, np.nan).coords
actual_sparse = xr.DataArray.from_series(series, sparse=True)
actual_coords = actual_sparse.data.coords
np.testing.assert_equal(actual_coords, expected_coords)
def test_nbytes_does_not_load_data(self) -> None:
array = InaccessibleArray(np.zeros((3, 3), dtype="uint8"))
da = xr.DataArray(array, dims=["x", "y"])
# If xarray tries to instantiate the InaccessibleArray to compute
# nbytes, the following will raise an error.
# However, it should still be able to accurately give us information
# about the number of bytes from the metadata
assert da.nbytes == 9
# Here we confirm that this does not depend on array having the
# nbytes property, since it isn't really required by the array
# interface. nbytes is more a property of arrays that have been
# cast to numpy arrays.
assert not hasattr(array, "nbytes")
def test_to_and_from_empty_series(self) -> None:
# GH697
expected: pd.Series[Any] = pd.Series([], dtype=np.float64)
da = DataArray.from_series(expected)
assert len(da) == 0
actual = da.to_series()
assert len(actual) == 0
assert expected.equals(actual)
def test_series_categorical_index(self) -> None:
# regression test for GH700
if not hasattr(pd, "CategoricalIndex"):
pytest.skip("requires pandas with CategoricalIndex")
s = pd.Series(np.arange(5), index=pd.CategoricalIndex(list("aabbc")))
arr = DataArray(s)
assert "'a'" in repr(arr) # should not error
@pytest.mark.parametrize("use_dask", [True, False])
@pytest.mark.parametrize("data", ["list", "array", True])
@pytest.mark.parametrize("encoding", [True, False])
def test_to_and_from_dict(
self, encoding: bool, data: bool | Literal["list", "array"], use_dask: bool
) -> None:
if use_dask and not has_dask:
pytest.skip("requires dask")
encoding_data = {"bar": "spam"}
array = DataArray(
np.random.randn(2, 3), {"x": ["a", "b"]}, ["x", "y"], name="foo"
)
array.encoding = encoding_data
return_data = array.to_numpy()
coords_data = np.array(["a", "b"])
if data == "list" or data is True:
return_data = return_data.tolist()
coords_data = coords_data.tolist()
expected: dict[str, Any] = {
"name": "foo",
"dims": ("x", "y"),
"data": return_data,
"attrs": {},
"coords": {"x": {"dims": ("x",), "data": coords_data, "attrs": {}}},
}
if encoding:
expected["encoding"] = encoding_data
if has_dask:
da = array.chunk()
else:
da = array
if data == "array" or data is False:
with raise_if_dask_computes():
actual = da.to_dict(encoding=encoding, data=data)
else:
actual = da.to_dict(encoding=encoding, data=data)
# check that they are identical
np.testing.assert_equal(expected, actual)
# check roundtrip
assert_identical(da, DataArray.from_dict(actual))
# a more bare bones representation still roundtrips
d = {
"name": "foo",
"dims": ("x", "y"),
"data": da.values.tolist(),
"coords": {"x": {"dims": "x", "data": ["a", "b"]}},
}
assert_identical(da, DataArray.from_dict(d))
# and the most bare bones representation still roundtrips
d = {"name": "foo", "dims": ("x", "y"), "data": da.values}
assert_identical(da.drop_vars("x"), DataArray.from_dict(d))
# missing a dims in the coords
d = {
"dims": ("x", "y"),
"data": da.values,
"coords": {"x": {"data": ["a", "b"]}},
}
with pytest.raises(
ValueError,
match=r"cannot convert dict when coords are missing the key 'dims'",
):
DataArray.from_dict(d)
# this one is missing some necessary information
d = {"dims": "t"}
with pytest.raises(
ValueError, match=r"cannot convert dict without the key 'data'"
):
DataArray.from_dict(d)
# check the data=False option
expected_no_data = expected.copy()
del expected_no_data["data"]
del expected_no_data["coords"]["x"]["data"]
endiantype = "<U1" if sys.byteorder == "little" else ">U1"
expected_no_data["coords"]["x"].update({"dtype": endiantype, "shape": (2,)})
expected_no_data.update({"dtype": "float64", "shape": (2, 3)})
actual_no_data = da.to_dict(data=False, encoding=encoding)
assert expected_no_data == actual_no_data
def test_to_and_from_dict_with_time_dim(self) -> None:
x = np.random.randn(10, 3)
t = pd.date_range("20130101", periods=10)
lat = [77.7, 83.2, 76]
da = DataArray(x, {"t": t, "lat": lat}, dims=["t", "lat"])
roundtripped = DataArray.from_dict(da.to_dict())
assert_identical(da, roundtripped)
def test_to_and_from_dict_with_nan_nat(self) -> None:
y = np.random.randn(10, 3)
y[2] = np.nan
t = pd.Series(pd.date_range("20130101", periods=10))
t[2] = np.nan
lat = [77.7, 83.2, 76]
da = DataArray(y, {"t": t, "lat": lat}, dims=["t", "lat"])
roundtripped = DataArray.from_dict(da.to_dict())
assert_identical(da, roundtripped)
def test_to_dict_with_numpy_attrs(self) -> None:
# this doesn't need to roundtrip
x = np.random.randn(10, 3)
t = list("abcdefghij")
lat = [77.7, 83.2, 76]
attrs = {
"created": np.float64(1998),
"coords": np.array([37, -110.1, 100]),
"maintainer": "bar",
}
da = DataArray(x, {"t": t, "lat": lat}, dims=["t", "lat"], attrs=attrs)
expected_attrs = {
"created": attrs["created"].item(), # type: ignore[attr-defined]
"coords": attrs["coords"].tolist(), # type: ignore[attr-defined]
"maintainer": "bar",
}
actual = da.to_dict()
# check that they are identical
assert expected_attrs == actual["attrs"]
def test_to_masked_array(self) -> None:
rs = np.random.default_rng(44)
x = rs.random(size=(10, 20))
x_masked = np.ma.masked_where(x < 0.5, x)
da = DataArray(x_masked)
# Test round trip
x_masked_2 = da.to_masked_array()
da_2 = DataArray(x_masked_2)
assert_array_equal(x_masked, x_masked_2)
assert_equal(da, da_2)
da_masked_array = da.to_masked_array(copy=True)
assert isinstance(da_masked_array, np.ma.MaskedArray)
# Test masks
assert_array_equal(da_masked_array.mask, x_masked.mask)
# Test that mask is unpacked correctly
assert_array_equal(da.values, x_masked.filled(np.nan))
# Test that the underlying data (including nans) hasn't changed
assert_array_equal(da_masked_array, x_masked.filled(np.nan))
# Test that copy=False gives access to values
masked_array = da.to_masked_array(copy=False)
masked_array[0, 0] = 10.0
assert masked_array[0, 0] == 10.0
assert da[0, 0].values == 10.0
assert masked_array.base is da.values
assert isinstance(masked_array, np.ma.MaskedArray)
# Test with some odd arrays
for v in [4, np.nan, True, "4", "four"]:
da = DataArray(v)
ma = da.to_masked_array()
assert isinstance(ma, np.ma.MaskedArray)
# Fix GH issue 684 - masked arrays mask should be an array not a scalar
N = 4
v = range(N)
da = DataArray(v)
ma = da.to_masked_array()
assert len(ma.mask) == N
def test_to_dataset_whole(self) -> None:
unnamed = DataArray([1, 2], dims="x")
with pytest.raises(ValueError, match=r"unable to convert unnamed"):
unnamed.to_dataset()
actual = unnamed.to_dataset(name="foo")
expected = Dataset({"foo": ("x", [1, 2])})
assert_identical(expected, actual)
named = DataArray([1, 2], dims="x", name="foo", attrs={"y": "testattr"})
actual = named.to_dataset()
expected = Dataset({"foo": ("x", [1, 2], {"y": "testattr"})})
assert_identical(expected, actual)
# Test promoting attrs
actual = named.to_dataset(promote_attrs=True)
expected = Dataset(
{"foo": ("x", [1, 2], {"y": "testattr"})}, attrs={"y": "testattr"}
)
assert_identical(expected, actual)
with pytest.raises(TypeError):
actual = named.to_dataset("bar")
def test_to_dataset_split(self) -> None:
array = DataArray(
[[1, 2], [3, 4], [5, 6]],
coords=[("x", list("abc")), ("y", [0.0, 0.1])],
attrs={"a": 1},
)
expected = Dataset(
{"a": ("y", [1, 2]), "b": ("y", [3, 4]), "c": ("y", [5, 6])},
coords={"y": [0.0, 0.1]},
attrs={"a": 1},
)
actual = array.to_dataset("x")
assert_identical(expected, actual)
with pytest.raises(TypeError):
array.to_dataset("x", name="foo")
roundtripped = actual.to_dataarray(dim="x")
assert_identical(array, roundtripped)
array = DataArray([1, 2, 3], dims="x")
expected = Dataset({0: 1, 1: 2, 2: 3})
actual = array.to_dataset("x")
assert_identical(expected, actual)
def test_to_dataset_retains_keys(self) -> None:
# use dates as convenient non-str objects. Not a specific date test
import datetime
dates = [datetime.date(2000, 1, d) for d in range(1, 4)]
array = DataArray([1, 2, 3], coords=[("x", dates)], attrs={"a": 1})
# convert to dataset and back again
result = array.to_dataset("x").to_dataarray(dim="x")
assert_equal(array, result)
def test_to_dataset_coord_value_is_dim(self) -> None:
# github issue #7823
array = DataArray(
np.zeros((3, 3)),
coords={
# 'a' is both a coordinate value and the name of a coordinate
"x": ["a", "b", "c"],
"a": [1, 2, 3],
},
)
with pytest.raises(
ValueError,
match=(
re.escape("dimension 'x' would produce the variables ('a',)")
+ ".*"
+ re.escape("DataArray.rename(a=...) or DataArray.assign_coords(x=...)")
),
):
array.to_dataset("x")
# test error message formatting when there are multiple ambiguous
# values/coordinates
array2 = DataArray(
np.zeros((3, 3, 2)),
coords={
"x": ["a", "b", "c"],
"a": [1, 2, 3],
"b": [0.0, 0.1],
},
)
with pytest.raises(
ValueError,
match=(
re.escape("dimension 'x' would produce the variables ('a', 'b')")
+ ".*"
+ re.escape(
"DataArray.rename(a=..., b=...) or DataArray.assign_coords(x=...)"
)
),
):
array2.to_dataset("x")
def test__title_for_slice(self) -> None:
array = DataArray(
np.ones((4, 3, 2)),
dims=["a", "b", "c"],
coords={"a": range(4), "b": range(3), "c": range(2)},
)
assert "" == array._title_for_slice()
assert "c = 0" == array.isel(c=0)._title_for_slice()
title = array.isel(b=1, c=0)._title_for_slice()
assert "b = 1, c = 0" == title or "c = 0, b = 1" == title
a2 = DataArray(np.ones((4, 1)), dims=["a", "b"])
assert "" == a2._title_for_slice()
def test__title_for_slice_truncate(self) -> None:
array = DataArray(np.ones(4))
array.coords["a"] = "a" * 100
array.coords["b"] = "b" * 100
nchar = 80
title = array._title_for_slice(truncate=nchar)
assert nchar == len(title)
assert title.endswith("...")
def test_dataarray_diff_n1(self) -> None:
da = DataArray(np.random.randn(3, 4), dims=["x", "y"])
actual = da.diff("y")
expected = DataArray(np.diff(da.values, axis=1), dims=["x", "y"])
assert_equal(expected, actual)
def test_coordinate_diff(self) -> None:
# regression test for GH634
arr = DataArray(range(0, 20, 2), dims=["lon"], coords=[range(10)])
lon = arr.coords["lon"]
expected = DataArray([1] * 9, dims=["lon"], coords=[range(1, 10)], name="lon")
actual = lon.diff("lon")
assert_equal(expected, actual)
@pytest.mark.parametrize("offset", [-5, 0, 1, 2])
@pytest.mark.parametrize("fill_value, dtype", [(2, int), (dtypes.NA, float)])
def test_shift(self, offset, fill_value, dtype) -> None:
arr = DataArray([1, 2, 3], dims="x")
actual = arr.shift(x=1, fill_value=fill_value)
if fill_value == dtypes.NA:
# if we supply the default, we expect the missing value for a
# float array
fill_value = np.nan
expected = DataArray([fill_value, 1, 2], dims="x")
assert_identical(expected, actual)
assert actual.dtype == dtype
arr = DataArray([1, 2, 3], [("x", ["a", "b", "c"])])
expected = DataArray(arr.to_pandas().shift(offset))
actual = arr.shift(x=offset)
assert_identical(expected, actual)
def test_roll_coords(self) -> None:
arr = DataArray([1, 2, 3], coords={"x": range(3)}, dims="x")
actual = arr.roll(x=1, roll_coords=True)
expected = DataArray([3, 1, 2], coords=[("x", [2, 0, 1])])
assert_identical(expected, actual)
def test_roll_no_coords(self) -> None:
arr = DataArray([1, 2, 3], coords={"x": range(3)}, dims="x")
actual = arr.roll(x=1)
expected = DataArray([3, 1, 2], coords=[("x", [0, 1, 2])])
assert_identical(expected, actual)
def test_copy_with_data(self) -> None:
orig = DataArray(
np.random.random(size=(2, 2)),
dims=("x", "y"),
attrs={"attr1": "value1"},
coords={"x": [4, 3]},
name="helloworld",
)
new_data = np.arange(4).reshape(2, 2)
actual = orig.copy(data=new_data)
expected = orig.copy()
expected.data = new_data
assert_identical(expected, actual)
@pytest.mark.xfail(raises=AssertionError)
@pytest.mark.parametrize(
"deep, expected_orig",
[
[
True,
xr.DataArray(
xr.IndexVariable("a", np.array([1, 2])),
coords={"a": [1, 2]},
dims=["a"],
),
],
[
False,
xr.DataArray(
xr.IndexVariable("a", np.array([999, 2])),
coords={"a": [999, 2]},
dims=["a"],
),
],
],
)
def test_copy_coords(self, deep, expected_orig) -> None:
"""The test fails for the shallow copy, and apparently only on Windows
for some reason. In windows coords seem to be immutable unless it's one
dataarray deep copied from another."""
da = xr.DataArray(
np.ones([2, 2, 2]),
coords={"a": [1, 2], "b": ["x", "y"], "c": [0, 1]},
dims=["a", "b", "c"],
)
da_cp = da.copy(deep)
new_a = np.array([999, 2])
da_cp.coords["a"] = da_cp["a"].copy(data=new_a)
expected_cp = xr.DataArray(
xr.IndexVariable("a", np.array([999, 2])),
coords={"a": [999, 2]},
dims=["a"],
)
assert_identical(da_cp["a"], expected_cp)
assert_identical(da["a"], expected_orig)
def test_real_and_imag(self) -> None:
array = DataArray(1 + 2j)
assert_identical(array.real, DataArray(1))
assert_identical(array.imag, DataArray(2))
def test_setattr_raises(self) -> None:
array = DataArray(0, coords={"scalar": 1}, attrs={"foo": "bar"})
with pytest.raises(AttributeError, match=r"cannot set attr"):
array.scalar = 2
with pytest.raises(AttributeError, match=r"cannot set attr"):
array.foo = 2
with pytest.raises(AttributeError, match=r"cannot set attr"):
array.other = 2
def test_full_like(self) -> None:
# For more thorough tests, see test_variable.py
da = DataArray(
np.random.random(size=(2, 2)),
dims=("x", "y"),
attrs={"attr1": "value1"},
coords={"x": [4, 3]},
name="helloworld",
)
actual = full_like(da, 2)
expect = da.copy(deep=True)
expect.values = np.array([[2.0, 2.0], [2.0, 2.0]])
assert_identical(expect, actual)
# override dtype
actual = full_like(da, fill_value=True, dtype=bool)
expect.values = np.array([[True, True], [True, True]])
assert expect.dtype == bool
assert_identical(expect, actual)
with pytest.raises(ValueError, match="'dtype' cannot be dict-like"):
full_like(da, fill_value=True, dtype={"x": bool})
def test_dot(self) -> None:
x = np.linspace(-3, 3, 6)
y = np.linspace(-3, 3, 5)
z = range(4)
da_vals = np.arange(6 * 5 * 4).reshape((6, 5, 4))
da = DataArray(da_vals, coords=[x, y, z], dims=["x", "y", "z"])
dm_vals1 = range(4)
dm1 = DataArray(dm_vals1, coords=[z], dims=["z"])
# nd dot 1d
actual1 = da.dot(dm1)
expected_vals1 = np.tensordot(da_vals, dm_vals1, (2, 0))
expected1 = DataArray(expected_vals1, coords=[x, y], dims=["x", "y"])
assert_equal(expected1, actual1)
# all shared dims
actual2 = da.dot(da)
expected_vals2 = np.tensordot(da_vals, da_vals, axes=([0, 1, 2], [0, 1, 2]))
expected2 = DataArray(expected_vals2)
assert_equal(expected2, actual2)
# multiple shared dims
dm_vals3 = np.arange(20 * 5 * 4).reshape((20, 5, 4))
j = np.linspace(-3, 3, 20)
dm3 = DataArray(dm_vals3, coords=[j, y, z], dims=["j", "y", "z"])
actual3 = da.dot(dm3)
expected_vals3 = np.tensordot(da_vals, dm_vals3, axes=([1, 2], [1, 2]))
expected3 = DataArray(expected_vals3, coords=[x, j], dims=["x", "j"])
assert_equal(expected3, actual3)
# Ellipsis: all dims are shared
actual4 = da.dot(da, dim=...)
expected4 = da.dot(da)
assert_equal(expected4, actual4)
# Ellipsis: not all dims are shared
actual5 = da.dot(dm3, dim=...)
expected5 = da.dot(dm3, dim=("j", "x", "y", "z"))
assert_equal(expected5, actual5)
with pytest.raises(NotImplementedError):
da.dot(dm3.to_dataset(name="dm"))
with pytest.raises(TypeError):
da.dot(dm3.values) # type: ignore[type-var]
def test_dot_align_coords(self) -> None:
# GH 3694
x = np.linspace(-3, 3, 6)
y = np.linspace(-3, 3, 5)
z_a = range(4)
da_vals = np.arange(6 * 5 * 4).reshape((6, 5, 4))
da = DataArray(da_vals, coords=[x, y, z_a], dims=["x", "y", "z"])
z_m = range(2, 6)
dm_vals1 = range(4)
dm1 = DataArray(dm_vals1, coords=[z_m], dims=["z"])
with xr.set_options(arithmetic_join="exact"):
with pytest.raises(
ValueError, match=r"cannot align.*join.*exact.*not equal.*"
):
da.dot(dm1)
da_aligned, dm_aligned = xr.align(da, dm1, join="inner")
# nd dot 1d
actual1 = da.dot(dm1)
expected_vals1 = np.tensordot(da_aligned.values, dm_aligned.values, (2, 0))
expected1 = DataArray(expected_vals1, coords=[x, da_aligned.y], dims=["x", "y"])
assert_equal(expected1, actual1)
# multiple shared dims
dm_vals2 = np.arange(20 * 5 * 4).reshape((20, 5, 4))
j = np.linspace(-3, 3, 20)
dm2 = DataArray(dm_vals2, coords=[j, y, z_m], dims=["j", "y", "z"])
da_aligned, dm_aligned = xr.align(da, dm2, join="inner")
actual2 = da.dot(dm2)
expected_vals2 = np.tensordot(
da_aligned.values, dm_aligned.values, axes=([1, 2], [1, 2])
)
expected2 = DataArray(expected_vals2, coords=[x, j], dims=["x", "j"])
assert_equal(expected2, actual2)
def test_matmul(self) -> None:
# copied from above (could make a fixture)
x = np.linspace(-3, 3, 6)
y = np.linspace(-3, 3, 5)
z = range(4)
da_vals = np.arange(6 * 5 * 4).reshape((6, 5, 4))
da = DataArray(da_vals, coords=[x, y, z], dims=["x", "y", "z"])
result = da @ da
expected = da.dot(da)
assert_identical(result, expected)
def test_matmul_align_coords(self) -> None:
# GH 3694
x_a = np.arange(6)
x_b = np.arange(2, 8)
da_vals = np.arange(6)
da_a = DataArray(da_vals, coords=[x_a], dims=["x"])
da_b = DataArray(da_vals, coords=[x_b], dims=["x"])
# only test arithmetic_join="inner" (=default)
result = da_a @ da_b
expected = da_a.dot(da_b)
assert_identical(result, expected)
with xr.set_options(arithmetic_join="exact"):
with pytest.raises(
ValueError, match=r"cannot align.*join.*exact.*not equal.*"
):
da_a @ da_b
def test_binary_op_propagate_indexes(self) -> None:
# regression test for GH2227
self.dv["x"] = np.arange(self.dv.sizes["x"])
expected = self.dv.xindexes["x"]
actual = (self.dv * 10).xindexes["x"]
assert expected is actual
actual = (self.dv > 10).xindexes["x"]
assert expected is actual
# use mda for bitshift test as it's type int
actual = (self.mda << 2).xindexes["x"]
expected = self.mda.xindexes["x"]
assert expected is actual
def test_binary_op_join_setting(self) -> None:
dim = "x"
align_type: Final = "outer"
coords_l, coords_r = [0, 1, 2], [1, 2, 3]
missing_3 = xr.DataArray(coords_l, [(dim, coords_l)])
missing_0 = xr.DataArray(coords_r, [(dim, coords_r)])
with xr.set_options(arithmetic_join=align_type):
actual = missing_0 + missing_3
missing_0_aligned, missing_3_aligned = xr.align(
missing_0, missing_3, join=align_type
)
expected = xr.DataArray([np.nan, 2, 4, np.nan], [(dim, [0, 1, 2, 3])])
assert_equal(actual, expected)
def test_combine_first(self) -> None:
ar0 = DataArray([[0, 0], [0, 0]], [("x", ["a", "b"]), ("y", [-1, 0])])
ar1 = DataArray([[1, 1], [1, 1]], [("x", ["b", "c"]), ("y", [0, 1])])
ar2 = DataArray([2], [("x", ["d"])])
actual = ar0.combine_first(ar1)
expected = DataArray(
[[0, 0, np.nan], [0, 0, 1], [np.nan, 1, 1]],
[("x", ["a", "b", "c"]), ("y", [-1, 0, 1])],
)
assert_equal(actual, expected)
actual = ar1.combine_first(ar0)
expected = DataArray(
[[0, 0, np.nan], [0, 1, 1], [np.nan, 1, 1]],
[("x", ["a", "b", "c"]), ("y", [-1, 0, 1])],
)
assert_equal(actual, expected)
actual = ar0.combine_first(ar2)
expected = DataArray(
[[0, 0], [0, 0], [2, 2]], [("x", ["a", "b", "d"]), ("y", [-1, 0])]
)
assert_equal(actual, expected)
def test_sortby(self) -> None:
da = DataArray(
[[1, 2], [3, 4], [5, 6]], [("x", ["c", "b", "a"]), ("y", [1, 0])]
)
sorted1d = DataArray(
[[5, 6], [3, 4], [1, 2]], [("x", ["a", "b", "c"]), ("y", [1, 0])]
)
sorted2d = DataArray(
[[6, 5], [4, 3], [2, 1]], [("x", ["a", "b", "c"]), ("y", [0, 1])]
)
expected = sorted1d
dax = DataArray([100, 99, 98], [("x", ["c", "b", "a"])])
actual = da.sortby(dax)
assert_equal(actual, expected)
# test descending order sort
actual = da.sortby(dax, ascending=False)
assert_equal(actual, da)
# test alignment (fills in nan for 'c')
dax_short = DataArray([98, 97], [("x", ["b", "a"])])
actual = da.sortby(dax_short)
assert_equal(actual, expected)
# test multi-dim sort by 1D dataarray values
expected = sorted2d
dax = DataArray([100, 99, 98], [("x", ["c", "b", "a"])])
day = DataArray([90, 80], [("y", [1, 0])])
actual = da.sortby([day, dax])
assert_equal(actual, expected)
expected = sorted1d
actual = da.sortby("x")
assert_equal(actual, expected)
expected = sorted2d
actual = da.sortby(["x", "y"])
assert_equal(actual, expected)
@requires_bottleneck
def test_rank(self) -> None:
# floats
ar = DataArray([[3, 4, np.nan, 1]])
expect_0 = DataArray([[1, 1, np.nan, 1]])
expect_1 = DataArray([[2, 3, np.nan, 1]])
assert_equal(ar.rank("dim_0"), expect_0)
assert_equal(ar.rank("dim_1"), expect_1)
# int
x = DataArray([3, 2, 1])
assert_equal(x.rank("dim_0"), x)
# str
y = DataArray(["c", "b", "a"])
assert_equal(y.rank("dim_0"), x)
x = DataArray([3.0, 1.0, np.nan, 2.0, 4.0], dims=("z",))
y = DataArray([0.75, 0.25, np.nan, 0.5, 1.0], dims=("z",))
assert_equal(y.rank("z", pct=True), y)
@pytest.mark.parametrize("use_dask", [True, False])
@pytest.mark.parametrize("use_datetime", [True, False])
@pytest.mark.filterwarnings("ignore:overflow encountered in multiply")
def test_polyfit(self, use_dask, use_datetime) -> None:
if use_dask and not has_dask:
pytest.skip("requires dask")
xcoord = xr.DataArray(
pd.date_range("1970-01-01", freq="D", periods=10), dims=("x",), name="x"
)
x = xr.core.missing.get_clean_interp_index(xcoord, "x")
if not use_datetime:
xcoord = x
da_raw = DataArray(
np.stack((10 + 1e-15 * x + 2e-28 * x**2, 30 + 2e-14 * x + 1e-29 * x**2)),
dims=("d", "x"),
coords={"x": xcoord, "d": [0, 1]},
)
if use_dask:
da = da_raw.chunk({"d": 1})
else:
da = da_raw
out = da.polyfit("x", 2)
expected = DataArray(
[[2e-28, 1e-15, 10], [1e-29, 2e-14, 30]],
dims=("d", "degree"),
coords={"degree": [2, 1, 0], "d": [0, 1]},
).T
assert_allclose(out.polyfit_coefficients, expected, rtol=1e-3)
# Full output and deficient rank
with warnings.catch_warnings():
warnings.simplefilter("ignore", RankWarning)
out = da.polyfit("x", 12, full=True)
assert out.polyfit_residuals.isnull().all()
# With NaN
da_raw[0, 1:3] = np.nan
if use_dask:
da = da_raw.chunk({"d": 1})
else:
da = da_raw
out = da.polyfit("x", 2, skipna=True, cov=True)
assert_allclose(out.polyfit_coefficients, expected, rtol=1e-3)
assert "polyfit_covariance" in out
# Skipna + Full output
out = da.polyfit("x", 2, skipna=True, full=True)
assert_allclose(out.polyfit_coefficients, expected, rtol=1e-3)
assert out.x_matrix_rank == 3
np.testing.assert_almost_equal(out.polyfit_residuals, [0, 0])
with warnings.catch_warnings():
warnings.simplefilter("ignore", RankWarning)
out = da.polyfit("x", 8, full=True)
np.testing.assert_array_equal(out.polyfit_residuals.isnull(), [True, False])
@requires_dask
def test_polyfit_nd_dask(self) -> None:
da = (
DataArray(np.arange(120), dims="time", coords={"time": np.arange(120)})
.chunk({"time": 20})
.expand_dims(lat=5, lon=5)
.chunk({"lat": 2, "lon": 2})
)
actual = da.polyfit("time", 1, skipna=False)
expected = da.compute().polyfit("time", 1, skipna=False)
assert_allclose(actual, expected)
def test_pad_constant(self) -> None:
ar = DataArray(np.arange(3 * 4 * 5).reshape(3, 4, 5))
actual = ar.pad(dim_0=(1, 3))
expected = DataArray(
np.pad(
np.arange(3 * 4 * 5).reshape(3, 4, 5).astype(np.float32),
mode="constant",
pad_width=((1, 3), (0, 0), (0, 0)),
constant_values=np.nan,
)
)
assert actual.shape == (7, 4, 5)
assert_identical(actual, expected)
ar = xr.DataArray([9], dims="x")
actual = ar.pad(x=1)
expected = xr.DataArray([np.nan, 9, np.nan], dims="x")
assert_identical(actual, expected)
actual = ar.pad(x=1, constant_values=1.23456)
expected = xr.DataArray([1, 9, 1], dims="x")
assert_identical(actual, expected)
with pytest.raises(ValueError, match="cannot convert float NaN to integer"):
ar.pad(x=1, constant_values=np.nan)
def test_pad_coords(self) -> None:
ar = DataArray(
np.arange(3 * 4 * 5).reshape(3, 4, 5),
[("x", np.arange(3)), ("y", np.arange(4)), ("z", np.arange(5))],
)
actual = ar.pad(x=(1, 3), constant_values=1)
expected = DataArray(
np.pad(
np.arange(3 * 4 * 5).reshape(3, 4, 5),
mode="constant",
pad_width=((1, 3), (0, 0), (0, 0)),
constant_values=1,
),
[
(
"x",
np.pad(
np.arange(3).astype(np.float32),
mode="constant",
pad_width=(1, 3),
constant_values=np.nan,
),
),
("y", np.arange(4)),
("z", np.arange(5)),
],
)
assert_identical(actual, expected)
@pytest.mark.parametrize("mode", ("minimum", "maximum", "mean", "median"))
@pytest.mark.parametrize(
"stat_length", (None, 3, (1, 3), {"dim_0": (2, 1), "dim_2": (4, 2)})
)
def test_pad_stat_length(self, mode, stat_length) -> None:
ar = DataArray(np.arange(3 * 4 * 5).reshape(3, 4, 5))
actual = ar.pad(dim_0=(1, 3), dim_2=(2, 2), mode=mode, stat_length=stat_length)
if isinstance(stat_length, dict):
stat_length = (stat_length["dim_0"], (4, 4), stat_length["dim_2"])
expected = DataArray(
np.pad(
np.arange(3 * 4 * 5).reshape(3, 4, 5),
pad_width=((1, 3), (0, 0), (2, 2)),
mode=mode,
stat_length=stat_length,
)
)
assert actual.shape == (7, 4, 9)
assert_identical(actual, expected)
@pytest.mark.parametrize(
"end_values", (None, 3, (3, 5), {"dim_0": (2, 1), "dim_2": (4, 2)})
)
def test_pad_linear_ramp(self, end_values) -> None:
ar = DataArray(np.arange(3 * 4 * 5).reshape(3, 4, 5))
actual = ar.pad(
dim_0=(1, 3), dim_2=(2, 2), mode="linear_ramp", end_values=end_values
)
if end_values is None:
end_values = 0
elif isinstance(end_values, dict):
end_values = (end_values["dim_0"], (4, 4), end_values["dim_2"])
expected = DataArray(
np.pad(
np.arange(3 * 4 * 5).reshape(3, 4, 5),
pad_width=((1, 3), (0, 0), (2, 2)),
mode="linear_ramp",
end_values=end_values,
)
)
assert actual.shape == (7, 4, 9)
assert_identical(actual, expected)
@pytest.mark.parametrize("mode", ("reflect", "symmetric"))
@pytest.mark.parametrize("reflect_type", (None, "even", "odd"))
def test_pad_reflect(self, mode, reflect_type) -> None:
ar = DataArray(np.arange(3 * 4 * 5).reshape(3, 4, 5))
actual = ar.pad(
dim_0=(1, 3), dim_2=(2, 2), mode=mode, reflect_type=reflect_type
)
np_kwargs = {
"array": np.arange(3 * 4 * 5).reshape(3, 4, 5),
"pad_width": ((1, 3), (0, 0), (2, 2)),
"mode": mode,
}
# numpy does not support reflect_type=None
if reflect_type is not None:
np_kwargs["reflect_type"] = reflect_type
expected = DataArray(np.pad(**np_kwargs))
assert actual.shape == (7, 4, 9)
assert_identical(actual, expected)
@pytest.mark.parametrize(
["keep_attrs", "attrs", "expected"],
[
pytest.param(None, {"a": 1, "b": 2}, {"a": 1, "b": 2}, id="default"),
pytest.param(False, {"a": 1, "b": 2}, {}, id="False"),
pytest.param(True, {"a": 1, "b": 2}, {"a": 1, "b": 2}, id="True"),
],
)
def test_pad_keep_attrs(self, keep_attrs, attrs, expected) -> None:
arr = xr.DataArray(
[1, 2], dims="x", coords={"c": ("x", [-1, 1], attrs)}, attrs=attrs
)
expected = xr.DataArray(
[0, 1, 2, 0],
dims="x",
coords={"c": ("x", [np.nan, -1, 1, np.nan], expected)},
attrs=expected,
)
keep_attrs_ = "default" if keep_attrs is None else keep_attrs
with set_options(keep_attrs=keep_attrs_):
actual = arr.pad({"x": (1, 1)}, mode="constant", constant_values=0)
xr.testing.assert_identical(actual, expected)
actual = arr.pad(
{"x": (1, 1)}, mode="constant", constant_values=0, keep_attrs=keep_attrs
)
xr.testing.assert_identical(actual, expected)
@pytest.mark.parametrize("parser", ["pandas", "python"])
@pytest.mark.parametrize(
"engine", ["python", None, pytest.param("numexpr", marks=[requires_numexpr])]
)
@pytest.mark.parametrize(
"backend", ["numpy", pytest.param("dask", marks=[requires_dask])]
)
def test_query(
self, backend, engine: QueryEngineOptions, parser: QueryParserOptions
) -> None:
"""Test querying a dataset."""
# setup test data
np.random.seed(42)
a = np.arange(0, 10, 1)
b = np.random.randint(0, 100, size=10)
c = np.linspace(0, 1, 20)
d = np.random.choice(["foo", "bar", "baz"], size=30, replace=True).astype(
object
)
aa = DataArray(data=a, dims=["x"], name="a", coords={"a2": ("x", a)})
bb = DataArray(data=b, dims=["x"], name="b", coords={"b2": ("x", b)})
cc = DataArray(data=c, dims=["y"], name="c", coords={"c2": ("y", c)})
dd = DataArray(data=d, dims=["z"], name="d", coords={"d2": ("z", d)})
if backend == "dask":
import dask.array as da
aa = aa.copy(data=da.from_array(a, chunks=3))
bb = bb.copy(data=da.from_array(b, chunks=3))
cc = cc.copy(data=da.from_array(c, chunks=7))
dd = dd.copy(data=da.from_array(d, chunks=12))
# query single dim, single variable
with raise_if_dask_computes():
actual = aa.query(x="a2 > 5", engine=engine, parser=parser)
expect = aa.isel(x=(a > 5))
assert_identical(expect, actual)
# query single dim, single variable, via dict
with raise_if_dask_computes():
actual = aa.query(dict(x="a2 > 5"), engine=engine, parser=parser)
expect = aa.isel(dict(x=(a > 5)))
assert_identical(expect, actual)
# query single dim, single variable
with raise_if_dask_computes():
actual = bb.query(x="b2 > 50", engine=engine, parser=parser)
expect = bb.isel(x=(b > 50))
assert_identical(expect, actual)
# query single dim, single variable
with raise_if_dask_computes():
actual = cc.query(y="c2 < .5", engine=engine, parser=parser)
expect = cc.isel(y=(c < 0.5))
assert_identical(expect, actual)
# query single dim, single string variable
if parser == "pandas":
# N.B., this query currently only works with the pandas parser
# xref https://github.com/pandas-dev/pandas/issues/40436
with raise_if_dask_computes():
actual = dd.query(z='d2 == "bar"', engine=engine, parser=parser)
expect = dd.isel(z=(d == "bar"))
assert_identical(expect, actual)
# test error handling
with pytest.raises(ValueError):
aa.query("a > 5") # type: ignore[arg-type] # must be dict or kwargs
with pytest.raises(ValueError):
aa.query(x=(a > 5)) # must be query string
with pytest.raises(UndefinedVariableError):
aa.query(x="spam > 50") # name not present
@requires_scipy
@pytest.mark.parametrize("use_dask", [True, False])
def test_curvefit(self, use_dask) -> None:
if use_dask and not has_dask:
pytest.skip("requires dask")
def exp_decay(t, n0, tau=1):
return n0 * np.exp(-t / tau)
t = np.arange(0, 5, 0.5)
da = DataArray(
np.stack([exp_decay(t, 3, 3), exp_decay(t, 5, 4), np.nan * t], axis=-1),
dims=("t", "x"),
coords={"t": t, "x": [0, 1, 2]},
)
da[0, 0] = np.nan
expected = DataArray(
[[3, 3], [5, 4], [np.nan, np.nan]],
dims=("x", "param"),
coords={"x": [0, 1, 2], "param": ["n0", "tau"]},
)
if use_dask:
da = da.chunk({"x": 1})
fit = da.curvefit(
coords=[da.t], func=exp_decay, p0={"n0": 4}, bounds={"tau": (2, 6)}
)
assert_allclose(fit.curvefit_coefficients, expected, rtol=1e-3)
da = da.compute()
fit = da.curvefit(coords="t", func=np.power, reduce_dims="x", param_names=["a"])
assert "a" in fit.param
assert "x" not in fit.dims
def test_curvefit_helpers(self) -> None:
def exp_decay(t, n0, tau=1):
return n0 * np.exp(-t / tau)
params, func_args = xr.core.dataset._get_func_args(exp_decay, [])
assert params == ["n0", "tau"]
param_defaults, bounds_defaults = xr.core.dataset._initialize_curvefit_params(
params, {"n0": 4}, {"tau": [5, np.inf]}, func_args
)
assert param_defaults == {"n0": 4, "tau": 6}
assert bounds_defaults == {"n0": (-np.inf, np.inf), "tau": (5, np.inf)}
# DataArray as bound
param_defaults, bounds_defaults = xr.core.dataset._initialize_curvefit_params(
params=params,
p0={"n0": 4},
bounds={"tau": [DataArray([3, 4], coords=[("x", [1, 2])]), np.inf]},
func_args=func_args,
)
assert param_defaults["n0"] == 4
assert (
param_defaults["tau"] == xr.DataArray([4, 5], coords=[("x", [1, 2])])
).all()
assert bounds_defaults["n0"] == (-np.inf, np.inf)
assert (
bounds_defaults["tau"][0] == DataArray([3, 4], coords=[("x", [1, 2])])
).all()
assert bounds_defaults["tau"][1] == np.inf
param_names = ["a"]
params, func_args = xr.core.dataset._get_func_args(np.power, param_names)
assert params == param_names
with pytest.raises(ValueError):
xr.core.dataset._get_func_args(np.power, [])
@requires_scipy
@pytest.mark.parametrize("use_dask", [True, False])
def test_curvefit_multidimensional_guess(self, use_dask: bool) -> None:
if use_dask and not has_dask:
pytest.skip("requires dask")
def sine(t, a, f, p):
return a * np.sin(2 * np.pi * (f * t + p))
t = np.arange(0, 2, 0.02)
da = DataArray(
np.stack([sine(t, 1.0, 2, 0), sine(t, 1.0, 2, 0)]),
coords={"x": [0, 1], "t": t},
)
# Fitting to a sine curve produces a different result depending on the
# initial guess: either the phase is zero and the amplitude is positive
# or the phase is 0.5 * 2pi and the amplitude is negative.
expected = DataArray(
[[1, 2, 0], [-1, 2, 0.5]],
coords={"x": [0, 1], "param": ["a", "f", "p"]},
)
# Different initial guesses for different values of x
a_guess = DataArray([1, -1], coords=[da.x])
p_guess = DataArray([0, 0.5], coords=[da.x])
if use_dask:
da = da.chunk({"x": 1})
fit = da.curvefit(
coords=[da.t],
func=sine,
p0={"a": a_guess, "p": p_guess, "f": 2},
)
assert_allclose(fit.curvefit_coefficients, expected)
with pytest.raises(
ValueError,
match=r"Initial guess for 'a' has unexpected dimensions .* should only have "
"dimensions that are in data dimensions",
):
# initial guess with additional dimensions should be an error
da.curvefit(
coords=[da.t],
func=sine,
p0={"a": DataArray([1, 2], coords={"foo": [1, 2]})},
)
@requires_scipy
@pytest.mark.parametrize("use_dask", [True, False])
def test_curvefit_multidimensional_bounds(self, use_dask: bool) -> None:
if use_dask and not has_dask:
pytest.skip("requires dask")
def sine(t, a, f, p):
return a * np.sin(2 * np.pi * (f * t + p))
t = np.arange(0, 2, 0.02)
da = xr.DataArray(
np.stack([sine(t, 1.0, 2, 0), sine(t, 1.0, 2, 0)]),
coords={"x": [0, 1], "t": t},
)
# Fit a sine with different bounds: positive amplitude should result in a fit with
# phase 0 and negative amplitude should result in phase 0.5 * 2pi.
expected = DataArray(
[[1, 2, 0], [-1, 2, 0.5]],
coords={"x": [0, 1], "param": ["a", "f", "p"]},
)
if use_dask:
da = da.chunk({"x": 1})
fit = da.curvefit(
coords=[da.t],
func=sine,
p0={"f": 2, "p": 0.25}, # this guess is needed to get the expected result
bounds={
"a": (
DataArray([0, -2], coords=[da.x]),
DataArray([2, 0], coords=[da.x]),
),
},
)
assert_allclose(fit.curvefit_coefficients, expected)
# Scalar lower bound with array upper bound
fit2 = da.curvefit(
coords=[da.t],
func=sine,
p0={"f": 2, "p": 0.25}, # this guess is needed to get the expected result
bounds={
"a": (-2, DataArray([2, 0], coords=[da.x])),
},
)
assert_allclose(fit2.curvefit_coefficients, expected)
with pytest.raises(
ValueError,
match=r"Upper bound for 'a' has unexpected dimensions .* should only have "
"dimensions that are in data dimensions",
):
# bounds with additional dimensions should be an error
da.curvefit(
coords=[da.t],
func=sine,
bounds={"a": (0, DataArray([1], coords={"foo": [1]}))},
)
@requires_scipy
@pytest.mark.parametrize("use_dask", [True, False])
def test_curvefit_ignore_errors(self, use_dask: bool) -> None:
if use_dask and not has_dask:
pytest.skip("requires dask")
# nonsense function to make the optimization fail
def line(x, a, b):
if a > 10:
return 0
return a * x + b
da = DataArray(
[[1, 3, 5], [0, 20, 40]],
coords={"i": [1, 2], "x": [0.0, 1.0, 2.0]},
)
if use_dask:
da = da.chunk({"i": 1})
expected = DataArray(
[[2, 1], [np.nan, np.nan]], coords={"i": [1, 2], "param": ["a", "b"]}
)
with pytest.raises(RuntimeError, match="calls to function has reached maxfev"):
da.curvefit(
coords="x",
func=line,
# limit maximum number of calls so the optimization fails
kwargs=dict(maxfev=5),
).compute() # have to compute to raise the error
fit = da.curvefit(
coords="x",
func=line,
errors="ignore",
# limit maximum number of calls so the optimization fails
kwargs=dict(maxfev=5),
).compute()
assert_allclose(fit.curvefit_coefficients, expected)
class TestReduce:
@pytest.fixture(autouse=True)
def setup(self):
self.attrs = {"attr1": "value1", "attr2": 2929}
@pytest.mark.parametrize(
["x", "minindex", "maxindex", "nanindex"],
[
pytest.param(np.array([0, 1, 2, 0, -2, -4, 2]), 5, 2, None, id="int"),
pytest.param(
np.array([0.0, 1.0, 2.0, 0.0, -2.0, -4.0, 2.0]), 5, 2, None, id="float"
),
pytest.param(
np.array([1.0, np.nan, 2.0, np.nan, -2.0, -4.0, 2.0]), 5, 2, 1, id="nan"
),
pytest.param(
np.array([1.0, np.nan, 2.0, np.nan, -2.0, -4.0, 2.0]).astype("object"),
5,
2,
1,
marks=pytest.mark.filterwarnings(
"ignore:invalid value encountered in reduce:RuntimeWarning"
),
id="obj",
),
pytest.param(np.array([np.nan, np.nan]), np.nan, np.nan, 0, id="allnan"),
pytest.param(
np.array(
["2015-12-31", "2020-01-02", "2020-01-01", "2016-01-01"],
dtype="datetime64[ns]",
),
0,
1,
None,
id="datetime",
),
],
)
class TestReduce1D(TestReduce):
def test_min(
self,
x: np.ndarray,
minindex: int | float,
maxindex: int | float,
nanindex: int | None,
) -> None:
ar = xr.DataArray(
x, dims=["x"], coords={"x": np.arange(x.size) * 4}, attrs=self.attrs
)
if np.isnan(minindex):
minindex = 0
expected0 = ar.isel(x=minindex, drop=True)
result0 = ar.min(keep_attrs=True)
assert_identical(result0, expected0)
result1 = ar.min()
expected1 = expected0.copy()
expected1.attrs = {}
assert_identical(result1, expected1)
result2 = ar.min(skipna=False)
if nanindex is not None and ar.dtype.kind != "O":
expected2 = ar.isel(x=nanindex, drop=True)
expected2.attrs = {}
else:
expected2 = expected1
assert_identical(result2, expected2)
def test_max(
self,
x: np.ndarray,
minindex: int | float,
maxindex: int | float,
nanindex: int | None,
) -> None:
ar = xr.DataArray(
x, dims=["x"], coords={"x": np.arange(x.size) * 4}, attrs=self.attrs
)
if np.isnan(minindex):
maxindex = 0
expected0 = ar.isel(x=maxindex, drop=True)
result0 = ar.max(keep_attrs=True)
assert_identical(result0, expected0)
result1 = ar.max()
expected1 = expected0.copy()
expected1.attrs = {}
assert_identical(result1, expected1)
result2 = ar.max(skipna=False)
if nanindex is not None and ar.dtype.kind != "O":
expected2 = ar.isel(x=nanindex, drop=True)
expected2.attrs = {}
else:
expected2 = expected1
assert_identical(result2, expected2)
@pytest.mark.filterwarnings(
"ignore:Behaviour of argmin/argmax with neither dim nor :DeprecationWarning"
)
def test_argmin(
self,
x: np.ndarray,
minindex: int | float,
maxindex: int | float,
nanindex: int | None,
) -> None:
ar = xr.DataArray(
x, dims=["x"], coords={"x": np.arange(x.size) * 4}, attrs=self.attrs
)
indarr = xr.DataArray(np.arange(x.size, dtype=np.intp), dims=["x"])
if np.isnan(minindex):
with pytest.raises(ValueError):
ar.argmin()
return
expected0 = indarr[minindex]
result0 = ar.argmin()
assert_identical(result0, expected0)
result1 = ar.argmin(keep_attrs=True)
expected1 = expected0.copy()
expected1.attrs = self.attrs
assert_identical(result1, expected1)
result2 = ar.argmin(skipna=False)
if nanindex is not None and ar.dtype.kind != "O":
expected2 = indarr.isel(x=nanindex, drop=True)
expected2.attrs = {}
else:
expected2 = expected0
assert_identical(result2, expected2)
@pytest.mark.filterwarnings(
"ignore:Behaviour of argmin/argmax with neither dim nor :DeprecationWarning"
)
def test_argmax(
self,
x: np.ndarray,
minindex: int | float,
maxindex: int | float,
nanindex: int | None,
) -> None:
ar = xr.DataArray(
x, dims=["x"], coords={"x": np.arange(x.size) * 4}, attrs=self.attrs
)
indarr = xr.DataArray(np.arange(x.size, dtype=np.intp), dims=["x"])
if np.isnan(maxindex):
with pytest.raises(ValueError):
ar.argmax()
return
expected0 = indarr[maxindex]
result0 = ar.argmax()
assert_identical(result0, expected0)
result1 = ar.argmax(keep_attrs=True)
expected1 = expected0.copy()
expected1.attrs = self.attrs
assert_identical(result1, expected1)
result2 = ar.argmax(skipna=False)
if nanindex is not None and ar.dtype.kind != "O":
expected2 = indarr.isel(x=nanindex, drop=True)
expected2.attrs = {}
else:
expected2 = expected0
assert_identical(result2, expected2)
@pytest.mark.parametrize(
"use_dask",
[
pytest.param(
True, marks=pytest.mark.skipif(not has_dask, reason="no dask")
),
False,
],
)
def test_idxmin(
self,
x: np.ndarray,
minindex: int | float,
maxindex: int | float,
nanindex: int | None,
use_dask: bool,
) -> None:
ar0_raw = xr.DataArray(
x, dims=["x"], coords={"x": np.arange(x.size) * 4}, attrs=self.attrs
)
if use_dask:
ar0 = ar0_raw.chunk()
else:
ar0 = ar0_raw
with pytest.raises(
KeyError,
match=r"'spam' not found in array dimensions",
):
ar0.idxmin(dim="spam")
# Scalar Dataarray
with pytest.raises(ValueError):
xr.DataArray(5).idxmin()
coordarr0 = xr.DataArray(ar0.coords["x"].data, dims=["x"])
coordarr1 = coordarr0.copy()
hasna = np.isnan(minindex)
if np.isnan(minindex):
minindex = 0
if hasna:
coordarr1[...] = 1
fill_value_0 = np.nan
else:
fill_value_0 = 1
expected0 = (
(coordarr1 * fill_value_0).isel(x=minindex, drop=True).astype("float")
)
expected0.name = "x"
# Default fill value (NaN)
result0 = ar0.idxmin()
assert_identical(result0, expected0)
# Manually specify NaN fill_value
result1 = ar0.idxmin(fill_value=np.nan)
assert_identical(result1, expected0)
# keep_attrs
result2 = ar0.idxmin(keep_attrs=True)
expected2 = expected0.copy()
expected2.attrs = self.attrs
assert_identical(result2, expected2)
# skipna=False
if nanindex is not None and ar0.dtype.kind != "O":
expected3 = coordarr0.isel(x=nanindex, drop=True).astype("float")
expected3.name = "x"
expected3.attrs = {}
else:
expected3 = expected0.copy()
result3 = ar0.idxmin(skipna=False)
assert_identical(result3, expected3)
# fill_value should be ignored with skipna=False
result4 = ar0.idxmin(skipna=False, fill_value=-100j)
assert_identical(result4, expected3)
# Float fill_value
if hasna:
fill_value_5 = -1.1
else:
fill_value_5 = 1
expected5 = (coordarr1 * fill_value_5).isel(x=minindex, drop=True)
expected5.name = "x"
result5 = ar0.idxmin(fill_value=-1.1)
assert_identical(result5, expected5)
# Integer fill_value
if hasna:
fill_value_6 = -1
else:
fill_value_6 = 1
expected6 = (coordarr1 * fill_value_6).isel(x=minindex, drop=True)
expected6.name = "x"
result6 = ar0.idxmin(fill_value=-1)
assert_identical(result6, expected6)
# Complex fill_value
if hasna:
fill_value_7 = -1j
else:
fill_value_7 = 1
expected7 = (coordarr1 * fill_value_7).isel(x=minindex, drop=True)
expected7.name = "x"
result7 = ar0.idxmin(fill_value=-1j)
assert_identical(result7, expected7)
@pytest.mark.parametrize("use_dask", [True, False])
def test_idxmax(
self,
x: np.ndarray,
minindex: int | float,
maxindex: int | float,
nanindex: int | None,
use_dask: bool,
) -> None:
if use_dask and not has_dask:
pytest.skip("requires dask")
if use_dask and x.dtype.kind == "M":
pytest.xfail("dask operation 'argmax' breaks when dtype is datetime64 (M)")
ar0_raw = xr.DataArray(
x, dims=["x"], coords={"x": np.arange(x.size) * 4}, attrs=self.attrs
)
if use_dask:
ar0 = ar0_raw.chunk({})
else:
ar0 = ar0_raw
with pytest.raises(
KeyError,
match=r"'spam' not found in array dimensions",
):
ar0.idxmax(dim="spam")
# Scalar Dataarray
with pytest.raises(ValueError):
xr.DataArray(5).idxmax()
coordarr0 = xr.DataArray(ar0.coords["x"].data, dims=["x"])
coordarr1 = coordarr0.copy()
hasna = np.isnan(maxindex)
if np.isnan(maxindex):
maxindex = 0
if hasna:
coordarr1[...] = 1
fill_value_0 = np.nan
else:
fill_value_0 = 1
expected0 = (
(coordarr1 * fill_value_0).isel(x=maxindex, drop=True).astype("float")
)
expected0.name = "x"
# Default fill value (NaN)
result0 = ar0.idxmax()
assert_identical(result0, expected0)
# Manually specify NaN fill_value
result1 = ar0.idxmax(fill_value=np.nan)
assert_identical(result1, expected0)
# keep_attrs
result2 = ar0.idxmax(keep_attrs=True)
expected2 = expected0.copy()
expected2.attrs = self.attrs
assert_identical(result2, expected2)
# skipna=False
if nanindex is not None and ar0.dtype.kind != "O":
expected3 = coordarr0.isel(x=nanindex, drop=True).astype("float")
expected3.name = "x"
expected3.attrs = {}
else:
expected3 = expected0.copy()
result3 = ar0.idxmax(skipna=False)
assert_identical(result3, expected3)
# fill_value should be ignored with skipna=False
result4 = ar0.idxmax(skipna=False, fill_value=-100j)
assert_identical(result4, expected3)
# Float fill_value
if hasna:
fill_value_5 = -1.1
else:
fill_value_5 = 1
expected5 = (coordarr1 * fill_value_5).isel(x=maxindex, drop=True)
expected5.name = "x"
result5 = ar0.idxmax(fill_value=-1.1)
assert_identical(result5, expected5)
# Integer fill_value
if hasna:
fill_value_6 = -1
else:
fill_value_6 = 1
expected6 = (coordarr1 * fill_value_6).isel(x=maxindex, drop=True)
expected6.name = "x"
result6 = ar0.idxmax(fill_value=-1)
assert_identical(result6, expected6)
# Complex fill_value
if hasna:
fill_value_7 = -1j
else:
fill_value_7 = 1
expected7 = (coordarr1 * fill_value_7).isel(x=maxindex, drop=True)
expected7.name = "x"
result7 = ar0.idxmax(fill_value=-1j)
assert_identical(result7, expected7)
@pytest.mark.filterwarnings(
"ignore:Behaviour of argmin/argmax with neither dim nor :DeprecationWarning"
)
def test_argmin_dim(
self,
x: np.ndarray,
minindex: int | float,
maxindex: int | float,
nanindex: int | None,
) -> None:
ar = xr.DataArray(
x, dims=["x"], coords={"x": np.arange(x.size) * 4}, attrs=self.attrs
)
indarr = xr.DataArray(np.arange(x.size, dtype=np.intp), dims=["x"])
if np.isnan(minindex):
with pytest.raises(ValueError):
ar.argmin()
return
expected0 = {"x": indarr[minindex]}
result0 = ar.argmin(...)
for key in expected0:
assert_identical(result0[key], expected0[key])
result1 = ar.argmin(..., keep_attrs=True)
expected1 = deepcopy(expected0)
for da in expected1.values():
da.attrs = self.attrs
for key in expected1:
assert_identical(result1[key], expected1[key])
result2 = ar.argmin(..., skipna=False)
if nanindex is not None and ar.dtype.kind != "O":
expected2 = {"x": indarr.isel(x=nanindex, drop=True)}
expected2["x"].attrs = {}
else:
expected2 = expected0
for key in expected2:
assert_identical(result2[key], expected2[key])
@pytest.mark.filterwarnings(
"ignore:Behaviour of argmin/argmax with neither dim nor :DeprecationWarning"
)
def test_argmax_dim(
self,
x: np.ndarray,
minindex: int | float,
maxindex: int | float,
nanindex: int | None,
) -> None:
ar = xr.DataArray(
x, dims=["x"], coords={"x": np.arange(x.size) * 4}, attrs=self.attrs
)
indarr = xr.DataArray(np.arange(x.size, dtype=np.intp), dims=["x"])
if np.isnan(maxindex):
with pytest.raises(ValueError):
ar.argmax()
return
expected0 = {"x": indarr[maxindex]}
result0 = ar.argmax(...)
for key in expected0:
assert_identical(result0[key], expected0[key])
result1 = ar.argmax(..., keep_attrs=True)
expected1 = deepcopy(expected0)
for da in expected1.values():
da.attrs = self.attrs
for key in expected1:
assert_identical(result1[key], expected1[key])
result2 = ar.argmax(..., skipna=False)
if nanindex is not None and ar.dtype.kind != "O":
expected2 = {"x": indarr.isel(x=nanindex, drop=True)}
expected2["x"].attrs = {}
else:
expected2 = expected0
for key in expected2:
assert_identical(result2[key], expected2[key])
@pytest.mark.parametrize(
["x", "minindex", "maxindex", "nanindex"],
[
pytest.param(
np.array(
[
[0, 1, 2, 0, -2, -4, 2],
[1, 1, 1, 1, 1, 1, 1],
[0, 0, -10, 5, 20, 0, 0],
]
),
[5, 0, 2],
[2, 0, 4],
[None, None, None],
id="int",
),
pytest.param(
np.array(
[
[2.0, 1.0, 2.0, 0.0, -2.0, -4.0, 2.0],
[-4.0, np.nan, 2.0, np.nan, -2.0, -4.0, 2.0],
[np.nan] * 7,
]
),
[5, 0, np.nan],
[0, 2, np.nan],
[None, 1, 0],
id="nan",
),
pytest.param(
np.array(
[
[2.0, 1.0, 2.0, 0.0, -2.0, -4.0, 2.0],
[-4.0, np.nan, 2.0, np.nan, -2.0, -4.0, 2.0],
[np.nan] * 7,
]
).astype("object"),
[5, 0, np.nan],
[0, 2, np.nan],
[None, 1, 0],
marks=pytest.mark.filterwarnings(
"ignore:invalid value encountered in reduce:RuntimeWarning:"
),
id="obj",
),
pytest.param(
np.array(
[
["2015-12-31", "2020-01-02", "2020-01-01", "2016-01-01"],
["2020-01-02", "2020-01-02", "2020-01-02", "2020-01-02"],
["1900-01-01", "1-02-03", "1900-01-02", "1-02-03"],
],
dtype="datetime64[ns]",
),
[0, 0, 1],
[1, 0, 2],
[None, None, None],
id="datetime",
),
],
)
class TestReduce2D(TestReduce):
def test_min(
self,
x: np.ndarray,
minindex: list[int | float],
maxindex: list[int | float],
nanindex: list[int | None],
) -> None:
ar = xr.DataArray(
x,
dims=["y", "x"],
coords={"x": np.arange(x.shape[1]) * 4, "y": 1 - np.arange(x.shape[0])},
attrs=self.attrs,
)
minindex = [x if not np.isnan(x) else 0 for x in minindex]
expected0list = [
ar.isel(y=yi).isel(x=indi, drop=True) for yi, indi in enumerate(minindex)
]
expected0 = xr.concat(expected0list, dim="y")
result0 = ar.min(dim="x", keep_attrs=True)
assert_identical(result0, expected0)
result1 = ar.min(dim="x")
expected1 = expected0
expected1.attrs = {}
assert_identical(result1, expected1)
result2 = ar.min(axis=1)
assert_identical(result2, expected1)
minindex = [
x if y is None or ar.dtype.kind == "O" else y
for x, y in zip(minindex, nanindex, strict=True)
]
expected2list = [
ar.isel(y=yi).isel(x=indi, drop=True) for yi, indi in enumerate(minindex)
]
expected2 = xr.concat(expected2list, dim="y")
expected2.attrs = {}
result3 = ar.min(dim="x", skipna=False)
assert_identical(result3, expected2)
def test_max(
self,
x: np.ndarray,
minindex: list[int | float],
maxindex: list[int | float],
nanindex: list[int | None],
) -> None:
ar = xr.DataArray(
x,
dims=["y", "x"],
coords={"x": np.arange(x.shape[1]) * 4, "y": 1 - np.arange(x.shape[0])},
attrs=self.attrs,
)
maxindex = [x if not np.isnan(x) else 0 for x in maxindex]
expected0list = [
ar.isel(y=yi).isel(x=indi, drop=True) for yi, indi in enumerate(maxindex)
]
expected0 = xr.concat(expected0list, dim="y")
result0 = ar.max(dim="x", keep_attrs=True)
assert_identical(result0, expected0)
result1 = ar.max(dim="x")
expected1 = expected0.copy()
expected1.attrs = {}
assert_identical(result1, expected1)
result2 = ar.max(axis=1)
assert_identical(result2, expected1)
maxindex = [
x if y is None or ar.dtype.kind == "O" else y
for x, y in zip(maxindex, nanindex, strict=True)
]
expected2list = [
ar.isel(y=yi).isel(x=indi, drop=True) for yi, indi in enumerate(maxindex)
]
expected2 = xr.concat(expected2list, dim="y")
expected2.attrs = {}
result3 = ar.max(dim="x", skipna=False)
assert_identical(result3, expected2)
def test_argmin(
self,
x: np.ndarray,
minindex: list[int | float],
maxindex: list[int | float],
nanindex: list[int | None],
) -> None:
ar = xr.DataArray(
x,
dims=["y", "x"],
coords={"x": np.arange(x.shape[1]) * 4, "y": 1 - np.arange(x.shape[0])},
attrs=self.attrs,
)
indarrnp = np.tile(np.arange(x.shape[1], dtype=np.intp), [x.shape[0], 1])
indarr = xr.DataArray(indarrnp, dims=ar.dims, coords=ar.coords)
if np.isnan(minindex).any():
with pytest.raises(ValueError):
ar.argmin(dim="x")
return
expected0list = [
indarr.isel(y=yi).isel(x=indi, drop=True)
for yi, indi in enumerate(minindex)
]
expected0 = xr.concat(expected0list, dim="y")
result0 = ar.argmin(dim="x")
assert_identical(result0, expected0)
result1 = ar.argmin(axis=1)
assert_identical(result1, expected0)
result2 = ar.argmin(dim="x", keep_attrs=True)
expected1 = expected0.copy()
expected1.attrs = self.attrs
assert_identical(result2, expected1)
minindex = [
x if y is None or ar.dtype.kind == "O" else y
for x, y in zip(minindex, nanindex, strict=True)
]
expected2list = [
indarr.isel(y=yi).isel(x=indi, drop=True)
for yi, indi in enumerate(minindex)
]
expected2 = xr.concat(expected2list, dim="y")
expected2.attrs = {}
result3 = ar.argmin(dim="x", skipna=False)
assert_identical(result3, expected2)
def test_argmax(
self,
x: np.ndarray,
minindex: list[int | float],
maxindex: list[int | float],
nanindex: list[int | None],
) -> None:
ar = xr.DataArray(
x,
dims=["y", "x"],
coords={"x": np.arange(x.shape[1]) * 4, "y": 1 - np.arange(x.shape[0])},
attrs=self.attrs,
)
indarr_np = np.tile(np.arange(x.shape[1], dtype=np.intp), [x.shape[0], 1])
indarr = xr.DataArray(indarr_np, dims=ar.dims, coords=ar.coords)
if np.isnan(maxindex).any():
with pytest.raises(ValueError):
ar.argmax(dim="x")
return
expected0list = [
indarr.isel(y=yi).isel(x=indi, drop=True)
for yi, indi in enumerate(maxindex)
]
expected0 = xr.concat(expected0list, dim="y")
result0 = ar.argmax(dim="x")
assert_identical(result0, expected0)
result1 = ar.argmax(axis=1)
assert_identical(result1, expected0)
result2 = ar.argmax(dim="x", keep_attrs=True)
expected1 = expected0.copy()
expected1.attrs = self.attrs
assert_identical(result2, expected1)
maxindex = [
x if y is None or ar.dtype.kind == "O" else y
for x, y in zip(maxindex, nanindex, strict=True)
]
expected2list = [
indarr.isel(y=yi).isel(x=indi, drop=True)
for yi, indi in enumerate(maxindex)
]
expected2 = xr.concat(expected2list, dim="y")
expected2.attrs = {}
result3 = ar.argmax(dim="x", skipna=False)
assert_identical(result3, expected2)
@pytest.mark.parametrize(
"use_dask", [pytest.param(True, id="dask"), pytest.param(False, id="nodask")]
)
def test_idxmin(
self,
x: np.ndarray,
minindex: list[int | float],
maxindex: list[int | float],
nanindex: list[int | None],
use_dask: bool,
) -> None:
if use_dask and not has_dask:
pytest.skip("requires dask")
if use_dask and x.dtype.kind == "M":
pytest.xfail("dask operation 'argmin' breaks when dtype is datetime64 (M)")
if x.dtype.kind == "O":
# TODO: nanops._nan_argminmax_object computes once to check for all-NaN slices.
max_computes = 1
else:
max_computes = 0
ar0_raw = xr.DataArray(
x,
dims=["y", "x"],
coords={"x": np.arange(x.shape[1]) * 4, "y": 1 - np.arange(x.shape[0])},
attrs=self.attrs,
)
if use_dask:
ar0 = ar0_raw.chunk({})
else:
ar0 = ar0_raw
assert_identical(ar0, ar0)
# No dimension specified
with pytest.raises(ValueError):
ar0.idxmin()
# dim doesn't exist
with pytest.raises(KeyError):
ar0.idxmin(dim="Y")
assert_identical(ar0, ar0)
coordarr0 = xr.DataArray(
np.tile(ar0.coords["x"], [x.shape[0], 1]), dims=ar0.dims, coords=ar0.coords
)
hasna = [np.isnan(x) for x in minindex]
coordarr1 = coordarr0.copy()
coordarr1[hasna, :] = 1
minindex0 = [x if not np.isnan(x) else 0 for x in minindex]
nan_mult_0 = np.array([np.nan if x else 1 for x in hasna])[:, None]
expected0list = [
(coordarr1 * nan_mult_0).isel(y=yi).isel(x=indi, drop=True)
for yi, indi in enumerate(minindex0)
]
expected0 = xr.concat(expected0list, dim="y")
expected0.name = "x"
# Default fill value (NaN)
with raise_if_dask_computes(max_computes=max_computes):
result0 = ar0.idxmin(dim="x")
assert_identical(result0, expected0)
# Manually specify NaN fill_value
with raise_if_dask_computes(max_computes=max_computes):
result1 = ar0.idxmin(dim="x", fill_value=np.nan)
assert_identical(result1, expected0)
# keep_attrs
with raise_if_dask_computes(max_computes=max_computes):
result2 = ar0.idxmin(dim="x", keep_attrs=True)
expected2 = expected0.copy()
expected2.attrs = self.attrs
assert_identical(result2, expected2)
# skipna=False
minindex3 = [
x if y is None or ar0.dtype.kind == "O" else y
for x, y in zip(minindex0, nanindex, strict=True)
]
expected3list = [
coordarr0.isel(y=yi).isel(x=indi, drop=True)
for yi, indi in enumerate(minindex3)
]
expected3 = xr.concat(expected3list, dim="y")
expected3.name = "x"
expected3.attrs = {}
with raise_if_dask_computes(max_computes=max_computes):
result3 = ar0.idxmin(dim="x", skipna=False)
assert_identical(result3, expected3)
# fill_value should be ignored with skipna=False
with raise_if_dask_computes(max_computes=max_computes):
result4 = ar0.idxmin(dim="x", skipna=False, fill_value=-100j)
assert_identical(result4, expected3)
# Float fill_value
nan_mult_5 = np.array([-1.1 if x else 1 for x in hasna])[:, None]
expected5list = [
(coordarr1 * nan_mult_5).isel(y=yi).isel(x=indi, drop=True)
for yi, indi in enumerate(minindex0)
]
expected5 = xr.concat(expected5list, dim="y")
expected5.name = "x"
with raise_if_dask_computes(max_computes=max_computes):
result5 = ar0.idxmin(dim="x", fill_value=-1.1)
assert_identical(result5, expected5)
# Integer fill_value
nan_mult_6 = np.array([-1 if x else 1 for x in hasna])[:, None]
expected6list = [
(coordarr1 * nan_mult_6).isel(y=yi).isel(x=indi, drop=True)
for yi, indi in enumerate(minindex0)
]
expected6 = xr.concat(expected6list, dim="y")
expected6.name = "x"
with raise_if_dask_computes(max_computes=max_computes):
result6 = ar0.idxmin(dim="x", fill_value=-1)
assert_identical(result6, expected6)
# Complex fill_value
nan_mult_7 = np.array([-5j if x else 1 for x in hasna])[:, None]
expected7list = [
(coordarr1 * nan_mult_7).isel(y=yi).isel(x=indi, drop=True)
for yi, indi in enumerate(minindex0)
]
expected7 = xr.concat(expected7list, dim="y")
expected7.name = "x"
with raise_if_dask_computes(max_computes=max_computes):
result7 = ar0.idxmin(dim="x", fill_value=-5j)
assert_identical(result7, expected7)
@pytest.mark.parametrize(
"use_dask", [pytest.param(True, id="dask"), pytest.param(False, id="nodask")]
)
def test_idxmax(
self,
x: np.ndarray,
minindex: list[int | float],
maxindex: list[int | float],
nanindex: list[int | None],
use_dask: bool,
) -> None:
if use_dask and not has_dask:
pytest.skip("requires dask")
if use_dask and x.dtype.kind == "M":
pytest.xfail("dask operation 'argmax' breaks when dtype is datetime64 (M)")
if x.dtype.kind == "O":
# TODO: nanops._nan_argminmax_object computes once to check for all-NaN slices.
max_computes = 1
else:
max_computes = 0
ar0_raw = xr.DataArray(
x,
dims=["y", "x"],
coords={"x": np.arange(x.shape[1]) * 4, "y": 1 - np.arange(x.shape[0])},
attrs=self.attrs,
)
if use_dask:
ar0 = ar0_raw.chunk({})
else:
ar0 = ar0_raw
# No dimension specified
with pytest.raises(ValueError):
ar0.idxmax()
# dim doesn't exist
with pytest.raises(KeyError):
ar0.idxmax(dim="Y")
ar1 = ar0.copy()
del ar1.coords["y"]
with pytest.raises(KeyError):
ar1.idxmax(dim="y")
coordarr0 = xr.DataArray(
np.tile(ar0.coords["x"], [x.shape[0], 1]), dims=ar0.dims, coords=ar0.coords
)
hasna = [np.isnan(x) for x in maxindex]
coordarr1 = coordarr0.copy()
coordarr1[hasna, :] = 1
maxindex0 = [x if not np.isnan(x) else 0 for x in maxindex]
nan_mult_0 = np.array([np.nan if x else 1 for x in hasna])[:, None]
expected0list = [
(coordarr1 * nan_mult_0).isel(y=yi).isel(x=indi, drop=True)
for yi, indi in enumerate(maxindex0)
]
expected0 = xr.concat(expected0list, dim="y")
expected0.name = "x"
# Default fill value (NaN)
with raise_if_dask_computes(max_computes=max_computes):
result0 = ar0.idxmax(dim="x")
assert_identical(result0, expected0)
# Manually specify NaN fill_value
with raise_if_dask_computes(max_computes=max_computes):
result1 = ar0.idxmax(dim="x", fill_value=np.nan)
assert_identical(result1, expected0)
# keep_attrs
with raise_if_dask_computes(max_computes=max_computes):
result2 = ar0.idxmax(dim="x", keep_attrs=True)
expected2 = expected0.copy()
expected2.attrs = self.attrs
assert_identical(result2, expected2)
# skipna=False
maxindex3 = [
x if y is None or ar0.dtype.kind == "O" else y
for x, y in zip(maxindex0, nanindex, strict=True)
]
expected3list = [
coordarr0.isel(y=yi).isel(x=indi, drop=True)
for yi, indi in enumerate(maxindex3)
]
expected3 = xr.concat(expected3list, dim="y")
expected3.name = "x"
expected3.attrs = {}
with raise_if_dask_computes(max_computes=max_computes):
result3 = ar0.idxmax(dim="x", skipna=False)
assert_identical(result3, expected3)
# fill_value should be ignored with skipna=False
with raise_if_dask_computes(max_computes=max_computes):
result4 = ar0.idxmax(dim="x", skipna=False, fill_value=-100j)
assert_identical(result4, expected3)
# Float fill_value
nan_mult_5 = np.array([-1.1 if x else 1 for x in hasna])[:, None]
expected5list = [
(coordarr1 * nan_mult_5).isel(y=yi).isel(x=indi, drop=True)
for yi, indi in enumerate(maxindex0)
]
expected5 = xr.concat(expected5list, dim="y")
expected5.name = "x"
with raise_if_dask_computes(max_computes=max_computes):
result5 = ar0.idxmax(dim="x", fill_value=-1.1)
assert_identical(result5, expected5)
# Integer fill_value
nan_mult_6 = np.array([-1 if x else 1 for x in hasna])[:, None]
expected6list = [
(coordarr1 * nan_mult_6).isel(y=yi).isel(x=indi, drop=True)
for yi, indi in enumerate(maxindex0)
]
expected6 = xr.concat(expected6list, dim="y")
expected6.name = "x"
with raise_if_dask_computes(max_computes=max_computes):
result6 = ar0.idxmax(dim="x", fill_value=-1)
assert_identical(result6, expected6)
# Complex fill_value
nan_mult_7 = np.array([-5j if x else 1 for x in hasna])[:, None]
expected7list = [
(coordarr1 * nan_mult_7).isel(y=yi).isel(x=indi, drop=True)
for yi, indi in enumerate(maxindex0)
]
expected7 = xr.concat(expected7list, dim="y")
expected7.name = "x"
with raise_if_dask_computes(max_computes=max_computes):
result7 = ar0.idxmax(dim="x", fill_value=-5j)
assert_identical(result7, expected7)
@pytest.mark.filterwarnings(
"ignore:Behaviour of argmin/argmax with neither dim nor :DeprecationWarning"
)
def test_argmin_dim(
self,
x: np.ndarray,
minindex: list[int | float],
maxindex: list[int | float],
nanindex: list[int | None],
) -> None:
ar = xr.DataArray(
x,
dims=["y", "x"],
coords={"x": np.arange(x.shape[1]) * 4, "y": 1 - np.arange(x.shape[0])},
attrs=self.attrs,
)
indarrnp = np.tile(np.arange(x.shape[1], dtype=np.intp), [x.shape[0], 1])
indarr = xr.DataArray(indarrnp, dims=ar.dims, coords=ar.coords)
if np.isnan(minindex).any():
with pytest.raises(ValueError):
ar.argmin(dim="x")
return
expected0list = [
indarr.isel(y=yi).isel(x=indi, drop=True)
for yi, indi in enumerate(minindex)
]
expected0 = {"x": xr.concat(expected0list, dim="y")}
result0 = ar.argmin(dim=["x"])
for key in expected0:
assert_identical(result0[key], expected0[key])
result1 = ar.argmin(dim=["x"], keep_attrs=True)
expected1 = deepcopy(expected0)
expected1["x"].attrs = self.attrs
for key in expected1:
assert_identical(result1[key], expected1[key])
minindex = [
x if y is None or ar.dtype.kind == "O" else y
for x, y in zip(minindex, nanindex, strict=True)
]
expected2list = [
indarr.isel(y=yi).isel(x=indi, drop=True)
for yi, indi in enumerate(minindex)
]
expected2 = {"x": xr.concat(expected2list, dim="y")}
expected2["x"].attrs = {}
result2 = ar.argmin(dim=["x"], skipna=False)
for key in expected2:
assert_identical(result2[key], expected2[key])
result3 = ar.argmin(...)
# TODO: remove cast once argmin typing is overloaded
min_xind = cast(DataArray, ar.isel(expected0).argmin())
expected3 = {
"y": DataArray(min_xind),
"x": DataArray(minindex[min_xind.item()]),
}
for key in expected3:
assert_identical(result3[key], expected3[key])
@pytest.mark.filterwarnings(
"ignore:Behaviour of argmin/argmax with neither dim nor :DeprecationWarning"
)
def test_argmax_dim(
self,
x: np.ndarray,
minindex: list[int | float],
maxindex: list[int | float],
nanindex: list[int | None],
) -> None:
ar = xr.DataArray(
x,
dims=["y", "x"],
coords={"x": np.arange(x.shape[1]) * 4, "y": 1 - np.arange(x.shape[0])},
attrs=self.attrs,
)
indarrnp = np.tile(np.arange(x.shape[1], dtype=np.intp), [x.shape[0], 1])
indarr = xr.DataArray(indarrnp, dims=ar.dims, coords=ar.coords)
if np.isnan(maxindex).any():
with pytest.raises(ValueError):
ar.argmax(dim="x")
return
expected0list = [
indarr.isel(y=yi).isel(x=indi, drop=True)
for yi, indi in enumerate(maxindex)
]
expected0 = {"x": xr.concat(expected0list, dim="y")}
result0 = ar.argmax(dim=["x"])
for key in expected0:
assert_identical(result0[key], expected0[key])
result1 = ar.argmax(dim=["x"], keep_attrs=True)
expected1 = deepcopy(expected0)
expected1["x"].attrs = self.attrs
for key in expected1:
assert_identical(result1[key], expected1[key])
maxindex = [
x if y is None or ar.dtype.kind == "O" else y
for x, y in zip(maxindex, nanindex, strict=True)
]
expected2list = [
indarr.isel(y=yi).isel(x=indi, drop=True)
for yi, indi in enumerate(maxindex)
]
expected2 = {"x": xr.concat(expected2list, dim="y")}
expected2["x"].attrs = {}
result2 = ar.argmax(dim=["x"], skipna=False)
for key in expected2:
assert_identical(result2[key], expected2[key])
result3 = ar.argmax(...)
# TODO: remove cast once argmax typing is overloaded
max_xind = cast(DataArray, ar.isel(expected0).argmax())
expected3 = {
"y": DataArray(max_xind),
"x": DataArray(maxindex[max_xind.item()]),
}
for key in expected3:
assert_identical(result3[key], expected3[key])
@pytest.mark.parametrize(
"x, minindices_x, minindices_y, minindices_z, minindices_xy, "
"minindices_xz, minindices_yz, minindices_xyz, maxindices_x, "
"maxindices_y, maxindices_z, maxindices_xy, maxindices_xz, maxindices_yz, "
"maxindices_xyz, nanindices_x, nanindices_y, nanindices_z, nanindices_xy, "
"nanindices_xz, nanindices_yz, nanindices_xyz",
[
pytest.param(
np.array(
[
[[0, 1, 2, 0], [-2, -4, 2, 0]],
[[1, 1, 1, 1], [1, 1, 1, 1]],
[[0, 0, -10, 5], [20, 0, 0, 0]],
]
),
{"x": np.array([[0, 2, 2, 0], [0, 0, 2, 0]])},
{"y": np.array([[1, 1, 0, 0], [0, 0, 0, 0], [0, 0, 0, 1]])},
{"z": np.array([[0, 1], [0, 0], [2, 1]])},
{"x": np.array([0, 0, 2, 0]), "y": np.array([1, 1, 0, 0])},
{"x": np.array([2, 0]), "z": np.array([2, 1])},
{"y": np.array([1, 0, 0]), "z": np.array([1, 0, 2])},
{"x": np.array(2), "y": np.array(0), "z": np.array(2)},
{"x": np.array([[1, 0, 0, 2], [2, 1, 0, 1]])},
{"y": np.array([[0, 0, 0, 0], [0, 0, 0, 0], [1, 0, 1, 0]])},
{"z": np.array([[2, 2], [0, 0], [3, 0]])},
{"x": np.array([2, 0, 0, 2]), "y": np.array([1, 0, 0, 0])},
{"x": np.array([2, 2]), "z": np.array([3, 0])},
{"y": np.array([0, 0, 1]), "z": np.array([2, 0, 0])},
{"x": np.array(2), "y": np.array(1), "z": np.array(0)},
{"x": np.array([[None, None, None, None], [None, None, None, None]])},
{
"y": np.array(
[
[None, None, None, None],
[None, None, None, None],
[None, None, None, None],
]
)
},
{"z": np.array([[None, None], [None, None], [None, None]])},
{
"x": np.array([None, None, None, None]),
"y": np.array([None, None, None, None]),
},
{"x": np.array([None, None]), "z": np.array([None, None])},
{"y": np.array([None, None, None]), "z": np.array([None, None, None])},
{"x": np.array(None), "y": np.array(None), "z": np.array(None)},
id="int",
),
pytest.param(
np.array(
[
[[2.0, 1.0, 2.0, 0.0], [-2.0, -4.0, 2.0, 0.0]],
[[-4.0, np.nan, 2.0, np.nan], [-2.0, -4.0, 2.0, 0.0]],
[[np.nan] * 4, [np.nan] * 4],
]
),
{"x": np.array([[1, 0, 0, 0], [0, 0, 0, 0]])},
{
"y": np.array(
[[1, 1, 0, 0], [0, 1, 0, 1], [np.nan, np.nan, np.nan, np.nan]]
)
},
{"z": np.array([[3, 1], [0, 1], [np.nan, np.nan]])},
{"x": np.array([1, 0, 0, 0]), "y": np.array([0, 1, 0, 0])},
{"x": np.array([1, 0]), "z": np.array([0, 1])},
{"y": np.array([1, 0, np.nan]), "z": np.array([1, 0, np.nan])},
{"x": np.array(0), "y": np.array(1), "z": np.array(1)},
{"x": np.array([[0, 0, 0, 0], [0, 0, 0, 0]])},
{
"y": np.array(
[[0, 0, 0, 0], [1, 1, 0, 1], [np.nan, np.nan, np.nan, np.nan]]
)
},
{"z": np.array([[0, 2], [2, 2], [np.nan, np.nan]])},
{"x": np.array([0, 0, 0, 0]), "y": np.array([0, 0, 0, 0])},
{"x": np.array([0, 0]), "z": np.array([2, 2])},
{"y": np.array([0, 0, np.nan]), "z": np.array([0, 2, np.nan])},
{"x": np.array(0), "y": np.array(0), "z": np.array(0)},
{"x": np.array([[2, 1, 2, 1], [2, 2, 2, 2]])},
{
"y": np.array(
[[None, None, None, None], [None, 0, None, 0], [0, 0, 0, 0]]
)
},
{"z": np.array([[None, None], [1, None], [0, 0]])},
{"x": np.array([2, 1, 2, 1]), "y": np.array([0, 0, 0, 0])},
{"x": np.array([1, 2]), "z": np.array([1, 0])},
{"y": np.array([None, 0, 0]), "z": np.array([None, 1, 0])},
{"x": np.array(1), "y": np.array(0), "z": np.array(1)},
id="nan",
),
pytest.param(
np.array(
[
[[2.0, 1.0, 2.0, 0.0], [-2.0, -4.0, 2.0, 0.0]],
[[-4.0, np.nan, 2.0, np.nan], [-2.0, -4.0, 2.0, 0.0]],
[[np.nan] * 4, [np.nan] * 4],
]
).astype("object"),
{"x": np.array([[1, 0, 0, 0], [0, 0, 0, 0]])},
{
"y": np.array(
[[1, 1, 0, 0], [0, 1, 0, 1], [np.nan, np.nan, np.nan, np.nan]]
)
},
{"z": np.array([[3, 1], [0, 1], [np.nan, np.nan]])},
{"x": np.array([1, 0, 0, 0]), "y": np.array([0, 1, 0, 0])},
{"x": np.array([1, 0]), "z": np.array([0, 1])},
{"y": np.array([1, 0, np.nan]), "z": np.array([1, 0, np.nan])},
{"x": np.array(0), "y": np.array(1), "z": np.array(1)},
{"x": np.array([[0, 0, 0, 0], [0, 0, 0, 0]])},
{
"y": np.array(
[[0, 0, 0, 0], [1, 1, 0, 1], [np.nan, np.nan, np.nan, np.nan]]
)
},
{"z": np.array([[0, 2], [2, 2], [np.nan, np.nan]])},
{"x": np.array([0, 0, 0, 0]), "y": np.array([0, 0, 0, 0])},
{"x": np.array([0, 0]), "z": np.array([2, 2])},
{"y": np.array([0, 0, np.nan]), "z": np.array([0, 2, np.nan])},
{"x": np.array(0), "y": np.array(0), "z": np.array(0)},
{"x": np.array([[2, 1, 2, 1], [2, 2, 2, 2]])},
{
"y": np.array(
[[None, None, None, None], [None, 0, None, 0], [0, 0, 0, 0]]
)
},
{"z": np.array([[None, None], [1, None], [0, 0]])},
{"x": np.array([2, 1, 2, 1]), "y": np.array([0, 0, 0, 0])},
{"x": np.array([1, 2]), "z": np.array([1, 0])},
{"y": np.array([None, 0, 0]), "z": np.array([None, 1, 0])},
{"x": np.array(1), "y": np.array(0), "z": np.array(1)},
id="obj",
),
pytest.param(
np.array(
[
[["2015-12-31", "2020-01-02"], ["2020-01-01", "2016-01-01"]],
[["2020-01-02", "2020-01-02"], ["2020-01-02", "2020-01-02"]],
[["1900-01-01", "1-02-03"], ["1900-01-02", "1-02-03"]],
],
dtype="datetime64[ns]",
),
{"x": np.array([[2, 2], [2, 2]])},
{"y": np.array([[0, 1], [0, 0], [0, 0]])},
{"z": np.array([[0, 1], [0, 0], [1, 1]])},
{"x": np.array([2, 2]), "y": np.array([0, 0])},
{"x": np.array([2, 2]), "z": np.array([1, 1])},
{"y": np.array([0, 0, 0]), "z": np.array([0, 0, 1])},
{"x": np.array(2), "y": np.array(0), "z": np.array(1)},
{"x": np.array([[1, 0], [1, 1]])},
{"y": np.array([[1, 0], [0, 0], [1, 0]])},
{"z": np.array([[1, 0], [0, 0], [0, 0]])},
{"x": np.array([1, 0]), "y": np.array([0, 0])},
{"x": np.array([0, 1]), "z": np.array([1, 0])},
{"y": np.array([0, 0, 1]), "z": np.array([1, 0, 0])},
{"x": np.array(0), "y": np.array(0), "z": np.array(1)},
{"x": np.array([[None, None], [None, None]])},
{"y": np.array([[None, None], [None, None], [None, None]])},
{"z": np.array([[None, None], [None, None], [None, None]])},
{"x": np.array([None, None]), "y": np.array([None, None])},
{"x": np.array([None, None]), "z": np.array([None, None])},
{"y": np.array([None, None, None]), "z": np.array([None, None, None])},
{"x": np.array(None), "y": np.array(None), "z": np.array(None)},
id="datetime",
),
],
)
class TestReduce3D(TestReduce):
def test_argmin_dim(
self,
x: np.ndarray,
minindices_x: dict[str, np.ndarray],
minindices_y: dict[str, np.ndarray],
minindices_z: dict[str, np.ndarray],
minindices_xy: dict[str, np.ndarray],
minindices_xz: dict[str, np.ndarray],
minindices_yz: dict[str, np.ndarray],
minindices_xyz: dict[str, np.ndarray],
maxindices_x: dict[str, np.ndarray],
maxindices_y: dict[str, np.ndarray],
maxindices_z: dict[str, np.ndarray],
maxindices_xy: dict[str, np.ndarray],
maxindices_xz: dict[str, np.ndarray],
maxindices_yz: dict[str, np.ndarray],
maxindices_xyz: dict[str, np.ndarray],
nanindices_x: dict[str, np.ndarray],
nanindices_y: dict[str, np.ndarray],
nanindices_z: dict[str, np.ndarray],
nanindices_xy: dict[str, np.ndarray],
nanindices_xz: dict[str, np.ndarray],
nanindices_yz: dict[str, np.ndarray],
nanindices_xyz: dict[str, np.ndarray],
) -> None:
ar = xr.DataArray(
x,
dims=["x", "y", "z"],
coords={
"x": np.arange(x.shape[0]) * 4,
"y": 1 - np.arange(x.shape[1]),
"z": 2 + 3 * np.arange(x.shape[2]),
},
attrs=self.attrs,
)
for inds in [
minindices_x,
minindices_y,
minindices_z,
minindices_xy,
minindices_xz,
minindices_yz,
minindices_xyz,
]:
if np.array([np.isnan(i) for i in inds.values()]).any():
with pytest.raises(ValueError):
ar.argmin(dim=list(inds))
return
result0 = ar.argmin(dim=["x"])
assert isinstance(result0, dict)
expected0 = {
key: xr.DataArray(value, dims=("y", "z"))
for key, value in minindices_x.items()
}
for key in expected0:
assert_identical(result0[key].drop_vars(["y", "z"]), expected0[key])
result1 = ar.argmin(dim=["y"])
assert isinstance(result1, dict)
expected1 = {
key: xr.DataArray(value, dims=("x", "z"))
for key, value in minindices_y.items()
}
for key in expected1:
assert_identical(result1[key].drop_vars(["x", "z"]), expected1[key])
result2 = ar.argmin(dim=["z"])
assert isinstance(result2, dict)
expected2 = {
key: xr.DataArray(value, dims=("x", "y"))
for key, value in minindices_z.items()
}
for key in expected2:
assert_identical(result2[key].drop_vars(["x", "y"]), expected2[key])
result3 = ar.argmin(dim=("x", "y"))
assert isinstance(result3, dict)
expected3 = {
key: xr.DataArray(value, dims=("z")) for key, value in minindices_xy.items()
}
for key in expected3:
assert_identical(result3[key].drop_vars("z"), expected3[key])
result4 = ar.argmin(dim=("x", "z"))
assert isinstance(result4, dict)
expected4 = {
key: xr.DataArray(value, dims=("y")) for key, value in minindices_xz.items()
}
for key in expected4:
assert_identical(result4[key].drop_vars("y"), expected4[key])
result5 = ar.argmin(dim=("y", "z"))
assert isinstance(result5, dict)
expected5 = {
key: xr.DataArray(value, dims=("x")) for key, value in minindices_yz.items()
}
for key in expected5:
assert_identical(result5[key].drop_vars("x"), expected5[key])
result6 = ar.argmin(...)
assert isinstance(result6, dict)
expected6 = {key: xr.DataArray(value) for key, value in minindices_xyz.items()}
for key in expected6:
assert_identical(result6[key], expected6[key])
minindices_x = {
key: xr.where(
nanindices_x[key] == None, # noqa: E711
minindices_x[key],
nanindices_x[key],
)
for key in minindices_x
}
expected7 = {
key: xr.DataArray(value, dims=("y", "z"))
for key, value in minindices_x.items()
}
result7 = ar.argmin(dim=["x"], skipna=False)
assert isinstance(result7, dict)
for key in expected7:
assert_identical(result7[key].drop_vars(["y", "z"]), expected7[key])
minindices_y = {
key: xr.where(
nanindices_y[key] == None, # noqa: E711
minindices_y[key],
nanindices_y[key],
)
for key in minindices_y
}
expected8 = {
key: xr.DataArray(value, dims=("x", "z"))
for key, value in minindices_y.items()
}
result8 = ar.argmin(dim=["y"], skipna=False)
assert isinstance(result8, dict)
for key in expected8:
assert_identical(result8[key].drop_vars(["x", "z"]), expected8[key])
minindices_z = {
key: xr.where(
nanindices_z[key] == None, # noqa: E711
minindices_z[key],
nanindices_z[key],
)
for key in minindices_z
}
expected9 = {
key: xr.DataArray(value, dims=("x", "y"))
for key, value in minindices_z.items()
}
result9 = ar.argmin(dim=["z"], skipna=False)
assert isinstance(result9, dict)
for key in expected9:
assert_identical(result9[key].drop_vars(["x", "y"]), expected9[key])
minindices_xy = {
key: xr.where(
nanindices_xy[key] == None, # noqa: E711
minindices_xy[key],
nanindices_xy[key],
)
for key in minindices_xy
}
expected10 = {
key: xr.DataArray(value, dims="z") for key, value in minindices_xy.items()
}
result10 = ar.argmin(dim=("x", "y"), skipna=False)
assert isinstance(result10, dict)
for key in expected10:
assert_identical(result10[key].drop_vars("z"), expected10[key])
minindices_xz = {
key: xr.where(
nanindices_xz[key] == None, # noqa: E711
minindices_xz[key],
nanindices_xz[key],
)
for key in minindices_xz
}
expected11 = {
key: xr.DataArray(value, dims="y") for key, value in minindices_xz.items()
}
result11 = ar.argmin(dim=("x", "z"), skipna=False)
assert isinstance(result11, dict)
for key in expected11:
assert_identical(result11[key].drop_vars("y"), expected11[key])
minindices_yz = {
key: xr.where(
nanindices_yz[key] == None, # noqa: E711
minindices_yz[key],
nanindices_yz[key],
)
for key in minindices_yz
}
expected12 = {
key: xr.DataArray(value, dims="x") for key, value in minindices_yz.items()
}
result12 = ar.argmin(dim=("y", "z"), skipna=False)
assert isinstance(result12, dict)
for key in expected12:
assert_identical(result12[key].drop_vars("x"), expected12[key])
minindices_xyz = {
key: xr.where(
nanindices_xyz[key] == None, # noqa: E711
minindices_xyz[key],
nanindices_xyz[key],
)
for key in minindices_xyz
}
expected13 = {key: xr.DataArray(value) for key, value in minindices_xyz.items()}
result13 = ar.argmin(..., skipna=False)
assert isinstance(result13, dict)
for key in expected13:
assert_identical(result13[key], expected13[key])
def test_argmax_dim(
self,
x: np.ndarray,
minindices_x: dict[str, np.ndarray],
minindices_y: dict[str, np.ndarray],
minindices_z: dict[str, np.ndarray],
minindices_xy: dict[str, np.ndarray],
minindices_xz: dict[str, np.ndarray],
minindices_yz: dict[str, np.ndarray],
minindices_xyz: dict[str, np.ndarray],
maxindices_x: dict[str, np.ndarray],
maxindices_y: dict[str, np.ndarray],
maxindices_z: dict[str, np.ndarray],
maxindices_xy: dict[str, np.ndarray],
maxindices_xz: dict[str, np.ndarray],
maxindices_yz: dict[str, np.ndarray],
maxindices_xyz: dict[str, np.ndarray],
nanindices_x: dict[str, np.ndarray],
nanindices_y: dict[str, np.ndarray],
nanindices_z: dict[str, np.ndarray],
nanindices_xy: dict[str, np.ndarray],
nanindices_xz: dict[str, np.ndarray],
nanindices_yz: dict[str, np.ndarray],
nanindices_xyz: dict[str, np.ndarray],
) -> None:
ar = xr.DataArray(
x,
dims=["x", "y", "z"],
coords={
"x": np.arange(x.shape[0]) * 4,
"y": 1 - np.arange(x.shape[1]),
"z": 2 + 3 * np.arange(x.shape[2]),
},
attrs=self.attrs,
)
for inds in [
maxindices_x,
maxindices_y,
maxindices_z,
maxindices_xy,
maxindices_xz,
maxindices_yz,
maxindices_xyz,
]:
if np.array([np.isnan(i) for i in inds.values()]).any():
with pytest.raises(ValueError):
ar.argmax(dim=list(inds))
return
result0 = ar.argmax(dim=["x"])
assert isinstance(result0, dict)
expected0 = {
key: xr.DataArray(value, dims=("y", "z"))
for key, value in maxindices_x.items()
}
for key in expected0:
assert_identical(result0[key].drop_vars(["y", "z"]), expected0[key])
result1 = ar.argmax(dim=["y"])
assert isinstance(result1, dict)
expected1 = {
key: xr.DataArray(value, dims=("x", "z"))
for key, value in maxindices_y.items()
}
for key in expected1:
assert_identical(result1[key].drop_vars(["x", "z"]), expected1[key])
result2 = ar.argmax(dim=["z"])
assert isinstance(result2, dict)
expected2 = {
key: xr.DataArray(value, dims=("x", "y"))
for key, value in maxindices_z.items()
}
for key in expected2:
assert_identical(result2[key].drop_vars(["x", "y"]), expected2[key])
result3 = ar.argmax(dim=("x", "y"))
assert isinstance(result3, dict)
expected3 = {
key: xr.DataArray(value, dims=("z")) for key, value in maxindices_xy.items()
}
for key in expected3:
assert_identical(result3[key].drop_vars("z"), expected3[key])
result4 = ar.argmax(dim=("x", "z"))
assert isinstance(result4, dict)
expected4 = {
key: xr.DataArray(value, dims=("y")) for key, value in maxindices_xz.items()
}
for key in expected4:
assert_identical(result4[key].drop_vars("y"), expected4[key])
result5 = ar.argmax(dim=("y", "z"))
assert isinstance(result5, dict)
expected5 = {
key: xr.DataArray(value, dims=("x")) for key, value in maxindices_yz.items()
}
for key in expected5:
assert_identical(result5[key].drop_vars("x"), expected5[key])
result6 = ar.argmax(...)
assert isinstance(result6, dict)
expected6 = {key: xr.DataArray(value) for key, value in maxindices_xyz.items()}
for key in expected6:
assert_identical(result6[key], expected6[key])
maxindices_x = {
key: xr.where(
nanindices_x[key] == None, # noqa: E711
maxindices_x[key],
nanindices_x[key],
)
for key in maxindices_x
}
expected7 = {
key: xr.DataArray(value, dims=("y", "z"))
for key, value in maxindices_x.items()
}
result7 = ar.argmax(dim=["x"], skipna=False)
assert isinstance(result7, dict)
for key in expected7:
assert_identical(result7[key].drop_vars(["y", "z"]), expected7[key])
maxindices_y = {
key: xr.where(
nanindices_y[key] == None, # noqa: E711
maxindices_y[key],
nanindices_y[key],
)
for key in maxindices_y
}
expected8 = {
key: xr.DataArray(value, dims=("x", "z"))
for key, value in maxindices_y.items()
}
result8 = ar.argmax(dim=["y"], skipna=False)
assert isinstance(result8, dict)
for key in expected8:
assert_identical(result8[key].drop_vars(["x", "z"]), expected8[key])
maxindices_z = {
key: xr.where(
nanindices_z[key] == None, # noqa: E711
maxindices_z[key],
nanindices_z[key],
)
for key in maxindices_z
}
expected9 = {
key: xr.DataArray(value, dims=("x", "y"))
for key, value in maxindices_z.items()
}
result9 = ar.argmax(dim=["z"], skipna=False)
assert isinstance(result9, dict)
for key in expected9:
assert_identical(result9[key].drop_vars(["x", "y"]), expected9[key])
maxindices_xy = {
key: xr.where(
nanindices_xy[key] == None, # noqa: E711
maxindices_xy[key],
nanindices_xy[key],
)
for key in maxindices_xy
}
expected10 = {
key: xr.DataArray(value, dims="z") for key, value in maxindices_xy.items()
}
result10 = ar.argmax(dim=("x", "y"), skipna=False)
assert isinstance(result10, dict)
for key in expected10:
assert_identical(result10[key].drop_vars("z"), expected10[key])
maxindices_xz = {
key: xr.where(
nanindices_xz[key] == None, # noqa: E711
maxindices_xz[key],
nanindices_xz[key],
)
for key in maxindices_xz
}
expected11 = {
key: xr.DataArray(value, dims="y") for key, value in maxindices_xz.items()
}
result11 = ar.argmax(dim=("x", "z"), skipna=False)
assert isinstance(result11, dict)
for key in expected11:
assert_identical(result11[key].drop_vars("y"), expected11[key])
maxindices_yz = {
key: xr.where(
nanindices_yz[key] == None, # noqa: E711
maxindices_yz[key],
nanindices_yz[key],
)
for key in maxindices_yz
}
expected12 = {
key: xr.DataArray(value, dims="x") for key, value in maxindices_yz.items()
}
result12 = ar.argmax(dim=("y", "z"), skipna=False)
assert isinstance(result12, dict)
for key in expected12:
assert_identical(result12[key].drop_vars("x"), expected12[key])
maxindices_xyz = {
key: xr.where(
nanindices_xyz[key] == None, # noqa: E711
maxindices_xyz[key],
nanindices_xyz[key],
)
for key in maxindices_xyz
}
expected13 = {key: xr.DataArray(value) for key, value in maxindices_xyz.items()}
result13 = ar.argmax(..., skipna=False)
assert isinstance(result13, dict)
for key in expected13:
assert_identical(result13[key], expected13[key])
class TestReduceND(TestReduce):
@pytest.mark.parametrize("op", ["idxmin", "idxmax"])
@pytest.mark.parametrize("ndim", [3, 5])
def test_idxminmax_dask(self, op: str, ndim: int) -> None:
if not has_dask:
pytest.skip("requires dask")
ar0_raw = xr.DataArray(
np.random.random_sample(size=[10] * ndim),
dims=list("abcdefghij"[: ndim - 1]) + ["x"],
coords={"x": np.arange(10)},
attrs=self.attrs,
)
ar0_dsk = ar0_raw.chunk({})
# Assert idx is the same with dask and without
assert_equal(getattr(ar0_dsk, op)(dim="x"), getattr(ar0_raw, op)(dim="x"))
@pytest.mark.parametrize("da", ("repeating_ints",), indirect=True)
def test_isin(da) -> None:
expected = DataArray(
np.asarray([[0, 0, 0], [1, 0, 0]]),
dims=list("yx"),
coords={"x": list("abc"), "y": list("de")},
).astype("bool")
result = da.isin([3]).sel(y=list("de"), z=0)
assert_equal(result, expected)
expected = DataArray(
np.asarray([[0, 0, 1], [1, 0, 0]]),
dims=list("yx"),
coords={"x": list("abc"), "y": list("de")},
).astype("bool")
result = da.isin([2, 3]).sel(y=list("de"), z=0)
assert_equal(result, expected)
def test_raise_no_warning_for_nan_in_binary_ops() -> None:
with assert_no_warnings():
_ = xr.DataArray([1, 2, np.nan]) > 0
@pytest.mark.filterwarnings("error")
def test_no_warning_for_all_nan() -> None:
_ = xr.DataArray([np.nan, np.nan]).mean()
def test_name_in_masking() -> None:
name = "RingoStarr"
da = xr.DataArray(range(10), coords=[("x", range(10))], name=name)
assert da.where(da > 5).name == name
assert da.where((da > 5).rename("YokoOno")).name == name
assert da.where(da > 5, drop=True).name == name
assert da.where((da > 5).rename("YokoOno"), drop=True).name == name
class TestIrisConversion:
@requires_iris
def test_to_and_from_iris(self) -> None:
import cf_units # iris requirement
import iris
# to iris
coord_dict: dict[Hashable, Any] = {}
coord_dict["distance"] = ("distance", [-2, 2], {"units": "meters"})
coord_dict["time"] = ("time", pd.date_range("2000-01-01", periods=3))
coord_dict["height"] = 10
coord_dict["distance2"] = ("distance", [0, 1], {"foo": "bar"})
coord_dict["time2"] = (("distance", "time"), [[0, 1, 2], [2, 3, 4]])
original = DataArray(
np.arange(6, dtype="float").reshape(2, 3),
coord_dict,
name="Temperature",
attrs={
"baz": 123,
"units": "Kelvin",
"standard_name": "fire_temperature",
"long_name": "Fire Temperature",
},
dims=("distance", "time"),
)
# Set a bad value to test the masking logic
original.data[0, 2] = np.nan
original.attrs["cell_methods"] = "height: mean (comment: A cell method)"
actual = original.to_iris()
assert_array_equal(actual.data, original.data)
assert actual.var_name == original.name
assert tuple(d.var_name for d in actual.dim_coords) == original.dims
assert actual.cell_methods == (
iris.coords.CellMethod(
method="mean",
coords=("height",),
intervals=(),
comments=("A cell method",),
),
)
for coord, original_key in zip((actual.coords()), original.coords, strict=True):
original_coord = original.coords[original_key]
assert coord.var_name == original_coord.name
assert_array_equal(
coord.points, CFDatetimeCoder().encode(original_coord.variable).values
)
assert actual.coord_dims(coord) == original.get_axis_num(
original.coords[coord.var_name].dims
)
assert (
actual.coord("distance2").attributes["foo"]
== original.coords["distance2"].attrs["foo"]
)
assert actual.coord("distance").units == cf_units.Unit(
original.coords["distance"].units
)
assert actual.attributes["baz"] == original.attrs["baz"]
assert actual.standard_name == original.attrs["standard_name"]
roundtripped = DataArray.from_iris(actual)
assert_identical(original, roundtripped)
actual.remove_coord("time")
auto_time_dimension = DataArray.from_iris(actual)
assert auto_time_dimension.dims == ("distance", "dim_1")
@requires_iris
@requires_dask
def test_to_and_from_iris_dask(self) -> None:
import cf_units # iris requirement
import dask.array as da
import iris
coord_dict: dict[Hashable, Any] = {}
coord_dict["distance"] = ("distance", [-2, 2], {"units": "meters"})
coord_dict["time"] = ("time", pd.date_range("2000-01-01", periods=3))
coord_dict["height"] = 10
coord_dict["distance2"] = ("distance", [0, 1], {"foo": "bar"})
coord_dict["time2"] = (("distance", "time"), [[0, 1, 2], [2, 3, 4]])
original = DataArray(
da.from_array(np.arange(-1, 5, dtype="float").reshape(2, 3), 3),
coord_dict,
name="Temperature",
attrs=dict(
baz=123,
units="Kelvin",
standard_name="fire_temperature",
long_name="Fire Temperature",
),
dims=("distance", "time"),
)
# Set a bad value to test the masking logic
original.data = da.ma.masked_less(original.data, 0)
original.attrs["cell_methods"] = "height: mean (comment: A cell method)"
actual = original.to_iris()
# Be careful not to trigger the loading of the iris data
actual_data = (
actual.core_data() if hasattr(actual, "core_data") else actual.data
)
assert_array_equal(actual_data, original.data)
assert actual.var_name == original.name
assert tuple(d.var_name for d in actual.dim_coords) == original.dims
assert actual.cell_methods == (
iris.coords.CellMethod(
method="mean",
coords=("height",),
intervals=(),
comments=("A cell method",),
),
)
for coord, original_key in zip((actual.coords()), original.coords, strict=True):
original_coord = original.coords[original_key]
assert coord.var_name == original_coord.name
assert_array_equal(
coord.points, CFDatetimeCoder().encode(original_coord.variable).values
)
assert actual.coord_dims(coord) == original.get_axis_num(
original.coords[coord.var_name].dims
)
assert (
actual.coord("distance2").attributes["foo"]
== original.coords["distance2"].attrs["foo"]
)
assert actual.coord("distance").units == cf_units.Unit(
original.coords["distance"].units
)
assert actual.attributes["baz"] == original.attrs["baz"]
assert actual.standard_name == original.attrs["standard_name"]
roundtripped = DataArray.from_iris(actual)
assert_identical(original, roundtripped)
# If the Iris version supports it then we should have a dask array
# at each stage of the conversion
if hasattr(actual, "core_data"):
assert isinstance(original.data, type(actual.core_data()))
assert isinstance(original.data, type(roundtripped.data))
actual.remove_coord("time")
auto_time_dimension = DataArray.from_iris(actual)
assert auto_time_dimension.dims == ("distance", "dim_1")
@requires_iris
@pytest.mark.parametrize(
"var_name, std_name, long_name, name, attrs",
[
(
"var_name",
"height",
"Height",
"var_name",
{"standard_name": "height", "long_name": "Height"},
),
(
None,
"height",
"Height",
"height",
{"standard_name": "height", "long_name": "Height"},
),
(None, None, "Height", "Height", {"long_name": "Height"}),
(None, None, None, None, {}),
],
)
def test_da_name_from_cube(
self, std_name, long_name, var_name, name, attrs
) -> None:
from iris.cube import Cube
cube = Cube([], var_name=var_name, standard_name=std_name, long_name=long_name)
result = xr.DataArray.from_iris(cube)
expected = xr.DataArray([], name=name, attrs=attrs)
xr.testing.assert_identical(result, expected)
@requires_iris
@pytest.mark.parametrize(
"var_name, std_name, long_name, name, attrs",
[
(
"var_name",
"height",
"Height",
"var_name",
{"standard_name": "height", "long_name": "Height"},
),
(
None,
"height",
"Height",
"height",
{"standard_name": "height", "long_name": "Height"},
),
(None, None, "Height", "Height", {"long_name": "Height"}),
(None, None, None, "unknown", {}),
],
)
def test_da_coord_name_from_cube(
self, std_name, long_name, var_name, name, attrs
) -> None:
from iris.coords import DimCoord
from iris.cube import Cube
latitude = DimCoord(
[-90, 0, 90], standard_name=std_name, var_name=var_name, long_name=long_name
)
data = [0, 0, 0]
cube = Cube(data, dim_coords_and_dims=[(latitude, 0)])
result = xr.DataArray.from_iris(cube)
expected = xr.DataArray(data, coords=[(name, [-90, 0, 90], attrs)])
xr.testing.assert_identical(result, expected)
@requires_iris
def test_prevent_duplicate_coord_names(self) -> None:
from iris.coords import DimCoord
from iris.cube import Cube
# Iris enforces unique coordinate names. Because we use a different
# name resolution order a valid iris Cube with coords that have the
# same var_name would lead to duplicate dimension names in the
# DataArray
longitude = DimCoord([0, 360], standard_name="longitude", var_name="duplicate")
latitude = DimCoord(
[-90, 0, 90], standard_name="latitude", var_name="duplicate"
)
data = [[0, 0, 0], [0, 0, 0]]
cube = Cube(data, dim_coords_and_dims=[(longitude, 0), (latitude, 1)])
with pytest.raises(ValueError):
xr.DataArray.from_iris(cube)
@requires_iris
@pytest.mark.parametrize(
"coord_values",
[["IA", "IL", "IN"], [0, 2, 1]], # non-numeric values # non-monotonic values
)
def test_fallback_to_iris_AuxCoord(self, coord_values) -> None:
from iris.coords import AuxCoord
from iris.cube import Cube
data = [0, 0, 0]
da = xr.DataArray(data, coords=[coord_values], dims=["space"])
result = xr.DataArray.to_iris(da)
expected = Cube(
data, aux_coords_and_dims=[(AuxCoord(coord_values, var_name="space"), 0)]
)
assert result == expected
def test_no_dict() -> None:
d = DataArray()
with pytest.raises(AttributeError):
_ = d.__dict__
def test_subclass_slots() -> None:
"""Test that DataArray subclasses must explicitly define ``__slots__``.
.. note::
As of 0.13.0, this is actually mitigated into a FutureWarning for any class
defined outside of the xarray package.
"""
with pytest.raises(AttributeError) as e:
class MyArray(DataArray):
pass
assert str(e.value) == "MyArray must explicitly define __slots__"
def test_weakref() -> None:
"""Classes with __slots__ are incompatible with the weakref module unless they
explicitly state __weakref__ among their slots
"""
from weakref import ref
a = DataArray(1)
r = ref(a)
assert r() is a
def test_delete_coords() -> None:
"""Make sure that deleting a coordinate doesn't corrupt the DataArray.
See issue #3899.
Also test that deleting succeeds and produces the expected output.
"""
a0 = DataArray(
np.array([[1, 2, 3], [4, 5, 6]]),
dims=["y", "x"],
coords={"x": ["a", "b", "c"], "y": [-1, 1]},
)
assert_identical(a0, a0)
a1 = a0.copy()
del a1.coords["y"]
# This test will detect certain sorts of corruption in the DataArray
assert_identical(a0, a0)
assert a0.dims == ("y", "x")
assert a1.dims == ("y", "x")
assert set(a0.coords.keys()) == {"x", "y"}
assert set(a1.coords.keys()) == {"x"}
def test_deepcopy_nested_attrs() -> None:
"""Check attrs deep copy, see :issue:`2835`"""
da1 = xr.DataArray([[1, 2], [3, 4]], dims=("x", "y"), coords={"x": [10, 20]})
da1.attrs["flat"] = "0"
da1.attrs["nested"] = {"level1a": "1", "level1b": "1"}
da2 = da1.copy(deep=True)
da2.attrs["new"] = "2"
da2.attrs.update({"new2": "2"})
da2.attrs["flat"] = "2"
da2.attrs["nested"]["level1a"] = "2"
da2.attrs["nested"].update({"level1b": "2"})
# Coarse test
assert not da1.identical(da2)
# Check attrs levels
assert da1.attrs["flat"] != da2.attrs["flat"]
assert da1.attrs["nested"] != da2.attrs["nested"]
assert "new" not in da1.attrs
assert "new2" not in da1.attrs
def test_deepcopy_obj_array() -> None:
x0 = DataArray(np.array([object()]))
x1 = deepcopy(x0)
assert x0.values[0] is not x1.values[0]
def test_deepcopy_recursive() -> None:
# GH:issue:7111
# direct recursion
da = xr.DataArray([1, 2], dims=["x"])
da.attrs["other"] = da
# TODO: cannot use assert_identical on recursive Vars yet...
# lets just ensure that deep copy works without RecursionError
da.copy(deep=True)
# indirect recursion
da2 = xr.DataArray([5, 6], dims=["y"])
da.attrs["other"] = da2
da2.attrs["other"] = da
# TODO: cannot use assert_identical on recursive Vars yet...
# lets just ensure that deep copy works without RecursionError
da.copy(deep=True)
da2.copy(deep=True)
def test_clip(da: DataArray) -> None:
with raise_if_dask_computes():
result = da.clip(min=0.5)
assert result.min() >= 0.5
result = da.clip(max=0.5)
assert result.max() <= 0.5
result = da.clip(min=0.25, max=0.75)
assert result.min() >= 0.25
assert result.max() <= 0.75
with raise_if_dask_computes():
result = da.clip(min=da.mean("x"), max=da.mean("a"))
assert result.dims == da.dims
assert_array_equal(
result.data,
np.clip(da.data, da.mean("x").data[:, :, np.newaxis], da.mean("a").data),
)
with_nans = da.isel(time=[0, 1]).reindex_like(da)
with raise_if_dask_computes():
result = da.clip(min=da.mean("x"), max=da.mean("a"))
result = da.clip(with_nans)
# The values should be the same where there were NaNs.
assert_array_equal(result.isel(time=[0, 1]), with_nans.isel(time=[0, 1]))
# Unclear whether we want this work, OK to adjust the test when we have decided.
with pytest.raises(ValueError, match="cannot reindex or align along dimension.*"):
result = da.clip(min=da.mean("x"), max=da.mean("a").isel(x=[0, 1]))
class TestDropDuplicates:
@pytest.mark.parametrize("keep", ["first", "last", False])
def test_drop_duplicates_1d(self, keep) -> None:
da = xr.DataArray(
[0, 5, 6, 7], dims="time", coords={"time": [0, 0, 1, 2]}, name="test"
)
if keep == "first":
data = [0, 6, 7]
time = [0, 1, 2]
elif keep == "last":
data = [5, 6, 7]
time = [0, 1, 2]
else:
data = [6, 7]
time = [1, 2]
expected = xr.DataArray(data, dims="time", coords={"time": time}, name="test")
result = da.drop_duplicates("time", keep=keep)
assert_equal(expected, result)
with pytest.raises(
ValueError,
match=re.escape(
"Dimensions ('space',) not found in data dimensions ('time',)"
),
):
da.drop_duplicates("space", keep=keep)
def test_drop_duplicates_2d(self) -> None:
da = xr.DataArray(
[[0, 5, 6, 7], [2, 1, 3, 4]],
dims=["space", "time"],
coords={"space": [10, 10], "time": [0, 0, 1, 2]},
name="test",
)
expected = xr.DataArray(
[[0, 6, 7]],
dims=["space", "time"],
coords={"time": ("time", [0, 1, 2]), "space": ("space", [10])},
name="test",
)
result = da.drop_duplicates(["time", "space"], keep="first")
assert_equal(expected, result)
result = da.drop_duplicates(..., keep="first")
assert_equal(expected, result)
class TestNumpyCoercion:
# TODO once flexible indexes refactor complete also test coercion of dimension coords
def test_from_numpy(self) -> None:
da = xr.DataArray([1, 2, 3], dims="x", coords={"lat": ("x", [4, 5, 6])})
assert_identical(da.as_numpy(), da)
np.testing.assert_equal(da.to_numpy(), np.array([1, 2, 3]))
np.testing.assert_equal(da["lat"].to_numpy(), np.array([4, 5, 6]))
def test_to_numpy(self) -> None:
arr = np.array([1, 2, 3])
da = xr.DataArray(arr, dims="x", coords={"lat": ("x", [4, 5, 6])})
with assert_no_warnings():
np.testing.assert_equal(np.asarray(da), arr)
np.testing.assert_equal(np.array(da), arr)
@requires_dask
def test_from_dask(self) -> None:
da = xr.DataArray([1, 2, 3], dims="x", coords={"lat": ("x", [4, 5, 6])})
da_chunked = da.chunk(1)
assert_identical(da_chunked.as_numpy(), da.compute())
np.testing.assert_equal(da.to_numpy(), np.array([1, 2, 3]))
np.testing.assert_equal(da["lat"].to_numpy(), np.array([4, 5, 6]))
@requires_pint
def test_from_pint(self) -> None:
from pint import Quantity
arr = np.array([1, 2, 3])
da = xr.DataArray(
Quantity(arr, units="Pa"),
dims="x",
coords={"lat": ("x", Quantity(arr + 3, units="m"))},
)
expected = xr.DataArray(arr, dims="x", coords={"lat": ("x", arr + 3)})
assert_identical(da.as_numpy(), expected)
np.testing.assert_equal(da.to_numpy(), arr)
np.testing.assert_equal(da["lat"].to_numpy(), arr + 3)
@requires_sparse
def test_from_sparse(self) -> None:
import sparse
arr = np.diagflat([1, 2, 3])
sparr = sparse.COO.from_numpy(arr)
da = xr.DataArray(
sparr, dims=["x", "y"], coords={"elev": (("x", "y"), sparr + 3)}
)
expected = xr.DataArray(
arr, dims=["x", "y"], coords={"elev": (("x", "y"), arr + 3)}
)
assert_identical(da.as_numpy(), expected)
np.testing.assert_equal(da.to_numpy(), arr)
@requires_cupy
def test_from_cupy(self) -> None:
import cupy as cp
arr = np.array([1, 2, 3])
da = xr.DataArray(
cp.array(arr), dims="x", coords={"lat": ("x", cp.array(arr + 3))}
)
expected = xr.DataArray(arr, dims="x", coords={"lat": ("x", arr + 3)})
assert_identical(da.as_numpy(), expected)
np.testing.assert_equal(da.to_numpy(), arr)
@requires_dask
@requires_pint
def test_from_pint_wrapping_dask(self) -> None:
import dask
from pint import Quantity
arr = np.array([1, 2, 3])
d = dask.array.from_array(arr)
da = xr.DataArray(
Quantity(d, units="Pa"),
dims="x",
coords={"lat": ("x", Quantity(d, units="m") * 2)},
)
result = da.as_numpy()
result.name = None # remove dask-assigned name
expected = xr.DataArray(arr, dims="x", coords={"lat": ("x", arr * 2)})
assert_identical(result, expected)
np.testing.assert_equal(da.to_numpy(), arr)
class TestStackEllipsis:
# https://github.com/pydata/xarray/issues/6051
def test_result_as_expected(self) -> None:
da = DataArray([[1, 2], [1, 2]], dims=("x", "y"))
result = da.stack(flat=[...])
expected = da.stack(flat=da.dims)
assert_identical(result, expected)
def test_error_on_ellipsis_without_list(self) -> None:
da = DataArray([[1, 2], [1, 2]], dims=("x", "y"))
with pytest.raises(ValueError):
da.stack(flat=...) # type: ignore[arg-type]
def test_nD_coord_dataarray() -> None:
# should succeed
da = DataArray(
np.ones((2, 4)),
dims=("x", "y"),
coords={
"x": (("x", "y"), np.arange(8).reshape((2, 4))),
"y": ("y", np.arange(4)),
},
)
_assert_internal_invariants(da, check_default_indexes=True)
da2 = DataArray(np.ones(4), dims=("y"), coords={"y": ("y", np.arange(4))})
da3 = DataArray(np.ones(4), dims=("z"))
_, actual = xr.align(da, da2)
assert_identical(da2, actual)
expected = da.drop_vars("x")
_, actual = xr.broadcast(da, da2)
assert_identical(expected, actual)
actual, _ = xr.broadcast(da, da3)
expected = da.expand_dims(z=4, axis=-1)
assert_identical(actual, expected)
da4 = DataArray(np.ones((2, 4)), coords={"x": 0}, dims=["x", "y"])
_assert_internal_invariants(da4, check_default_indexes=True)
assert "x" not in da4.xindexes
assert "x" in da4.coords
def test_lazy_data_variable_not_loaded():
# GH8753
array = InaccessibleArray(np.array([1, 2, 3]))
v = Variable(data=array, dims="x")
# No data needs to be accessed, so no error should be raised
da = xr.DataArray(v)
# No data needs to be accessed, so no error should be raised
xr.DataArray(da)
def test_unstack_index_var() -> None:
source = xr.DataArray(range(2), dims=["x"], coords=[["a", "b"]])
da = source.x
da = da.assign_coords(y=("x", ["c", "d"]), z=("x", ["e", "f"]))
da = da.set_index(x=["y", "z"])
actual = da.unstack("x")
expected = xr.DataArray(
np.array([["a", np.nan], [np.nan, "b"]], dtype=object),
coords={"y": ["c", "d"], "z": ["e", "f"]},
name="x",
)
assert_identical(actual, expected)