CCR/.venv/lib/python3.12/site-packages/xarray/tests/test_dataset.py

7665 lines
279 KiB
Python

from __future__ import annotations
import pickle
import re
import sys
import warnings
from collections.abc import Hashable
from copy import copy, deepcopy
from io import StringIO
from textwrap import dedent
from typing import Any, Literal
import numpy as np
import pandas as pd
import pytest
from pandas.core.indexes.datetimes import DatetimeIndex
# remove once numpy 2.0 is the oldest supported version
try:
from numpy.exceptions import RankWarning
except ImportError:
from numpy import RankWarning # type: ignore[no-redef,attr-defined,unused-ignore]
import xarray as xr
from xarray import (
DataArray,
Dataset,
IndexVariable,
MergeError,
Variable,
align,
backends,
broadcast,
open_dataset,
set_options,
)
from xarray.coding.cftimeindex import CFTimeIndex
from xarray.core import dtypes, indexing, utils
from xarray.core.common import duck_array_ops, full_like
from xarray.core.coordinates import Coordinates, DatasetCoordinates
from xarray.core.indexes import Index, PandasIndex
from xarray.core.types import ArrayLike
from xarray.core.utils import is_scalar
from xarray.groupers import TimeResampler
from xarray.namedarray.pycompat import array_type, integer_types
from xarray.testing import _assert_internal_invariants
from xarray.tests import (
DuckArrayWrapper,
InaccessibleArray,
UnexpectedDataAccess,
assert_allclose,
assert_array_equal,
assert_equal,
assert_identical,
assert_no_warnings,
assert_writeable,
create_test_data,
has_cftime,
has_dask,
raise_if_dask_computes,
requires_bottleneck,
requires_cftime,
requires_cupy,
requires_dask,
requires_numexpr,
requires_pint,
requires_scipy,
requires_sparse,
source_ndarray,
)
try:
from pandas.errors import UndefinedVariableError
except ImportError:
# TODO: remove once we stop supporting pandas<1.4.3
from pandas.core.computation.ops import UndefinedVariableError
try:
import dask.array as da
except ImportError:
pass
# from numpy version 2.0 trapz is deprecated and renamed to trapezoid
# remove once numpy 2.0 is the oldest supported version
try:
from numpy import trapezoid # type: ignore[attr-defined,unused-ignore]
except ImportError:
from numpy import ( # type: ignore[arg-type,no-redef,attr-defined,unused-ignore]
trapz as trapezoid,
)
sparse_array_type = array_type("sparse")
pytestmark = [
pytest.mark.filterwarnings("error:Mean of empty slice"),
pytest.mark.filterwarnings("error:All-NaN (slice|axis) encountered"),
]
def create_append_test_data(seed=None) -> tuple[Dataset, Dataset, Dataset]:
rs = np.random.default_rng(seed)
lat = [2, 1, 0]
lon = [0, 1, 2]
nt1 = 3
nt2 = 2
time1 = pd.date_range("2000-01-01", periods=nt1).as_unit("ns")
time2 = pd.date_range("2000-02-01", periods=nt2).as_unit("ns")
string_var = np.array(["a", "bc", "def"], dtype=object)
string_var_to_append = np.array(["asdf", "asdfg"], dtype=object)
string_var_fixed_length = np.array(["aa", "bb", "cc"], dtype="|S2")
string_var_fixed_length_to_append = np.array(["dd", "ee"], dtype="|S2")
unicode_var = np.array(["áó", "áó", "áó"])
datetime_var = np.array(
["2019-01-01", "2019-01-02", "2019-01-03"], dtype="datetime64[ns]"
)
datetime_var_to_append = np.array(
["2019-01-04", "2019-01-05"], dtype="datetime64[ns]"
)
bool_var = np.array([True, False, True], dtype=bool)
bool_var_to_append = np.array([False, True], dtype=bool)
with warnings.catch_warnings():
warnings.filterwarnings("ignore", "Converting non-default")
ds = xr.Dataset(
data_vars={
"da": xr.DataArray(
rs.random((3, 3, nt1)),
coords=[lat, lon, time1],
dims=["lat", "lon", "time"],
),
"string_var": ("time", string_var),
"string_var_fixed_length": ("time", string_var_fixed_length),
"unicode_var": ("time", unicode_var),
"datetime_var": ("time", datetime_var),
"bool_var": ("time", bool_var),
}
)
ds_to_append = xr.Dataset(
data_vars={
"da": xr.DataArray(
rs.random((3, 3, nt2)),
coords=[lat, lon, time2],
dims=["lat", "lon", "time"],
),
"string_var": ("time", string_var_to_append),
"string_var_fixed_length": ("time", string_var_fixed_length_to_append),
"unicode_var": ("time", unicode_var[:nt2]),
"datetime_var": ("time", datetime_var_to_append),
"bool_var": ("time", bool_var_to_append),
}
)
ds_with_new_var = xr.Dataset(
data_vars={
"new_var": xr.DataArray(
rs.random((3, 3, nt1 + nt2)),
coords=[lat, lon, time1.append(time2)],
dims=["lat", "lon", "time"],
)
}
)
assert_writeable(ds)
assert_writeable(ds_to_append)
assert_writeable(ds_with_new_var)
return ds, ds_to_append, ds_with_new_var
def create_append_string_length_mismatch_test_data(dtype) -> tuple[Dataset, Dataset]:
def make_datasets(data, data_to_append) -> tuple[Dataset, Dataset]:
ds = xr.Dataset(
{"temperature": (["time"], data)},
coords={"time": [0, 1, 2]},
)
ds_to_append = xr.Dataset(
{"temperature": (["time"], data_to_append)}, coords={"time": [0, 1, 2]}
)
assert_writeable(ds)
assert_writeable(ds_to_append)
return ds, ds_to_append
u2_strings = ["ab", "cd", "ef"]
u5_strings = ["abc", "def", "ghijk"]
s2_strings = np.array(["aa", "bb", "cc"], dtype="|S2")
s3_strings = np.array(["aaa", "bbb", "ccc"], dtype="|S3")
if dtype == "U":
return make_datasets(u2_strings, u5_strings)
elif dtype == "S":
return make_datasets(s2_strings, s3_strings)
else:
raise ValueError(f"unsupported dtype {dtype}.")
def create_test_multiindex() -> Dataset:
mindex = pd.MultiIndex.from_product(
[["a", "b"], [1, 2]], names=("level_1", "level_2")
)
return Dataset({}, Coordinates.from_pandas_multiindex(mindex, "x"))
def create_test_stacked_array() -> tuple[DataArray, DataArray]:
x = DataArray(pd.Index(np.r_[:10], name="x"))
y = DataArray(pd.Index(np.r_[:20], name="y"))
a = x * y
b = x * y * y
return a, b
class InaccessibleVariableDataStore(backends.InMemoryDataStore):
"""
Store that does not allow any data access.
"""
def __init__(self):
super().__init__()
self._indexvars = set()
def store(self, variables, *args, **kwargs) -> None:
super().store(variables, *args, **kwargs)
for k, v in variables.items():
if isinstance(v, IndexVariable):
self._indexvars.add(k)
def get_variables(self):
def lazy_inaccessible(k, v):
if k in self._indexvars:
return v
data = indexing.LazilyIndexedArray(InaccessibleArray(v.values))
return Variable(v.dims, data, v.attrs)
return {k: lazy_inaccessible(k, v) for k, v in self._variables.items()}
class DuckBackendArrayWrapper(backends.common.BackendArray):
"""Mimic a BackendArray wrapper around DuckArrayWrapper"""
def __init__(self, array):
self.array = DuckArrayWrapper(array)
self.shape = array.shape
self.dtype = array.dtype
def get_array(self):
return self.array
def __getitem__(self, key):
return self.array[key.tuple]
class AccessibleAsDuckArrayDataStore(backends.InMemoryDataStore):
"""
Store that returns a duck array, not convertible to numpy array,
on read. Modeled after nVIDIA's kvikio.
"""
def __init__(self):
super().__init__()
self._indexvars = set()
def store(self, variables, *args, **kwargs) -> None:
super().store(variables, *args, **kwargs)
for k, v in variables.items():
if isinstance(v, IndexVariable):
self._indexvars.add(k)
def get_variables(self) -> dict[Any, xr.Variable]:
def lazy_accessible(k, v) -> xr.Variable:
if k in self._indexvars:
return v
data = indexing.LazilyIndexedArray(DuckBackendArrayWrapper(v.values))
return Variable(v.dims, data, v.attrs)
return {k: lazy_accessible(k, v) for k, v in self._variables.items()}
class TestDataset:
def test_repr(self) -> None:
data = create_test_data(seed=123)
data.attrs["foo"] = "bar"
# need to insert str dtype at runtime to handle different endianness
expected = dedent(
"""\
<xarray.Dataset> Size: 2kB
Dimensions: (dim2: 9, dim3: 10, time: 20, dim1: 8)
Coordinates:
* dim2 (dim2) float64 72B 0.0 0.5 1.0 1.5 2.0 2.5 3.0 3.5 4.0
* dim3 (dim3) {} 40B 'a' 'b' 'c' 'd' 'e' 'f' 'g' 'h' 'i' 'j'
* time (time) datetime64[{}] 160B 2000-01-01 2000-01-02 ... 2000-01-20
numbers (dim3) int64 80B 0 1 2 0 0 1 1 2 2 3
Dimensions without coordinates: dim1
Data variables:
var1 (dim1, dim2) float64 576B -0.9891 -0.3678 1.288 ... -0.2116 0.364
var2 (dim1, dim2) float64 576B 0.953 1.52 1.704 ... 0.1347 -0.6423
var3 (dim3, dim1) float64 640B 0.4107 0.9941 0.1665 ... 0.716 1.555
Attributes:
foo: bar""".format(
data["dim3"].dtype,
"ns",
)
)
actual = "\n".join(x.rstrip() for x in repr(data).split("\n"))
assert expected == actual
with set_options(display_width=100):
max_len = max(map(len, repr(data).split("\n")))
assert 90 < max_len < 100
expected = dedent(
"""\
<xarray.Dataset> Size: 0B
Dimensions: ()
Data variables:
*empty*"""
)
actual = "\n".join(x.rstrip() for x in repr(Dataset()).split("\n"))
print(actual)
assert expected == actual
# verify that ... doesn't appear for scalar coordinates
data = Dataset({"foo": ("x", np.ones(10))}).mean()
expected = dedent(
"""\
<xarray.Dataset> Size: 8B
Dimensions: ()
Data variables:
foo float64 8B 1.0"""
)
actual = "\n".join(x.rstrip() for x in repr(data).split("\n"))
print(actual)
assert expected == actual
# verify long attributes are truncated
data = Dataset(attrs={"foo": "bar" * 1000})
assert len(repr(data)) < 1000
def test_repr_multiindex(self) -> None:
data = create_test_multiindex()
obj_size = np.dtype("O").itemsize
expected = dedent(
f"""\
<xarray.Dataset> Size: {8 * obj_size + 32}B
Dimensions: (x: 4)
Coordinates:
* x (x) object {4 * obj_size}B MultiIndex
* level_1 (x) object {4 * obj_size}B 'a' 'a' 'b' 'b'
* level_2 (x) int64 32B 1 2 1 2
Data variables:
*empty*"""
)
actual = "\n".join(x.rstrip() for x in repr(data).split("\n"))
print(actual)
assert expected == actual
# verify that long level names are not truncated
midx = pd.MultiIndex.from_product(
[["a", "b"], [1, 2]], names=("a_quite_long_level_name", "level_2")
)
midx_coords = Coordinates.from_pandas_multiindex(midx, "x")
data = Dataset({}, midx_coords)
expected = dedent(
f"""\
<xarray.Dataset> Size: {8 * obj_size + 32}B
Dimensions: (x: 4)
Coordinates:
* x (x) object {4 * obj_size}B MultiIndex
* a_quite_long_level_name (x) object {4 * obj_size}B 'a' 'a' 'b' 'b'
* level_2 (x) int64 32B 1 2 1 2
Data variables:
*empty*"""
)
actual = "\n".join(x.rstrip() for x in repr(data).split("\n"))
print(actual)
assert expected == actual
def test_repr_period_index(self) -> None:
data = create_test_data(seed=456)
data.coords["time"] = pd.period_range("2000-01-01", periods=20, freq="D")
# check that creating the repr doesn't raise an error #GH645
repr(data)
def test_unicode_data(self) -> None:
# regression test for GH834
data = Dataset({"foø": ["ba®"]}, attrs={"å": ""})
repr(data) # should not raise
byteorder = "<" if sys.byteorder == "little" else ">"
expected = dedent(
f"""\
<xarray.Dataset> Size: 12B
Dimensions: (foø: 1)
Coordinates:
* foø (foø) {byteorder}U3 12B {'ba®'!r}
Data variables:
*empty*
Attributes:
å: ∑"""
)
actual = str(data)
assert expected == actual
def test_repr_nep18(self) -> None:
class Array:
def __init__(self):
self.shape = (2,)
self.ndim = 1
self.dtype = np.dtype(np.float64)
def __array_function__(self, *args, **kwargs):
return NotImplemented
def __array_ufunc__(self, *args, **kwargs):
return NotImplemented
def __repr__(self):
return "Custom\nArray"
dataset = Dataset({"foo": ("x", Array())})
expected = dedent(
"""\
<xarray.Dataset> Size: 16B
Dimensions: (x: 2)
Dimensions without coordinates: x
Data variables:
foo (x) float64 16B Custom Array"""
)
assert expected == repr(dataset)
def test_info(self) -> None:
ds = create_test_data(seed=123)
ds = ds.drop_vars("dim3") # string type prints differently in PY2 vs PY3
ds.attrs["unicode_attr"] = "ba®"
ds.attrs["string_attr"] = "bar"
buf = StringIO()
ds.info(buf=buf)
expected = dedent(
"""\
xarray.Dataset {
dimensions:
\tdim2 = 9 ;
\ttime = 20 ;
\tdim1 = 8 ;
\tdim3 = 10 ;
variables:
\tfloat64 dim2(dim2) ;
\tdatetime64[ns] time(time) ;
\tfloat64 var1(dim1, dim2) ;
\t\tvar1:foo = variable ;
\tfloat64 var2(dim1, dim2) ;
\t\tvar2:foo = variable ;
\tfloat64 var3(dim3, dim1) ;
\t\tvar3:foo = variable ;
\tint64 numbers(dim3) ;
// global attributes:
\t:unicode_attr = ba® ;
\t:string_attr = bar ;
}"""
)
actual = buf.getvalue()
assert expected == actual
buf.close()
def test_constructor(self) -> None:
x1 = ("x", 2 * np.arange(100))
x2 = ("x", np.arange(1000))
z = (["x", "y"], np.arange(1000).reshape(100, 10))
with pytest.raises(ValueError, match=r"conflicting sizes"):
Dataset({"a": x1, "b": x2})
with pytest.raises(TypeError, match=r"tuple of form"):
Dataset({"x": (1, 2, 3, 4, 5, 6, 7)})
with pytest.raises(ValueError, match=r"already exists as a scalar"):
Dataset({"x": 0, "y": ("x", [1, 2, 3])})
# nD coordinate variable "x" sharing name with dimension
actual = Dataset({"a": x1, "x": z})
assert "x" not in actual.xindexes
_assert_internal_invariants(actual, check_default_indexes=True)
# verify handling of DataArrays
expected = Dataset({"x": x1, "z": z})
actual = Dataset({"z": expected["z"]})
assert_identical(expected, actual)
def test_constructor_1d(self) -> None:
expected = Dataset({"x": (["x"], 5.0 + np.arange(5))})
actual = Dataset({"x": 5.0 + np.arange(5)})
assert_identical(expected, actual)
actual = Dataset({"x": [5, 6, 7, 8, 9]})
assert_identical(expected, actual)
def test_constructor_0d(self) -> None:
expected = Dataset({"x": ([], 1)})
for arg in [1, np.array(1), expected["x"]]:
actual = Dataset({"x": arg})
assert_identical(expected, actual)
class Arbitrary:
pass
d = pd.Timestamp("2000-01-01T12")
args = [
True,
None,
3.4,
np.nan,
"hello",
b"raw",
np.datetime64("2000-01-01"),
d,
d.to_pydatetime(),
Arbitrary(),
]
for arg in args:
print(arg)
expected = Dataset({"x": ([], arg)})
actual = Dataset({"x": arg})
assert_identical(expected, actual)
def test_constructor_auto_align(self) -> None:
a = DataArray([1, 2], [("x", [0, 1])])
b = DataArray([3, 4], [("x", [1, 2])])
# verify align uses outer join
expected = Dataset(
{"a": ("x", [1, 2, np.nan]), "b": ("x", [np.nan, 3, 4])}, {"x": [0, 1, 2]}
)
actual = Dataset({"a": a, "b": b})
assert_identical(expected, actual)
# regression test for GH346
assert isinstance(actual.variables["x"], IndexVariable)
# variable with different dimensions
c = ("y", [3, 4])
expected2 = expected.merge({"c": c})
actual = Dataset({"a": a, "b": b, "c": c})
assert_identical(expected2, actual)
# variable that is only aligned against the aligned variables
d = ("x", [3, 2, 1])
expected3 = expected.merge({"d": d})
actual = Dataset({"a": a, "b": b, "d": d})
assert_identical(expected3, actual)
e = ("x", [0, 0])
with pytest.raises(ValueError, match=r"conflicting sizes"):
Dataset({"a": a, "b": b, "e": e})
def test_constructor_pandas_sequence(self) -> None:
ds = self.make_example_math_dataset()
pandas_objs = {
var_name: ds[var_name].to_pandas() for var_name in ["foo", "bar"]
}
ds_based_on_pandas = Dataset(pandas_objs, ds.coords, attrs=ds.attrs)
del ds_based_on_pandas["x"]
assert_equal(ds, ds_based_on_pandas)
# reindex pandas obj, check align works
rearranged_index = reversed(pandas_objs["foo"].index)
pandas_objs["foo"] = pandas_objs["foo"].reindex(rearranged_index)
ds_based_on_pandas = Dataset(pandas_objs, ds.coords, attrs=ds.attrs)
del ds_based_on_pandas["x"]
assert_equal(ds, ds_based_on_pandas)
def test_constructor_pandas_single(self) -> None:
das = [
DataArray(np.random.rand(4), dims=["a"]), # series
DataArray(np.random.rand(4, 3), dims=["a", "b"]), # df
]
for a in das:
pandas_obj = a.to_pandas()
ds_based_on_pandas = Dataset(pandas_obj) # type: ignore[arg-type] # TODO: improve typing of __init__
for dim in ds_based_on_pandas.data_vars:
assert isinstance(dim, int)
assert_array_equal(ds_based_on_pandas[dim], pandas_obj[dim])
def test_constructor_compat(self) -> None:
data = {"x": DataArray(0, coords={"y": 1}), "y": ("z", [1, 1, 1])}
expected = Dataset({"x": 0}, {"y": ("z", [1, 1, 1])})
actual = Dataset(data)
assert_identical(expected, actual)
data = {"y": ("z", [1, 1, 1]), "x": DataArray(0, coords={"y": 1})}
actual = Dataset(data)
assert_identical(expected, actual)
original = Dataset(
{"a": (("x", "y"), np.ones((2, 3)))},
{"c": (("x", "y"), np.zeros((2, 3))), "x": [0, 1]},
)
expected = Dataset(
{"a": ("x", np.ones(2)), "b": ("y", np.ones(3))},
{"c": (("x", "y"), np.zeros((2, 3))), "x": [0, 1]},
)
actual = Dataset(
{"a": original["a"][:, 0], "b": original["a"][0].drop_vars("x")}
)
assert_identical(expected, actual)
data = {"x": DataArray(0, coords={"y": 3}), "y": ("z", [1, 1, 1])}
with pytest.raises(MergeError):
Dataset(data)
data = {"x": DataArray(0, coords={"y": 1}), "y": [1, 1]}
actual = Dataset(data)
expected = Dataset({"x": 0}, {"y": [1, 1]})
assert_identical(expected, actual)
def test_constructor_with_coords(self) -> None:
with pytest.raises(ValueError, match=r"found in both data_vars and"):
Dataset({"a": ("x", [1])}, {"a": ("x", [1])})
ds = Dataset({}, {"a": ("x", [1])})
assert not ds.data_vars
assert list(ds.coords.keys()) == ["a"]
mindex = pd.MultiIndex.from_product(
[["a", "b"], [1, 2]], names=("level_1", "level_2")
)
with pytest.raises(ValueError, match=r"conflicting MultiIndex"):
with pytest.warns(
FutureWarning,
match=".*`pandas.MultiIndex`.*no longer be implicitly promoted.*",
):
Dataset({}, {"x": mindex, "y": mindex})
Dataset({}, {"x": mindex, "level_1": range(4)})
def test_constructor_no_default_index(self) -> None:
# explicitly passing a Coordinates object skips the creation of default index
ds = Dataset(coords=Coordinates({"x": [1, 2, 3]}, indexes={}))
assert "x" in ds
assert "x" not in ds.xindexes
def test_constructor_multiindex(self) -> None:
midx = pd.MultiIndex.from_product([["a", "b"], [1, 2]], names=("one", "two"))
coords = Coordinates.from_pandas_multiindex(midx, "x")
ds = Dataset(coords=coords)
assert_identical(ds, coords.to_dataset())
with pytest.warns(
FutureWarning,
match=".*`pandas.MultiIndex`.*no longer be implicitly promoted.*",
):
Dataset(data_vars={"x": midx})
with pytest.warns(
FutureWarning,
match=".*`pandas.MultiIndex`.*no longer be implicitly promoted.*",
):
Dataset(coords={"x": midx})
def test_constructor_custom_index(self) -> None:
class CustomIndex(Index): ...
coords = Coordinates(
coords={"x": ("x", [1, 2, 3])}, indexes={"x": CustomIndex()}
)
ds = Dataset(coords=coords)
assert isinstance(ds.xindexes["x"], CustomIndex)
# test coordinate variables copied
assert ds.variables["x"] is not coords.variables["x"]
@pytest.mark.filterwarnings("ignore:return type")
def test_properties(self) -> None:
ds = create_test_data()
# dims / sizes
# These exact types aren't public API, but this makes sure we don't
# change them inadvertently:
assert isinstance(ds.dims, utils.Frozen)
# TODO change after deprecation cycle in GH #8500 is complete
assert isinstance(ds.dims.mapping, dict)
assert type(ds.dims.mapping) is dict
with pytest.warns(
FutureWarning,
match=" To access a mapping from dimension names to lengths, please use `Dataset.sizes`",
):
assert ds.dims == ds.sizes
assert ds.sizes == {"dim1": 8, "dim2": 9, "dim3": 10, "time": 20}
# dtypes
assert isinstance(ds.dtypes, utils.Frozen)
assert isinstance(ds.dtypes.mapping, dict)
assert ds.dtypes == {
"var1": np.dtype("float64"),
"var2": np.dtype("float64"),
"var3": np.dtype("float64"),
}
# data_vars
assert list(ds) == list(ds.data_vars)
assert list(ds.keys()) == list(ds.data_vars)
assert "aasldfjalskdfj" not in ds.variables
assert "dim1" in repr(ds.variables)
assert len(ds) == 3
assert bool(ds)
assert list(ds.data_vars) == ["var1", "var2", "var3"]
assert list(ds.data_vars.keys()) == ["var1", "var2", "var3"]
assert "var1" in ds.data_vars
assert "dim1" not in ds.data_vars
assert "numbers" not in ds.data_vars
assert len(ds.data_vars) == 3
# xindexes
assert set(ds.xindexes) == {"dim2", "dim3", "time"}
assert len(ds.xindexes) == 3
assert "dim2" in repr(ds.xindexes)
assert all(isinstance(idx, Index) for idx in ds.xindexes.values())
# indexes
assert set(ds.indexes) == {"dim2", "dim3", "time"}
assert len(ds.indexes) == 3
assert "dim2" in repr(ds.indexes)
assert all(isinstance(idx, pd.Index) for idx in ds.indexes.values())
# coords
assert list(ds.coords) == ["dim2", "dim3", "time", "numbers"]
assert "dim2" in ds.coords
assert "numbers" in ds.coords
assert "var1" not in ds.coords
assert "dim1" not in ds.coords
assert len(ds.coords) == 4
# nbytes
assert (
Dataset({"x": np.int64(1), "y": np.array([1, 2], dtype=np.float32)}).nbytes
== 16
)
def test_warn_ds_dims_deprecation(self) -> None:
# TODO remove after deprecation cycle in GH #8500 is complete
ds = create_test_data()
with pytest.warns(FutureWarning, match="return type"):
ds.dims["dim1"]
with pytest.warns(FutureWarning, match="return type"):
ds.dims.keys()
with pytest.warns(FutureWarning, match="return type"):
ds.dims.values()
with pytest.warns(FutureWarning, match="return type"):
ds.dims.items()
with assert_no_warnings():
len(ds.dims)
ds.dims.__iter__()
_ = "dim1" in ds.dims
def test_asarray(self) -> None:
ds = Dataset({"x": 0})
with pytest.raises(TypeError, match=r"cannot directly convert"):
np.asarray(ds)
def test_get_index(self) -> None:
ds = Dataset({"foo": (("x", "y"), np.zeros((2, 3)))}, coords={"x": ["a", "b"]})
assert ds.get_index("x").equals(pd.Index(["a", "b"]))
assert ds.get_index("y").equals(pd.Index([0, 1, 2]))
with pytest.raises(KeyError):
ds.get_index("z")
def test_attr_access(self) -> None:
ds = Dataset(
{"tmin": ("x", [42], {"units": "Celsius"})}, attrs={"title": "My test data"}
)
assert_identical(ds.tmin, ds["tmin"])
assert_identical(ds.tmin.x, ds.x)
assert ds.title == ds.attrs["title"]
assert ds.tmin.units == ds["tmin"].attrs["units"]
assert {"tmin", "title"} <= set(dir(ds))
assert "units" in set(dir(ds.tmin))
# should defer to variable of same name
ds.attrs["tmin"] = -999
assert ds.attrs["tmin"] == -999
assert_identical(ds.tmin, ds["tmin"])
def test_variable(self) -> None:
a = Dataset()
d = np.random.random((10, 3))
a["foo"] = (("time", "x"), d)
assert "foo" in a.variables
assert "foo" in a
a["bar"] = (("time", "x"), d)
# order of creation is preserved
assert list(a.variables) == ["foo", "bar"]
assert_array_equal(a["foo"].values, d)
# try to add variable with dim (10,3) with data that's (3,10)
with pytest.raises(ValueError):
a["qux"] = (("time", "x"), d.T)
def test_modify_inplace(self) -> None:
a = Dataset()
vec = np.random.random((10,))
attributes = {"foo": "bar"}
a["x"] = ("x", vec, attributes)
assert "x" in a.coords
assert isinstance(a.coords["x"].to_index(), pd.Index)
assert_identical(a.coords["x"].variable, a.variables["x"])
b = Dataset()
b["x"] = ("x", vec, attributes)
assert_identical(a["x"], b["x"])
assert a.sizes == b.sizes
# this should work
a["x"] = ("x", vec[:5])
a["z"] = ("x", np.arange(5))
with pytest.raises(ValueError):
# now it shouldn't, since there is a conflicting length
a["x"] = ("x", vec[:4])
arr = np.random.random((10, 1))
scal = np.array(0)
with pytest.raises(ValueError):
a["y"] = ("y", arr)
with pytest.raises(ValueError):
a["y"] = ("y", scal)
assert "y" not in a.dims
def test_coords_properties(self) -> None:
# use int64 for repr consistency on windows
data = Dataset(
{
"x": ("x", np.array([-1, -2], "int64")),
"y": ("y", np.array([0, 1, 2], "int64")),
"foo": (["x", "y"], np.random.randn(2, 3)),
},
{"a": ("x", np.array([4, 5], "int64")), "b": np.int64(-10)},
)
coords = data.coords
assert isinstance(coords, DatasetCoordinates)
# len
assert len(coords) == 4
# iter
assert list(coords) == ["x", "y", "a", "b"]
assert_identical(coords["x"].variable, data["x"].variable)
assert_identical(coords["y"].variable, data["y"].variable)
assert "x" in coords
assert "a" in coords
assert 0 not in coords
assert "foo" not in coords
with pytest.raises(KeyError):
coords["foo"]
with pytest.raises(KeyError):
coords[0]
# repr
expected = dedent(
"""\
Coordinates:
* x (x) int64 16B -1 -2
* y (y) int64 24B 0 1 2
a (x) int64 16B 4 5
b int64 8B -10"""
)
actual = repr(coords)
assert expected == actual
# dims
assert coords.sizes == {"x": 2, "y": 3}
# dtypes
assert coords.dtypes == {
"x": np.dtype("int64"),
"y": np.dtype("int64"),
"a": np.dtype("int64"),
"b": np.dtype("int64"),
}
def test_coords_modify(self) -> None:
data = Dataset(
{
"x": ("x", [-1, -2]),
"y": ("y", [0, 1, 2]),
"foo": (["x", "y"], np.random.randn(2, 3)),
},
{"a": ("x", [4, 5]), "b": -10},
)
actual = data.copy(deep=True)
actual.coords["x"] = ("x", ["a", "b"])
assert_array_equal(actual["x"], ["a", "b"])
actual = data.copy(deep=True)
actual.coords["z"] = ("z", ["a", "b"])
assert_array_equal(actual["z"], ["a", "b"])
actual = data.copy(deep=True)
with pytest.raises(ValueError, match=r"conflicting dimension sizes"):
actual.coords["x"] = ("x", [-1])
assert_identical(actual, data) # should not be modified
actual = data.copy()
del actual.coords["b"]
expected = data.reset_coords("b", drop=True)
assert_identical(expected, actual)
with pytest.raises(KeyError):
del data.coords["not_found"]
with pytest.raises(KeyError):
del data.coords["foo"]
actual = data.copy(deep=True)
actual.coords.update({"c": 11})
expected = data.merge({"c": 11}).set_coords("c")
assert_identical(expected, actual)
# regression test for GH3746
del actual.coords["x"]
assert "x" not in actual.xindexes
def test_update_index(self) -> None:
actual = Dataset(coords={"x": [1, 2, 3]})
actual["x"] = ["a", "b", "c"]
assert actual.xindexes["x"].to_pandas_index().equals(pd.Index(["a", "b", "c"]))
def test_coords_setitem_with_new_dimension(self) -> None:
actual = Dataset()
actual.coords["foo"] = ("x", [1, 2, 3])
expected = Dataset(coords={"foo": ("x", [1, 2, 3])})
assert_identical(expected, actual)
def test_coords_setitem_multiindex(self) -> None:
data = create_test_multiindex()
with pytest.raises(ValueError, match=r"cannot drop or update.*corrupt.*index "):
data.coords["level_1"] = range(4)
def test_coords_set(self) -> None:
one_coord = Dataset({"x": ("x", [0]), "yy": ("x", [1]), "zzz": ("x", [2])})
two_coords = Dataset({"zzz": ("x", [2])}, {"x": ("x", [0]), "yy": ("x", [1])})
all_coords = Dataset(
coords={"x": ("x", [0]), "yy": ("x", [1]), "zzz": ("x", [2])}
)
actual = one_coord.set_coords("x")
assert_identical(one_coord, actual)
actual = one_coord.set_coords(["x"])
assert_identical(one_coord, actual)
actual = one_coord.set_coords("yy")
assert_identical(two_coords, actual)
actual = one_coord.set_coords(["yy", "zzz"])
assert_identical(all_coords, actual)
actual = one_coord.reset_coords()
assert_identical(one_coord, actual)
actual = two_coords.reset_coords()
assert_identical(one_coord, actual)
actual = all_coords.reset_coords()
assert_identical(one_coord, actual)
actual = all_coords.reset_coords(["yy", "zzz"])
assert_identical(one_coord, actual)
actual = all_coords.reset_coords("zzz")
assert_identical(two_coords, actual)
with pytest.raises(ValueError, match=r"cannot remove index"):
one_coord.reset_coords("x")
actual = all_coords.reset_coords("zzz", drop=True)
expected = all_coords.drop_vars("zzz")
assert_identical(expected, actual)
expected = two_coords.drop_vars("zzz")
assert_identical(expected, actual)
def test_coords_to_dataset(self) -> None:
orig = Dataset({"foo": ("y", [-1, 0, 1])}, {"x": 10, "y": [2, 3, 4]})
expected = Dataset(coords={"x": 10, "y": [2, 3, 4]})
actual = orig.coords.to_dataset()
assert_identical(expected, actual)
def test_coords_merge(self) -> None:
orig_coords = Dataset(coords={"a": ("x", [1, 2]), "x": [0, 1]}).coords
other_coords = Dataset(coords={"b": ("x", ["a", "b"]), "x": [0, 1]}).coords
expected = Dataset(
coords={"a": ("x", [1, 2]), "b": ("x", ["a", "b"]), "x": [0, 1]}
)
actual = orig_coords.merge(other_coords)
assert_identical(expected, actual)
actual = other_coords.merge(orig_coords)
assert_identical(expected, actual)
other_coords = Dataset(coords={"x": ("x", ["a"])}).coords
with pytest.raises(MergeError):
orig_coords.merge(other_coords)
other_coords = Dataset(coords={"x": ("x", ["a", "b"])}).coords
with pytest.raises(MergeError):
orig_coords.merge(other_coords)
other_coords = Dataset(coords={"x": ("x", ["a", "b", "c"])}).coords
with pytest.raises(MergeError):
orig_coords.merge(other_coords)
other_coords = Dataset(coords={"a": ("x", [8, 9])}).coords
expected = Dataset(coords={"x": range(2)})
actual = orig_coords.merge(other_coords)
assert_identical(expected, actual)
actual = other_coords.merge(orig_coords)
assert_identical(expected, actual)
other_coords = Dataset(coords={"x": np.nan}).coords
actual = orig_coords.merge(other_coords)
assert_identical(orig_coords.to_dataset(), actual)
actual = other_coords.merge(orig_coords)
assert_identical(orig_coords.to_dataset(), actual)
def test_coords_merge_mismatched_shape(self) -> None:
orig_coords = Dataset(coords={"a": ("x", [1, 1])}).coords
other_coords = Dataset(coords={"a": 1}).coords
expected = orig_coords.to_dataset()
actual = orig_coords.merge(other_coords)
assert_identical(expected, actual)
other_coords = Dataset(coords={"a": ("y", [1])}).coords
expected = Dataset(coords={"a": (["x", "y"], [[1], [1]])})
actual = orig_coords.merge(other_coords)
assert_identical(expected, actual)
actual = other_coords.merge(orig_coords)
assert_identical(expected.transpose(), actual)
orig_coords = Dataset(coords={"a": ("x", [np.nan])}).coords
other_coords = Dataset(coords={"a": np.nan}).coords
expected = orig_coords.to_dataset()
actual = orig_coords.merge(other_coords)
assert_identical(expected, actual)
def test_data_vars_properties(self) -> None:
ds = Dataset()
ds["foo"] = (("x",), [1.0])
ds["bar"] = 2.0
# iter
assert set(ds.data_vars) == {"foo", "bar"}
assert "foo" in ds.data_vars
assert "x" not in ds.data_vars
assert_identical(ds["foo"], ds.data_vars["foo"])
# repr
expected = dedent(
"""\
Data variables:
foo (x) float64 8B 1.0
bar float64 8B 2.0"""
)
actual = repr(ds.data_vars)
assert expected == actual
# dtypes
assert ds.data_vars.dtypes == {
"foo": np.dtype("float64"),
"bar": np.dtype("float64"),
}
# len
ds.coords["x"] = [1]
assert len(ds.data_vars) == 2
# https://github.com/pydata/xarray/issues/7588
with pytest.raises(
AssertionError, match="something is wrong with Dataset._coord_names"
):
ds._coord_names = {"w", "x", "y", "z"}
len(ds.data_vars)
def test_equals_and_identical(self) -> None:
data = create_test_data(seed=42)
assert data.equals(data)
assert data.identical(data)
data2 = create_test_data(seed=42)
data2.attrs["foobar"] = "baz"
assert data.equals(data2)
assert not data.identical(data2)
del data2["time"]
assert not data.equals(data2)
data = create_test_data(seed=42).rename({"var1": None})
assert data.equals(data)
assert data.identical(data)
data2 = data.reset_coords()
assert not data2.equals(data)
assert not data2.identical(data)
def test_equals_failures(self) -> None:
data = create_test_data()
assert not data.equals("foo") # type: ignore[arg-type]
assert not data.identical(123) # type: ignore[arg-type]
assert not data.broadcast_equals({1: 2}) # type: ignore[arg-type]
def test_broadcast_equals(self) -> None:
data1 = Dataset(coords={"x": 0})
data2 = Dataset(coords={"x": [0]})
assert data1.broadcast_equals(data2)
assert not data1.equals(data2)
assert not data1.identical(data2)
def test_attrs(self) -> None:
data = create_test_data(seed=42)
data.attrs = {"foobar": "baz"}
assert data.attrs["foobar"], "baz"
assert isinstance(data.attrs, dict)
def test_chunks_does_not_load_data(self) -> None:
# regression test for GH6538
store = InaccessibleVariableDataStore()
create_test_data().dump_to_store(store)
ds = open_dataset(store)
assert ds.chunks == {}
@requires_dask
def test_chunk(self) -> None:
data = create_test_data()
for v in data.variables.values():
assert isinstance(v.data, np.ndarray)
assert data.chunks == {}
reblocked = data.chunk()
for k, v in reblocked.variables.items():
if k in reblocked.dims:
assert isinstance(v.data, np.ndarray)
else:
assert isinstance(v.data, da.Array)
expected_chunks: dict[Hashable, tuple[int, ...]] = {
"dim1": (8,),
"dim2": (9,),
"dim3": (10,),
}
assert reblocked.chunks == expected_chunks
# test kwargs form of chunks
assert data.chunk(expected_chunks).chunks == expected_chunks
def get_dask_names(ds):
return {k: v.data.name for k, v in ds.items()}
orig_dask_names = get_dask_names(reblocked)
reblocked = data.chunk({"time": 5, "dim1": 5, "dim2": 5, "dim3": 5})
# time is not a dim in any of the data_vars, so it
# doesn't get chunked
expected_chunks = {"dim1": (5, 3), "dim2": (5, 4), "dim3": (5, 5)}
assert reblocked.chunks == expected_chunks
# make sure dask names change when rechunking by different amounts
# regression test for GH3350
new_dask_names = get_dask_names(reblocked)
for k, v in new_dask_names.items():
assert v != orig_dask_names[k]
reblocked = data.chunk(expected_chunks)
assert reblocked.chunks == expected_chunks
# reblock on already blocked data
orig_dask_names = get_dask_names(reblocked)
reblocked = reblocked.chunk(expected_chunks)
new_dask_names = get_dask_names(reblocked)
assert reblocked.chunks == expected_chunks
assert_identical(reblocked, data)
# rechunking with same chunk sizes should not change names
for k, v in new_dask_names.items():
assert v == orig_dask_names[k]
with pytest.raises(
ValueError,
match=re.escape(
"chunks keys ('foo',) not found in data dimensions ('dim2', 'dim3', 'time', 'dim1')"
),
):
data.chunk({"foo": 10})
@requires_dask
@pytest.mark.parametrize(
"calendar",
(
"standard",
pytest.param(
"gregorian",
marks=pytest.mark.skipif(not has_cftime, reason="needs cftime"),
),
),
)
@pytest.mark.parametrize("freq", ["D", "W", "5ME", "YE"])
@pytest.mark.parametrize("add_gap", [True, False])
def test_chunk_by_frequency(self, freq: str, calendar: str, add_gap: bool) -> None:
import dask.array
N = 365 * 2
ΔN = 28
time = xr.date_range(
"2001-01-01", periods=N + ΔN, freq="D", calendar=calendar
).to_numpy(copy=True)
if add_gap:
# introduce an empty bin
time[31 : 31 + ΔN] = np.datetime64("NaT")
time = time[~np.isnat(time)]
else:
time = time[:N]
ds = Dataset(
{
"pr": ("time", dask.array.random.random((N), chunks=(20))),
"pr2d": (("x", "time"), dask.array.random.random((10, N), chunks=(20))),
"ones": ("time", np.ones((N,))),
},
coords={"time": time},
)
rechunked = ds.chunk(x=2, time=TimeResampler(freq))
expected = tuple(
ds.ones.resample(time=freq).sum().dropna("time").astype(int).data.tolist()
)
assert rechunked.chunksizes["time"] == expected
assert rechunked.chunksizes["x"] == (2,) * 5
rechunked = ds.chunk({"x": 2, "time": TimeResampler(freq)})
assert rechunked.chunksizes["time"] == expected
assert rechunked.chunksizes["x"] == (2,) * 5
def test_chunk_by_frequecy_errors(self):
ds = Dataset({"foo": ("x", [1, 2, 3])})
with pytest.raises(ValueError, match="virtual variable"):
ds.chunk(x=TimeResampler("YE"))
ds["x"] = ("x", [1, 2, 3])
with pytest.raises(ValueError, match="datetime variables"):
ds.chunk(x=TimeResampler("YE"))
ds["x"] = ("x", xr.date_range("2001-01-01", periods=3, freq="D"))
with pytest.raises(ValueError, match="Invalid frequency"):
ds.chunk(x=TimeResampler("foo"))
@requires_dask
def test_dask_is_lazy(self) -> None:
store = InaccessibleVariableDataStore()
create_test_data().dump_to_store(store)
ds = open_dataset(store).chunk()
with pytest.raises(UnexpectedDataAccess):
ds.load()
with pytest.raises(UnexpectedDataAccess):
_ = ds["var1"].values
# these should not raise UnexpectedDataAccess:
_ = ds.var1.data
ds.isel(time=10)
ds.isel(time=slice(10), dim1=[0]).isel(dim1=0, dim2=-1)
ds.transpose()
ds.mean()
ds.fillna(0)
ds.rename({"dim1": "foobar"})
ds.set_coords("var1")
ds.drop_vars("var1")
def test_isel(self) -> None:
data = create_test_data()
slicers: dict[Hashable, slice] = {
"dim1": slice(None, None, 2),
"dim2": slice(0, 2),
}
ret = data.isel(slicers)
# Verify that only the specified dimension was altered
assert list(data.dims) == list(ret.dims)
for d in data.dims:
if d in slicers:
assert ret.sizes[d] == np.arange(data.sizes[d])[slicers[d]].size
else:
assert data.sizes[d] == ret.sizes[d]
# Verify that the data is what we expect
for v in data.variables:
assert data[v].dims == ret[v].dims
assert data[v].attrs == ret[v].attrs
slice_list = [slice(None)] * data[v].values.ndim
for d, s in slicers.items():
if d in data[v].dims:
inds = np.nonzero(np.array(data[v].dims) == d)[0]
for ind in inds:
slice_list[ind] = s
expected = data[v].values[tuple(slice_list)]
actual = ret[v].values
np.testing.assert_array_equal(expected, actual)
with pytest.raises(ValueError):
data.isel(not_a_dim=slice(0, 2))
with pytest.raises(
ValueError,
match=r"Dimensions {'not_a_dim'} do not exist. Expected "
r"one or more of "
r"[\w\W]*'dim\d'[\w\W]*'dim\d'[\w\W]*'time'[\w\W]*'dim\d'[\w\W]*",
):
data.isel(not_a_dim=slice(0, 2))
with pytest.warns(
UserWarning,
match=r"Dimensions {'not_a_dim'} do not exist. "
r"Expected one or more of "
r"[\w\W]*'dim\d'[\w\W]*'dim\d'[\w\W]*'time'[\w\W]*'dim\d'[\w\W]*",
):
data.isel(not_a_dim=slice(0, 2), missing_dims="warn")
assert_identical(data, data.isel(not_a_dim=slice(0, 2), missing_dims="ignore"))
ret = data.isel(dim1=0)
assert {"time": 20, "dim2": 9, "dim3": 10} == ret.sizes
assert set(data.data_vars) == set(ret.data_vars)
assert set(data.coords) == set(ret.coords)
assert set(data.xindexes) == set(ret.xindexes)
ret = data.isel(time=slice(2), dim1=0, dim2=slice(5))
assert {"time": 2, "dim2": 5, "dim3": 10} == ret.sizes
assert set(data.data_vars) == set(ret.data_vars)
assert set(data.coords) == set(ret.coords)
assert set(data.xindexes) == set(ret.xindexes)
ret = data.isel(time=0, dim1=0, dim2=slice(5))
assert {"dim2": 5, "dim3": 10} == ret.sizes
assert set(data.data_vars) == set(ret.data_vars)
assert set(data.coords) == set(ret.coords)
assert set(data.xindexes) == set(list(ret.xindexes) + ["time"])
def test_isel_fancy(self) -> None:
# isel with fancy indexing.
data = create_test_data()
pdim1 = [1, 2, 3]
pdim2 = [4, 5, 1]
pdim3 = [1, 2, 3]
actual = data.isel(
dim1=(("test_coord",), pdim1),
dim2=(("test_coord",), pdim2),
dim3=(("test_coord",), pdim3),
)
assert "test_coord" in actual.dims
assert actual.coords["test_coord"].shape == (len(pdim1),)
# Should work with DataArray
actual = data.isel(
dim1=DataArray(pdim1, dims="test_coord"),
dim2=(("test_coord",), pdim2),
dim3=(("test_coord",), pdim3),
)
assert "test_coord" in actual.dims
assert actual.coords["test_coord"].shape == (len(pdim1),)
expected = data.isel(
dim1=(("test_coord",), pdim1),
dim2=(("test_coord",), pdim2),
dim3=(("test_coord",), pdim3),
)
assert_identical(actual, expected)
# DataArray with coordinate
idx1 = DataArray(pdim1, dims=["a"], coords={"a": np.random.randn(3)})
idx2 = DataArray(pdim2, dims=["b"], coords={"b": np.random.randn(3)})
idx3 = DataArray(pdim3, dims=["c"], coords={"c": np.random.randn(3)})
# Should work with DataArray
actual = data.isel(dim1=idx1, dim2=idx2, dim3=idx3)
assert "a" in actual.dims
assert "b" in actual.dims
assert "c" in actual.dims
assert "time" in actual.coords
assert "dim2" in actual.coords
assert "dim3" in actual.coords
expected = data.isel(
dim1=(("a",), pdim1), dim2=(("b",), pdim2), dim3=(("c",), pdim3)
)
expected = expected.assign_coords(a=idx1["a"], b=idx2["b"], c=idx3["c"])
assert_identical(actual, expected)
idx1 = DataArray(pdim1, dims=["a"], coords={"a": np.random.randn(3)})
idx2 = DataArray(pdim2, dims=["a"])
idx3 = DataArray(pdim3, dims=["a"])
# Should work with DataArray
actual = data.isel(dim1=idx1, dim2=idx2, dim3=idx3)
assert "a" in actual.dims
assert "time" in actual.coords
assert "dim2" in actual.coords
assert "dim3" in actual.coords
expected = data.isel(
dim1=(("a",), pdim1), dim2=(("a",), pdim2), dim3=(("a",), pdim3)
)
expected = expected.assign_coords(a=idx1["a"])
assert_identical(actual, expected)
actual = data.isel(dim1=(("points",), pdim1), dim2=(("points",), pdim2))
assert "points" in actual.dims
assert "dim3" in actual.dims
assert "dim3" not in actual.data_vars
np.testing.assert_array_equal(data["dim2"][pdim2], actual["dim2"])
# test that the order of the indexers doesn't matter
assert_identical(
data.isel(dim1=(("points",), pdim1), dim2=(("points",), pdim2)),
data.isel(dim2=(("points",), pdim2), dim1=(("points",), pdim1)),
)
# make sure we're raising errors in the right places
with pytest.raises(IndexError, match=r"Dimensions of indexers mismatch"):
data.isel(dim1=(("points",), [1, 2]), dim2=(("points",), [1, 2, 3]))
with pytest.raises(TypeError, match=r"cannot use a Dataset"):
data.isel(dim1=Dataset({"points": [1, 2]}))
# test to be sure we keep around variables that were not indexed
ds = Dataset({"x": [1, 2, 3, 4], "y": 0})
actual = ds.isel(x=(("points",), [0, 1, 2]))
assert_identical(ds["y"], actual["y"])
# tests using index or DataArray as indexers
stations = Dataset()
stations["station"] = (("station",), ["A", "B", "C"])
stations["dim1s"] = (("station",), [1, 2, 3])
stations["dim2s"] = (("station",), [4, 5, 1])
actual = data.isel(dim1=stations["dim1s"], dim2=stations["dim2s"])
assert "station" in actual.coords
assert "station" in actual.dims
assert_identical(actual["station"].drop_vars(["dim2"]), stations["station"])
with pytest.raises(ValueError, match=r"conflicting values/indexes on "):
data.isel(
dim1=DataArray(
[0, 1, 2], dims="station", coords={"station": [0, 1, 2]}
),
dim2=DataArray(
[0, 1, 2], dims="station", coords={"station": [0, 1, 3]}
),
)
# multi-dimensional selection
stations = Dataset()
stations["a"] = (("a",), ["A", "B", "C"])
stations["b"] = (("b",), [0, 1])
stations["dim1s"] = (("a", "b"), [[1, 2], [2, 3], [3, 4]])
stations["dim2s"] = (("a",), [4, 5, 1])
actual = data.isel(dim1=stations["dim1s"], dim2=stations["dim2s"])
assert "a" in actual.coords
assert "a" in actual.dims
assert "b" in actual.coords
assert "b" in actual.dims
assert "dim2" in actual.coords
assert "a" in actual["dim2"].dims
assert_identical(actual["a"].drop_vars(["dim2"]), stations["a"])
assert_identical(actual["b"], stations["b"])
expected_var1 = data["var1"].variable[
stations["dim1s"].variable, stations["dim2s"].variable
]
expected_var2 = data["var2"].variable[
stations["dim1s"].variable, stations["dim2s"].variable
]
expected_var3 = data["var3"].variable[slice(None), stations["dim1s"].variable]
assert_equal(actual["a"].drop_vars("dim2"), stations["a"])
assert_array_equal(actual["var1"], expected_var1)
assert_array_equal(actual["var2"], expected_var2)
assert_array_equal(actual["var3"], expected_var3)
# test that drop works
ds = xr.Dataset({"a": (("x",), [1, 2, 3])}, coords={"b": (("x",), [5, 6, 7])})
actual = ds.isel({"x": 1}, drop=False)
expected = xr.Dataset({"a": 2}, coords={"b": 6})
assert_identical(actual, expected)
actual = ds.isel({"x": 1}, drop=True)
expected = xr.Dataset({"a": 2})
assert_identical(actual, expected)
actual = ds.isel({"x": DataArray(1)}, drop=False)
expected = xr.Dataset({"a": 2}, coords={"b": 6})
assert_identical(actual, expected)
actual = ds.isel({"x": DataArray(1)}, drop=True)
expected = xr.Dataset({"a": 2})
assert_identical(actual, expected)
def test_isel_dataarray(self) -> None:
"""Test for indexing by DataArray"""
data = create_test_data()
# indexing with DataArray with same-name coordinates.
indexing_da = DataArray(
np.arange(1, 4), dims=["dim1"], coords={"dim1": np.random.randn(3)}
)
actual = data.isel(dim1=indexing_da)
assert_identical(indexing_da["dim1"], actual["dim1"])
assert_identical(data["dim2"], actual["dim2"])
# Conflict in the dimension coordinate
indexing_da = DataArray(
np.arange(1, 4), dims=["dim2"], coords={"dim2": np.random.randn(3)}
)
with pytest.raises(IndexError, match=r"dimension coordinate 'dim2'"):
data.isel(dim2=indexing_da)
# Also the case for DataArray
with pytest.raises(IndexError, match=r"dimension coordinate 'dim2'"):
data["var2"].isel(dim2=indexing_da)
with pytest.raises(IndexError, match=r"dimension coordinate 'dim2'"):
data["dim2"].isel(dim2=indexing_da)
# same name coordinate which does not conflict
indexing_da = DataArray(
np.arange(1, 4), dims=["dim2"], coords={"dim2": data["dim2"].values[1:4]}
)
actual = data.isel(dim2=indexing_da)
assert_identical(actual["dim2"], indexing_da["dim2"])
# Silently drop conflicted (non-dimensional) coordinate of indexer
indexing_da = DataArray(
np.arange(1, 4),
dims=["dim2"],
coords={
"dim2": data["dim2"].values[1:4],
"numbers": ("dim2", np.arange(2, 5)),
},
)
actual = data.isel(dim2=indexing_da)
assert_identical(actual["numbers"], data["numbers"])
# boolean data array with coordinate with the same name
indexing_da = DataArray(
np.arange(1, 10), dims=["dim2"], coords={"dim2": data["dim2"].values}
)
indexing_da = indexing_da < 3
actual = data.isel(dim2=indexing_da)
assert_identical(actual["dim2"], data["dim2"][:2])
# boolean data array with non-dimensioncoordinate
indexing_da = DataArray(
np.arange(1, 10),
dims=["dim2"],
coords={
"dim2": data["dim2"].values,
"non_dim": (("dim2",), np.random.randn(9)),
"non_dim2": 0,
},
)
indexing_da = indexing_da < 3
actual = data.isel(dim2=indexing_da)
assert_identical(
actual["dim2"].drop_vars("non_dim").drop_vars("non_dim2"), data["dim2"][:2]
)
assert_identical(actual["non_dim"], indexing_da["non_dim"][:2])
assert_identical(actual["non_dim2"], indexing_da["non_dim2"])
# non-dimension coordinate will be also attached
indexing_da = DataArray(
np.arange(1, 4),
dims=["dim2"],
coords={"non_dim": (("dim2",), np.random.randn(3))},
)
actual = data.isel(dim2=indexing_da)
assert "non_dim" in actual
assert "non_dim" in actual.coords
# Index by a scalar DataArray
indexing_da = DataArray(3, dims=[], coords={"station": 2})
actual = data.isel(dim2=indexing_da)
assert "station" in actual
actual = data.isel(dim2=indexing_da["station"])
assert "station" in actual
# indexer generated from coordinates
indexing_ds = Dataset({}, coords={"dim2": [0, 1, 2]})
with pytest.raises(IndexError, match=r"dimension coordinate 'dim2'"):
actual = data.isel(dim2=indexing_ds["dim2"])
def test_isel_fancy_convert_index_variable(self) -> None:
# select index variable "x" with a DataArray of dim "z"
# -> drop index and convert index variable to base variable
ds = xr.Dataset({"foo": ("x", [1, 2, 3])}, coords={"x": [0, 1, 2]})
idxr = xr.DataArray([1], dims="z", name="x")
actual = ds.isel(x=idxr)
assert "x" not in actual.xindexes
assert not isinstance(actual.x.variable, IndexVariable)
def test_sel(self) -> None:
data = create_test_data()
int_slicers = {"dim1": slice(None, None, 2), "dim2": slice(2), "dim3": slice(3)}
loc_slicers = {
"dim1": slice(None, None, 2),
"dim2": slice(0, 0.5),
"dim3": slice("a", "c"),
}
assert_equal(data.isel(int_slicers), data.sel(loc_slicers))
data["time"] = ("time", pd.date_range("2000-01-01", periods=20))
assert_equal(data.isel(time=0), data.sel(time="2000-01-01"))
assert_equal(
data.isel(time=slice(10)), data.sel(time=slice("2000-01-01", "2000-01-10"))
)
assert_equal(data, data.sel(time=slice("1999", "2005")))
times = pd.date_range("2000-01-01", periods=3)
assert_equal(data.isel(time=slice(3)), data.sel(time=times))
assert_equal(
data.isel(time=slice(3)), data.sel(time=(data["time.dayofyear"] <= 3))
)
td = pd.to_timedelta(np.arange(3), unit="days")
data = Dataset({"x": ("td", np.arange(3)), "td": td})
assert_equal(data, data.sel(td=td))
assert_equal(data, data.sel(td=slice("3 days")))
assert_equal(data.isel(td=0), data.sel(td=pd.Timedelta("0 days")))
assert_equal(data.isel(td=0), data.sel(td=pd.Timedelta("0h")))
assert_equal(data.isel(td=slice(1, 3)), data.sel(td=slice("1 days", "2 days")))
def test_sel_dataarray(self) -> None:
data = create_test_data()
ind = DataArray([0.0, 0.5, 1.0], dims=["dim2"])
actual = data.sel(dim2=ind)
assert_equal(actual, data.isel(dim2=[0, 1, 2]))
# with different dimension
ind = DataArray([0.0, 0.5, 1.0], dims=["new_dim"])
actual = data.sel(dim2=ind)
expected = data.isel(dim2=Variable("new_dim", [0, 1, 2]))
assert "new_dim" in actual.dims
assert_equal(actual, expected)
# Multi-dimensional
ind = DataArray([[0.0], [0.5], [1.0]], dims=["new_dim", "new_dim2"])
actual = data.sel(dim2=ind)
expected = data.isel(dim2=Variable(("new_dim", "new_dim2"), [[0], [1], [2]]))
assert "new_dim" in actual.dims
assert "new_dim2" in actual.dims
assert_equal(actual, expected)
# with coordinate
ind = DataArray(
[0.0, 0.5, 1.0], dims=["new_dim"], coords={"new_dim": ["a", "b", "c"]}
)
actual = data.sel(dim2=ind)
expected = data.isel(dim2=[0, 1, 2]).rename({"dim2": "new_dim"})
assert "new_dim" in actual.dims
assert "new_dim" in actual.coords
assert_equal(
actual.drop_vars("new_dim").drop_vars("dim2"), expected.drop_vars("new_dim")
)
assert_equal(actual["new_dim"].drop_vars("dim2"), ind["new_dim"])
# with conflicted coordinate (silently ignored)
ind = DataArray(
[0.0, 0.5, 1.0], dims=["dim2"], coords={"dim2": ["a", "b", "c"]}
)
actual = data.sel(dim2=ind)
expected = data.isel(dim2=[0, 1, 2])
assert_equal(actual, expected)
# with conflicted coordinate (silently ignored)
ind = DataArray(
[0.0, 0.5, 1.0],
dims=["new_dim"],
coords={"new_dim": ["a", "b", "c"], "dim2": 3},
)
actual = data.sel(dim2=ind)
assert_equal(
actual["new_dim"].drop_vars("dim2"), ind["new_dim"].drop_vars("dim2")
)
expected = data.isel(dim2=[0, 1, 2])
expected["dim2"] = (("new_dim"), expected["dim2"].values)
assert_equal(actual["dim2"].drop_vars("new_dim"), expected["dim2"])
assert actual["var1"].dims == ("dim1", "new_dim")
# with non-dimensional coordinate
ind = DataArray(
[0.0, 0.5, 1.0],
dims=["dim2"],
coords={
"dim2": ["a", "b", "c"],
"numbers": ("dim2", [0, 1, 2]),
"new_dim": ("dim2", [1.1, 1.2, 1.3]),
},
)
actual = data.sel(dim2=ind)
expected = data.isel(dim2=[0, 1, 2])
assert_equal(actual.drop_vars("new_dim"), expected)
assert np.allclose(actual["new_dim"].values, ind["new_dim"].values)
def test_sel_dataarray_mindex(self) -> None:
midx = pd.MultiIndex.from_product([list("abc"), [0, 1]], names=("one", "two"))
midx_coords = Coordinates.from_pandas_multiindex(midx, "x")
midx_coords["y"] = range(3)
mds = xr.Dataset(
{"var": (("x", "y"), np.random.rand(6, 3))}, coords=midx_coords
)
actual_isel = mds.isel(x=xr.DataArray(np.arange(3), dims="x"))
actual_sel = mds.sel(x=DataArray(midx[:3], dims="x"))
assert actual_isel["x"].dims == ("x",)
assert actual_sel["x"].dims == ("x",)
assert_identical(actual_isel, actual_sel)
actual_isel = mds.isel(x=xr.DataArray(np.arange(3), dims="z"))
actual_sel = mds.sel(x=Variable("z", midx[:3]))
assert actual_isel["x"].dims == ("z",)
assert actual_sel["x"].dims == ("z",)
assert_identical(actual_isel, actual_sel)
# with coordinate
actual_isel = mds.isel(
x=xr.DataArray(np.arange(3), dims="z", coords={"z": [0, 1, 2]})
)
actual_sel = mds.sel(
x=xr.DataArray(midx[:3], dims="z", coords={"z": [0, 1, 2]})
)
assert actual_isel["x"].dims == ("z",)
assert actual_sel["x"].dims == ("z",)
assert_identical(actual_isel, actual_sel)
# Vectorized indexing with level-variables raises an error
with pytest.raises(ValueError, match=r"Vectorized selection is "):
mds.sel(one=["a", "b"])
with pytest.raises(
ValueError,
match=r"Vectorized selection is not available along coordinate 'x' with a multi-index",
):
mds.sel(
x=xr.DataArray(
[np.array(midx[:2]), np.array(midx[-2:])], dims=["a", "b"]
)
)
def test_sel_categorical(self) -> None:
ind = pd.Series(["foo", "bar"], dtype="category")
df = pd.DataFrame({"ind": ind, "values": [1, 2]})
ds = df.set_index("ind").to_xarray()
actual = ds.sel(ind="bar")
expected = ds.isel(ind=1)
assert_identical(expected, actual)
def test_sel_categorical_error(self) -> None:
ind = pd.Series(["foo", "bar"], dtype="category")
df = pd.DataFrame({"ind": ind, "values": [1, 2]})
ds = df.set_index("ind").to_xarray()
with pytest.raises(ValueError):
ds.sel(ind="bar", method="nearest")
with pytest.raises(ValueError):
ds.sel(ind="bar", tolerance="nearest") # type: ignore[arg-type]
def test_categorical_index(self) -> None:
cat = pd.CategoricalIndex(
["foo", "bar", "foo"],
categories=["foo", "bar", "baz", "qux", "quux", "corge"],
)
ds = xr.Dataset(
{"var": ("cat", np.arange(3))},
coords={"cat": ("cat", cat), "c": ("cat", [0, 1, 1])},
)
# test slice
actual1 = ds.sel(cat="foo")
expected1 = ds.isel(cat=[0, 2])
assert_identical(expected1, actual1)
# make sure the conversion to the array works
actual2 = ds.sel(cat="foo")["cat"].values
assert (actual2 == np.array(["foo", "foo"])).all()
ds = ds.set_index(index=["cat", "c"])
actual3 = ds.unstack("index")
assert actual3["var"].shape == (2, 2)
def test_categorical_reindex(self) -> None:
cat = pd.CategoricalIndex(
["foo", "bar", "baz"],
categories=["foo", "bar", "baz", "qux", "quux", "corge"],
)
ds = xr.Dataset(
{"var": ("cat", np.arange(3))},
coords={"cat": ("cat", cat), "c": ("cat", [0, 1, 2])},
)
actual = ds.reindex(cat=["foo"])["cat"].values
assert (actual == np.array(["foo"])).all()
def test_categorical_multiindex(self) -> None:
i1 = pd.Series([0, 0])
cat = pd.CategoricalDtype(categories=["foo", "baz", "bar"])
i2 = pd.Series(["baz", "bar"], dtype=cat)
df = pd.DataFrame({"i1": i1, "i2": i2, "values": [1, 2]}).set_index(
["i1", "i2"]
)
actual = df.to_xarray()
assert actual["values"].shape == (1, 2)
def test_sel_drop(self) -> None:
data = Dataset({"foo": ("x", [1, 2, 3])}, {"x": [0, 1, 2]})
expected = Dataset({"foo": 1})
selected = data.sel(x=0, drop=True)
assert_identical(expected, selected)
expected = Dataset({"foo": 1}, {"x": 0})
selected = data.sel(x=0, drop=False)
assert_identical(expected, selected)
data = Dataset({"foo": ("x", [1, 2, 3])})
expected = Dataset({"foo": 1})
selected = data.sel(x=0, drop=True)
assert_identical(expected, selected)
def test_sel_drop_mindex(self) -> None:
midx = pd.MultiIndex.from_arrays([["a", "a"], [1, 2]], names=("foo", "bar"))
midx_coords = Coordinates.from_pandas_multiindex(midx, "x")
data = Dataset(coords=midx_coords)
actual = data.sel(foo="a", drop=True)
assert "foo" not in actual.coords
actual = data.sel(foo="a", drop=False)
assert_equal(actual.foo, DataArray("a", coords={"foo": "a"}))
def test_isel_drop(self) -> None:
data = Dataset({"foo": ("x", [1, 2, 3])}, {"x": [0, 1, 2]})
expected = Dataset({"foo": 1})
selected = data.isel(x=0, drop=True)
assert_identical(expected, selected)
expected = Dataset({"foo": 1}, {"x": 0})
selected = data.isel(x=0, drop=False)
assert_identical(expected, selected)
def test_head(self) -> None:
data = create_test_data()
expected = data.isel(time=slice(5), dim2=slice(6))
actual = data.head(time=5, dim2=6)
assert_equal(expected, actual)
expected = data.isel(time=slice(0))
actual = data.head(time=0)
assert_equal(expected, actual)
expected = data.isel({dim: slice(6) for dim in data.dims})
actual = data.head(6)
assert_equal(expected, actual)
expected = data.isel({dim: slice(5) for dim in data.dims})
actual = data.head()
assert_equal(expected, actual)
with pytest.raises(TypeError, match=r"either dict-like or a single int"):
data.head([3]) # type: ignore[arg-type]
with pytest.raises(TypeError, match=r"expected integer type"):
data.head(dim2=3.1)
with pytest.raises(ValueError, match=r"expected positive int"):
data.head(time=-3)
def test_tail(self) -> None:
data = create_test_data()
expected = data.isel(time=slice(-5, None), dim2=slice(-6, None))
actual = data.tail(time=5, dim2=6)
assert_equal(expected, actual)
expected = data.isel(dim1=slice(0))
actual = data.tail(dim1=0)
assert_equal(expected, actual)
expected = data.isel({dim: slice(-6, None) for dim in data.dims})
actual = data.tail(6)
assert_equal(expected, actual)
expected = data.isel({dim: slice(-5, None) for dim in data.dims})
actual = data.tail()
assert_equal(expected, actual)
with pytest.raises(TypeError, match=r"either dict-like or a single int"):
data.tail([3]) # type: ignore[arg-type]
with pytest.raises(TypeError, match=r"expected integer type"):
data.tail(dim2=3.1)
with pytest.raises(ValueError, match=r"expected positive int"):
data.tail(time=-3)
def test_thin(self) -> None:
data = create_test_data()
expected = data.isel(time=slice(None, None, 5), dim2=slice(None, None, 6))
actual = data.thin(time=5, dim2=6)
assert_equal(expected, actual)
expected = data.isel({dim: slice(None, None, 6) for dim in data.dims})
actual = data.thin(6)
assert_equal(expected, actual)
with pytest.raises(TypeError, match=r"either dict-like or a single int"):
data.thin([3]) # type: ignore[arg-type]
with pytest.raises(TypeError, match=r"expected integer type"):
data.thin(dim2=3.1)
with pytest.raises(ValueError, match=r"cannot be zero"):
data.thin(time=0)
with pytest.raises(ValueError, match=r"expected positive int"):
data.thin(time=-3)
@pytest.mark.filterwarnings("ignore::DeprecationWarning")
def test_sel_fancy(self) -> None:
data = create_test_data()
# add in a range() index
data["dim1"] = data.dim1
pdim1 = [1, 2, 3]
pdim2 = [4, 5, 1]
pdim3 = [1, 2, 3]
expected = data.isel(
dim1=Variable(("test_coord",), pdim1),
dim2=Variable(("test_coord",), pdim2),
dim3=Variable(("test_coord"), pdim3),
)
actual = data.sel(
dim1=Variable(("test_coord",), data.dim1[pdim1]),
dim2=Variable(("test_coord",), data.dim2[pdim2]),
dim3=Variable(("test_coord",), data.dim3[pdim3]),
)
assert_identical(expected, actual)
# DataArray Indexer
idx_t = DataArray(
data["time"][[3, 2, 1]].values, dims=["a"], coords={"a": ["a", "b", "c"]}
)
idx_2 = DataArray(
data["dim2"][[3, 2, 1]].values, dims=["a"], coords={"a": ["a", "b", "c"]}
)
idx_3 = DataArray(
data["dim3"][[3, 2, 1]].values, dims=["a"], coords={"a": ["a", "b", "c"]}
)
actual = data.sel(time=idx_t, dim2=idx_2, dim3=idx_3)
expected = data.isel(
time=Variable(("a",), [3, 2, 1]),
dim2=Variable(("a",), [3, 2, 1]),
dim3=Variable(("a",), [3, 2, 1]),
)
expected = expected.assign_coords(a=idx_t["a"])
assert_identical(expected, actual)
idx_t = DataArray(
data["time"][[3, 2, 1]].values, dims=["a"], coords={"a": ["a", "b", "c"]}
)
idx_2 = DataArray(
data["dim2"][[2, 1, 3]].values, dims=["b"], coords={"b": [0, 1, 2]}
)
idx_3 = DataArray(
data["dim3"][[1, 2, 1]].values, dims=["c"], coords={"c": [0.0, 1.1, 2.2]}
)
actual = data.sel(time=idx_t, dim2=idx_2, dim3=idx_3)
expected = data.isel(
time=Variable(("a",), [3, 2, 1]),
dim2=Variable(("b",), [2, 1, 3]),
dim3=Variable(("c",), [1, 2, 1]),
)
expected = expected.assign_coords(a=idx_t["a"], b=idx_2["b"], c=idx_3["c"])
assert_identical(expected, actual)
# test from sel_points
data = Dataset({"foo": (("x", "y"), np.arange(9).reshape(3, 3))})
data.coords.update({"x": [0, 1, 2], "y": [0, 1, 2]})
expected = Dataset(
{"foo": ("points", [0, 4, 8])},
coords={
"x": Variable(("points",), [0, 1, 2]),
"y": Variable(("points",), [0, 1, 2]),
},
)
actual = data.sel(
x=Variable(("points",), [0, 1, 2]), y=Variable(("points",), [0, 1, 2])
)
assert_identical(expected, actual)
expected.coords.update({"x": ("points", [0, 1, 2]), "y": ("points", [0, 1, 2])})
actual = data.sel(
x=Variable(("points",), [0.1, 1.1, 2.5]),
y=Variable(("points",), [0, 1.2, 2.0]),
method="pad",
)
assert_identical(expected, actual)
idx_x = DataArray([0, 1, 2], dims=["a"], coords={"a": ["a", "b", "c"]})
idx_y = DataArray([0, 2, 1], dims=["b"], coords={"b": [0, 3, 6]})
expected_ary = data["foo"][[0, 1, 2], [0, 2, 1]]
actual = data.sel(x=idx_x, y=idx_y)
assert_array_equal(expected_ary, actual["foo"])
assert_identical(actual["a"].drop_vars("x"), idx_x["a"])
assert_identical(actual["b"].drop_vars("y"), idx_y["b"])
with pytest.raises(KeyError):
data.sel(x=[2.5], y=[2.0], method="pad", tolerance=1e-3)
def test_sel_method(self) -> None:
data = create_test_data()
expected = data.sel(dim2=1)
actual = data.sel(dim2=0.95, method="nearest")
assert_identical(expected, actual)
actual = data.sel(dim2=0.95, method="nearest", tolerance=1)
assert_identical(expected, actual)
with pytest.raises(KeyError):
actual = data.sel(dim2=np.pi, method="nearest", tolerance=0)
expected = data.sel(dim2=[1.5])
actual = data.sel(dim2=[1.45], method="backfill")
assert_identical(expected, actual)
with pytest.raises(NotImplementedError, match=r"slice objects"):
data.sel(dim2=slice(1, 3), method="ffill")
with pytest.raises(TypeError, match=r"``method``"):
# this should not pass silently
data.sel(dim2=1, method=data) # type: ignore[arg-type]
# cannot pass method if there is no associated coordinate
with pytest.raises(ValueError, match=r"cannot supply"):
data.sel(dim1=0, method="nearest")
def test_loc(self) -> None:
data = create_test_data()
expected = data.sel(dim3="a")
actual = data.loc[dict(dim3="a")]
assert_identical(expected, actual)
with pytest.raises(TypeError, match=r"can only lookup dict"):
data.loc["a"] # type: ignore[index]
def test_selection_multiindex(self) -> None:
midx = pd.MultiIndex.from_product(
[["a", "b"], [1, 2], [-1, -2]], names=("one", "two", "three")
)
midx_coords = Coordinates.from_pandas_multiindex(midx, "x")
mdata = Dataset(data_vars={"var": ("x", range(8))}, coords=midx_coords)
def test_sel(
lab_indexer, pos_indexer, replaced_idx=False, renamed_dim=None
) -> None:
ds = mdata.sel(x=lab_indexer)
expected_ds = mdata.isel(x=pos_indexer)
if not replaced_idx:
assert_identical(ds, expected_ds)
else:
if renamed_dim:
assert ds["var"].dims[0] == renamed_dim
ds = ds.rename({renamed_dim: "x"})
assert_identical(ds["var"].variable, expected_ds["var"].variable)
assert not ds["x"].equals(expected_ds["x"])
test_sel(("a", 1, -1), 0)
test_sel(("b", 2, -2), -1)
test_sel(("a", 1), [0, 1], replaced_idx=True, renamed_dim="three")
test_sel(("a",), range(4), replaced_idx=True)
test_sel("a", range(4), replaced_idx=True)
test_sel([("a", 1, -1), ("b", 2, -2)], [0, 7])
test_sel(slice("a", "b"), range(8))
test_sel(slice(("a", 1), ("b", 1)), range(6))
test_sel({"one": "a", "two": 1, "three": -1}, 0)
test_sel({"one": "a", "two": 1}, [0, 1], replaced_idx=True, renamed_dim="three")
test_sel({"one": "a"}, range(4), replaced_idx=True)
assert_identical(mdata.loc[{"x": {"one": "a"}}], mdata.sel(x={"one": "a"}))
assert_identical(mdata.loc[{"x": "a"}], mdata.sel(x="a"))
assert_identical(mdata.loc[{"x": ("a", 1)}], mdata.sel(x=("a", 1)))
assert_identical(mdata.loc[{"x": ("a", 1, -1)}], mdata.sel(x=("a", 1, -1)))
assert_identical(mdata.sel(x={"one": "a", "two": 1}), mdata.sel(one="a", two=1))
def test_broadcast_like(self) -> None:
original1 = DataArray(
np.random.randn(5), [("x", range(5))], name="a"
).to_dataset()
original2 = DataArray(np.random.randn(6), [("y", range(6))], name="b")
expected1, expected2 = broadcast(original1, original2)
assert_identical(
original1.broadcast_like(original2), expected1.transpose("y", "x")
)
assert_identical(original2.broadcast_like(original1), expected2)
def test_to_pandas(self) -> None:
# 0D -> series
actual = Dataset({"a": 1, "b": 2}).to_pandas()
expected = pd.Series([1, 2], ["a", "b"])
assert_array_equal(actual, expected)
# 1D -> dataframe
x = np.random.randn(10)
y = np.random.randn(10)
t = list("abcdefghij")
ds = Dataset({"a": ("t", x), "b": ("t", y), "t": ("t", t)})
actual_df = ds.to_pandas()
expected_df = ds.to_dataframe()
assert expected_df.equals(actual_df), (expected_df, actual_df)
# 2D -> error
x2d = np.random.randn(10, 10)
y2d = np.random.randn(10, 10)
with pytest.raises(ValueError, match=r"cannot convert Datasets"):
Dataset({"a": (["t", "r"], x2d), "b": (["t", "r"], y2d)}).to_pandas()
def test_reindex_like(self) -> None:
data = create_test_data()
data["letters"] = ("dim3", 10 * ["a"])
expected = data.isel(dim1=slice(10), time=slice(13))
actual = data.reindex_like(expected)
assert_identical(actual, expected)
expected = data.copy(deep=True)
expected["dim3"] = ("dim3", list("cdefghijkl"))
expected["var3"][:-2] = expected["var3"][2:].values
expected["var3"][-2:] = np.nan
expected["letters"] = expected["letters"].astype(object)
expected["letters"][-2:] = np.nan
expected["numbers"] = expected["numbers"].astype(float)
expected["numbers"][:-2] = expected["numbers"][2:].values
expected["numbers"][-2:] = np.nan
actual = data.reindex_like(expected)
assert_identical(actual, expected)
def test_reindex(self) -> None:
data = create_test_data()
assert_identical(data, data.reindex())
expected = data.assign_coords(dim1=data["dim1"])
actual = data.reindex(dim1=data["dim1"])
assert_identical(actual, expected)
actual = data.reindex(dim1=data["dim1"].values)
assert_identical(actual, expected)
actual = data.reindex(dim1=data["dim1"].to_index())
assert_identical(actual, expected)
with pytest.raises(
ValueError, match=r"cannot reindex or align along dimension"
):
data.reindex(dim1=data["dim1"][:5])
expected = data.isel(dim2=slice(5))
actual = data.reindex(dim2=data["dim2"][:5])
assert_identical(actual, expected)
# test dict-like argument
actual = data.reindex({"dim2": data["dim2"]})
expected = data
assert_identical(actual, expected)
with pytest.raises(ValueError, match=r"cannot specify both"):
data.reindex({"x": 0}, x=0)
with pytest.raises(ValueError, match=r"dictionary"):
data.reindex("foo") # type: ignore[arg-type]
# invalid dimension
# TODO: (benbovy - explicit indexes): uncomment?
# --> from reindex docstrings: "any mis-matched dimension is simply ignored"
# with pytest.raises(ValueError, match=r"indexer keys.*not correspond.*"):
# data.reindex(invalid=0)
# out of order
expected = data.sel(dim2=data["dim2"][:5:-1])
actual = data.reindex(dim2=data["dim2"][:5:-1])
assert_identical(actual, expected)
# multiple fill values
expected = data.reindex(dim2=[0.1, 2.1, 3.1, 4.1]).assign(
var1=lambda ds: ds.var1.copy(data=[[-10, -10, -10, -10]] * len(ds.dim1)),
var2=lambda ds: ds.var2.copy(data=[[-20, -20, -20, -20]] * len(ds.dim1)),
)
actual = data.reindex(
dim2=[0.1, 2.1, 3.1, 4.1], fill_value={"var1": -10, "var2": -20}
)
assert_identical(actual, expected)
# use the default value
expected = data.reindex(dim2=[0.1, 2.1, 3.1, 4.1]).assign(
var1=lambda ds: ds.var1.copy(data=[[-10, -10, -10, -10]] * len(ds.dim1)),
var2=lambda ds: ds.var2.copy(
data=[[np.nan, np.nan, np.nan, np.nan]] * len(ds.dim1)
),
)
actual = data.reindex(dim2=[0.1, 2.1, 3.1, 4.1], fill_value={"var1": -10})
assert_identical(actual, expected)
# regression test for #279
expected = Dataset({"x": ("time", np.random.randn(5))}, {"time": range(5)})
time2 = DataArray(np.arange(5), dims="time2")
with pytest.raises(ValueError):
actual = expected.reindex(time=time2)
# another regression test
ds = Dataset(
{"foo": (["x", "y"], np.zeros((3, 4)))}, {"x": range(3), "y": range(4)}
)
expected = Dataset(
{"foo": (["x", "y"], np.zeros((3, 2)))}, {"x": [0, 1, 3], "y": [0, 1]}
)
expected["foo"][-1] = np.nan
actual = ds.reindex(x=[0, 1, 3], y=[0, 1])
assert_identical(expected, actual)
def test_reindex_attrs_encoding(self) -> None:
ds = Dataset(
{"data": ("x", [1, 2, 3])},
{"x": ("x", [0, 1, 2], {"foo": "bar"}, {"bar": "baz"})},
)
actual = ds.reindex(x=[0, 1])
expected = Dataset(
{"data": ("x", [1, 2])},
{"x": ("x", [0, 1], {"foo": "bar"}, {"bar": "baz"})},
)
assert_identical(actual, expected)
assert actual.x.encoding == expected.x.encoding
def test_reindex_warning(self) -> None:
data = create_test_data()
with pytest.raises(ValueError):
# DataArray with different dimension raises Future warning
ind = xr.DataArray([0.0, 1.0], dims=["new_dim"], name="ind")
data.reindex(dim2=ind)
# Should not warn
ind = xr.DataArray([0.0, 1.0], dims=["dim2"], name="ind")
with warnings.catch_warnings(record=True) as ws:
data.reindex(dim2=ind)
assert len(ws) == 0
def test_reindex_variables_copied(self) -> None:
data = create_test_data()
reindexed_data = data.reindex(copy=False)
for k in data.variables:
assert reindexed_data.variables[k] is not data.variables[k]
def test_reindex_method(self) -> None:
ds = Dataset({"x": ("y", [10, 20]), "y": [0, 1]})
y = [-0.5, 0.5, 1.5]
actual = ds.reindex(y=y, method="backfill")
expected = Dataset({"x": ("y", [10, 20, np.nan]), "y": y})
assert_identical(expected, actual)
actual = ds.reindex(y=y, method="backfill", tolerance=0.1)
expected = Dataset({"x": ("y", 3 * [np.nan]), "y": y})
assert_identical(expected, actual)
actual = ds.reindex(y=y, method="backfill", tolerance=[0.1, 0.5, 0.1])
expected = Dataset({"x": ("y", [np.nan, 20, np.nan]), "y": y})
assert_identical(expected, actual)
actual = ds.reindex(y=[0.1, 0.1, 1], tolerance=[0, 0.1, 0], method="nearest")
expected = Dataset({"x": ("y", [np.nan, 10, 20]), "y": [0.1, 0.1, 1]})
assert_identical(expected, actual)
actual = ds.reindex(y=y, method="pad")
expected = Dataset({"x": ("y", [np.nan, 10, 20]), "y": y})
assert_identical(expected, actual)
alt = Dataset({"y": y})
actual = ds.reindex_like(alt, method="pad")
assert_identical(expected, actual)
@pytest.mark.parametrize("fill_value", [dtypes.NA, 2, 2.0, {"x": 2, "z": 1}])
def test_reindex_fill_value(self, fill_value) -> None:
ds = Dataset({"x": ("y", [10, 20]), "z": ("y", [-20, -10]), "y": [0, 1]})
y = [0, 1, 2]
actual = ds.reindex(y=y, fill_value=fill_value)
if fill_value == dtypes.NA:
# if we supply the default, we expect the missing value for a
# float array
fill_value_x = fill_value_z = np.nan
elif isinstance(fill_value, dict):
fill_value_x = fill_value["x"]
fill_value_z = fill_value["z"]
else:
fill_value_x = fill_value_z = fill_value
expected = Dataset(
{
"x": ("y", [10, 20, fill_value_x]),
"z": ("y", [-20, -10, fill_value_z]),
"y": y,
}
)
assert_identical(expected, actual)
@pytest.mark.parametrize("fill_value", [dtypes.NA, 2, 2.0, {"x": 2, "z": 1}])
def test_reindex_like_fill_value(self, fill_value) -> None:
ds = Dataset({"x": ("y", [10, 20]), "z": ("y", [-20, -10]), "y": [0, 1]})
y = [0, 1, 2]
alt = Dataset({"y": y})
actual = ds.reindex_like(alt, fill_value=fill_value)
if fill_value == dtypes.NA:
# if we supply the default, we expect the missing value for a
# float array
fill_value_x = fill_value_z = np.nan
elif isinstance(fill_value, dict):
fill_value_x = fill_value["x"]
fill_value_z = fill_value["z"]
else:
fill_value_x = fill_value_z = fill_value
expected = Dataset(
{
"x": ("y", [10, 20, fill_value_x]),
"z": ("y", [-20, -10, fill_value_z]),
"y": y,
}
)
assert_identical(expected, actual)
@pytest.mark.parametrize("dtype", [str, bytes])
def test_reindex_str_dtype(self, dtype) -> None:
data = Dataset({"data": ("x", [1, 2]), "x": np.array(["a", "b"], dtype=dtype)})
actual = data.reindex(x=data.x)
expected = data
assert_identical(expected, actual)
assert actual.x.dtype == expected.x.dtype
@pytest.mark.parametrize("fill_value", [dtypes.NA, 2, 2.0, {"foo": 2, "bar": 1}])
def test_align_fill_value(self, fill_value) -> None:
x = Dataset({"foo": DataArray([1, 2], dims=["x"], coords={"x": [1, 2]})})
y = Dataset({"bar": DataArray([1, 2], dims=["x"], coords={"x": [1, 3]})})
x2, y2 = align(x, y, join="outer", fill_value=fill_value)
if fill_value == dtypes.NA:
# if we supply the default, we expect the missing value for a
# float array
fill_value_foo = fill_value_bar = np.nan
elif isinstance(fill_value, dict):
fill_value_foo = fill_value["foo"]
fill_value_bar = fill_value["bar"]
else:
fill_value_foo = fill_value_bar = fill_value
expected_x2 = Dataset(
{
"foo": DataArray(
[1, 2, fill_value_foo], dims=["x"], coords={"x": [1, 2, 3]}
)
}
)
expected_y2 = Dataset(
{
"bar": DataArray(
[1, fill_value_bar, 2], dims=["x"], coords={"x": [1, 2, 3]}
)
}
)
assert_identical(expected_x2, x2)
assert_identical(expected_y2, y2)
def test_align(self) -> None:
left = create_test_data()
right = left.copy(deep=True)
right["dim3"] = ("dim3", list("cdefghijkl"))
right["var3"][:-2] = right["var3"][2:].values
right["var3"][-2:] = np.random.randn(*right["var3"][-2:].shape)
right["numbers"][:-2] = right["numbers"][2:].values
right["numbers"][-2:] = -10
intersection = list("cdefghij")
union = list("abcdefghijkl")
left2, right2 = align(left, right, join="inner")
assert_array_equal(left2["dim3"], intersection)
assert_identical(left2, right2)
left2, right2 = align(left, right, join="outer")
assert_array_equal(left2["dim3"], union)
assert_equal(left2["dim3"].variable, right2["dim3"].variable)
assert_identical(left2.sel(dim3=intersection), right2.sel(dim3=intersection))
assert np.isnan(left2["var3"][-2:]).all()
assert np.isnan(right2["var3"][:2]).all()
left2, right2 = align(left, right, join="left")
assert_equal(left2["dim3"].variable, right2["dim3"].variable)
assert_equal(left2["dim3"].variable, left["dim3"].variable)
assert_identical(left2.sel(dim3=intersection), right2.sel(dim3=intersection))
assert np.isnan(right2["var3"][:2]).all()
left2, right2 = align(left, right, join="right")
assert_equal(left2["dim3"].variable, right2["dim3"].variable)
assert_equal(left2["dim3"].variable, right["dim3"].variable)
assert_identical(left2.sel(dim3=intersection), right2.sel(dim3=intersection))
assert np.isnan(left2["var3"][-2:]).all()
with pytest.raises(ValueError, match=r"invalid value for join"):
align(left, right, join="foobar") # type: ignore[call-overload]
with pytest.raises(TypeError):
align(left, right, foo="bar") # type: ignore[call-overload]
def test_align_exact(self) -> None:
left = xr.Dataset(coords={"x": [0, 1]})
right = xr.Dataset(coords={"x": [1, 2]})
left1, left2 = xr.align(left, left, join="exact")
assert_identical(left1, left)
assert_identical(left2, left)
with pytest.raises(ValueError, match=r"cannot align.*join.*exact.*not equal.*"):
xr.align(left, right, join="exact")
def test_align_override(self) -> None:
left = xr.Dataset(coords={"x": [0, 1, 2]})
right = xr.Dataset(coords={"x": [0.1, 1.1, 2.1], "y": [1, 2, 3]})
expected_right = xr.Dataset(coords={"x": [0, 1, 2], "y": [1, 2, 3]})
new_left, new_right = xr.align(left, right, join="override")
assert_identical(left, new_left)
assert_identical(new_right, expected_right)
new_left, new_right = xr.align(left, right, exclude="x", join="override")
assert_identical(left, new_left)
assert_identical(right, new_right)
new_left, new_right = xr.align(
left.isel(x=0, drop=True), right, exclude="x", join="override"
)
assert_identical(left.isel(x=0, drop=True), new_left)
assert_identical(right, new_right)
with pytest.raises(
ValueError, match=r"cannot align.*join.*override.*same size"
):
xr.align(left.isel(x=0).expand_dims("x"), right, join="override")
def test_align_exclude(self) -> None:
x = Dataset(
{
"foo": DataArray(
[[1, 2], [3, 4]], dims=["x", "y"], coords={"x": [1, 2], "y": [3, 4]}
)
}
)
y = Dataset(
{
"bar": DataArray(
[[1, 2], [3, 4]], dims=["x", "y"], coords={"x": [1, 3], "y": [5, 6]}
)
}
)
x2, y2 = align(x, y, exclude=["y"], join="outer")
expected_x2 = Dataset(
{
"foo": DataArray(
[[1, 2], [3, 4], [np.nan, np.nan]],
dims=["x", "y"],
coords={"x": [1, 2, 3], "y": [3, 4]},
)
}
)
expected_y2 = Dataset(
{
"bar": DataArray(
[[1, 2], [np.nan, np.nan], [3, 4]],
dims=["x", "y"],
coords={"x": [1, 2, 3], "y": [5, 6]},
)
}
)
assert_identical(expected_x2, x2)
assert_identical(expected_y2, y2)
def test_align_nocopy(self) -> None:
x = Dataset({"foo": DataArray([1, 2, 3], coords=[("x", [1, 2, 3])])})
y = Dataset({"foo": DataArray([1, 2], coords=[("x", [1, 2])])})
expected_x2 = x
expected_y2 = Dataset(
{"foo": DataArray([1, 2, np.nan], coords=[("x", [1, 2, 3])])}
)
x2, y2 = align(x, y, copy=False, join="outer")
assert_identical(expected_x2, x2)
assert_identical(expected_y2, y2)
assert source_ndarray(x["foo"].data) is source_ndarray(x2["foo"].data)
x2, y2 = align(x, y, copy=True, join="outer")
assert source_ndarray(x["foo"].data) is not source_ndarray(x2["foo"].data)
assert_identical(expected_x2, x2)
assert_identical(expected_y2, y2)
def test_align_indexes(self) -> None:
x = Dataset({"foo": DataArray([1, 2, 3], dims="x", coords=[("x", [1, 2, 3])])})
(x2,) = align(x, indexes={"x": [2, 3, 1]})
expected_x2 = Dataset(
{"foo": DataArray([2, 3, 1], dims="x", coords={"x": [2, 3, 1]})}
)
assert_identical(expected_x2, x2)
def test_align_non_unique(self) -> None:
x = Dataset({"foo": ("x", [3, 4, 5]), "x": [0, 0, 1]})
x1, x2 = align(x, x)
assert_identical(x1, x)
assert_identical(x2, x)
y = Dataset({"bar": ("x", [6, 7]), "x": [0, 1]})
with pytest.raises(ValueError, match=r"cannot reindex or align"):
align(x, y)
def test_align_str_dtype(self) -> None:
a = Dataset({"foo": ("x", [0, 1])}, coords={"x": ["a", "b"]})
b = Dataset({"foo": ("x", [1, 2])}, coords={"x": ["b", "c"]})
expected_a = Dataset(
{"foo": ("x", [0, 1, np.nan])}, coords={"x": ["a", "b", "c"]}
)
expected_b = Dataset(
{"foo": ("x", [np.nan, 1, 2])}, coords={"x": ["a", "b", "c"]}
)
actual_a, actual_b = xr.align(a, b, join="outer")
assert_identical(expected_a, actual_a)
assert expected_a.x.dtype == actual_a.x.dtype
assert_identical(expected_b, actual_b)
assert expected_b.x.dtype == actual_b.x.dtype
@pytest.mark.parametrize("join", ["left", "override"])
def test_align_index_var_attrs(self, join) -> None:
# regression test https://github.com/pydata/xarray/issues/6852
# aligning two objects should have no side effect on their index variable
# metadata.
ds = Dataset(coords={"x": ("x", [1, 2, 3], {"units": "m"})})
ds_noattr = Dataset(coords={"x": ("x", [1, 2, 3])})
xr.align(ds_noattr, ds, join=join)
assert ds.x.attrs == {"units": "m"}
assert ds_noattr.x.attrs == {}
def test_broadcast(self) -> None:
ds = Dataset(
{"foo": 0, "bar": ("x", [1]), "baz": ("y", [2, 3])}, {"c": ("x", [4])}
)
expected = Dataset(
{
"foo": (("x", "y"), [[0, 0]]),
"bar": (("x", "y"), [[1, 1]]),
"baz": (("x", "y"), [[2, 3]]),
},
{"c": ("x", [4])},
)
(actual,) = broadcast(ds)
assert_identical(expected, actual)
ds_x = Dataset({"foo": ("x", [1])})
ds_y = Dataset({"bar": ("y", [2, 3])})
expected_x = Dataset({"foo": (("x", "y"), [[1, 1]])})
expected_y = Dataset({"bar": (("x", "y"), [[2, 3]])})
actual_x, actual_y = broadcast(ds_x, ds_y)
assert_identical(expected_x, actual_x)
assert_identical(expected_y, actual_y)
array_y = ds_y["bar"]
expected_y2 = expected_y["bar"]
actual_x2, actual_y2 = broadcast(ds_x, array_y)
assert_identical(expected_x, actual_x2)
assert_identical(expected_y2, actual_y2)
def test_broadcast_nocopy(self) -> None:
# Test that data is not copied if not needed
x = Dataset({"foo": (("x", "y"), [[1, 1]])})
y = Dataset({"bar": ("y", [2, 3])})
(actual_x,) = broadcast(x)
assert_identical(x, actual_x)
assert source_ndarray(actual_x["foo"].data) is source_ndarray(x["foo"].data)
actual_x, actual_y = broadcast(x, y)
assert_identical(x, actual_x)
assert source_ndarray(actual_x["foo"].data) is source_ndarray(x["foo"].data)
def test_broadcast_exclude(self) -> None:
x = Dataset(
{
"foo": DataArray(
[[1, 2], [3, 4]], dims=["x", "y"], coords={"x": [1, 2], "y": [3, 4]}
),
"bar": DataArray(5),
}
)
y = Dataset(
{
"foo": DataArray(
[[1, 2]], dims=["z", "y"], coords={"z": [1], "y": [5, 6]}
)
}
)
x2, y2 = broadcast(x, y, exclude=["y"])
expected_x2 = Dataset(
{
"foo": DataArray(
[[[1, 2]], [[3, 4]]],
dims=["x", "z", "y"],
coords={"z": [1], "x": [1, 2], "y": [3, 4]},
),
"bar": DataArray(
[[5], [5]], dims=["x", "z"], coords={"x": [1, 2], "z": [1]}
),
}
)
expected_y2 = Dataset(
{
"foo": DataArray(
[[[1, 2]], [[1, 2]]],
dims=["x", "z", "y"],
coords={"z": [1], "x": [1, 2], "y": [5, 6]},
)
}
)
assert_identical(expected_x2, x2)
assert_identical(expected_y2, y2)
def test_broadcast_misaligned(self) -> None:
x = Dataset({"foo": DataArray([1, 2, 3], coords=[("x", [-1, -2, -3])])})
y = Dataset(
{
"bar": DataArray(
[[1, 2], [3, 4]],
dims=["y", "x"],
coords={"y": [1, 2], "x": [10, -3]},
)
}
)
x2, y2 = broadcast(x, y)
expected_x2 = Dataset(
{
"foo": DataArray(
[[3, 3], [2, 2], [1, 1], [np.nan, np.nan]],
dims=["x", "y"],
coords={"y": [1, 2], "x": [-3, -2, -1, 10]},
)
}
)
expected_y2 = Dataset(
{
"bar": DataArray(
[[2, 4], [np.nan, np.nan], [np.nan, np.nan], [1, 3]],
dims=["x", "y"],
coords={"y": [1, 2], "x": [-3, -2, -1, 10]},
)
}
)
assert_identical(expected_x2, x2)
assert_identical(expected_y2, y2)
def test_broadcast_multi_index(self) -> None:
# GH6430
ds = Dataset(
{"foo": (("x", "y", "z"), np.ones((3, 4, 2)))},
{"x": ["a", "b", "c"], "y": [1, 2, 3, 4]},
)
stacked = ds.stack(space=["x", "y"])
broadcasted, _ = broadcast(stacked, stacked.space)
assert broadcasted.xindexes["x"] is broadcasted.xindexes["space"]
assert broadcasted.xindexes["y"] is broadcasted.xindexes["space"]
def test_variable_indexing(self) -> None:
data = create_test_data()
v = data["var1"]
d1 = data["dim1"]
d2 = data["dim2"]
assert_equal(v, v[d1.values])
assert_equal(v, v[d1])
assert_equal(v[:3], v[d1 < 3])
assert_equal(v[:, 3:], v[:, d2 >= 1.5])
assert_equal(v[:3, 3:], v[d1 < 3, d2 >= 1.5])
assert_equal(v[:3, :2], v[range(3), range(2)])
assert_equal(v[:3, :2], v.loc[d1[:3], d2[:2]])
def test_drop_variables(self) -> None:
data = create_test_data()
assert_identical(data, data.drop_vars([]))
expected = Dataset({k: data[k] for k in data.variables if k != "time"})
actual = data.drop_vars("time")
assert_identical(expected, actual)
actual = data.drop_vars(["time"])
assert_identical(expected, actual)
with pytest.raises(
ValueError,
match=re.escape(
"These variables cannot be found in this dataset: ['not_found_here']"
),
):
data.drop_vars("not_found_here")
actual = data.drop_vars("not_found_here", errors="ignore")
assert_identical(data, actual)
actual = data.drop_vars(["not_found_here"], errors="ignore")
assert_identical(data, actual)
actual = data.drop_vars(["time", "not_found_here"], errors="ignore")
assert_identical(expected, actual)
# deprecated approach with `drop` works (straight copy paste from above)
with pytest.warns(DeprecationWarning):
actual = data.drop("not_found_here", errors="ignore")
assert_identical(data, actual)
with pytest.warns(DeprecationWarning):
actual = data.drop(["not_found_here"], errors="ignore")
assert_identical(data, actual)
with pytest.warns(DeprecationWarning):
actual = data.drop(["time", "not_found_here"], errors="ignore")
assert_identical(expected, actual)
with pytest.warns(DeprecationWarning):
actual = data.drop({"time", "not_found_here"}, errors="ignore")
assert_identical(expected, actual)
def test_drop_multiindex_level(self) -> None:
data = create_test_multiindex()
expected = data.drop_vars(["x", "level_1", "level_2"])
with pytest.warns(DeprecationWarning):
actual = data.drop_vars("level_1")
assert_identical(expected, actual)
def test_drop_index_labels(self) -> None:
data = Dataset({"A": (["x", "y"], np.random.randn(2, 3)), "x": ["a", "b"]})
with pytest.warns(DeprecationWarning):
actual = data.drop(["a"], dim="x")
expected = data.isel(x=[1])
assert_identical(expected, actual)
with pytest.warns(DeprecationWarning):
actual = data.drop(["a", "b"], dim="x")
expected = data.isel(x=slice(0, 0))
assert_identical(expected, actual)
with pytest.raises(KeyError):
# not contained in axis
with pytest.warns(DeprecationWarning):
data.drop(["c"], dim="x")
with pytest.warns(DeprecationWarning):
actual = data.drop(["c"], dim="x", errors="ignore")
assert_identical(data, actual)
with pytest.raises(ValueError):
data.drop(["c"], dim="x", errors="wrong_value") # type: ignore[arg-type]
with pytest.warns(DeprecationWarning):
actual = data.drop(["a", "b", "c"], "x", errors="ignore")
expected = data.isel(x=slice(0, 0))
assert_identical(expected, actual)
# DataArrays as labels are a nasty corner case as they are not
# Iterable[Hashable] - DataArray.__iter__ yields scalar DataArrays.
actual = data.drop_sel(x=DataArray(["a", "b", "c"]), errors="ignore")
expected = data.isel(x=slice(0, 0))
assert_identical(expected, actual)
with pytest.warns(DeprecationWarning):
data.drop(DataArray(["a", "b", "c"]), dim="x", errors="ignore")
assert_identical(expected, actual)
actual = data.drop_sel(y=[1])
expected = data.isel(y=[0, 2])
assert_identical(expected, actual)
with pytest.raises(KeyError, match=r"not found in axis"):
data.drop_sel(x=0)
def test_drop_labels_by_keyword(self) -> None:
data = Dataset(
{"A": (["x", "y"], np.random.randn(2, 6)), "x": ["a", "b"], "y": range(6)}
)
# Basic functionality.
assert len(data.coords["x"]) == 2
with pytest.warns(DeprecationWarning):
ds1 = data.drop(["a"], dim="x")
ds2 = data.drop_sel(x="a")
ds3 = data.drop_sel(x=["a"])
ds4 = data.drop_sel(x=["a", "b"])
ds5 = data.drop_sel(x=["a", "b"], y=range(0, 6, 2))
arr = DataArray(range(3), dims=["c"])
with pytest.warns(DeprecationWarning):
data.drop(arr.coords)
with pytest.warns(DeprecationWarning):
data.drop(arr.xindexes)
assert_array_equal(ds1.coords["x"], ["b"])
assert_array_equal(ds2.coords["x"], ["b"])
assert_array_equal(ds3.coords["x"], ["b"])
assert ds4.coords["x"].size == 0
assert ds5.coords["x"].size == 0
assert_array_equal(ds5.coords["y"], [1, 3, 5])
# Error handling if user tries both approaches.
with pytest.raises(ValueError):
data.drop(labels=["a"], x="a")
with pytest.raises(ValueError):
data.drop(labels=["a"], dim="x", x="a")
warnings.filterwarnings("ignore", r"\W*drop")
with pytest.raises(ValueError):
data.drop(dim="x", x="a")
def test_drop_labels_by_position(self) -> None:
data = Dataset(
{"A": (["x", "y"], np.random.randn(2, 6)), "x": ["a", "b"], "y": range(6)}
)
# Basic functionality.
assert len(data.coords["x"]) == 2
actual = data.drop_isel(x=0)
expected = data.drop_sel(x="a")
assert_identical(expected, actual)
actual = data.drop_isel(x=[0])
expected = data.drop_sel(x=["a"])
assert_identical(expected, actual)
actual = data.drop_isel(x=[0, 1])
expected = data.drop_sel(x=["a", "b"])
assert_identical(expected, actual)
assert actual.coords["x"].size == 0
actual = data.drop_isel(x=[0, 1], y=range(0, 6, 2))
expected = data.drop_sel(x=["a", "b"], y=range(0, 6, 2))
assert_identical(expected, actual)
assert actual.coords["x"].size == 0
with pytest.raises(KeyError):
data.drop_isel(z=1)
def test_drop_indexes(self) -> None:
ds = Dataset(
coords={
"x": ("x", [0, 1, 2]),
"y": ("y", [3, 4, 5]),
"foo": ("x", ["a", "a", "b"]),
}
)
actual = ds.drop_indexes("x")
assert "x" not in actual.xindexes
assert type(actual.x.variable) is Variable
actual = ds.drop_indexes(["x", "y"])
assert "x" not in actual.xindexes
assert "y" not in actual.xindexes
assert type(actual.x.variable) is Variable
assert type(actual.y.variable) is Variable
with pytest.raises(
ValueError,
match=r"The coordinates \('not_a_coord',\) are not found in the dataset coordinates",
):
ds.drop_indexes("not_a_coord")
with pytest.raises(ValueError, match="those coordinates do not have an index"):
ds.drop_indexes("foo")
actual = ds.drop_indexes(["foo", "not_a_coord"], errors="ignore")
assert_identical(actual, ds)
# test index corrupted
midx = pd.MultiIndex.from_tuples([([1, 2]), ([3, 4])], names=["a", "b"])
midx_coords = Coordinates.from_pandas_multiindex(midx, "x")
ds = Dataset(coords=midx_coords)
with pytest.raises(ValueError, match=".*would corrupt the following index.*"):
ds.drop_indexes("a")
def test_drop_dims(self) -> None:
data = xr.Dataset(
{
"A": (["x", "y"], np.random.randn(2, 3)),
"B": ("x", np.random.randn(2)),
"x": ["a", "b"],
"z": np.pi,
}
)
actual = data.drop_dims("x")
expected = data.drop_vars(["A", "B", "x"])
assert_identical(expected, actual)
actual = data.drop_dims("y")
expected = data.drop_vars("A")
assert_identical(expected, actual)
actual = data.drop_dims(["x", "y"])
expected = data.drop_vars(["A", "B", "x"])
assert_identical(expected, actual)
with pytest.raises((ValueError, KeyError)):
data.drop_dims("z") # not a dimension
with pytest.raises((ValueError, KeyError)):
data.drop_dims(None) # type:ignore[arg-type]
actual = data.drop_dims("z", errors="ignore")
assert_identical(data, actual)
# should this be allowed?
actual = data.drop_dims(None, errors="ignore") # type:ignore[arg-type]
assert_identical(data, actual)
with pytest.raises(ValueError):
actual = data.drop_dims("z", errors="wrong_value") # type: ignore[arg-type]
actual = data.drop_dims(["x", "y", "z"], errors="ignore")
expected = data.drop_vars(["A", "B", "x"])
assert_identical(expected, actual)
def test_copy(self) -> None:
data = create_test_data()
data.attrs["Test"] = [1, 2, 3]
for copied in [data.copy(deep=False), copy(data)]:
assert_identical(data, copied)
assert data.encoding == copied.encoding
# Note: IndexVariable objects with string dtype are always
# copied because of xarray.core.indexes.safe_cast_to_index.
# Limiting the test to data variables.
for k in data.data_vars:
v0 = data.variables[k]
v1 = copied.variables[k]
assert source_ndarray(v0.data) is source_ndarray(v1.data)
copied["foo"] = ("z", np.arange(5))
assert "foo" not in data
copied.attrs["foo"] = "bar"
assert "foo" not in data.attrs
assert data.attrs["Test"] is copied.attrs["Test"]
for copied in [data.copy(deep=True), deepcopy(data)]:
assert_identical(data, copied)
for k, v0 in data.variables.items():
v1 = copied.variables[k]
assert v0 is not v1
assert data.attrs["Test"] is not copied.attrs["Test"]
def test_copy_with_data(self) -> None:
orig = create_test_data()
new_data = {k: np.random.randn(*v.shape) for k, v in orig.data_vars.items()}
actual = orig.copy(data=new_data)
expected = orig.copy()
for k, v in new_data.items():
expected[k].data = v
assert_identical(expected, actual)
@pytest.mark.xfail(raises=AssertionError)
@pytest.mark.parametrize(
"deep, expected_orig",
[
[
True,
xr.DataArray(
xr.IndexVariable("a", np.array([1, 2])),
coords={"a": [1, 2]},
dims=["a"],
),
],
[
False,
xr.DataArray(
xr.IndexVariable("a", np.array([999, 2])),
coords={"a": [999, 2]},
dims=["a"],
),
],
],
)
def test_copy_coords(self, deep, expected_orig) -> None:
"""The test fails for the shallow copy, and apparently only on Windows
for some reason. In windows coords seem to be immutable unless it's one
dataset deep copied from another."""
ds = xr.DataArray(
np.ones([2, 2, 2]),
coords={"a": [1, 2], "b": ["x", "y"], "c": [0, 1]},
dims=["a", "b", "c"],
name="value",
).to_dataset()
ds_cp = ds.copy(deep=deep)
new_a = np.array([999, 2])
ds_cp.coords["a"] = ds_cp.a.copy(data=new_a)
expected_cp = xr.DataArray(
xr.IndexVariable("a", new_a),
coords={"a": [999, 2]},
dims=["a"],
)
assert_identical(ds_cp.coords["a"], expected_cp)
assert_identical(ds.coords["a"], expected_orig)
def test_copy_with_data_errors(self) -> None:
orig = create_test_data()
new_var1 = np.arange(orig["var1"].size).reshape(orig["var1"].shape)
with pytest.raises(ValueError, match=r"Data must be dict-like"):
orig.copy(data=new_var1) # type: ignore[arg-type]
with pytest.raises(ValueError, match=r"only contain variables in original"):
orig.copy(data={"not_in_original": new_var1})
with pytest.raises(ValueError, match=r"contain all variables in original"):
orig.copy(data={"var1": new_var1})
def test_drop_encoding(self) -> None:
orig = create_test_data()
vencoding = {"scale_factor": 10}
orig.encoding = {"foo": "bar"}
for k in orig.variables.keys():
orig[k].encoding = vencoding
actual = orig.drop_encoding()
assert actual.encoding == {}
for v in actual.variables.values():
assert v.encoding == {}
assert_equal(actual, orig)
def test_rename(self) -> None:
data = create_test_data()
newnames = {
"var1": "renamed_var1",
"dim2": "renamed_dim2",
}
renamed = data.rename(newnames)
variables = dict(data.variables)
for nk, nv in newnames.items():
variables[nv] = variables.pop(nk)
for k, v in variables.items():
dims = list(v.dims)
for name, newname in newnames.items():
if name in dims:
dims[dims.index(name)] = newname
assert_equal(
Variable(dims, v.values, v.attrs),
renamed[k].variable.to_base_variable(),
)
assert v.encoding == renamed[k].encoding
assert type(v) is type(renamed.variables[k])
assert "var1" not in renamed
assert "dim2" not in renamed
with pytest.raises(ValueError, match=r"cannot rename 'not_a_var'"):
data.rename({"not_a_var": "nada"})
with pytest.raises(ValueError, match=r"'var1' conflicts"):
data.rename({"var2": "var1"})
# verify that we can rename a variable without accessing the data
var1 = data["var1"]
data["var1"] = (var1.dims, InaccessibleArray(var1.values))
renamed = data.rename(newnames)
with pytest.raises(UnexpectedDataAccess):
_ = renamed["renamed_var1"].values
# https://github.com/python/mypy/issues/10008
renamed_kwargs = data.rename(**newnames) # type: ignore[arg-type]
assert_identical(renamed, renamed_kwargs)
def test_rename_old_name(self) -> None:
# regtest for GH1477
data = create_test_data()
with pytest.raises(ValueError, match=r"'samecol' conflicts"):
data.rename({"var1": "samecol", "var2": "samecol"})
# This shouldn't cause any problems.
data.rename({"var1": "var2", "var2": "var1"})
def test_rename_same_name(self) -> None:
data = create_test_data()
newnames = {"var1": "var1", "dim2": "dim2"}
renamed = data.rename(newnames)
assert_identical(renamed, data)
def test_rename_dims(self) -> None:
original = Dataset({"x": ("x", [0, 1, 2]), "y": ("x", [10, 11, 12]), "z": 42})
expected = Dataset(
{"x": ("x_new", [0, 1, 2]), "y": ("x_new", [10, 11, 12]), "z": 42}
)
# TODO: (benbovy - explicit indexes) update when set_index supports
# setting index for non-dimension variables
expected = expected.set_coords("x")
actual = original.rename_dims({"x": "x_new"})
assert_identical(expected, actual, check_default_indexes=False)
actual_2 = original.rename_dims(x="x_new")
assert_identical(expected, actual_2, check_default_indexes=False)
# Test to raise ValueError
dims_dict_bad = {"x_bad": "x_new"}
with pytest.raises(ValueError):
original.rename_dims(dims_dict_bad)
with pytest.raises(ValueError):
original.rename_dims({"x": "z"})
def test_rename_vars(self) -> None:
original = Dataset({"x": ("x", [0, 1, 2]), "y": ("x", [10, 11, 12]), "z": 42})
expected = Dataset(
{"x_new": ("x", [0, 1, 2]), "y": ("x", [10, 11, 12]), "z": 42}
)
# TODO: (benbovy - explicit indexes) update when set_index supports
# setting index for non-dimension variables
expected = expected.set_coords("x_new")
actual = original.rename_vars({"x": "x_new"})
assert_identical(expected, actual, check_default_indexes=False)
actual_2 = original.rename_vars(x="x_new")
assert_identical(expected, actual_2, check_default_indexes=False)
# Test to raise ValueError
names_dict_bad = {"x_bad": "x_new"}
with pytest.raises(ValueError):
original.rename_vars(names_dict_bad)
def test_rename_dimension_coord(self) -> None:
# rename a dimension corodinate to a non-dimension coordinate
# should preserve index
original = Dataset(coords={"x": ("x", [0, 1, 2])})
actual = original.rename_vars({"x": "x_new"})
assert "x_new" in actual.xindexes
actual_2 = original.rename_dims({"x": "x_new"})
assert "x" in actual_2.xindexes
def test_rename_dimension_coord_warnings(self) -> None:
# create a dimension coordinate by renaming a dimension or coordinate
# should raise a warning (no index created)
ds = Dataset(coords={"x": ("y", [0, 1])})
with pytest.warns(
UserWarning, match="rename 'x' to 'y' does not create an index.*"
):
ds.rename(x="y")
ds = Dataset(coords={"y": ("x", [0, 1])})
with pytest.warns(
UserWarning, match="rename 'x' to 'y' does not create an index.*"
):
ds.rename(x="y")
# No operation should not raise a warning
ds = Dataset(
data_vars={"data": (("x", "y"), np.ones((2, 3)))},
coords={"x": range(2), "y": range(3), "a": ("x", [3, 4])},
)
with warnings.catch_warnings():
warnings.simplefilter("error")
ds.rename(x="x")
def test_rename_multiindex(self) -> None:
midx = pd.MultiIndex.from_tuples([([1, 2]), ([3, 4])], names=["a", "b"])
midx_coords = Coordinates.from_pandas_multiindex(midx, "x")
original = Dataset({}, midx_coords)
midx_renamed = midx.rename(["a", "c"])
midx_coords_renamed = Coordinates.from_pandas_multiindex(midx_renamed, "x")
expected = Dataset({}, midx_coords_renamed)
actual = original.rename({"b": "c"})
assert_identical(expected, actual)
with pytest.raises(ValueError, match=r"'a' conflicts"):
with pytest.warns(UserWarning, match="does not create an index anymore"):
original.rename({"x": "a"})
with pytest.raises(ValueError, match=r"'x' conflicts"):
with pytest.warns(UserWarning, match="does not create an index anymore"):
original.rename({"a": "x"})
with pytest.raises(ValueError, match=r"'b' conflicts"):
original.rename({"a": "b"})
def test_rename_preserve_attrs_encoding(self) -> None:
# test propagate attrs/encoding to new variable(s) created from Index object
original = Dataset(coords={"x": ("x", [0, 1, 2])})
expected = Dataset(coords={"y": ("y", [0, 1, 2])})
for ds, dim in zip([original, expected], ["x", "y"], strict=True):
ds[dim].attrs = {"foo": "bar"}
ds[dim].encoding = {"foo": "bar"}
actual = original.rename({"x": "y"})
assert_identical(actual, expected)
@requires_cftime
def test_rename_does_not_change_CFTimeIndex_type(self) -> None:
# make sure CFTimeIndex is not converted to DatetimeIndex #3522
time = xr.cftime_range(start="2000", periods=6, freq="2MS", calendar="noleap")
orig = Dataset(coords={"time": time})
renamed = orig.rename(time="time_new")
assert "time_new" in renamed.xindexes
# TODO: benbovy - flexible indexes: update when CFTimeIndex
# inherits from xarray.Index
assert isinstance(renamed.xindexes["time_new"].to_pandas_index(), CFTimeIndex)
assert renamed.xindexes["time_new"].to_pandas_index().name == "time_new"
# check original has not changed
assert "time" in orig.xindexes
assert isinstance(orig.xindexes["time"].to_pandas_index(), CFTimeIndex)
assert orig.xindexes["time"].to_pandas_index().name == "time"
# note: rename_dims(time="time_new") drops "ds.indexes"
renamed = orig.rename_dims()
assert isinstance(renamed.xindexes["time"].to_pandas_index(), CFTimeIndex)
renamed = orig.rename_vars()
assert isinstance(renamed.xindexes["time"].to_pandas_index(), CFTimeIndex)
def test_rename_does_not_change_DatetimeIndex_type(self) -> None:
# make sure DatetimeIndex is conderved on rename
time = pd.date_range(start="2000", periods=6, freq="2MS")
orig = Dataset(coords={"time": time})
renamed = orig.rename(time="time_new")
assert "time_new" in renamed.xindexes
# TODO: benbovy - flexible indexes: update when DatetimeIndex
# inherits from xarray.Index?
assert isinstance(renamed.xindexes["time_new"].to_pandas_index(), DatetimeIndex)
assert renamed.xindexes["time_new"].to_pandas_index().name == "time_new"
# check original has not changed
assert "time" in orig.xindexes
assert isinstance(orig.xindexes["time"].to_pandas_index(), DatetimeIndex)
assert orig.xindexes["time"].to_pandas_index().name == "time"
# note: rename_dims(time="time_new") drops "ds.indexes"
renamed = orig.rename_dims()
assert isinstance(renamed.xindexes["time"].to_pandas_index(), DatetimeIndex)
renamed = orig.rename_vars()
assert isinstance(renamed.xindexes["time"].to_pandas_index(), DatetimeIndex)
def test_swap_dims(self) -> None:
original = Dataset({"x": [1, 2, 3], "y": ("x", list("abc")), "z": 42})
expected = Dataset({"z": 42}, {"x": ("y", [1, 2, 3]), "y": list("abc")})
actual = original.swap_dims({"x": "y"})
assert_identical(expected, actual)
assert isinstance(actual.variables["y"], IndexVariable)
assert isinstance(actual.variables["x"], Variable)
assert actual.xindexes["y"].equals(expected.xindexes["y"])
roundtripped = actual.swap_dims({"y": "x"})
assert_identical(original.set_coords("y"), roundtripped)
with pytest.raises(ValueError, match=r"cannot swap"):
original.swap_dims({"y": "x"})
with pytest.raises(ValueError, match=r"replacement dimension"):
original.swap_dims({"x": "z"})
expected = Dataset(
{"y": ("u", list("abc")), "z": 42}, coords={"x": ("u", [1, 2, 3])}
)
actual = original.swap_dims({"x": "u"})
assert_identical(expected, actual)
# as kwargs
expected = Dataset(
{"y": ("u", list("abc")), "z": 42}, coords={"x": ("u", [1, 2, 3])}
)
actual = original.swap_dims(x="u")
assert_identical(expected, actual)
# handle multiindex case
midx = pd.MultiIndex.from_arrays([list("aab"), list("yzz")], names=["y1", "y2"])
original = Dataset({"x": [1, 2, 3], "y": ("x", midx), "z": 42})
midx_coords = Coordinates.from_pandas_multiindex(midx, "y")
midx_coords["x"] = ("y", [1, 2, 3])
expected = Dataset({"z": 42}, midx_coords)
actual = original.swap_dims({"x": "y"})
assert_identical(expected, actual)
assert isinstance(actual.variables["y"], IndexVariable)
assert isinstance(actual.variables["x"], Variable)
assert actual.xindexes["y"].equals(expected.xindexes["y"])
def test_expand_dims_error(self) -> None:
original = Dataset(
{
"x": ("a", np.random.randn(3)),
"y": (["b", "a"], np.random.randn(4, 3)),
"z": ("a", np.random.randn(3)),
},
coords={
"a": np.linspace(0, 1, 3),
"b": np.linspace(0, 1, 4),
"c": np.linspace(0, 1, 5),
},
attrs={"key": "entry"},
)
with pytest.raises(ValueError, match=r"already exists"):
original.expand_dims(dim=["x"])
# Make sure it raises true error also for non-dimensional coordinates
# which has dimension.
original = original.set_coords("z")
with pytest.raises(ValueError, match=r"already exists"):
original.expand_dims(dim=["z"])
original = Dataset(
{
"x": ("a", np.random.randn(3)),
"y": (["b", "a"], np.random.randn(4, 3)),
"z": ("a", np.random.randn(3)),
},
coords={
"a": np.linspace(0, 1, 3),
"b": np.linspace(0, 1, 4),
"c": np.linspace(0, 1, 5),
},
attrs={"key": "entry"},
)
with pytest.raises(TypeError, match=r"value of new dimension"):
original.expand_dims({"d": 3.2})
with pytest.raises(ValueError, match=r"both keyword and positional"):
original.expand_dims({"d": 4}, e=4)
def test_expand_dims_int(self) -> None:
original = Dataset(
{"x": ("a", np.random.randn(3)), "y": (["b", "a"], np.random.randn(4, 3))},
coords={
"a": np.linspace(0, 1, 3),
"b": np.linspace(0, 1, 4),
"c": np.linspace(0, 1, 5),
},
attrs={"key": "entry"},
)
actual = original.expand_dims(["z"], [1])
expected = Dataset(
{
"x": original["x"].expand_dims("z", 1),
"y": original["y"].expand_dims("z", 1),
},
coords={
"a": np.linspace(0, 1, 3),
"b": np.linspace(0, 1, 4),
"c": np.linspace(0, 1, 5),
},
attrs={"key": "entry"},
)
assert_identical(expected, actual)
# make sure squeeze restores the original data set.
roundtripped = actual.squeeze("z")
assert_identical(original, roundtripped)
# another test with a negative axis
actual = original.expand_dims(["z"], [-1])
expected = Dataset(
{
"x": original["x"].expand_dims("z", -1),
"y": original["y"].expand_dims("z", -1),
},
coords={
"a": np.linspace(0, 1, 3),
"b": np.linspace(0, 1, 4),
"c": np.linspace(0, 1, 5),
},
attrs={"key": "entry"},
)
assert_identical(expected, actual)
# make sure squeeze restores the original data set.
roundtripped = actual.squeeze("z")
assert_identical(original, roundtripped)
def test_expand_dims_coords(self) -> None:
original = Dataset({"x": ("a", np.array([1, 2, 3]))})
expected = Dataset(
{"x": (("b", "a"), np.array([[1, 2, 3], [1, 2, 3]]))}, coords={"b": [1, 2]}
)
actual = original.expand_dims(dict(b=[1, 2]))
assert_identical(expected, actual)
assert "b" not in original._coord_names
def test_expand_dims_existing_scalar_coord(self) -> None:
original = Dataset({"x": 1}, {"a": 2})
expected = Dataset({"x": (("a",), [1])}, {"a": [2]})
actual = original.expand_dims("a")
assert_identical(expected, actual)
def test_isel_expand_dims_roundtrip(self) -> None:
original = Dataset({"x": (("a",), [1])}, {"a": [2]})
actual = original.isel(a=0).expand_dims("a")
assert_identical(actual, original)
def test_expand_dims_mixed_int_and_coords(self) -> None:
# Test expanding one dimension to have size > 1 that doesn't have
# coordinates, and also expanding another dimension to have size > 1
# that DOES have coordinates.
original = Dataset(
{"x": ("a", np.random.randn(3)), "y": (["b", "a"], np.random.randn(4, 3))},
coords={
"a": np.linspace(0, 1, 3),
"b": np.linspace(0, 1, 4),
"c": np.linspace(0, 1, 5),
},
)
actual = original.expand_dims({"d": 4, "e": ["l", "m", "n"]})
expected = Dataset(
{
"x": xr.DataArray(
original["x"].values * np.ones([4, 3, 3]),
coords=dict(d=range(4), e=["l", "m", "n"], a=np.linspace(0, 1, 3)),
dims=["d", "e", "a"],
).drop_vars("d"),
"y": xr.DataArray(
original["y"].values * np.ones([4, 3, 4, 3]),
coords=dict(
d=range(4),
e=["l", "m", "n"],
b=np.linspace(0, 1, 4),
a=np.linspace(0, 1, 3),
),
dims=["d", "e", "b", "a"],
).drop_vars("d"),
},
coords={"c": np.linspace(0, 1, 5)},
)
assert_identical(actual, expected)
def test_expand_dims_kwargs_python36plus(self) -> None:
original = Dataset(
{"x": ("a", np.random.randn(3)), "y": (["b", "a"], np.random.randn(4, 3))},
coords={
"a": np.linspace(0, 1, 3),
"b": np.linspace(0, 1, 4),
"c": np.linspace(0, 1, 5),
},
attrs={"key": "entry"},
)
other_way = original.expand_dims(e=["l", "m", "n"])
other_way_expected = Dataset(
{
"x": xr.DataArray(
original["x"].values * np.ones([3, 3]),
coords=dict(e=["l", "m", "n"], a=np.linspace(0, 1, 3)),
dims=["e", "a"],
),
"y": xr.DataArray(
original["y"].values * np.ones([3, 4, 3]),
coords=dict(
e=["l", "m", "n"],
b=np.linspace(0, 1, 4),
a=np.linspace(0, 1, 3),
),
dims=["e", "b", "a"],
),
},
coords={"c": np.linspace(0, 1, 5)},
attrs={"key": "entry"},
)
assert_identical(other_way_expected, other_way)
@pytest.mark.parametrize("create_index_for_new_dim_flag", [True, False])
def test_expand_dims_create_index_data_variable(
self, create_index_for_new_dim_flag
):
# data variables should not gain an index ever
ds = Dataset({"x": 0})
if create_index_for_new_dim_flag:
with pytest.warns(UserWarning, match="No index created"):
expanded = ds.expand_dims(
"x", create_index_for_new_dim=create_index_for_new_dim_flag
)
else:
expanded = ds.expand_dims(
"x", create_index_for_new_dim=create_index_for_new_dim_flag
)
# TODO Can't just create the expected dataset directly using constructor because of GH issue 8959
expected = Dataset({"x": ("x", [0])}).drop_indexes("x").reset_coords("x")
assert_identical(expanded, expected, check_default_indexes=False)
assert expanded.indexes == {}
def test_expand_dims_create_index_coordinate_variable(self):
# coordinate variables should gain an index only if create_index_for_new_dim is True (the default)
ds = Dataset(coords={"x": 0})
expanded = ds.expand_dims("x")
expected = Dataset({"x": ("x", [0])})
assert_identical(expanded, expected)
expanded_no_index = ds.expand_dims("x", create_index_for_new_dim=False)
# TODO Can't just create the expected dataset directly using constructor because of GH issue 8959
expected = Dataset(coords={"x": ("x", [0])}).drop_indexes("x")
assert_identical(expanded_no_index, expected, check_default_indexes=False)
assert expanded_no_index.indexes == {}
def test_expand_dims_create_index_from_iterable(self):
ds = Dataset(coords={"x": 0})
expanded = ds.expand_dims(x=[0, 1])
expected = Dataset({"x": ("x", [0, 1])})
assert_identical(expanded, expected)
expanded_no_index = ds.expand_dims(x=[0, 1], create_index_for_new_dim=False)
# TODO Can't just create the expected dataset directly using constructor because of GH issue 8959
expected = Dataset(coords={"x": ("x", [0, 1])}).drop_indexes("x")
assert_identical(expanded, expected, check_default_indexes=False)
assert expanded_no_index.indexes == {}
def test_expand_dims_non_nanosecond_conversion(self) -> None:
# Regression test for https://github.com/pydata/xarray/issues/7493#issuecomment-1953091000
# todo: test still needed?
ds = Dataset().expand_dims({"time": [np.datetime64("2018-01-01", "m")]})
assert ds.time.dtype == np.dtype("datetime64[s]")
def test_set_index(self) -> None:
expected = create_test_multiindex()
mindex = expected["x"].to_index()
indexes = [mindex.get_level_values(n) for n in mindex.names]
coords = {idx.name: ("x", idx) for idx in indexes}
ds = Dataset({}, coords=coords)
obj = ds.set_index(x=mindex.names)
assert_identical(obj, expected)
# ensure pre-existing indexes involved are removed
# (level_2 should be a coordinate with no index)
ds = create_test_multiindex()
coords = {"x": coords["level_1"], "level_2": coords["level_2"]}
expected = Dataset({}, coords=coords)
obj = ds.set_index(x="level_1")
assert_identical(obj, expected)
# ensure set_index with no existing index and a single data var given
# doesn't return multi-index
ds = Dataset(data_vars={"x_var": ("x", [0, 1, 2])})
expected = Dataset(coords={"x": [0, 1, 2]})
assert_identical(ds.set_index(x="x_var"), expected)
with pytest.raises(ValueError, match=r"bar variable\(s\) do not exist"):
ds.set_index(foo="bar")
with pytest.raises(ValueError, match=r"dimension mismatch.*"):
ds.set_index(y="x_var")
ds = Dataset(coords={"x": 1})
with pytest.raises(
ValueError, match=r".*cannot set a PandasIndex.*scalar variable.*"
):
ds.set_index(x="x")
def test_set_index_deindexed_coords(self) -> None:
# test de-indexed coordinates are converted to base variable
# https://github.com/pydata/xarray/issues/6969
one = ["a", "a", "b", "b"]
two = [1, 2, 1, 2]
three = ["c", "c", "d", "d"]
four = [3, 4, 3, 4]
midx_12 = pd.MultiIndex.from_arrays([one, two], names=["one", "two"])
midx_34 = pd.MultiIndex.from_arrays([three, four], names=["three", "four"])
coords = Coordinates.from_pandas_multiindex(midx_12, "x")
coords["three"] = ("x", three)
coords["four"] = ("x", four)
ds = xr.Dataset(coords=coords)
actual = ds.set_index(x=["three", "four"])
coords_expected = Coordinates.from_pandas_multiindex(midx_34, "x")
coords_expected["one"] = ("x", one)
coords_expected["two"] = ("x", two)
expected = xr.Dataset(coords=coords_expected)
assert_identical(actual, expected)
def test_reset_index(self) -> None:
ds = create_test_multiindex()
mindex = ds["x"].to_index()
indexes = [mindex.get_level_values(n) for n in mindex.names]
coords = {idx.name: ("x", idx) for idx in indexes}
expected = Dataset({}, coords=coords)
obj = ds.reset_index("x")
assert_identical(obj, expected, check_default_indexes=False)
assert len(obj.xindexes) == 0
ds = Dataset(coords={"y": ("x", [1, 2, 3])})
with pytest.raises(ValueError, match=r".*not coordinates with an index"):
ds.reset_index("y")
def test_reset_index_keep_attrs(self) -> None:
coord_1 = DataArray([1, 2], dims=["coord_1"], attrs={"attrs": True})
ds = Dataset({}, {"coord_1": coord_1})
obj = ds.reset_index("coord_1")
assert ds.coord_1.attrs == obj.coord_1.attrs
assert len(obj.xindexes) == 0
def test_reset_index_drop_dims(self) -> None:
ds = Dataset(coords={"x": [1, 2]})
reset = ds.reset_index("x", drop=True)
assert len(reset.dims) == 0
@pytest.mark.parametrize(
["arg", "drop", "dropped", "converted", "renamed"],
[
("foo", False, [], [], {"bar": "x"}),
("foo", True, ["foo"], [], {"bar": "x"}),
("x", False, ["x"], ["foo", "bar"], {}),
("x", True, ["x", "foo", "bar"], [], {}),
(["foo", "bar"], False, ["x"], ["foo", "bar"], {}),
(["foo", "bar"], True, ["x", "foo", "bar"], [], {}),
(["x", "foo"], False, ["x"], ["foo", "bar"], {}),
(["foo", "x"], True, ["x", "foo", "bar"], [], {}),
],
)
def test_reset_index_drop_convert(
self,
arg: str | list[str],
drop: bool,
dropped: list[str],
converted: list[str],
renamed: dict[str, str],
) -> None:
# regressions https://github.com/pydata/xarray/issues/6946 and
# https://github.com/pydata/xarray/issues/6989
# check that multi-index dimension or level coordinates are dropped, converted
# from IndexVariable to Variable or renamed to dimension as expected
midx = pd.MultiIndex.from_product([["a", "b"], [1, 2]], names=("foo", "bar"))
midx_coords = Coordinates.from_pandas_multiindex(midx, "x")
ds = xr.Dataset(coords=midx_coords)
reset = ds.reset_index(arg, drop=drop)
for name in dropped:
assert name not in reset.variables
for name in converted:
assert_identical(reset[name].variable, ds[name].variable.to_base_variable())
for old_name, new_name in renamed.items():
assert_identical(ds[old_name].variable, reset[new_name].variable)
def test_reorder_levels(self) -> None:
ds = create_test_multiindex()
mindex = ds["x"].to_index()
assert isinstance(mindex, pd.MultiIndex)
midx = mindex.reorder_levels(["level_2", "level_1"])
midx_coords = Coordinates.from_pandas_multiindex(midx, "x")
expected = Dataset({}, coords=midx_coords)
# check attrs propagated
ds["level_1"].attrs["foo"] = "bar"
expected["level_1"].attrs["foo"] = "bar"
reindexed = ds.reorder_levels(x=["level_2", "level_1"])
assert_identical(reindexed, expected)
ds = Dataset({}, coords={"x": [1, 2]})
with pytest.raises(ValueError, match=r"has no MultiIndex"):
ds.reorder_levels(x=["level_1", "level_2"])
def test_set_xindex(self) -> None:
ds = Dataset(
coords={"foo": ("x", ["a", "a", "b", "b"]), "bar": ("x", [0, 1, 2, 3])}
)
actual = ds.set_xindex("foo")
expected = ds.set_index(x="foo").rename_vars(x="foo")
assert_identical(actual, expected, check_default_indexes=False)
actual_mindex = ds.set_xindex(["foo", "bar"])
expected_mindex = ds.set_index(x=["foo", "bar"])
assert_identical(actual_mindex, expected_mindex)
class NotAnIndex: ...
with pytest.raises(TypeError, match=".*not a subclass of xarray.Index"):
ds.set_xindex("foo", NotAnIndex) # type: ignore[arg-type]
with pytest.raises(ValueError, match="those variables don't exist"):
ds.set_xindex("not_a_coordinate", PandasIndex)
ds["data_var"] = ("x", [1, 2, 3, 4])
with pytest.raises(ValueError, match="those variables are data variables"):
ds.set_xindex("data_var", PandasIndex)
ds2 = Dataset(coords={"x": ("x", [0, 1, 2, 3])})
with pytest.raises(ValueError, match="those coordinates already have an index"):
ds2.set_xindex("x", PandasIndex)
def test_set_xindex_options(self) -> None:
ds = Dataset(coords={"foo": ("x", ["a", "a", "b", "b"])})
class IndexWithOptions(Index):
def __init__(self, opt):
self.opt = opt
@classmethod
def from_variables(cls, variables, options):
return cls(options["opt"])
indexed = ds.set_xindex("foo", IndexWithOptions, opt=1)
assert indexed.xindexes["foo"].opt == 1 # type: ignore[attr-defined]
def test_stack(self) -> None:
ds = Dataset(
data_vars={"b": (("x", "y"), [[0, 1], [2, 3]])},
coords={"x": ("x", [0, 1]), "y": ["a", "b"]},
)
midx_expected = pd.MultiIndex.from_product(
[[0, 1], ["a", "b"]], names=["x", "y"]
)
midx_coords_expected = Coordinates.from_pandas_multiindex(midx_expected, "z")
expected = Dataset(
data_vars={"b": ("z", [0, 1, 2, 3])}, coords=midx_coords_expected
)
# check attrs propagated
ds["x"].attrs["foo"] = "bar"
expected["x"].attrs["foo"] = "bar"
actual = ds.stack(z=["x", "y"])
assert_identical(expected, actual)
assert list(actual.xindexes) == ["z", "x", "y"]
actual = ds.stack(z=[...])
assert_identical(expected, actual)
# non list dims with ellipsis
actual = ds.stack(z=(...,))
assert_identical(expected, actual)
# ellipsis with given dim
actual = ds.stack(z=[..., "y"])
assert_identical(expected, actual)
midx_expected = pd.MultiIndex.from_product(
[["a", "b"], [0, 1]], names=["y", "x"]
)
midx_coords_expected = Coordinates.from_pandas_multiindex(midx_expected, "z")
expected = Dataset(
data_vars={"b": ("z", [0, 2, 1, 3])}, coords=midx_coords_expected
)
expected["x"].attrs["foo"] = "bar"
actual = ds.stack(z=["y", "x"])
assert_identical(expected, actual)
assert list(actual.xindexes) == ["z", "y", "x"]
@pytest.mark.parametrize(
"create_index,expected_keys",
[
(True, ["z", "x", "y"]),
(False, []),
(None, ["z", "x", "y"]),
],
)
def test_stack_create_index(self, create_index, expected_keys) -> None:
ds = Dataset(
data_vars={"b": (("x", "y"), [[0, 1], [2, 3]])},
coords={"x": ("x", [0, 1]), "y": ["a", "b"]},
)
actual = ds.stack(z=["x", "y"], create_index=create_index)
assert list(actual.xindexes) == expected_keys
# TODO: benbovy (flexible indexes) - test error multiple indexes found
# along dimension + create_index=True
def test_stack_multi_index(self) -> None:
# multi-index on a dimension to stack is discarded too
midx = pd.MultiIndex.from_product([["a", "b"], [0, 1]], names=("lvl1", "lvl2"))
coords = Coordinates.from_pandas_multiindex(midx, "x")
coords["y"] = [0, 1]
ds = xr.Dataset(
data_vars={"b": (("x", "y"), [[0, 1], [2, 3], [4, 5], [6, 7]])},
coords=coords,
)
expected = Dataset(
data_vars={"b": ("z", [0, 1, 2, 3, 4, 5, 6, 7])},
coords={
"x": ("z", np.repeat(midx.values, 2)),
"lvl1": ("z", np.repeat(midx.get_level_values("lvl1"), 2)),
"lvl2": ("z", np.repeat(midx.get_level_values("lvl2"), 2)),
"y": ("z", [0, 1, 0, 1] * 2),
},
)
actual = ds.stack(z=["x", "y"], create_index=False)
assert_identical(expected, actual)
assert len(actual.xindexes) == 0
with pytest.raises(ValueError, match=r"cannot create.*wraps a multi-index"):
ds.stack(z=["x", "y"], create_index=True)
def test_stack_non_dim_coords(self) -> None:
ds = Dataset(
data_vars={"b": (("x", "y"), [[0, 1], [2, 3]])},
coords={"x": ("x", [0, 1]), "y": ["a", "b"]},
).rename_vars(x="xx")
exp_index = pd.MultiIndex.from_product([[0, 1], ["a", "b"]], names=["xx", "y"])
exp_coords = Coordinates.from_pandas_multiindex(exp_index, "z")
expected = Dataset(data_vars={"b": ("z", [0, 1, 2, 3])}, coords=exp_coords)
actual = ds.stack(z=["x", "y"])
assert_identical(expected, actual)
assert list(actual.xindexes) == ["z", "xx", "y"]
def test_unstack(self) -> None:
index = pd.MultiIndex.from_product([[0, 1], ["a", "b"]], names=["x", "y"])
coords = Coordinates.from_pandas_multiindex(index, "z")
ds = Dataset(data_vars={"b": ("z", [0, 1, 2, 3])}, coords=coords)
expected = Dataset(
{"b": (("x", "y"), [[0, 1], [2, 3]]), "x": [0, 1], "y": ["a", "b"]}
)
# check attrs propagated
ds["x"].attrs["foo"] = "bar"
expected["x"].attrs["foo"] = "bar"
for dim in ["z", ["z"], None]:
actual = ds.unstack(dim)
assert_identical(actual, expected)
def test_unstack_errors(self) -> None:
ds = Dataset({"x": [1, 2, 3]})
with pytest.raises(
ValueError,
match=re.escape("Dimensions ('foo',) not found in data dimensions ('x',)"),
):
ds.unstack("foo")
with pytest.raises(ValueError, match=r".*do not have exactly one multi-index"):
ds.unstack("x")
ds = Dataset({"da": [1, 2]}, coords={"y": ("x", [1, 1]), "z": ("x", [0, 0])})
ds = ds.set_index(x=("y", "z"))
with pytest.raises(
ValueError, match="Cannot unstack MultiIndex containing duplicates"
):
ds.unstack("x")
def test_unstack_fill_value(self) -> None:
ds = xr.Dataset(
{"var": (("x",), np.arange(6)), "other_var": (("x",), np.arange(3, 9))},
coords={"x": [0, 1, 2] * 2, "y": (("x",), ["a"] * 3 + ["b"] * 3)},
)
# make ds incomplete
ds = ds.isel(x=[0, 2, 3, 4]).set_index(index=["x", "y"])
# test fill_value
actual1 = ds.unstack("index", fill_value=-1)
expected1 = ds.unstack("index").fillna(-1).astype(int)
assert actual1["var"].dtype == int
assert_equal(actual1, expected1)
actual2 = ds["var"].unstack("index", fill_value=-1)
expected2 = ds["var"].unstack("index").fillna(-1).astype(int)
assert_equal(actual2, expected2)
actual3 = ds.unstack("index", fill_value={"var": -1, "other_var": 1})
expected3 = ds.unstack("index").fillna({"var": -1, "other_var": 1}).astype(int)
assert_equal(actual3, expected3)
@requires_sparse
def test_unstack_sparse(self) -> None:
ds = xr.Dataset(
{"var": (("x",), np.arange(6))},
coords={"x": [0, 1, 2] * 2, "y": (("x",), ["a"] * 3 + ["b"] * 3)},
)
# make ds incomplete
ds = ds.isel(x=[0, 2, 3, 4]).set_index(index=["x", "y"])
# test fill_value
actual1 = ds.unstack("index", sparse=True)
expected1 = ds.unstack("index")
assert isinstance(actual1["var"].data, sparse_array_type)
assert actual1["var"].variable._to_dense().equals(expected1["var"].variable)
assert actual1["var"].data.density < 1.0
actual2 = ds["var"].unstack("index", sparse=True)
expected2 = ds["var"].unstack("index")
assert isinstance(actual2.data, sparse_array_type)
assert actual2.variable._to_dense().equals(expected2.variable)
assert actual2.data.density < 1.0
midx = pd.MultiIndex.from_arrays([np.arange(3), np.arange(3)], names=["a", "b"])
coords = Coordinates.from_pandas_multiindex(midx, "z")
coords["foo"] = np.arange(4)
coords["bar"] = np.arange(5)
ds_eye = Dataset(
{"var": (("z", "foo", "bar"), np.ones((3, 4, 5)))}, coords=coords
)
actual3 = ds_eye.unstack(sparse=True, fill_value=0)
assert isinstance(actual3["var"].data, sparse_array_type)
expected3 = xr.Dataset(
{
"var": (
("foo", "bar", "a", "b"),
np.broadcast_to(np.eye(3, 3), (4, 5, 3, 3)),
)
},
coords={
"foo": np.arange(4),
"bar": np.arange(5),
"a": np.arange(3),
"b": np.arange(3),
},
)
actual3["var"].data = actual3["var"].data.todense()
assert_equal(expected3, actual3)
def test_stack_unstack_fast(self) -> None:
ds = Dataset(
{
"a": ("x", [0, 1]),
"b": (("x", "y"), [[0, 1], [2, 3]]),
"x": [0, 1],
"y": ["a", "b"],
}
)
actual = ds.stack(z=["x", "y"]).unstack("z")
assert actual.broadcast_equals(ds)
actual = ds[["b"]].stack(z=["x", "y"]).unstack("z")
assert actual.identical(ds[["b"]])
def test_stack_unstack_slow(self) -> None:
ds = Dataset(
data_vars={
"a": ("x", [0, 1]),
"b": (("x", "y"), [[0, 1], [2, 3]]),
},
coords={"x": [0, 1], "y": ["a", "b"]},
)
stacked = ds.stack(z=["x", "y"])
actual = stacked.isel(z=slice(None, None, -1)).unstack("z")
assert actual.broadcast_equals(ds)
stacked = ds[["b"]].stack(z=["x", "y"])
actual = stacked.isel(z=slice(None, None, -1)).unstack("z")
assert actual.identical(ds[["b"]])
def test_to_stacked_array_invalid_sample_dims(self) -> None:
data = xr.Dataset(
data_vars={"a": (("x", "y"), [[0, 1, 2], [3, 4, 5]]), "b": ("x", [6, 7])},
coords={"y": ["u", "v", "w"]},
)
with pytest.raises(
ValueError,
match=r"Variables in the dataset must contain all ``sample_dims`` \(\['y'\]\) but 'b' misses \['y'\]",
):
data.to_stacked_array("features", sample_dims=["y"])
def test_to_stacked_array_name(self) -> None:
name = "adf9d"
# make a two dimensional dataset
a, b = create_test_stacked_array()
D = xr.Dataset({"a": a, "b": b})
sample_dims = ["x"]
y = D.to_stacked_array("features", sample_dims, name=name)
assert y.name == name
def test_to_stacked_array_dtype_dims(self) -> None:
# make a two dimensional dataset
a, b = create_test_stacked_array()
D = xr.Dataset({"a": a, "b": b})
sample_dims = ["x"]
y = D.to_stacked_array("features", sample_dims)
mindex = y.xindexes["features"].to_pandas_index()
assert isinstance(mindex, pd.MultiIndex)
assert mindex.levels[1].dtype == D.y.dtype
assert y.dims == ("x", "features")
def test_to_stacked_array_to_unstacked_dataset(self) -> None:
# single dimension: regression test for GH4049
arr = xr.DataArray(np.arange(3), coords=[("x", [0, 1, 2])])
data = xr.Dataset({"a": arr, "b": arr})
stacked = data.to_stacked_array("y", sample_dims=["x"])
unstacked = stacked.to_unstacked_dataset("y")
assert_identical(unstacked, data)
# make a two dimensional dataset
a, b = create_test_stacked_array()
D = xr.Dataset({"a": a, "b": b})
sample_dims = ["x"]
y = D.to_stacked_array("features", sample_dims).transpose("x", "features")
x = y.to_unstacked_dataset("features")
assert_identical(D, x)
# test on just one sample
x0 = y[0].to_unstacked_dataset("features")
d0 = D.isel(x=0)
assert_identical(d0, x0)
def test_to_stacked_array_to_unstacked_dataset_different_dimension(self) -> None:
# test when variables have different dimensionality
a, b = create_test_stacked_array()
sample_dims = ["x"]
D = xr.Dataset({"a": a, "b": b.isel(y=0)})
y = D.to_stacked_array("features", sample_dims)
x = y.to_unstacked_dataset("features")
assert_identical(D, x)
def test_to_stacked_array_preserves_dtype(self) -> None:
# regression test for bug found in https://github.com/pydata/xarray/pull/8872#issuecomment-2081218616
ds = xr.Dataset(
data_vars={
"a": (("x", "y"), [[0, 1, 2], [3, 4, 5]]),
"b": ("x", [6, 7]),
},
coords={"y": ["u", "v", "w"]},
)
stacked = ds.to_stacked_array("z", sample_dims=["x"])
# coordinate created from variables names should be of string dtype
data = np.array(["a", "a", "a", "b"], dtype="<U1")
expected_stacked_variable = DataArray(name="variable", data=data, dims="z")
assert_identical(
stacked.coords["variable"].drop_vars(["z", "variable", "y"]),
expected_stacked_variable,
)
def test_update(self) -> None:
data = create_test_data(seed=0)
expected = data.copy()
var2 = Variable("dim1", np.arange(8))
actual = data
actual.update({"var2": var2})
expected["var2"] = var2
assert_identical(expected, actual)
actual = data.copy()
actual.update(data)
assert_identical(expected, actual)
other = Dataset(attrs={"new": "attr"})
actual = data.copy()
actual.update(other)
assert_identical(expected, actual)
def test_update_overwrite_coords(self) -> None:
data = Dataset({"a": ("x", [1, 2])}, {"b": 3})
data.update(Dataset(coords={"b": 4}))
expected = Dataset({"a": ("x", [1, 2])}, {"b": 4})
assert_identical(data, expected)
data = Dataset({"a": ("x", [1, 2])}, {"b": 3})
data.update(Dataset({"c": 5}, coords={"b": 4}))
expected = Dataset({"a": ("x", [1, 2]), "c": 5}, {"b": 4})
assert_identical(data, expected)
data = Dataset({"a": ("x", [1, 2])}, {"b": 3})
data.update({"c": DataArray(5, coords={"b": 4})})
expected = Dataset({"a": ("x", [1, 2]), "c": 5}, {"b": 3})
assert_identical(data, expected)
def test_update_multiindex_level(self) -> None:
data = create_test_multiindex()
with pytest.raises(
ValueError, match=r"cannot set or update variable.*corrupt.*index "
):
data.update({"level_1": range(4)})
def test_update_auto_align(self) -> None:
ds = Dataset({"x": ("t", [3, 4])}, {"t": [0, 1]})
expected1 = Dataset(
{"x": ("t", [3, 4]), "y": ("t", [np.nan, 5])}, {"t": [0, 1]}
)
actual1 = ds.copy()
other1 = {"y": ("t", [5]), "t": [1]}
with pytest.raises(ValueError, match=r"conflicting sizes"):
actual1.update(other1)
actual1.update(Dataset(other1))
assert_identical(expected1, actual1)
actual2 = ds.copy()
other2 = Dataset({"y": ("t", [5]), "t": [100]})
actual2.update(other2)
expected2 = Dataset(
{"x": ("t", [3, 4]), "y": ("t", [np.nan] * 2)}, {"t": [0, 1]}
)
assert_identical(expected2, actual2)
def test_getitem(self) -> None:
data = create_test_data()
assert isinstance(data["var1"], DataArray)
assert_equal(data["var1"].variable, data.variables["var1"])
with pytest.raises(KeyError):
data["notfound"]
with pytest.raises(KeyError):
data[["var1", "notfound"]]
with pytest.raises(
KeyError,
match=r"Hint: use a list to select multiple variables, for example `ds\[\['var1', 'var2'\]\]`",
):
data["var1", "var2"]
actual1 = data[["var1", "var2"]]
expected1 = Dataset({"var1": data["var1"], "var2": data["var2"]})
assert_equal(expected1, actual1)
actual2 = data["numbers"]
expected2 = DataArray(
data["numbers"].variable,
{"dim3": data["dim3"], "numbers": data["numbers"]},
dims="dim3",
name="numbers",
)
assert_identical(expected2, actual2)
actual3 = data[dict(dim1=0)]
expected3 = data.isel(dim1=0)
assert_identical(expected3, actual3)
def test_getitem_hashable(self) -> None:
data = create_test_data()
data[(3, 4)] = data["var1"] + 1
expected = data["var1"] + 1
expected.name = (3, 4)
assert_identical(expected, data[(3, 4)])
with pytest.raises(KeyError, match=r"('var1', 'var2')"):
data[("var1", "var2")]
def test_getitem_multiple_dtype(self) -> None:
keys = ["foo", 1]
dataset = Dataset({key: ("dim0", range(1)) for key in keys})
assert_identical(dataset, dataset[keys])
def test_virtual_variables_default_coords(self) -> None:
dataset = Dataset({"foo": ("x", range(10))})
expected1 = DataArray(range(10), dims="x", name="x")
actual1 = dataset["x"]
assert_identical(expected1, actual1)
assert isinstance(actual1.variable, IndexVariable)
actual2 = dataset[["x", "foo"]]
expected2 = dataset.assign_coords(x=range(10))
assert_identical(expected2, actual2)
def test_virtual_variables_time(self) -> None:
# access virtual variables
data = create_test_data()
index = data.variables["time"].to_index()
assert isinstance(index, pd.DatetimeIndex)
assert_array_equal(data["time.month"].values, index.month)
assert_array_equal(data["time.season"].values, "DJF")
# test virtual variable math
assert_array_equal(data["time.dayofyear"] + 1, 2 + np.arange(20))
assert_array_equal(np.sin(data["time.dayofyear"]), np.sin(1 + np.arange(20)))
# ensure they become coordinates
expected = Dataset({}, {"dayofyear": data["time.dayofyear"]})
actual = data[["time.dayofyear"]]
assert_equal(expected, actual)
# non-coordinate variables
ds = Dataset({"t": ("x", pd.date_range("2000-01-01", periods=3))})
assert (ds["t.year"] == 2000).all()
def test_virtual_variable_same_name(self) -> None:
# regression test for GH367
times = pd.date_range("2000-01-01", freq="h", periods=5)
data = Dataset({"time": times})
actual = data["time.time"]
expected = DataArray(times.time, [("time", times)], name="time")
assert_identical(actual, expected)
def test_time_season(self) -> None:
time = xr.date_range("2000-01-01", periods=12, freq="ME", use_cftime=False)
ds = Dataset({"t": time})
seas = ["DJF"] * 2 + ["MAM"] * 3 + ["JJA"] * 3 + ["SON"] * 3 + ["DJF"]
assert_array_equal(seas, ds["t.season"])
def test_slice_virtual_variable(self) -> None:
data = create_test_data()
assert_equal(
data["time.dayofyear"][:10].variable, Variable(["time"], 1 + np.arange(10))
)
assert_equal(data["time.dayofyear"][0].variable, Variable([], 1))
def test_setitem(self) -> None:
# assign a variable
var = Variable(["dim1"], np.random.randn(8))
data1 = create_test_data()
data1["A"] = var
data2 = data1.copy()
data2["A"] = var
assert_identical(data1, data2)
# assign a dataset array
dv = 2 * data2["A"]
data1["B"] = dv.variable
data2["B"] = dv
assert_identical(data1, data2)
# can't assign an ND array without dimensions
with pytest.raises(ValueError, match=r"without explicit dimension names"):
data2["C"] = var.values.reshape(2, 4)
# but can assign a 1D array
data1["C"] = var.values
data2["C"] = ("C", var.values)
assert_identical(data1, data2)
# can assign a scalar
data1["scalar"] = 0
data2["scalar"] = ([], 0)
assert_identical(data1, data2)
# can't use the same dimension name as a scalar var
with pytest.raises(ValueError, match=r"already exists as a scalar"):
data1["newvar"] = ("scalar", [3, 4, 5])
# can't resize a used dimension
with pytest.raises(ValueError, match=r"conflicting dimension sizes"):
data1["dim1"] = data1["dim1"][:5]
# override an existing value
data1["A"] = 3 * data2["A"]
assert_equal(data1["A"], 3 * data2["A"])
# can't assign a dataset to a single key
with pytest.raises(TypeError, match="Cannot assign a Dataset to a single key"):
data1["D"] = xr.Dataset()
# test assignment with positional and label-based indexing
data3 = data1[["var1", "var2"]]
data3["var3"] = data3.var1.isel(dim1=0)
data4 = data3.copy()
err_msg = (
"can only set locations defined by dictionaries from Dataset.loc. Got: a"
)
with pytest.raises(TypeError, match=err_msg):
data1.loc["a"] = 0
err_msg = r"Variables \['A', 'B', 'scalar'\] in new values not available in original dataset:"
with pytest.raises(ValueError, match=err_msg):
data4[{"dim2": 1}] = data1[{"dim2": 2}]
err_msg = "Variable 'var3': indexer {'dim2': 0} not available"
with pytest.raises(ValueError, match=err_msg):
data1[{"dim2": 0}] = 0.0
err_msg = "Variable 'var1': indexer {'dim2': 10} not available"
with pytest.raises(ValueError, match=err_msg):
data4[{"dim2": 10}] = data3[{"dim2": 2}]
err_msg = "Variable 'var1': dimension 'dim2' appears in new values"
with pytest.raises(KeyError, match=err_msg):
data4[{"dim2": 2}] = data3[{"dim2": [2]}]
err_msg = (
"Variable 'var2': dimension order differs between original and new data"
)
data3["var2"] = data3["var2"].T
with pytest.raises(ValueError, match=err_msg):
data4[{"dim2": [2, 3]}] = data3[{"dim2": [2, 3]}]
data3["var2"] = data3["var2"].T
err_msg = r"cannot align objects.*not equal along these coordinates.*"
with pytest.raises(ValueError, match=err_msg):
data4[{"dim2": [2, 3]}] = data3[{"dim2": [2, 3, 4]}]
err_msg = "Dataset assignment only accepts DataArrays, Datasets, and scalars."
with pytest.raises(TypeError, match=err_msg):
data4[{"dim2": [2, 3]}] = data3["var1"][{"dim2": [3, 4]}].values
data5 = data4.astype(str)
data5["var4"] = data4["var1"]
# convert to `np.str_('a')` once `numpy<2.0` has been dropped
err_msg = "could not convert string to float: .*'a'.*"
with pytest.raises(ValueError, match=err_msg):
data5[{"dim2": 1}] = "a"
data4[{"dim2": 0}] = 0.0
data4[{"dim2": 1}] = data3[{"dim2": 2}]
data4.loc[{"dim2": 1.5}] = 1.0
data4.loc[{"dim2": 2.0}] = data3.loc[{"dim2": 2.5}]
for v, dat3 in data3.items():
dat4 = data4[v]
assert_array_equal(dat4[{"dim2": 0}], 0.0)
assert_array_equal(dat4[{"dim2": 1}], dat3[{"dim2": 2}])
assert_array_equal(dat4.loc[{"dim2": 1.5}], 1.0)
assert_array_equal(dat4.loc[{"dim2": 2.0}], dat3.loc[{"dim2": 2.5}])
unchanged = [1.0, 2.5, 3.0, 3.5, 4.0]
assert_identical(
dat4.loc[{"dim2": unchanged}], dat3.loc[{"dim2": unchanged}]
)
def test_setitem_pandas(self) -> None:
ds = self.make_example_math_dataset()
ds["x"] = np.arange(3)
ds_copy = ds.copy()
ds_copy["bar"] = ds["bar"].to_pandas()
assert_equal(ds, ds_copy)
def test_setitem_auto_align(self) -> None:
ds = Dataset()
ds["x"] = ("y", range(3))
ds["y"] = 1 + np.arange(3)
expected = Dataset({"x": ("y", range(3)), "y": 1 + np.arange(3)})
assert_identical(ds, expected)
ds["y"] = DataArray(range(3), dims="y")
expected = Dataset({"x": ("y", range(3))}, {"y": range(3)})
assert_identical(ds, expected)
ds["x"] = DataArray([1, 2], coords=[("y", [0, 1])])
expected = Dataset({"x": ("y", [1, 2, np.nan])}, {"y": range(3)})
assert_identical(ds, expected)
ds["x"] = 42
expected = Dataset({"x": 42, "y": range(3)})
assert_identical(ds, expected)
ds["x"] = DataArray([4, 5, 6, 7], coords=[("y", [0, 1, 2, 3])])
expected = Dataset({"x": ("y", [4, 5, 6])}, {"y": range(3)})
assert_identical(ds, expected)
def test_setitem_dimension_override(self) -> None:
# regression test for GH-3377
ds = xr.Dataset({"x": [0, 1, 2]})
ds["x"] = ds["x"][:2]
expected = Dataset({"x": [0, 1]})
assert_identical(ds, expected)
ds = xr.Dataset({"x": [0, 1, 2]})
ds["x"] = np.array([0, 1])
assert_identical(ds, expected)
ds = xr.Dataset({"x": [0, 1, 2]})
ds.coords["x"] = [0, 1]
assert_identical(ds, expected)
def test_setitem_with_coords(self) -> None:
# Regression test for GH:2068
ds = create_test_data()
other = DataArray(
np.arange(10), dims="dim3", coords={"numbers": ("dim3", np.arange(10))}
)
expected = ds.copy()
expected["var3"] = other.drop_vars("numbers")
actual = ds.copy()
actual["var3"] = other
assert_identical(expected, actual)
assert "numbers" in other.coords # should not change other
# with alignment
other = ds["var3"].isel(dim3=slice(1, -1))
other["numbers"] = ("dim3", np.arange(8))
actual = ds.copy()
actual["var3"] = other
assert "numbers" in other.coords # should not change other
expected = ds.copy()
expected["var3"] = ds["var3"].isel(dim3=slice(1, -1))
assert_identical(expected, actual)
# with non-duplicate coords
other = ds["var3"].isel(dim3=slice(1, -1))
other["numbers"] = ("dim3", np.arange(8))
other["position"] = ("dim3", np.arange(8))
actual = ds.copy()
actual["var3"] = other
assert "position" in actual
assert "position" in other.coords
# assigning a coordinate-only dataarray
actual = ds.copy()
other = actual["numbers"]
other[0] = 10
actual["numbers"] = other
assert actual["numbers"][0] == 10
# GH: 2099
ds = Dataset(
{"var": ("x", [1, 2, 3])},
coords={"x": [0, 1, 2], "z1": ("x", [1, 2, 3]), "z2": ("x", [1, 2, 3])},
)
ds["var"] = ds["var"] * 2
assert np.allclose(ds["var"], [2, 4, 6])
def test_setitem_align_new_indexes(self) -> None:
ds = Dataset({"foo": ("x", [1, 2, 3])}, {"x": [0, 1, 2]})
ds["bar"] = DataArray([2, 3, 4], [("x", [1, 2, 3])])
expected = Dataset(
{"foo": ("x", [1, 2, 3]), "bar": ("x", [np.nan, 2, 3])}, {"x": [0, 1, 2]}
)
assert_identical(ds, expected)
def test_setitem_vectorized(self) -> None:
# Regression test for GH:7030
# Positional indexing
da = xr.DataArray(np.r_[:120].reshape(2, 3, 4, 5), dims=["a", "b", "c", "d"])
ds = xr.Dataset({"da": da})
b = xr.DataArray([[0, 0], [1, 0]], dims=["u", "v"])
c = xr.DataArray([[0, 1], [2, 3]], dims=["u", "v"])
w = xr.DataArray([-1, -2], dims=["u"])
index = dict(b=b, c=c)
ds[index] = xr.Dataset({"da": w})
assert (ds[index]["da"] == w).all()
# Indexing with coordinates
da = xr.DataArray(np.r_[:120].reshape(2, 3, 4, 5), dims=["a", "b", "c", "d"])
ds = xr.Dataset({"da": da})
ds.coords["b"] = [2, 4, 6]
b = xr.DataArray([[2, 2], [4, 2]], dims=["u", "v"])
c = xr.DataArray([[0, 1], [2, 3]], dims=["u", "v"])
w = xr.DataArray([-1, -2], dims=["u"])
index = dict(b=b, c=c)
ds.loc[index] = xr.Dataset({"da": w}, coords={"b": ds.coords["b"]})
assert (ds.loc[index]["da"] == w).all()
@pytest.mark.parametrize("dtype", [str, bytes])
def test_setitem_str_dtype(self, dtype) -> None:
ds = xr.Dataset(coords={"x": np.array(["x", "y"], dtype=dtype)})
# test Dataset update
ds["foo"] = xr.DataArray(np.array([0, 0]), dims=["x"])
assert np.issubdtype(ds.x.dtype, dtype)
def test_setitem_using_list(self) -> None:
# assign a list of variables
var1 = Variable(["dim1"], np.random.randn(8))
var2 = Variable(["dim1"], np.random.randn(8))
actual = create_test_data()
expected = actual.copy()
expected["A"] = var1
expected["B"] = var2
actual[["A", "B"]] = [var1, var2]
assert_identical(actual, expected)
# assign a list of dataset arrays
dv = 2 * expected[["A", "B"]]
actual[["C", "D"]] = [d.variable for d in dv.data_vars.values()]
expected[["C", "D"]] = dv
assert_identical(actual, expected)
@pytest.mark.parametrize(
"var_list, data, error_regex",
[
(
["A", "B"],
[Variable(["dim1"], np.random.randn(8))],
r"Different lengths",
),
([], [Variable(["dim1"], np.random.randn(8))], r"Empty list of variables"),
(["A", "B"], xr.DataArray([1, 2]), r"assign single DataArray"),
],
)
def test_setitem_using_list_errors(self, var_list, data, error_regex) -> None:
actual = create_test_data()
with pytest.raises(ValueError, match=error_regex):
actual[var_list] = data
def test_assign(self) -> None:
ds = Dataset()
actual = ds.assign(x=[0, 1, 2], y=2)
expected = Dataset({"x": [0, 1, 2], "y": 2})
assert_identical(actual, expected)
assert list(actual.variables) == ["x", "y"]
assert_identical(ds, Dataset())
actual = actual.assign(y=lambda ds: ds.x**2)
expected = Dataset({"y": ("x", [0, 1, 4]), "x": [0, 1, 2]})
assert_identical(actual, expected)
actual = actual.assign_coords(z=2)
expected = Dataset({"y": ("x", [0, 1, 4])}, {"z": 2, "x": [0, 1, 2]})
assert_identical(actual, expected)
def test_assign_coords(self) -> None:
ds = Dataset()
actual = ds.assign(x=[0, 1, 2], y=2)
actual = actual.assign_coords(x=list("abc"))
expected = Dataset({"x": list("abc"), "y": 2})
assert_identical(actual, expected)
actual = ds.assign(x=[0, 1, 2], y=[2, 3])
actual = actual.assign_coords({"y": [2.0, 3.0]})
expected = ds.assign(x=[0, 1, 2], y=[2.0, 3.0])
assert_identical(actual, expected)
def test_assign_attrs(self) -> None:
expected = Dataset(attrs=dict(a=1, b=2))
new = Dataset()
actual = new.assign_attrs(a=1, b=2)
assert_identical(actual, expected)
assert new.attrs == {}
expected.attrs["c"] = 3
new_actual = actual.assign_attrs({"c": 3})
assert_identical(new_actual, expected)
assert actual.attrs == dict(a=1, b=2)
def test_drop_attrs(self) -> None:
# Simple example
ds = Dataset().assign_attrs(a=1, b=2)
original = ds.copy()
expected = Dataset()
result = ds.drop_attrs()
assert_identical(result, expected)
# Doesn't change original
assert_identical(ds, original)
# Example with variables and coords with attrs, and a multiindex. (arguably
# should have used a canonical dataset with all the features we're should
# support...)
var = Variable("x", [1, 2, 3], attrs=dict(x=1, y=2))
idx = IndexVariable("y", [1, 2, 3], attrs=dict(c=1, d=2))
mx = xr.Coordinates.from_pandas_multiindex(
pd.MultiIndex.from_tuples([(1, 2), (3, 4)], names=["d", "e"]), "z"
)
ds = Dataset(dict(var1=var), coords=dict(y=idx, z=mx)).assign_attrs(a=1, b=2)
assert ds.attrs != {}
assert ds["var1"].attrs != {}
assert ds["y"].attrs != {}
assert ds.coords["y"].attrs != {}
original = ds.copy(deep=True)
result = ds.drop_attrs()
assert result.attrs == {}
assert result["var1"].attrs == {}
assert result["y"].attrs == {}
assert list(result.data_vars) == list(ds.data_vars)
assert list(result.coords) == list(ds.coords)
# Doesn't change original
assert_identical(ds, original)
# Specifically test that the attrs on the coords are still there. (The index
# can't currently contain `attrs`, so we can't test those.)
assert ds.coords["y"].attrs != {}
# Test for deep=False
result_shallow = ds.drop_attrs(deep=False)
assert result_shallow.attrs == {}
assert result_shallow["var1"].attrs != {}
assert result_shallow["y"].attrs != {}
assert list(result.data_vars) == list(ds.data_vars)
assert list(result.coords) == list(ds.coords)
def test_assign_multiindex_level(self) -> None:
data = create_test_multiindex()
with pytest.raises(ValueError, match=r"cannot drop or update.*corrupt.*index "):
data.assign(level_1=range(4))
data.assign_coords(level_1=range(4))
def test_assign_new_multiindex(self) -> None:
midx = pd.MultiIndex.from_arrays([["a", "a", "b", "b"], [0, 1, 0, 1]])
midx_coords = Coordinates.from_pandas_multiindex(midx, "x")
ds = Dataset(coords={"x": [1, 2]})
expected = Dataset(coords=midx_coords)
with pytest.warns(
FutureWarning,
match=".*`pandas.MultiIndex`.*no longer be implicitly promoted.*",
):
actual = ds.assign(x=midx)
assert_identical(actual, expected)
@pytest.mark.parametrize("orig_coords", [{}, {"x": range(4)}])
def test_assign_coords_new_multiindex(self, orig_coords) -> None:
ds = Dataset(coords=orig_coords)
midx = pd.MultiIndex.from_arrays(
[["a", "a", "b", "b"], [0, 1, 0, 1]], names=("one", "two")
)
midx_coords = Coordinates.from_pandas_multiindex(midx, "x")
expected = Dataset(coords=midx_coords)
with pytest.warns(
FutureWarning,
match=".*`pandas.MultiIndex`.*no longer be implicitly promoted.*",
):
actual = ds.assign_coords({"x": midx})
assert_identical(actual, expected)
actual = ds.assign_coords(midx_coords)
assert_identical(actual, expected)
def test_assign_coords_existing_multiindex(self) -> None:
data = create_test_multiindex()
with pytest.warns(
FutureWarning, match=r"updating coordinate.*MultiIndex.*inconsistent"
):
updated = data.assign_coords(x=range(4))
# https://github.com/pydata/xarray/issues/7097 (coord names updated)
assert len(updated.coords) == 1
with pytest.warns(
FutureWarning, match=r"updating coordinate.*MultiIndex.*inconsistent"
):
updated = data.assign(x=range(4))
# https://github.com/pydata/xarray/issues/7097 (coord names updated)
assert len(updated.coords) == 1
def test_assign_all_multiindex_coords(self) -> None:
data = create_test_multiindex()
actual = data.assign(x=range(4), level_1=range(4), level_2=range(4))
# no error but multi-index dropped in favor of single indexes for each level
assert (
actual.xindexes["x"]
is not actual.xindexes["level_1"]
is not actual.xindexes["level_2"]
)
def test_assign_coords_custom_index_side_effect(self) -> None:
# test that assigning new coordinates do not reset other dimension coord indexes
# to default (pandas) index (https://github.com/pydata/xarray/issues/7346)
class CustomIndex(PandasIndex):
pass
ds = (
Dataset(coords={"x": [1, 2, 3]})
.drop_indexes("x")
.set_xindex("x", CustomIndex)
)
actual = ds.assign_coords(y=[4, 5, 6])
assert isinstance(actual.xindexes["x"], CustomIndex)
def test_assign_coords_custom_index(self) -> None:
class CustomIndex(Index):
pass
coords = Coordinates(
coords={"x": ("x", [1, 2, 3])}, indexes={"x": CustomIndex()}
)
ds = Dataset()
actual = ds.assign_coords(coords)
assert isinstance(actual.xindexes["x"], CustomIndex)
def test_assign_coords_no_default_index(self) -> None:
coords = Coordinates({"y": [1, 2, 3]}, indexes={})
ds = Dataset()
actual = ds.assign_coords(coords)
expected = coords.to_dataset()
assert_identical(expected, actual, check_default_indexes=False)
assert "y" not in actual.xindexes
def test_merge_multiindex_level(self) -> None:
data = create_test_multiindex()
other = Dataset({"level_1": ("x", [0, 1])})
with pytest.raises(ValueError, match=r".*conflicting dimension sizes.*"):
data.merge(other)
other = Dataset({"level_1": ("x", range(4))})
with pytest.raises(
ValueError, match=r"unable to determine.*coordinates or not.*"
):
data.merge(other)
# `other` Dataset coordinates are ignored (bug or feature?)
other = Dataset(coords={"level_1": ("x", range(4))})
assert_identical(data.merge(other), data)
def test_setitem_original_non_unique_index(self) -> None:
# regression test for GH943
original = Dataset({"data": ("x", np.arange(5))}, coords={"x": [0, 1, 2, 0, 1]})
expected = Dataset({"data": ("x", np.arange(5))}, {"x": range(5)})
actual = original.copy()
actual["x"] = list(range(5))
assert_identical(actual, expected)
actual = original.copy()
actual["x"] = ("x", list(range(5)))
assert_identical(actual, expected)
actual = original.copy()
actual.coords["x"] = list(range(5))
assert_identical(actual, expected)
def test_setitem_both_non_unique_index(self) -> None:
# regression test for GH956
names = ["joaquin", "manolo", "joaquin"]
values = np.random.randint(0, 256, (3, 4, 4))
array = DataArray(
values, dims=["name", "row", "column"], coords=[names, range(4), range(4)]
)
expected = Dataset({"first": array, "second": array})
actual = array.rename("first").to_dataset()
actual["second"] = array
assert_identical(expected, actual)
def test_setitem_multiindex_level(self) -> None:
data = create_test_multiindex()
with pytest.raises(
ValueError, match=r"cannot set or update variable.*corrupt.*index "
):
data["level_1"] = range(4)
def test_delitem(self) -> None:
data = create_test_data()
all_items = set(data.variables)
assert set(data.variables) == all_items
del data["var1"]
assert set(data.variables) == all_items - {"var1"}
del data["numbers"]
assert set(data.variables) == all_items - {"var1", "numbers"}
assert "numbers" not in data.coords
expected = Dataset()
actual = Dataset({"y": ("x", [1, 2])})
del actual["y"]
assert_identical(expected, actual)
def test_delitem_multiindex_level(self) -> None:
data = create_test_multiindex()
with pytest.raises(
ValueError, match=r"cannot remove coordinate.*corrupt.*index "
):
del data["level_1"]
def test_squeeze(self) -> None:
data = Dataset({"foo": (["x", "y", "z"], [[[1], [2]]])})
test_args: list[list] = [[], [["x"]], [["x", "z"]]]
for args in test_args:
def get_args(args, v):
return [set(args[0]) & set(v.dims)] if args else []
expected = Dataset(
{k: v.squeeze(*get_args(args, v)) for k, v in data.variables.items()}
)
expected = expected.set_coords(data.coords)
assert_identical(expected, data.squeeze(*args))
# invalid squeeze
with pytest.raises(ValueError, match=r"cannot select a dimension"):
data.squeeze("y")
def test_squeeze_drop(self) -> None:
data = Dataset({"foo": ("x", [1])}, {"x": [0]})
expected = Dataset({"foo": 1})
selected = data.squeeze(drop=True)
assert_identical(expected, selected)
expected = Dataset({"foo": 1}, {"x": 0})
selected = data.squeeze(drop=False)
assert_identical(expected, selected)
data = Dataset({"foo": (("x", "y"), [[1]])}, {"x": [0], "y": [0]})
expected = Dataset({"foo": 1})
selected = data.squeeze(drop=True)
assert_identical(expected, selected)
expected = Dataset({"foo": ("x", [1])}, {"x": [0]})
selected = data.squeeze(dim="y", drop=True)
assert_identical(expected, selected)
data = Dataset({"foo": (("x",), [])}, {"x": []})
selected = data.squeeze(drop=True)
assert_identical(data, selected)
def test_to_dataarray(self) -> None:
ds = Dataset(
{"a": 1, "b": ("x", [1, 2, 3])},
coords={"c": 42},
attrs={"Conventions": "None"},
)
data = [[1, 1, 1], [1, 2, 3]]
coords = {"c": 42, "variable": ["a", "b"]}
dims = ("variable", "x")
expected = DataArray(data, coords, dims, attrs=ds.attrs)
actual = ds.to_dataarray()
assert_identical(expected, actual)
actual = ds.to_dataarray("abc", name="foo")
expected = expected.rename({"variable": "abc"}).rename("foo")
assert_identical(expected, actual)
def test_to_and_from_dataframe(self) -> None:
x = np.random.randn(10)
y = np.random.randn(10)
t = list("abcdefghij")
cat = pd.Categorical(["a", "b"] * 5)
ds = Dataset({"a": ("t", x), "b": ("t", y), "t": ("t", t), "cat": ("t", cat)})
expected = pd.DataFrame(
np.array([x, y]).T, columns=["a", "b"], index=pd.Index(t, name="t")
)
expected["cat"] = cat
actual = ds.to_dataframe()
# use the .equals method to check all DataFrame metadata
assert expected.equals(actual), (expected, actual)
# verify coords are included
actual = ds.set_coords("b").to_dataframe()
assert expected.equals(actual), (expected, actual)
# check roundtrip
assert_identical(ds, Dataset.from_dataframe(actual))
assert isinstance(ds["cat"].variable.data.dtype, pd.CategoricalDtype)
# test a case with a MultiIndex
w = np.random.randn(2, 3)
cat = pd.Categorical(["a", "a", "c"])
ds = Dataset({"w": (("x", "y"), w), "cat": ("y", cat)})
ds["y"] = ("y", list("abc"))
exp_index = pd.MultiIndex.from_arrays(
[[0, 0, 0, 1, 1, 1], ["a", "b", "c", "a", "b", "c"]], names=["x", "y"]
)
expected = pd.DataFrame(
{"w": w.reshape(-1), "cat": pd.Categorical(["a", "a", "c", "a", "a", "c"])},
index=exp_index,
)
actual = ds.to_dataframe()
assert expected.equals(actual)
# check roundtrip
# from_dataframe attempts to broadcast across because it doesn't know better, so cat must be converted
ds["cat"] = (("x", "y"), np.stack((ds["cat"].to_numpy(), ds["cat"].to_numpy())))
assert_identical(ds.assign_coords(x=[0, 1]), Dataset.from_dataframe(actual))
# Check multiindex reordering
new_order = ["x", "y"]
# revert broadcasting fix above for 1d arrays
ds["cat"] = ("y", cat)
actual = ds.to_dataframe(dim_order=new_order)
assert expected.equals(actual)
new_order = ["y", "x"]
exp_index = pd.MultiIndex.from_arrays(
[["a", "a", "b", "b", "c", "c"], [0, 1, 0, 1, 0, 1]], names=["y", "x"]
)
expected = pd.DataFrame(
{
"w": w.transpose().reshape(-1),
"cat": pd.Categorical(["a", "a", "a", "a", "c", "c"]),
},
index=exp_index,
)
actual = ds.to_dataframe(dim_order=new_order)
assert expected.equals(actual)
invalid_order = ["x"]
with pytest.raises(
ValueError, match="does not match the set of dimensions of this"
):
ds.to_dataframe(dim_order=invalid_order)
invalid_order = ["x", "z"]
with pytest.raises(
ValueError, match="does not match the set of dimensions of this"
):
ds.to_dataframe(dim_order=invalid_order)
# check pathological cases
df = pd.DataFrame([1])
actual_ds = Dataset.from_dataframe(df)
expected_ds = Dataset({0: ("index", [1])}, {"index": [0]})
assert_identical(expected_ds, actual_ds)
df = pd.DataFrame()
actual_ds = Dataset.from_dataframe(df)
expected_ds = Dataset(coords={"index": []})
assert_identical(expected_ds, actual_ds)
# GH697
df = pd.DataFrame({"A": []})
actual_ds = Dataset.from_dataframe(df)
expected_ds = Dataset({"A": DataArray([], dims=("index",))}, {"index": []})
assert_identical(expected_ds, actual_ds)
# regression test for GH278
# use int64 to ensure consistent results for the pandas .equals method
# on windows (which requires the same dtype)
ds = Dataset({"x": pd.Index(["bar"]), "a": ("y", np.array([1], "int64"))}).isel(
x=0
)
# use .loc to ensure consistent results on Python 3
actual = ds.to_dataframe().loc[:, ["a", "x"]]
expected = pd.DataFrame(
[[1, "bar"]], index=pd.Index([0], name="y"), columns=["a", "x"]
)
assert expected.equals(actual), (expected, actual)
ds = Dataset({"x": np.array([0], "int64"), "y": np.array([1], "int64")})
actual = ds.to_dataframe()
idx = pd.MultiIndex.from_arrays([[0], [1]], names=["x", "y"])
expected = pd.DataFrame([[]], index=idx)
assert expected.equals(actual), (expected, actual)
def test_from_dataframe_categorical_index(self) -> None:
cat = pd.CategoricalDtype(
categories=["foo", "bar", "baz", "qux", "quux", "corge"]
)
i1 = pd.Series(["foo", "bar", "foo"], dtype=cat)
i2 = pd.Series(["bar", "bar", "baz"], dtype=cat)
df = pd.DataFrame({"i1": i1, "i2": i2, "values": [1, 2, 3]})
ds = df.set_index("i1").to_xarray()
assert len(ds["i1"]) == 3
ds = df.set_index(["i1", "i2"]).to_xarray()
assert len(ds["i1"]) == 2
assert len(ds["i2"]) == 2
def test_from_dataframe_categorical_index_string_categories(self) -> None:
cat = pd.CategoricalIndex(
pd.Categorical.from_codes(
np.array([1, 1, 0, 2], dtype=np.int64), # type: ignore[arg-type]
categories=pd.Index(["foo", "bar", "baz"], dtype="string"),
)
)
ser = pd.Series(1, index=cat)
ds = ser.to_xarray()
assert ds.coords.dtypes["index"] == np.dtype("O")
@requires_sparse
def test_from_dataframe_sparse(self) -> None:
import sparse
df_base = pd.DataFrame(
{"x": range(10), "y": list("abcdefghij"), "z": np.arange(0, 100, 10)}
)
ds_sparse = Dataset.from_dataframe(df_base.set_index("x"), sparse=True)
ds_dense = Dataset.from_dataframe(df_base.set_index("x"), sparse=False)
assert isinstance(ds_sparse["y"].data, sparse.COO)
assert isinstance(ds_sparse["z"].data, sparse.COO)
ds_sparse["y"].data = ds_sparse["y"].data.todense()
ds_sparse["z"].data = ds_sparse["z"].data.todense()
assert_identical(ds_dense, ds_sparse)
ds_sparse = Dataset.from_dataframe(df_base.set_index(["x", "y"]), sparse=True)
ds_dense = Dataset.from_dataframe(df_base.set_index(["x", "y"]), sparse=False)
assert isinstance(ds_sparse["z"].data, sparse.COO)
ds_sparse["z"].data = ds_sparse["z"].data.todense()
assert_identical(ds_dense, ds_sparse)
def test_to_and_from_empty_dataframe(self) -> None:
# GH697
expected = pd.DataFrame({"foo": []})
ds = Dataset.from_dataframe(expected)
assert len(ds["foo"]) == 0
actual = ds.to_dataframe()
assert len(actual) == 0
assert expected.equals(actual)
def test_from_dataframe_multiindex(self) -> None:
index = pd.MultiIndex.from_product([["a", "b"], [1, 2, 3]], names=["x", "y"])
df = pd.DataFrame({"z": np.arange(6)}, index=index)
expected = Dataset(
{"z": (("x", "y"), [[0, 1, 2], [3, 4, 5]])},
coords={"x": ["a", "b"], "y": [1, 2, 3]},
)
actual = Dataset.from_dataframe(df)
assert_identical(actual, expected)
df2 = df.iloc[[3, 2, 1, 0, 4, 5], :]
actual = Dataset.from_dataframe(df2)
assert_identical(actual, expected)
df3 = df.iloc[:4, :]
expected3 = Dataset(
{"z": (("x", "y"), [[0, 1, 2], [3, np.nan, np.nan]])},
coords={"x": ["a", "b"], "y": [1, 2, 3]},
)
actual = Dataset.from_dataframe(df3)
assert_identical(actual, expected3)
df_nonunique = df.iloc[[0, 0], :]
with pytest.raises(ValueError, match=r"non-unique MultiIndex"):
Dataset.from_dataframe(df_nonunique)
def test_from_dataframe_unsorted_levels(self) -> None:
# regression test for GH-4186
index = pd.MultiIndex(
levels=[["b", "a"], ["foo"]], codes=[[0, 1], [0, 0]], names=["lev1", "lev2"]
)
df = pd.DataFrame({"c1": [0, 2], "c2": [1, 3]}, index=index)
expected = Dataset(
{
"c1": (("lev1", "lev2"), [[0], [2]]),
"c2": (("lev1", "lev2"), [[1], [3]]),
},
coords={"lev1": ["b", "a"], "lev2": ["foo"]},
)
actual = Dataset.from_dataframe(df)
assert_identical(actual, expected)
def test_from_dataframe_non_unique_columns(self) -> None:
# regression test for GH449
df = pd.DataFrame(np.zeros((2, 2)))
df.columns = ["foo", "foo"] # type: ignore[assignment]
with pytest.raises(ValueError, match=r"non-unique columns"):
Dataset.from_dataframe(df)
def test_convert_dataframe_with_many_types_and_multiindex(self) -> None:
# regression test for GH737
df = pd.DataFrame(
{
"a": list("abc"),
"b": list(range(1, 4)),
"c": np.arange(3, 6).astype("u1"),
"d": np.arange(4.0, 7.0, dtype="float64"),
"e": [True, False, True],
"f": pd.Categorical(list("abc")),
"g": pd.date_range("20130101", periods=3),
"h": pd.date_range("20130101", periods=3, tz="America/New_York"),
}
)
df.index = pd.MultiIndex.from_product([["a"], range(3)], names=["one", "two"])
roundtripped = Dataset.from_dataframe(df).to_dataframe()
# we can't do perfectly, but we should be at least as faithful as
# np.asarray
expected = df.apply(np.asarray)
assert roundtripped.equals(expected)
@pytest.mark.parametrize("encoding", [True, False])
@pytest.mark.parametrize("data", [True, "list", "array"])
def test_to_and_from_dict(
self, encoding: bool, data: bool | Literal["list", "array"]
) -> None:
# <xarray.Dataset>
# Dimensions: (t: 10)
# Coordinates:
# * t (t) <U1 'a' 'b' 'c' 'd' 'e' 'f' 'g' 'h' 'i' 'j'
# Data variables:
# a (t) float64 0.6916 -1.056 -1.163 0.9792 -0.7865 ...
# b (t) float64 1.32 0.1954 1.91 1.39 0.519 -0.2772 ...
x = np.random.randn(10)
y = np.random.randn(10)
t = list("abcdefghij")
ds = Dataset({"a": ("t", x), "b": ("t", y), "t": ("t", t)})
expected: dict[str, dict[str, Any]] = {
"coords": {"t": {"dims": ("t",), "data": t, "attrs": {}}},
"attrs": {},
"dims": {"t": 10},
"data_vars": {
"a": {"dims": ("t",), "data": x.tolist(), "attrs": {}},
"b": {"dims": ("t",), "data": y.tolist(), "attrs": {}},
},
}
if encoding:
ds.t.encoding.update({"foo": "bar"})
expected["encoding"] = {}
expected["coords"]["t"]["encoding"] = ds.t.encoding
for vvs in ["a", "b"]:
expected["data_vars"][vvs]["encoding"] = {}
actual = ds.to_dict(data=data, encoding=encoding)
# check that they are identical
np.testing.assert_equal(expected, actual)
# check roundtrip
ds_rt = Dataset.from_dict(actual)
assert_identical(ds, ds_rt)
if encoding:
assert set(ds_rt.variables) == set(ds.variables)
for vv in ds.variables:
np.testing.assert_equal(ds_rt[vv].encoding, ds[vv].encoding)
# check the data=False option
expected_no_data = expected.copy()
del expected_no_data["coords"]["t"]["data"]
del expected_no_data["data_vars"]["a"]["data"]
del expected_no_data["data_vars"]["b"]["data"]
endiantype = "<U1" if sys.byteorder == "little" else ">U1"
expected_no_data["coords"]["t"].update({"dtype": endiantype, "shape": (10,)})
expected_no_data["data_vars"]["a"].update({"dtype": "float64", "shape": (10,)})
expected_no_data["data_vars"]["b"].update({"dtype": "float64", "shape": (10,)})
actual_no_data = ds.to_dict(data=False, encoding=encoding)
assert expected_no_data == actual_no_data
# verify coords are included roundtrip
expected_ds = ds.set_coords("b")
actual2 = Dataset.from_dict(expected_ds.to_dict(data=data, encoding=encoding))
assert_identical(expected_ds, actual2)
if encoding:
assert set(expected_ds.variables) == set(actual2.variables)
for vv in ds.variables:
np.testing.assert_equal(expected_ds[vv].encoding, actual2[vv].encoding)
# test some incomplete dicts:
# this one has no attrs field, the dims are strings, and x, y are
# np.arrays
d = {
"coords": {"t": {"dims": "t", "data": t}},
"dims": "t",
"data_vars": {"a": {"dims": "t", "data": x}, "b": {"dims": "t", "data": y}},
}
assert_identical(ds, Dataset.from_dict(d))
# this is kind of a flattened version with no coords, or data_vars
d = {
"a": {"dims": "t", "data": x},
"t": {"data": t, "dims": "t"},
"b": {"dims": "t", "data": y},
}
assert_identical(ds, Dataset.from_dict(d))
# this one is missing some necessary information
d = {
"a": {"data": x},
"t": {"data": t, "dims": "t"},
"b": {"dims": "t", "data": y},
}
with pytest.raises(
ValueError, match=r"cannot convert dict without the key 'dims'"
):
Dataset.from_dict(d)
def test_to_and_from_dict_with_time_dim(self) -> None:
x = np.random.randn(10, 3)
y = np.random.randn(10, 3)
t = pd.date_range("20130101", periods=10)
lat = [77.7, 83.2, 76]
ds = Dataset(
{
"a": (["t", "lat"], x),
"b": (["t", "lat"], y),
"t": ("t", t),
"lat": ("lat", lat),
}
)
roundtripped = Dataset.from_dict(ds.to_dict())
assert_identical(ds, roundtripped)
@pytest.mark.parametrize("data", [True, "list", "array"])
def test_to_and_from_dict_with_nan_nat(
self, data: bool | Literal["list", "array"]
) -> None:
x = np.random.randn(10, 3)
y = np.random.randn(10, 3)
y[2] = np.nan
t = pd.Series(pd.date_range("20130101", periods=10))
t[2] = np.nan
lat = [77.7, 83.2, 76]
ds = Dataset(
{
"a": (["t", "lat"], x),
"b": (["t", "lat"], y),
"t": ("t", t),
"lat": ("lat", lat),
}
)
roundtripped = Dataset.from_dict(ds.to_dict(data=data))
assert_identical(ds, roundtripped)
def test_to_dict_with_numpy_attrs(self) -> None:
# this doesn't need to roundtrip
x = np.random.randn(10)
y = np.random.randn(10)
t = list("abcdefghij")
attrs = {
"created": np.float64(1998),
"coords": np.array([37, -110.1, 100]),
"maintainer": "bar",
}
ds = Dataset({"a": ("t", x, attrs), "b": ("t", y, attrs), "t": ("t", t)})
expected_attrs = {
"created": attrs["created"].item(), # type: ignore[attr-defined]
"coords": attrs["coords"].tolist(), # type: ignore[attr-defined]
"maintainer": "bar",
}
actual = ds.to_dict()
# check that they are identical
assert expected_attrs == actual["data_vars"]["a"]["attrs"]
def test_pickle(self) -> None:
data = create_test_data()
roundtripped = pickle.loads(pickle.dumps(data))
assert_identical(data, roundtripped)
# regression test for #167:
assert data.sizes == roundtripped.sizes
def test_lazy_load(self) -> None:
store = InaccessibleVariableDataStore()
create_test_data().dump_to_store(store)
for decode_cf in [True, False]:
ds = open_dataset(store, decode_cf=decode_cf)
with pytest.raises(UnexpectedDataAccess):
ds.load()
with pytest.raises(UnexpectedDataAccess):
_ = ds["var1"].values
# these should not raise UnexpectedDataAccess:
ds.isel(time=10)
ds.isel(time=slice(10), dim1=[0]).isel(dim1=0, dim2=-1)
def test_lazy_load_duck_array(self) -> None:
store = AccessibleAsDuckArrayDataStore()
create_test_data().dump_to_store(store)
for decode_cf in [True, False]:
ds = open_dataset(store, decode_cf=decode_cf)
with pytest.raises(UnexpectedDataAccess):
_ = ds["var1"].values
# these should not raise UnexpectedDataAccess:
_ = ds.var1.data
ds.isel(time=10)
ds.isel(time=slice(10), dim1=[0]).isel(dim1=0, dim2=-1)
repr(ds)
# preserve the duck array type and don't cast to array
assert isinstance(ds["var1"].load().data, DuckArrayWrapper)
assert isinstance(
ds["var1"].isel(dim2=0, dim1=0).load().data, DuckArrayWrapper
)
ds.close()
def test_dropna(self) -> None:
x = np.random.randn(4, 4)
x[::2, 0] = np.nan
y = np.random.randn(4)
y[-1] = np.nan
ds = Dataset({"foo": (("a", "b"), x), "bar": (("b", y))})
expected = ds.isel(a=slice(1, None, 2))
actual = ds.dropna("a")
assert_identical(actual, expected)
expected = ds.isel(b=slice(1, 3))
actual = ds.dropna("b")
assert_identical(actual, expected)
actual = ds.dropna("b", subset=["foo", "bar"])
assert_identical(actual, expected)
expected = ds.isel(b=slice(1, None))
actual = ds.dropna("b", subset=["foo"])
assert_identical(actual, expected)
expected = ds.isel(b=slice(3))
actual = ds.dropna("b", subset=["bar"])
assert_identical(actual, expected)
actual = ds.dropna("a", subset=[])
assert_identical(actual, ds)
actual = ds.dropna("a", subset=["bar"])
assert_identical(actual, ds)
actual = ds.dropna("a", how="all")
assert_identical(actual, ds)
actual = ds.dropna("b", how="all", subset=["bar"])
expected = ds.isel(b=[0, 1, 2])
assert_identical(actual, expected)
actual = ds.dropna("b", thresh=1, subset=["bar"])
assert_identical(actual, expected)
actual = ds.dropna("b", thresh=2)
assert_identical(actual, ds)
actual = ds.dropna("b", thresh=4)
expected = ds.isel(b=[1, 2, 3])
assert_identical(actual, expected)
actual = ds.dropna("a", thresh=3)
expected = ds.isel(a=[1, 3])
assert_identical(actual, ds)
with pytest.raises(
ValueError,
match=r"'foo' not found in data dimensions \('a', 'b'\)",
):
ds.dropna("foo")
with pytest.raises(ValueError, match=r"invalid how"):
ds.dropna("a", how="somehow") # type: ignore[arg-type]
with pytest.raises(TypeError, match=r"must specify how or thresh"):
ds.dropna("a", how=None) # type: ignore[arg-type]
def test_fillna(self) -> None:
ds = Dataset({"a": ("x", [np.nan, 1, np.nan, 3])}, {"x": [0, 1, 2, 3]})
# fill with -1
actual1 = ds.fillna(-1)
expected = Dataset({"a": ("x", [-1, 1, -1, 3])}, {"x": [0, 1, 2, 3]})
assert_identical(expected, actual1)
actual2 = ds.fillna({"a": -1})
assert_identical(expected, actual2)
other = Dataset({"a": -1})
actual3 = ds.fillna(other)
assert_identical(expected, actual3)
actual4 = ds.fillna({"a": other.a})
assert_identical(expected, actual4)
# fill with range(4)
b = DataArray(range(4), coords=[("x", range(4))])
actual5 = ds.fillna(b)
expected = b.rename("a").to_dataset()
assert_identical(expected, actual5)
actual6 = ds.fillna(expected)
assert_identical(expected, actual6)
actual7 = ds.fillna(np.arange(4))
assert_identical(expected, actual7)
actual8 = ds.fillna(b[:3])
assert_identical(expected, actual8)
# okay to only include some data variables
ds["b"] = np.nan
actual9 = ds.fillna({"a": -1})
expected = Dataset(
{"a": ("x", [-1, 1, -1, 3]), "b": np.nan}, {"x": [0, 1, 2, 3]}
)
assert_identical(expected, actual9)
# but new data variables is not okay
with pytest.raises(ValueError, match=r"must be contained"):
ds.fillna({"x": 0})
# empty argument should be OK
result1 = ds.fillna({})
assert_identical(ds, result1)
result2 = ds.fillna(Dataset(coords={"c": 42}))
expected = ds.assign_coords(c=42)
assert_identical(expected, result2)
da = DataArray(range(5), name="a", attrs={"attr": "da"})
actual10 = da.fillna(1)
assert actual10.name == "a"
assert actual10.attrs == da.attrs
ds = Dataset({"a": da}, attrs={"attr": "ds"})
actual11 = ds.fillna({"a": 1})
assert actual11.attrs == ds.attrs
assert actual11.a.name == "a"
assert actual11.a.attrs == ds.a.attrs
@pytest.mark.parametrize(
"func", [lambda x: x.clip(0, 1), lambda x: np.float64(1.0) * x, np.abs, abs]
)
def test_propagate_attrs(self, func) -> None:
da = DataArray(range(5), name="a", attrs={"attr": "da"})
ds = Dataset({"a": da}, attrs={"attr": "ds"})
# test defaults
assert func(ds).attrs == ds.attrs
with set_options(keep_attrs=False):
assert func(ds).attrs != ds.attrs
assert func(ds).a.attrs != ds.a.attrs
with set_options(keep_attrs=False):
assert func(ds).attrs != ds.attrs
assert func(ds).a.attrs != ds.a.attrs
with set_options(keep_attrs=True):
assert func(ds).attrs == ds.attrs
assert func(ds).a.attrs == ds.a.attrs
def test_where(self) -> None:
ds = Dataset({"a": ("x", range(5))})
expected1 = Dataset({"a": ("x", [np.nan, np.nan, 2, 3, 4])})
actual1 = ds.where(ds > 1)
assert_identical(expected1, actual1)
actual2 = ds.where(ds.a > 1)
assert_identical(expected1, actual2)
actual3 = ds.where(ds.a.values > 1)
assert_identical(expected1, actual3)
actual4 = ds.where(True)
assert_identical(ds, actual4)
expected5 = ds.copy(deep=True)
expected5["a"].values = np.array([np.nan] * 5)
actual5 = ds.where(False)
assert_identical(expected5, actual5)
# 2d
ds = Dataset({"a": (("x", "y"), [[0, 1], [2, 3]])})
expected6 = Dataset({"a": (("x", "y"), [[np.nan, 1], [2, 3]])})
actual6 = ds.where(ds > 0)
assert_identical(expected6, actual6)
# attrs
da = DataArray(range(5), name="a", attrs={"attr": "da"})
actual7 = da.where(da.values > 1)
assert actual7.name == "a"
assert actual7.attrs == da.attrs
ds = Dataset({"a": da}, attrs={"attr": "ds"})
actual8 = ds.where(ds > 0)
assert actual8.attrs == ds.attrs
assert actual8.a.name == "a"
assert actual8.a.attrs == ds.a.attrs
# lambda
ds = Dataset({"a": ("x", range(5))})
expected9 = Dataset({"a": ("x", [np.nan, np.nan, 2, 3, 4])})
actual9 = ds.where(lambda x: x > 1)
assert_identical(expected9, actual9)
def test_where_other(self) -> None:
ds = Dataset({"a": ("x", range(5))}, {"x": range(5)})
expected = Dataset({"a": ("x", [-1, -1, 2, 3, 4])}, {"x": range(5)})
actual = ds.where(ds > 1, -1)
assert_equal(expected, actual)
assert actual.a.dtype == int
actual = ds.where(lambda x: x > 1, -1)
assert_equal(expected, actual)
actual = ds.where(ds > 1, other=-1, drop=True)
expected_nodrop = ds.where(ds > 1, -1)
_, expected = xr.align(actual, expected_nodrop, join="left")
assert_equal(actual, expected)
assert actual.a.dtype == int
with pytest.raises(ValueError, match=r"cannot align .* are not equal"):
ds.where(ds > 1, ds.isel(x=slice(3)))
with pytest.raises(ValueError, match=r"exact match required"):
ds.where(ds > 1, ds.assign(b=2))
def test_where_drop(self) -> None:
# if drop=True
# 1d
# data array case
array = DataArray(range(5), coords=[range(5)], dims=["x"])
expected1 = DataArray(range(5)[2:], coords=[range(5)[2:]], dims=["x"])
actual1 = array.where(array > 1, drop=True)
assert_identical(expected1, actual1)
# dataset case
ds = Dataset({"a": array})
expected2 = Dataset({"a": expected1})
actual2 = ds.where(ds > 1, drop=True)
assert_identical(expected2, actual2)
actual3 = ds.where(ds.a > 1, drop=True)
assert_identical(expected2, actual3)
with pytest.raises(TypeError, match=r"must be a"):
ds.where(np.arange(5) > 1, drop=True)
# 1d with odd coordinates
array = DataArray(
np.array([2, 7, 1, 8, 3]), coords=[np.array([3, 1, 4, 5, 9])], dims=["x"]
)
expected4 = DataArray(
np.array([7, 8, 3]), coords=[np.array([1, 5, 9])], dims=["x"]
)
actual4 = array.where(array > 2, drop=True)
assert_identical(expected4, actual4)
# 1d multiple variables
ds = Dataset({"a": (("x"), [0, 1, 2, 3]), "b": (("x"), [4, 5, 6, 7])})
expected5 = Dataset(
{"a": (("x"), [np.nan, 1, 2, 3]), "b": (("x"), [4, 5, 6, np.nan])}
)
actual5 = ds.where((ds > 0) & (ds < 7), drop=True)
assert_identical(expected5, actual5)
# 2d
ds = Dataset({"a": (("x", "y"), [[0, 1], [2, 3]])})
expected6 = Dataset({"a": (("x", "y"), [[np.nan, 1], [2, 3]])})
actual6 = ds.where(ds > 0, drop=True)
assert_identical(expected6, actual6)
# 2d with odd coordinates
ds = Dataset(
{"a": (("x", "y"), [[0, 1], [2, 3]])},
coords={
"x": [4, 3],
"y": [1, 2],
"z": (["x", "y"], [[np.e, np.pi], [np.pi * np.e, np.pi * 3]]),
},
)
expected7 = Dataset(
{"a": (("x", "y"), [[3]])},
coords={"x": [3], "y": [2], "z": (["x", "y"], [[np.pi * 3]])},
)
actual7 = ds.where(ds > 2, drop=True)
assert_identical(expected7, actual7)
# 2d multiple variables
ds = Dataset(
{"a": (("x", "y"), [[0, 1], [2, 3]]), "b": (("x", "y"), [[4, 5], [6, 7]])}
)
expected8 = Dataset(
{
"a": (("x", "y"), [[np.nan, 1], [2, 3]]),
"b": (("x", "y"), [[4, 5], [6, 7]]),
}
)
actual8 = ds.where(ds > 0, drop=True)
assert_identical(expected8, actual8)
# mixed dimensions: PR#6690, Issue#6227
ds = xr.Dataset(
{
"a": ("x", [1, 2, 3]),
"b": ("y", [2, 3, 4]),
"c": (("x", "y"), np.arange(9).reshape((3, 3))),
}
)
expected9 = xr.Dataset(
{
"a": ("x", [np.nan, 3]),
"b": ("y", [np.nan, 3, 4]),
"c": (("x", "y"), np.arange(3.0, 9.0).reshape((2, 3))),
}
)
actual9 = ds.where(ds > 2, drop=True)
assert actual9.sizes["x"] == 2
assert_identical(expected9, actual9)
def test_where_drop_empty(self) -> None:
# regression test for GH1341
array = DataArray(np.random.rand(100, 10), dims=["nCells", "nVertLevels"])
mask = DataArray(np.zeros((100,), dtype="bool"), dims="nCells")
actual = array.where(mask, drop=True)
expected = DataArray(np.zeros((0, 10)), dims=["nCells", "nVertLevels"])
assert_identical(expected, actual)
def test_where_drop_no_indexes(self) -> None:
ds = Dataset({"foo": ("x", [0.0, 1.0])})
expected = Dataset({"foo": ("x", [1.0])})
actual = ds.where(ds == 1, drop=True)
assert_identical(expected, actual)
def test_reduce(self) -> None:
data = create_test_data()
assert len(data.mean().coords) == 0
actual = data.max()
expected = Dataset({k: v.max() for k, v in data.data_vars.items()})
assert_equal(expected, actual)
assert_equal(data.min(dim=["dim1"]), data.min(dim="dim1"))
for reduct, expected_dims in [
("dim2", ["dim3", "time", "dim1"]),
(["dim2", "time"], ["dim3", "dim1"]),
(("dim2", "time"), ["dim3", "dim1"]),
((), ["dim2", "dim3", "time", "dim1"]),
]:
actual_dims = list(data.min(dim=reduct).dims)
assert actual_dims == expected_dims
assert_equal(data.mean(dim=[]), data)
with pytest.raises(ValueError):
data.mean(axis=0)
def test_reduce_coords(self) -> None:
# regression test for GH1470
data = xr.Dataset({"a": ("x", [1, 2, 3])}, coords={"b": 4})
expected = xr.Dataset({"a": 2}, coords={"b": 4})
actual = data.mean("x")
assert_identical(actual, expected)
# should be consistent
actual = data["a"].mean("x").to_dataset()
assert_identical(actual, expected)
def test_mean_uint_dtype(self) -> None:
data = xr.Dataset(
{
"a": (("x", "y"), np.arange(6).reshape(3, 2).astype("uint")),
"b": (("x",), np.array([0.1, 0.2, np.nan])),
}
)
actual = data.mean("x", skipna=True)
expected = xr.Dataset(
{"a": data["a"].mean("x"), "b": data["b"].mean("x", skipna=True)}
)
assert_identical(actual, expected)
def test_reduce_bad_dim(self) -> None:
data = create_test_data()
with pytest.raises(
ValueError,
match=re.escape("Dimension(s) 'bad_dim' do not exist"),
):
data.mean(dim="bad_dim")
def test_reduce_cumsum(self) -> None:
data = xr.Dataset(
{"a": 1, "b": ("x", [1, 2]), "c": (("x", "y"), [[np.nan, 3], [0, 4]])}
)
assert_identical(data.fillna(0), data.cumsum("y"))
expected = xr.Dataset(
{"a": 1, "b": ("x", [1, 3]), "c": (("x", "y"), [[0, 3], [0, 7]])}
)
assert_identical(expected, data.cumsum())
@pytest.mark.parametrize(
"reduct, expected",
[
("dim1", ["dim2", "dim3", "time", "dim1"]),
("dim2", ["dim3", "time", "dim1", "dim2"]),
("dim3", ["dim2", "time", "dim1", "dim3"]),
("time", ["dim2", "dim3", "dim1"]),
],
)
@pytest.mark.parametrize("func", ["cumsum", "cumprod"])
def test_reduce_cumsum_test_dims(self, reduct, expected, func) -> None:
data = create_test_data()
with pytest.raises(
ValueError,
match=re.escape("Dimension(s) 'bad_dim' do not exist"),
):
getattr(data, func)(dim="bad_dim")
# ensure dimensions are correct
actual = getattr(data, func)(dim=reduct).dims
assert list(actual) == expected
def test_reduce_non_numeric(self) -> None:
data1 = create_test_data(seed=44, use_extension_array=True)
data2 = create_test_data(seed=44)
add_vars = {"var5": ["dim1", "dim2"], "var6": ["dim1"]}
for v, dims in sorted(add_vars.items()):
size = tuple(data1.sizes[d] for d in dims)
data = np.random.randint(0, 100, size=size).astype(np.str_)
data1[v] = (dims, data, {"foo": "variable"})
# var4 is extension array categorical and should be dropped
assert (
"var4" not in data1.mean()
and "var5" not in data1.mean()
and "var6" not in data1.mean()
)
assert_equal(data1.mean(), data2.mean())
assert_equal(data1.mean(dim="dim1"), data2.mean(dim="dim1"))
assert "var5" not in data1.mean(dim="dim2") and "var6" in data1.mean(dim="dim2")
@pytest.mark.filterwarnings(
"ignore:Once the behaviour of DataArray:DeprecationWarning"
)
def test_reduce_strings(self) -> None:
expected = Dataset({"x": "a"})
ds = Dataset({"x": ("y", ["a", "b"])})
ds.coords["y"] = [-10, 10]
actual = ds.min()
assert_identical(expected, actual)
expected = Dataset({"x": "b"})
actual = ds.max()
assert_identical(expected, actual)
expected = Dataset({"x": 0})
actual = ds.argmin()
assert_identical(expected, actual)
expected = Dataset({"x": 1})
actual = ds.argmax()
assert_identical(expected, actual)
expected = Dataset({"x": -10})
actual = ds.idxmin()
assert_identical(expected, actual)
expected = Dataset({"x": 10})
actual = ds.idxmax()
assert_identical(expected, actual)
expected = Dataset({"x": b"a"})
ds = Dataset({"x": ("y", np.array(["a", "b"], "S1"))})
actual = ds.min()
assert_identical(expected, actual)
expected = Dataset({"x": "a"})
ds = Dataset({"x": ("y", np.array(["a", "b"], "U1"))})
actual = ds.min()
assert_identical(expected, actual)
def test_reduce_dtypes(self) -> None:
# regression test for GH342
expected = Dataset({"x": 1})
actual = Dataset({"x": True}).sum()
assert_identical(expected, actual)
# regression test for GH505
expected = Dataset({"x": 3})
actual = Dataset({"x": ("y", np.array([1, 2], "uint16"))}).sum()
assert_identical(expected, actual)
expected = Dataset({"x": 1 + 1j})
actual = Dataset({"x": ("y", [1, 1j])}).sum()
assert_identical(expected, actual)
def test_reduce_keep_attrs(self) -> None:
data = create_test_data()
_attrs = {"attr1": "value1", "attr2": 2929}
attrs = dict(_attrs)
data.attrs = attrs
# Test dropped attrs
ds = data.mean()
assert ds.attrs == {}
for v in ds.data_vars.values():
assert v.attrs == {}
# Test kept attrs
ds = data.mean(keep_attrs=True)
assert ds.attrs == attrs
for k, v in ds.data_vars.items():
assert v.attrs == data[k].attrs
@pytest.mark.filterwarnings(
"ignore:Once the behaviour of DataArray:DeprecationWarning"
)
def test_reduce_argmin(self) -> None:
# regression test for #205
ds = Dataset({"a": ("x", [0, 1])})
expected = Dataset({"a": ([], 0)})
actual = ds.argmin()
assert_identical(expected, actual)
actual = ds.argmin("x")
assert_identical(expected, actual)
def test_reduce_scalars(self) -> None:
ds = Dataset({"x": ("a", [2, 2]), "y": 2, "z": ("b", [2])})
expected = Dataset({"x": 0, "y": 0, "z": 0})
actual = ds.var()
assert_identical(expected, actual)
expected = Dataset({"x": 0, "y": 0, "z": ("b", [0])})
actual = ds.var("a")
assert_identical(expected, actual)
def test_reduce_only_one_axis(self) -> None:
def mean_only_one_axis(x, axis):
if not isinstance(axis, integer_types):
raise TypeError("non-integer axis")
return x.mean(axis)
ds = Dataset({"a": (["x", "y"], [[0, 1, 2, 3, 4]])})
expected = Dataset({"a": ("x", [2])})
actual = ds.reduce(mean_only_one_axis, "y")
assert_identical(expected, actual)
with pytest.raises(
TypeError, match=r"missing 1 required positional argument: 'axis'"
):
ds.reduce(mean_only_one_axis)
def test_reduce_no_axis(self) -> None:
def total_sum(x):
return np.sum(x.flatten())
ds = Dataset({"a": (["x", "y"], [[0, 1, 2, 3, 4]])})
expected = Dataset({"a": ((), 10)})
actual = ds.reduce(total_sum)
assert_identical(expected, actual)
with pytest.raises(TypeError, match=r"unexpected keyword argument 'axis'"):
ds.reduce(total_sum, dim="x")
def test_reduce_keepdims(self) -> None:
ds = Dataset(
{"a": (["x", "y"], [[0, 1, 2, 3, 4]])},
coords={
"y": [0, 1, 2, 3, 4],
"x": [0],
"lat": (["x", "y"], [[0, 1, 2, 3, 4]]),
"c": -999.0,
},
)
# Shape should match behaviour of numpy reductions with keepdims=True
# Coordinates involved in the reduction should be removed
actual = ds.mean(keepdims=True)
expected = Dataset(
{"a": (["x", "y"], np.mean(ds.a, keepdims=True).data)}, coords={"c": ds.c}
)
assert_identical(expected, actual)
actual = ds.mean("x", keepdims=True)
expected = Dataset(
{"a": (["x", "y"], np.mean(ds.a, axis=0, keepdims=True).data)},
coords={"y": ds.y, "c": ds.c},
)
assert_identical(expected, actual)
@pytest.mark.parametrize("compute_backend", ["numbagg", None], indirect=True)
@pytest.mark.parametrize("skipna", [True, False, None])
@pytest.mark.parametrize("q", [0.25, [0.50], [0.25, 0.75]])
def test_quantile(self, q, skipna, compute_backend) -> None:
ds = create_test_data(seed=123)
ds.var1.data[0, 0] = np.nan
for dim in [None, "dim1", ["dim1"]]:
ds_quantile = ds.quantile(q, dim=dim, skipna=skipna)
if is_scalar(q):
assert "quantile" not in ds_quantile.dims
else:
assert "quantile" in ds_quantile.dims
for var, dar in ds.data_vars.items():
assert var in ds_quantile
assert_identical(
ds_quantile[var], dar.quantile(q, dim=dim, skipna=skipna)
)
dim = ["dim1", "dim2"]
ds_quantile = ds.quantile(q, dim=dim, skipna=skipna)
assert "dim3" in ds_quantile.dims
assert all(d not in ds_quantile.dims for d in dim)
@pytest.mark.parametrize("compute_backend", ["numbagg", None], indirect=True)
@pytest.mark.parametrize("skipna", [True, False])
def test_quantile_skipna(self, skipna, compute_backend) -> None:
q = 0.1
dim = "time"
ds = Dataset({"a": ([dim], np.arange(0, 11))})
ds = ds.where(ds >= 1)
result = ds.quantile(q=q, dim=dim, skipna=skipna)
value = 1.9 if skipna else np.nan
expected = Dataset({"a": value}, coords={"quantile": q})
assert_identical(result, expected)
@pytest.mark.parametrize("method", ["midpoint", "lower"])
def test_quantile_method(self, method) -> None:
ds = create_test_data(seed=123)
q = [0.25, 0.5, 0.75]
result = ds.quantile(q, method=method)
assert_identical(result.var1, ds.var1.quantile(q, method=method))
assert_identical(result.var2, ds.var2.quantile(q, method=method))
assert_identical(result.var3, ds.var3.quantile(q, method=method))
@pytest.mark.parametrize("method", ["midpoint", "lower"])
def test_quantile_interpolation_deprecated(self, method) -> None:
ds = create_test_data(seed=123)
q = [0.25, 0.5, 0.75]
with warnings.catch_warnings(record=True) as w:
ds.quantile(q, interpolation=method)
# ensure the warning is only raised once
assert len(w) == 1
with warnings.catch_warnings(record=True):
with pytest.raises(TypeError, match="interpolation and method keywords"):
ds.quantile(q, method=method, interpolation=method)
@requires_bottleneck
def test_rank(self) -> None:
ds = create_test_data(seed=1234)
# only ds.var3 depends on dim3
z = ds.rank("dim3")
assert ["var3"] == list(z.data_vars)
# same as dataarray version
x = z.var3
y = ds.var3.rank("dim3")
assert_equal(x, y)
# coordinates stick
assert list(z.coords) == list(ds.coords)
assert list(x.coords) == list(y.coords)
# invalid dim
with pytest.raises(
ValueError,
match=re.escape(
"Dimension 'invalid_dim' not found in data dimensions ('dim3', 'dim1')"
),
):
x.rank("invalid_dim")
def test_rank_use_bottleneck(self) -> None:
ds = Dataset({"a": ("x", [0, np.nan, 2]), "b": ("y", [4, 6, 3, 4])})
with xr.set_options(use_bottleneck=False):
with pytest.raises(RuntimeError):
ds.rank("x")
def test_count(self) -> None:
ds = Dataset({"x": ("a", [np.nan, 1]), "y": 0, "z": np.nan})
expected = Dataset({"x": 1, "y": 1, "z": 0})
actual = ds.count()
assert_identical(expected, actual)
def test_map(self) -> None:
data = create_test_data()
data.attrs["foo"] = "bar"
assert_identical(data.map(np.mean), data.mean())
expected = data.mean(keep_attrs=True)
actual = data.map(lambda x: x.mean(keep_attrs=True), keep_attrs=True)
assert_identical(expected, actual)
assert_identical(data.map(lambda x: x, keep_attrs=True), data.drop_vars("time"))
def scale(x, multiple=1):
return multiple * x
actual = data.map(scale, multiple=2)
assert_equal(actual["var1"], 2 * data["var1"])
assert_identical(actual["numbers"], data["numbers"])
actual = data.map(np.asarray)
expected = data.drop_vars("time") # time is not used on a data var
assert_equal(expected, actual)
def test_apply_pending_deprecated_map(self) -> None:
data = create_test_data()
data.attrs["foo"] = "bar"
with pytest.warns(PendingDeprecationWarning):
assert_identical(data.apply(np.mean), data.mean())
def make_example_math_dataset(self):
variables = {
"bar": ("x", np.arange(100, 400, 100)),
"foo": (("x", "y"), 1.0 * np.arange(12).reshape(3, 4)),
}
coords = {"abc": ("x", ["a", "b", "c"]), "y": 10 * np.arange(4)}
ds = Dataset(variables, coords)
ds["foo"][0, 0] = np.nan
return ds
def test_dataset_number_math(self) -> None:
ds = self.make_example_math_dataset()
assert_identical(ds, +ds)
assert_identical(ds, ds + 0)
assert_identical(ds, 0 + ds)
assert_identical(ds, ds + np.array(0))
assert_identical(ds, np.array(0) + ds)
actual = ds.copy(deep=True)
actual += 0
assert_identical(ds, actual)
def test_unary_ops(self) -> None:
ds = self.make_example_math_dataset()
assert_identical(ds.map(abs), abs(ds))
assert_identical(ds.map(lambda x: x + 4), ds + 4)
for func in [
lambda x: x.isnull(),
lambda x: x.round(),
lambda x: x.astype(int),
]:
assert_identical(ds.map(func), func(ds))
assert_identical(ds.isnull(), ~ds.notnull())
# don't actually patch these methods in
with pytest.raises(AttributeError):
_ = ds.item
with pytest.raises(AttributeError):
_ = ds.searchsorted
def test_dataset_array_math(self) -> None:
ds = self.make_example_math_dataset()
expected = ds.map(lambda x: x - ds["foo"])
assert_identical(expected, ds - ds["foo"])
assert_identical(expected, -ds["foo"] + ds)
assert_identical(expected, ds - ds["foo"].variable)
assert_identical(expected, -ds["foo"].variable + ds)
actual = ds.copy(deep=True)
actual -= ds["foo"]
assert_identical(expected, actual)
expected = ds.map(lambda x: x + ds["bar"])
assert_identical(expected, ds + ds["bar"])
actual = ds.copy(deep=True)
actual += ds["bar"]
assert_identical(expected, actual)
expected = Dataset({"bar": ds["bar"] + np.arange(3)})
assert_identical(expected, ds[["bar"]] + np.arange(3))
assert_identical(expected, np.arange(3) + ds[["bar"]])
def test_dataset_dataset_math(self) -> None:
ds = self.make_example_math_dataset()
assert_identical(ds, ds + 0 * ds)
assert_identical(ds, ds + {"foo": 0, "bar": 0})
expected = ds.map(lambda x: 2 * x)
assert_identical(expected, 2 * ds)
assert_identical(expected, ds + ds)
assert_identical(expected, ds + ds.data_vars)
assert_identical(expected, ds + dict(ds.data_vars))
actual = ds.copy(deep=True)
expected_id = id(actual)
actual += ds
assert_identical(expected, actual)
assert expected_id == id(actual)
assert_identical(ds == ds, ds.notnull())
subsampled = ds.isel(y=slice(2))
expected = 2 * subsampled
assert_identical(expected, subsampled + ds)
assert_identical(expected, ds + subsampled)
def test_dataset_math_auto_align(self) -> None:
ds = self.make_example_math_dataset()
subset = ds.isel(y=[1, 3])
expected = 2 * subset
actual = ds + subset
assert_identical(expected, actual)
actual = ds.isel(y=slice(1)) + ds.isel(y=slice(1, None))
expected = 2 * ds.drop_sel(y=ds.y)
assert_equal(actual, expected)
actual = ds + ds[["bar"]]
expected = (2 * ds[["bar"]]).merge(ds.coords)
assert_identical(expected, actual)
assert_identical(ds + Dataset(), ds.coords.to_dataset())
assert_identical(Dataset() + Dataset(), Dataset())
ds2 = Dataset(coords={"bar": 42})
assert_identical(ds + ds2, ds.coords.merge(ds2))
# maybe unary arithmetic with empty datasets should raise instead?
assert_identical(Dataset() + 1, Dataset())
actual = ds.copy(deep=True)
other = ds.isel(y=slice(2))
actual += other
expected = ds + other.reindex_like(ds)
assert_identical(expected, actual)
def test_dataset_math_errors(self) -> None:
ds = self.make_example_math_dataset()
with pytest.raises(TypeError):
ds["foo"] += ds
with pytest.raises(TypeError):
ds["foo"].variable += ds
with pytest.raises(ValueError, match=r"must have the same"):
ds += ds[["bar"]]
# verify we can rollback in-place operations if something goes wrong
# nb. inplace datetime64 math actually will work with an integer array
# but not floats thanks to numpy's inconsistent handling
other = DataArray(np.datetime64("2000-01-01"), coords={"c": 2})
actual = ds.copy(deep=True)
with pytest.raises(TypeError):
actual += other
assert_identical(actual, ds)
def test_dataset_transpose(self) -> None:
ds = Dataset(
{
"a": (("x", "y"), np.random.randn(3, 4)),
"b": (("y", "x"), np.random.randn(4, 3)),
},
coords={
"x": range(3),
"y": range(4),
"xy": (("x", "y"), np.random.randn(3, 4)),
},
)
actual = ds.transpose()
expected = Dataset(
{"a": (("y", "x"), ds.a.values.T), "b": (("x", "y"), ds.b.values.T)},
coords={
"x": ds.x.values,
"y": ds.y.values,
"xy": (("y", "x"), ds.xy.values.T),
},
)
assert_identical(expected, actual)
actual = ds.transpose(...)
expected = ds
assert_identical(expected, actual)
actual = ds.transpose("x", "y")
expected = ds.map(lambda x: x.transpose("x", "y", transpose_coords=True))
assert_identical(expected, actual)
ds = create_test_data()
actual = ds.transpose()
for k in ds.variables:
assert actual[k].dims[::-1] == ds[k].dims
new_order = ("dim2", "dim3", "dim1", "time")
actual = ds.transpose(*new_order)
for k in ds.variables:
expected_dims = tuple(d for d in new_order if d in ds[k].dims)
assert actual[k].dims == expected_dims
# same as above but with ellipsis
new_order = ("dim2", "dim3", "dim1", "time")
actual = ds.transpose("dim2", "dim3", ...)
for k in ds.variables:
expected_dims = tuple(d for d in new_order if d in ds[k].dims)
assert actual[k].dims == expected_dims
# test missing dimension, raise error
with pytest.raises(ValueError):
ds.transpose(..., "not_a_dim")
# test missing dimension, ignore error
actual = ds.transpose(..., "not_a_dim", missing_dims="ignore")
expected_ell = ds.transpose(...)
assert_identical(expected_ell, actual)
# test missing dimension, raise warning
with pytest.warns(UserWarning):
actual = ds.transpose(..., "not_a_dim", missing_dims="warn")
assert_identical(expected_ell, actual)
assert "T" not in dir(ds)
def test_dataset_ellipsis_transpose_different_ordered_vars(self) -> None:
# https://github.com/pydata/xarray/issues/1081#issuecomment-544350457
ds = Dataset(
dict(
a=(("w", "x", "y", "z"), np.ones((2, 3, 4, 5))),
b=(("x", "w", "y", "z"), np.zeros((3, 2, 4, 5))),
)
)
result = ds.transpose(..., "z", "y")
assert list(result["a"].dims) == list("wxzy")
assert list(result["b"].dims) == list("xwzy")
def test_dataset_retains_period_index_on_transpose(self) -> None:
ds = create_test_data()
ds["time"] = pd.period_range("2000-01-01", periods=20)
transposed = ds.transpose()
assert isinstance(transposed.time.to_index(), pd.PeriodIndex)
def test_dataset_diff_n1_simple(self) -> None:
ds = Dataset({"foo": ("x", [5, 5, 6, 6])})
actual = ds.diff("x")
expected = Dataset({"foo": ("x", [0, 1, 0])})
assert_equal(expected, actual)
def test_dataset_diff_n1_label(self) -> None:
ds = Dataset({"foo": ("x", [5, 5, 6, 6])}, {"x": [0, 1, 2, 3]})
actual = ds.diff("x", label="lower")
expected = Dataset({"foo": ("x", [0, 1, 0])}, {"x": [0, 1, 2]})
assert_equal(expected, actual)
actual = ds.diff("x", label="upper")
expected = Dataset({"foo": ("x", [0, 1, 0])}, {"x": [1, 2, 3]})
assert_equal(expected, actual)
def test_dataset_diff_n1(self) -> None:
ds = create_test_data(seed=1)
actual = ds.diff("dim2")
expected_dict = {}
expected_dict["var1"] = DataArray(
np.diff(ds["var1"].values, axis=1),
{"dim2": ds["dim2"].values[1:]},
["dim1", "dim2"],
)
expected_dict["var2"] = DataArray(
np.diff(ds["var2"].values, axis=1),
{"dim2": ds["dim2"].values[1:]},
["dim1", "dim2"],
)
expected_dict["var3"] = ds["var3"]
expected = Dataset(expected_dict, coords={"time": ds["time"].values})
expected.coords["numbers"] = ("dim3", ds["numbers"].values)
assert_equal(expected, actual)
def test_dataset_diff_n2(self) -> None:
ds = create_test_data(seed=1)
actual = ds.diff("dim2", n=2)
expected_dict = {}
expected_dict["var1"] = DataArray(
np.diff(ds["var1"].values, axis=1, n=2),
{"dim2": ds["dim2"].values[2:]},
["dim1", "dim2"],
)
expected_dict["var2"] = DataArray(
np.diff(ds["var2"].values, axis=1, n=2),
{"dim2": ds["dim2"].values[2:]},
["dim1", "dim2"],
)
expected_dict["var3"] = ds["var3"]
expected = Dataset(expected_dict, coords={"time": ds["time"].values})
expected.coords["numbers"] = ("dim3", ds["numbers"].values)
assert_equal(expected, actual)
def test_dataset_diff_exception_n_neg(self) -> None:
ds = create_test_data(seed=1)
with pytest.raises(ValueError, match=r"must be non-negative"):
ds.diff("dim2", n=-1)
def test_dataset_diff_exception_label_str(self) -> None:
ds = create_test_data(seed=1)
with pytest.raises(ValueError, match=r"'label' argument has to"):
ds.diff("dim2", label="raise_me") # type: ignore[arg-type]
@pytest.mark.parametrize("fill_value", [dtypes.NA, 2, 2.0, {"foo": -10}])
def test_shift(self, fill_value) -> None:
coords = {"bar": ("x", list("abc")), "x": [-4, 3, 2]}
attrs = {"meta": "data"}
ds = Dataset({"foo": ("x", [1, 2, 3])}, coords, attrs)
actual = ds.shift(x=1, fill_value=fill_value)
if fill_value == dtypes.NA:
# if we supply the default, we expect the missing value for a
# float array
fill_value = np.nan
elif isinstance(fill_value, dict):
fill_value = fill_value.get("foo", np.nan)
expected = Dataset({"foo": ("x", [fill_value, 1, 2])}, coords, attrs)
assert_identical(expected, actual)
with pytest.raises(ValueError, match=r"dimensions"):
ds.shift(foo=123)
def test_roll_coords(self) -> None:
coords = {"bar": ("x", list("abc")), "x": [-4, 3, 2]}
attrs = {"meta": "data"}
ds = Dataset({"foo": ("x", [1, 2, 3])}, coords, attrs)
actual = ds.roll(x=1, roll_coords=True)
ex_coords = {"bar": ("x", list("cab")), "x": [2, -4, 3]}
expected = Dataset({"foo": ("x", [3, 1, 2])}, ex_coords, attrs)
assert_identical(expected, actual)
with pytest.raises(ValueError, match=r"dimensions"):
ds.roll(foo=123, roll_coords=True)
def test_roll_no_coords(self) -> None:
coords = {"bar": ("x", list("abc")), "x": [-4, 3, 2]}
attrs = {"meta": "data"}
ds = Dataset({"foo": ("x", [1, 2, 3])}, coords, attrs)
actual = ds.roll(x=1)
expected = Dataset({"foo": ("x", [3, 1, 2])}, coords, attrs)
assert_identical(expected, actual)
with pytest.raises(ValueError, match=r"dimensions"):
ds.roll(abc=321)
def test_roll_multidim(self) -> None:
# regression test for 2445
arr = xr.DataArray(
[[1, 2, 3], [4, 5, 6]],
coords={"x": range(3), "y": range(2)},
dims=("y", "x"),
)
actual = arr.roll(x=1, roll_coords=True)
expected = xr.DataArray(
[[3, 1, 2], [6, 4, 5]], coords=[("y", [0, 1]), ("x", [2, 0, 1])]
)
assert_identical(expected, actual)
def test_real_and_imag(self) -> None:
attrs = {"foo": "bar"}
ds = Dataset({"x": ((), 1 + 2j, attrs)}, attrs=attrs)
expected_re = Dataset({"x": ((), 1, attrs)}, attrs=attrs)
assert_identical(ds.real, expected_re)
expected_im = Dataset({"x": ((), 2, attrs)}, attrs=attrs)
assert_identical(ds.imag, expected_im)
def test_setattr_raises(self) -> None:
ds = Dataset({}, coords={"scalar": 1}, attrs={"foo": "bar"})
with pytest.raises(AttributeError, match=r"cannot set attr"):
ds.scalar = 2
with pytest.raises(AttributeError, match=r"cannot set attr"):
ds.foo = 2
with pytest.raises(AttributeError, match=r"cannot set attr"):
ds.other = 2
def test_filter_by_attrs(self) -> None:
precip = dict(standard_name="convective_precipitation_flux")
temp0 = dict(standard_name="air_potential_temperature", height="0 m")
temp10 = dict(standard_name="air_potential_temperature", height="10 m")
ds = Dataset(
{
"temperature_0": (["t"], [0], temp0),
"temperature_10": (["t"], [0], temp10),
"precipitation": (["t"], [0], precip),
},
coords={"time": (["t"], [0], dict(axis="T", long_name="time_in_seconds"))},
)
# Test return empty Dataset.
ds.filter_by_attrs(standard_name="invalid_standard_name")
new_ds = ds.filter_by_attrs(standard_name="invalid_standard_name")
assert not bool(new_ds.data_vars)
# Test return one DataArray.
new_ds = ds.filter_by_attrs(standard_name="convective_precipitation_flux")
assert new_ds["precipitation"].standard_name == "convective_precipitation_flux"
assert_equal(new_ds["precipitation"], ds["precipitation"])
# Test filter coordinates
new_ds = ds.filter_by_attrs(long_name="time_in_seconds")
assert new_ds["time"].long_name == "time_in_seconds"
assert not bool(new_ds.data_vars)
# Test return more than one DataArray.
new_ds = ds.filter_by_attrs(standard_name="air_potential_temperature")
assert len(new_ds.data_vars) == 2
for var in new_ds.data_vars:
assert new_ds[var].standard_name == "air_potential_temperature"
# Test callable.
new_ds = ds.filter_by_attrs(height=lambda v: v is not None)
assert len(new_ds.data_vars) == 2
for var in new_ds.data_vars:
assert new_ds[var].standard_name == "air_potential_temperature"
new_ds = ds.filter_by_attrs(height="10 m")
assert len(new_ds.data_vars) == 1
for var in new_ds.data_vars:
assert new_ds[var].height == "10 m"
# Test return empty Dataset due to conflicting filters
new_ds = ds.filter_by_attrs(
standard_name="convective_precipitation_flux", height="0 m"
)
assert not bool(new_ds.data_vars)
# Test return one DataArray with two filter conditions
new_ds = ds.filter_by_attrs(
standard_name="air_potential_temperature", height="0 m"
)
for var in new_ds.data_vars:
assert new_ds[var].standard_name == "air_potential_temperature"
assert new_ds[var].height == "0 m"
assert new_ds[var].height != "10 m"
# Test return empty Dataset due to conflicting callables
new_ds = ds.filter_by_attrs(
standard_name=lambda v: False, height=lambda v: True
)
assert not bool(new_ds.data_vars)
def test_binary_op_propagate_indexes(self) -> None:
ds = Dataset(
{"d1": DataArray([1, 2, 3], dims=["x"], coords={"x": [10, 20, 30]})}
)
expected = ds.xindexes["x"]
actual = (ds * 2).xindexes["x"]
assert expected is actual
def test_binary_op_join_setting(self) -> None:
# arithmetic_join applies to data array coordinates
missing_2 = xr.Dataset({"x": [0, 1]})
missing_0 = xr.Dataset({"x": [1, 2]})
with xr.set_options(arithmetic_join="outer"):
actual = missing_2 + missing_0
expected = xr.Dataset({"x": [0, 1, 2]})
assert_equal(actual, expected)
# arithmetic join also applies to data_vars
ds1 = xr.Dataset({"foo": 1, "bar": 2})
ds2 = xr.Dataset({"bar": 2, "baz": 3})
expected = xr.Dataset({"bar": 4}) # default is inner joining
actual = ds1 + ds2
assert_equal(actual, expected)
with xr.set_options(arithmetic_join="outer"):
expected = xr.Dataset({"foo": np.nan, "bar": 4, "baz": np.nan})
actual = ds1 + ds2
assert_equal(actual, expected)
with xr.set_options(arithmetic_join="left"):
expected = xr.Dataset({"foo": np.nan, "bar": 4})
actual = ds1 + ds2
assert_equal(actual, expected)
with xr.set_options(arithmetic_join="right"):
expected = xr.Dataset({"bar": 4, "baz": np.nan})
actual = ds1 + ds2
assert_equal(actual, expected)
@pytest.mark.parametrize(
["keep_attrs", "expected"],
(
pytest.param(False, {}, id="False"),
pytest.param(True, {"foo": "a", "bar": "b"}, id="True"),
),
)
def test_binary_ops_keep_attrs(self, keep_attrs, expected) -> None:
ds1 = xr.Dataset({"a": 1}, attrs={"foo": "a", "bar": "b"})
ds2 = xr.Dataset({"a": 1}, attrs={"foo": "a", "baz": "c"})
with xr.set_options(keep_attrs=keep_attrs):
ds_result = ds1 + ds2
assert ds_result.attrs == expected
def test_full_like(self) -> None:
# For more thorough tests, see test_variable.py
# Note: testing data_vars with mismatched dtypes
ds = Dataset(
{
"d1": DataArray([1, 2, 3], dims=["x"], coords={"x": [10, 20, 30]}),
"d2": DataArray([1.1, 2.2, 3.3], dims=["y"]),
},
attrs={"foo": "bar"},
)
actual = full_like(ds, 2)
expected = ds.copy(deep=True)
# https://github.com/python/mypy/issues/3004
expected["d1"].values = [2, 2, 2] # type: ignore[assignment]
expected["d2"].values = [2.0, 2.0, 2.0] # type: ignore[assignment]
assert expected["d1"].dtype == int
assert expected["d2"].dtype == float
assert_identical(expected, actual)
# override dtype
actual = full_like(ds, fill_value=True, dtype=bool)
expected = ds.copy(deep=True)
expected["d1"].values = [True, True, True] # type: ignore[assignment]
expected["d2"].values = [True, True, True] # type: ignore[assignment]
assert expected["d1"].dtype == bool
assert expected["d2"].dtype == bool
assert_identical(expected, actual)
# with multiple fill values
actual = full_like(ds, {"d1": 1, "d2": 2.3})
expected = ds.assign(d1=("x", [1, 1, 1]), d2=("y", [2.3, 2.3, 2.3]))
assert expected["d1"].dtype == int
assert expected["d2"].dtype == float
assert_identical(expected, actual)
# override multiple dtypes
actual = full_like(ds, fill_value={"d1": 1, "d2": 2.3}, dtype={"d1": bool})
expected = ds.assign(d1=("x", [True, True, True]), d2=("y", [2.3, 2.3, 2.3]))
assert expected["d1"].dtype == bool
assert expected["d2"].dtype == float
assert_identical(expected, actual)
def test_combine_first(self) -> None:
dsx0 = DataArray([0, 0], [("x", ["a", "b"])]).to_dataset(name="dsx0")
dsx1 = DataArray([1, 1], [("x", ["b", "c"])]).to_dataset(name="dsx1")
actual = dsx0.combine_first(dsx1)
expected = Dataset(
{"dsx0": ("x", [0, 0, np.nan]), "dsx1": ("x", [np.nan, 1, 1])},
coords={"x": ["a", "b", "c"]},
)
assert_equal(actual, expected)
assert_equal(actual, xr.merge([dsx0, dsx1]))
# works just like xr.merge([self, other])
dsy2 = DataArray([2, 2, 2], [("x", ["b", "c", "d"])]).to_dataset(name="dsy2")
actual = dsx0.combine_first(dsy2)
expected = xr.merge([dsy2, dsx0])
assert_equal(actual, expected)
def test_sortby(self) -> None:
ds = Dataset(
{
"A": DataArray(
[[1, 2], [3, 4], [5, 6]], [("x", ["c", "b", "a"]), ("y", [1, 0])]
),
"B": DataArray([[5, 6], [7, 8], [9, 10]], dims=["x", "y"]),
}
)
sorted1d = Dataset(
{
"A": DataArray(
[[5, 6], [3, 4], [1, 2]], [("x", ["a", "b", "c"]), ("y", [1, 0])]
),
"B": DataArray([[9, 10], [7, 8], [5, 6]], dims=["x", "y"]),
}
)
sorted2d = Dataset(
{
"A": DataArray(
[[6, 5], [4, 3], [2, 1]], [("x", ["a", "b", "c"]), ("y", [0, 1])]
),
"B": DataArray([[10, 9], [8, 7], [6, 5]], dims=["x", "y"]),
}
)
expected = sorted1d
dax = DataArray([100, 99, 98], [("x", ["c", "b", "a"])])
actual = ds.sortby(dax)
assert_equal(actual, expected)
# test descending order sort
actual = ds.sortby(dax, ascending=False)
assert_equal(actual, ds)
# test alignment (fills in nan for 'c')
dax_short = DataArray([98, 97], [("x", ["b", "a"])])
actual = ds.sortby(dax_short)
assert_equal(actual, expected)
# test 1-D lexsort
# dax0 is sorted first to give indices of [1, 2, 0]
# and then dax1 would be used to move index 2 ahead of 1
dax0 = DataArray([100, 95, 95], [("x", ["c", "b", "a"])])
dax1 = DataArray([0, 1, 0], [("x", ["c", "b", "a"])])
actual = ds.sortby([dax0, dax1]) # lexsort underneath gives [2, 1, 0]
assert_equal(actual, expected)
expected = sorted2d
# test multi-dim sort by 1D dataarray values
day = DataArray([90, 80], [("y", [1, 0])])
actual = ds.sortby([day, dax])
assert_equal(actual, expected)
# test exception-raising
with pytest.raises(KeyError):
actual = ds.sortby("z")
with pytest.raises(ValueError) as excinfo:
actual = ds.sortby(ds["A"])
assert "DataArray is not 1-D" in str(excinfo.value)
expected = sorted1d
actual = ds.sortby("x")
assert_equal(actual, expected)
# test pandas.MultiIndex
indices = (("b", 1), ("b", 0), ("a", 1), ("a", 0))
midx = pd.MultiIndex.from_tuples(indices, names=["one", "two"])
ds_midx = Dataset(
{
"A": DataArray(
[[1, 2], [3, 4], [5, 6], [7, 8]], [("x", midx), ("y", [1, 0])]
),
"B": DataArray([[5, 6], [7, 8], [9, 10], [11, 12]], dims=["x", "y"]),
}
)
actual = ds_midx.sortby("x")
midx_reversed = pd.MultiIndex.from_tuples(
tuple(reversed(indices)), names=["one", "two"]
)
expected = Dataset(
{
"A": DataArray(
[[7, 8], [5, 6], [3, 4], [1, 2]],
[("x", midx_reversed), ("y", [1, 0])],
),
"B": DataArray([[11, 12], [9, 10], [7, 8], [5, 6]], dims=["x", "y"]),
}
)
assert_equal(actual, expected)
# multi-dim sort by coordinate objects
expected = sorted2d
actual = ds.sortby(["x", "y"])
assert_equal(actual, expected)
# test descending order sort
actual = ds.sortby(["x", "y"], ascending=False)
assert_equal(actual, ds)
def test_attribute_access(self) -> None:
ds = create_test_data(seed=1)
for key in ["var1", "var2", "var3", "time", "dim1", "dim2", "dim3", "numbers"]:
assert_equal(ds[key], getattr(ds, key))
assert key in dir(ds)
for key in ["dim3", "dim1", "numbers"]:
assert_equal(ds["var3"][key], getattr(ds.var3, key))
assert key in dir(ds["var3"])
# attrs
assert ds["var3"].attrs["foo"] == ds.var3.foo
assert "foo" in dir(ds["var3"])
def test_ipython_key_completion(self) -> None:
ds = create_test_data(seed=1)
actual = ds._ipython_key_completions_()
expected = ["var1", "var2", "var3", "time", "dim1", "dim2", "dim3", "numbers"]
for item in actual:
ds[item] # should not raise
assert sorted(actual) == sorted(expected)
# for dataarray
actual = ds["var3"]._ipython_key_completions_()
expected = ["dim3", "dim1", "numbers"]
for item in actual:
ds["var3"][item] # should not raise
assert sorted(actual) == sorted(expected)
# MultiIndex
ds_midx = ds.stack(dim12=["dim2", "dim3"])
actual = ds_midx._ipython_key_completions_()
expected = [
"var1",
"var2",
"var3",
"time",
"dim1",
"dim2",
"dim3",
"numbers",
"dim12",
]
for item in actual:
ds_midx[item] # should not raise
assert sorted(actual) == sorted(expected)
# coords
actual = ds.coords._ipython_key_completions_()
expected = ["time", "dim1", "dim2", "dim3", "numbers"]
for item in actual:
ds.coords[item] # should not raise
assert sorted(actual) == sorted(expected)
actual = ds["var3"].coords._ipython_key_completions_()
expected = ["dim1", "dim3", "numbers"]
for item in actual:
ds["var3"].coords[item] # should not raise
assert sorted(actual) == sorted(expected)
coords = Coordinates(ds.coords)
actual = coords._ipython_key_completions_()
expected = ["time", "dim2", "dim3", "numbers"]
for item in actual:
coords[item] # should not raise
assert sorted(actual) == sorted(expected)
# data_vars
actual = ds.data_vars._ipython_key_completions_()
expected = ["var1", "var2", "var3", "dim1"]
for item in actual:
ds.data_vars[item] # should not raise
assert sorted(actual) == sorted(expected)
def test_polyfit_output(self) -> None:
ds = create_test_data(seed=1)
out = ds.polyfit("dim2", 2, full=False)
assert "var1_polyfit_coefficients" in out
out = ds.polyfit("dim1", 2, full=True)
assert "var1_polyfit_coefficients" in out
assert "dim1_matrix_rank" in out
out = ds.polyfit("time", 2)
assert len(out.data_vars) == 0
def test_polyfit_weighted(self) -> None:
ds = create_test_data(seed=1)
ds = ds.broadcast_like(ds) # test more than 2 dimensions (issue #9972)
ds_copy = ds.copy(deep=True)
expected = ds.polyfit("dim2", 2)
actual = ds.polyfit("dim2", 2, w=np.ones(ds.sizes["dim2"]))
xr.testing.assert_identical(expected, actual)
# Make sure weighted polyfit does not change the original object (issue #5644)
xr.testing.assert_identical(ds, ds_copy)
def test_polyfit_coord(self) -> None:
# Make sure polyfit works when given a non-dimension coordinate.
ds = create_test_data(seed=1)
out = ds.polyfit("numbers", 2, full=False)
assert "var3_polyfit_coefficients" in out
assert "dim1" in out.dims
assert "dim2" not in out
assert "dim3" not in out
def test_polyfit_coord_output(self) -> None:
da = xr.DataArray(
[1, 3, 2], dims=["x"], coords=dict(x=["a", "b", "c"], y=("x", [0, 1, 2]))
)
out = da.polyfit("y", deg=1)["polyfit_coefficients"]
assert out.sel(degree=0).item() == pytest.approx(1.5)
assert out.sel(degree=1).item() == pytest.approx(0.5)
def test_polyfit_warnings(self) -> None:
ds = create_test_data(seed=1)
with warnings.catch_warnings(record=True) as ws:
ds.var1.polyfit("dim2", 10, full=False)
assert len(ws) == 1
assert ws[0].category == RankWarning
ds.var1.polyfit("dim2", 10, full=True)
assert len(ws) == 1
def test_polyfit_polyval(self) -> None:
da = xr.DataArray(
np.arange(1, 10).astype(np.float64), dims=["x"], coords=dict(x=np.arange(9))
)
out = da.polyfit("x", 3, full=False)
da_fitval = xr.polyval(da.x, out.polyfit_coefficients)
# polyval introduces very small errors (1e-16 here)
xr.testing.assert_allclose(da_fitval, da)
da = da.assign_coords(x=xr.date_range("2001-01-01", periods=9, freq="YS"))
out = da.polyfit("x", 3, full=False)
da_fitval = xr.polyval(da.x, out.polyfit_coefficients)
xr.testing.assert_allclose(da_fitval, da, rtol=1e-3)
@requires_cftime
def test_polyfit_polyval_cftime(self) -> None:
da = xr.DataArray(
np.arange(1, 10).astype(np.float64),
dims=["x"],
coords=dict(
x=xr.date_range("2001-01-01", periods=9, freq="YS", calendar="noleap")
),
)
out = da.polyfit("x", 3, full=False)
da_fitval = xr.polyval(da.x, out.polyfit_coefficients)
np.testing.assert_allclose(da_fitval, da)
@staticmethod
def _test_data_var_interior(
original_data_var, padded_data_var, padded_dim_name, expected_pad_values
):
np.testing.assert_equal(
np.unique(padded_data_var.isel({padded_dim_name: [0, -1]})),
expected_pad_values,
)
np.testing.assert_array_equal(
padded_data_var.isel({padded_dim_name: slice(1, -1)}), original_data_var
)
@pytest.mark.parametrize("padded_dim_name", ["dim1", "dim2", "dim3", "time"])
@pytest.mark.parametrize(
["constant_values"],
[
pytest.param(None, id="default"),
pytest.param(42, id="scalar"),
pytest.param((42, 43), id="tuple"),
pytest.param({"dim1": 42, "dim2": 43}, id="per dim scalar"),
pytest.param({"dim1": (42, 43), "dim2": (43, 44)}, id="per dim tuple"),
pytest.param({"var1": 42, "var2": (42, 43)}, id="per var"),
pytest.param({"var1": 42, "dim1": (42, 43)}, id="mixed"),
],
)
def test_pad(self, padded_dim_name, constant_values) -> None:
ds = create_test_data(seed=1)
padded = ds.pad({padded_dim_name: (1, 1)}, constant_values=constant_values)
# test padded dim values and size
for ds_dim_name, ds_dim in ds.sizes.items():
if ds_dim_name == padded_dim_name:
np.testing.assert_equal(padded.sizes[ds_dim_name], ds_dim + 2)
if ds_dim_name in padded.coords:
assert padded[ds_dim_name][[0, -1]].isnull().all()
else:
np.testing.assert_equal(padded.sizes[ds_dim_name], ds_dim)
# check if coord "numbers" with dimension dim3 is padded correctly
if padded_dim_name == "dim3":
assert padded["numbers"][[0, -1]].isnull().all()
# twarning: passes but dtype changes from int to float
np.testing.assert_array_equal(padded["numbers"][1:-1], ds["numbers"])
# test if data_vars are paded with correct values
for data_var_name, data_var in padded.data_vars.items():
if padded_dim_name in data_var.dims:
if utils.is_dict_like(constant_values):
if (
expected := constant_values.get(data_var_name, None)
) is not None:
self._test_data_var_interior(
ds[data_var_name], data_var, padded_dim_name, expected
)
elif (
expected := constant_values.get(padded_dim_name, None)
) is not None:
self._test_data_var_interior(
ds[data_var_name], data_var, padded_dim_name, expected
)
else:
self._test_data_var_interior(
ds[data_var_name], data_var, padded_dim_name, 0
)
elif constant_values:
self._test_data_var_interior(
ds[data_var_name], data_var, padded_dim_name, constant_values
)
else:
self._test_data_var_interior(
ds[data_var_name], data_var, padded_dim_name, np.nan
)
else:
assert_array_equal(data_var, ds[data_var_name])
@pytest.mark.parametrize(
["keep_attrs", "attrs", "expected"],
[
pytest.param(None, {"a": 1, "b": 2}, {"a": 1, "b": 2}, id="default"),
pytest.param(False, {"a": 1, "b": 2}, {}, id="False"),
pytest.param(True, {"a": 1, "b": 2}, {"a": 1, "b": 2}, id="True"),
],
)
def test_pad_keep_attrs(self, keep_attrs, attrs, expected) -> None:
ds = xr.Dataset(
{"a": ("x", [1, 2], attrs), "b": ("y", [1, 2], attrs)},
coords={"c": ("x", [-1, 1], attrs), "d": ("y", [-1, 1], attrs)},
attrs=attrs,
)
expected = xr.Dataset(
{"a": ("x", [0, 1, 2, 0], expected), "b": ("y", [1, 2], attrs)},
coords={
"c": ("x", [np.nan, -1, 1, np.nan], expected),
"d": ("y", [-1, 1], attrs),
},
attrs=expected,
)
keep_attrs_ = "default" if keep_attrs is None else keep_attrs
with set_options(keep_attrs=keep_attrs_):
actual = ds.pad({"x": (1, 1)}, mode="constant", constant_values=0)
xr.testing.assert_identical(actual, expected)
actual = ds.pad(
{"x": (1, 1)}, mode="constant", constant_values=0, keep_attrs=keep_attrs
)
xr.testing.assert_identical(actual, expected)
def test_astype_attrs(self) -> None:
data = create_test_data(seed=123)
data.attrs["foo"] = "bar"
assert data.attrs == data.astype(float).attrs
assert data.var1.attrs == data.astype(float).var1.attrs
assert not data.astype(float, keep_attrs=False).attrs
assert not data.astype(float, keep_attrs=False).var1.attrs
@pytest.mark.parametrize("parser", ["pandas", "python"])
@pytest.mark.parametrize(
"engine", ["python", None, pytest.param("numexpr", marks=[requires_numexpr])]
)
@pytest.mark.parametrize(
"backend", ["numpy", pytest.param("dask", marks=[requires_dask])]
)
def test_query(self, backend, engine, parser) -> None:
"""Test querying a dataset."""
# setup test data
np.random.seed(42)
a = np.arange(0, 10, 1)
b = np.random.randint(0, 100, size=10)
c = np.linspace(0, 1, 20)
d = np.random.choice(["foo", "bar", "baz"], size=30, replace=True).astype(
object
)
e = np.arange(0, 10 * 20).reshape(10, 20)
f = np.random.normal(0, 1, size=(10, 20, 30))
if backend == "numpy":
ds = Dataset(
{
"a": ("x", a),
"b": ("x", b),
"c": ("y", c),
"d": ("z", d),
"e": (("x", "y"), e),
"f": (("x", "y", "z"), f),
},
coords={
"a2": ("x", a),
"b2": ("x", b),
"c2": ("y", c),
"d2": ("z", d),
"e2": (("x", "y"), e),
"f2": (("x", "y", "z"), f),
},
)
elif backend == "dask":
ds = Dataset(
{
"a": ("x", da.from_array(a, chunks=3)),
"b": ("x", da.from_array(b, chunks=3)),
"c": ("y", da.from_array(c, chunks=7)),
"d": ("z", da.from_array(d, chunks=12)),
"e": (("x", "y"), da.from_array(e, chunks=(3, 7))),
"f": (("x", "y", "z"), da.from_array(f, chunks=(3, 7, 12))),
},
coords={
"a2": ("x", a),
"b2": ("x", b),
"c2": ("y", c),
"d2": ("z", d),
"e2": (("x", "y"), e),
"f2": (("x", "y", "z"), f),
},
)
# query single dim, single variable
with raise_if_dask_computes():
actual = ds.query(x="a2 > 5", engine=engine, parser=parser)
expect = ds.isel(x=(a > 5))
assert_identical(expect, actual)
# query single dim, single variable, via dict
with raise_if_dask_computes():
actual = ds.query(dict(x="a2 > 5"), engine=engine, parser=parser)
expect = ds.isel(dict(x=(a > 5)))
assert_identical(expect, actual)
# query single dim, single variable
with raise_if_dask_computes():
actual = ds.query(x="b2 > 50", engine=engine, parser=parser)
expect = ds.isel(x=(b > 50))
assert_identical(expect, actual)
# query single dim, single variable
with raise_if_dask_computes():
actual = ds.query(y="c2 < .5", engine=engine, parser=parser)
expect = ds.isel(y=(c < 0.5))
assert_identical(expect, actual)
# query single dim, single string variable
if parser == "pandas":
# N.B., this query currently only works with the pandas parser
# xref https://github.com/pandas-dev/pandas/issues/40436
with raise_if_dask_computes():
actual = ds.query(z='d2 == "bar"', engine=engine, parser=parser)
expect = ds.isel(z=(d == "bar"))
assert_identical(expect, actual)
# query single dim, multiple variables
with raise_if_dask_computes():
actual = ds.query(x="(a2 > 5) & (b2 > 50)", engine=engine, parser=parser)
expect = ds.isel(x=((a > 5) & (b > 50)))
assert_identical(expect, actual)
# query single dim, multiple variables with computation
with raise_if_dask_computes():
actual = ds.query(x="(a2 * b2) > 250", engine=engine, parser=parser)
expect = ds.isel(x=(a * b) > 250)
assert_identical(expect, actual)
# check pandas query syntax is supported
if parser == "pandas":
with raise_if_dask_computes():
actual = ds.query(
x="(a2 > 5) and (b2 > 50)", engine=engine, parser=parser
)
expect = ds.isel(x=((a > 5) & (b > 50)))
assert_identical(expect, actual)
# query multiple dims via kwargs
with raise_if_dask_computes():
actual = ds.query(x="a2 > 5", y="c2 < .5", engine=engine, parser=parser)
expect = ds.isel(x=(a > 5), y=(c < 0.5))
assert_identical(expect, actual)
# query multiple dims via kwargs
if parser == "pandas":
with raise_if_dask_computes():
actual = ds.query(
x="a2 > 5",
y="c2 < .5",
z="d2 == 'bar'",
engine=engine,
parser=parser,
)
expect = ds.isel(x=(a > 5), y=(c < 0.5), z=(d == "bar"))
assert_identical(expect, actual)
# query multiple dims via dict
with raise_if_dask_computes():
actual = ds.query(
dict(x="a2 > 5", y="c2 < .5"), engine=engine, parser=parser
)
expect = ds.isel(dict(x=(a > 5), y=(c < 0.5)))
assert_identical(expect, actual)
# query multiple dims via dict
if parser == "pandas":
with raise_if_dask_computes():
actual = ds.query(
dict(x="a2 > 5", y="c2 < .5", z="d2 == 'bar'"),
engine=engine,
parser=parser,
)
expect = ds.isel(dict(x=(a > 5), y=(c < 0.5), z=(d == "bar")))
assert_identical(expect, actual)
# test error handling
with pytest.raises(ValueError):
ds.query("a > 5") # type: ignore[arg-type] # must be dict or kwargs
with pytest.raises(ValueError):
ds.query(x=(a > 5))
with pytest.raises(IndexError):
ds.query(y="a > 5") # wrong length dimension
with pytest.raises(IndexError):
ds.query(x="c < .5") # wrong length dimension
with pytest.raises(IndexError):
ds.query(x="e > 100") # wrong number of dimensions
with pytest.raises(UndefinedVariableError):
ds.query(x="spam > 50") # name not present
# pytest tests — new tests should go here, rather than in the class.
@pytest.mark.parametrize("parser", ["pandas", "python"])
def test_eval(ds, parser) -> None:
"""Currently much more minimal testing that `query` above, and much of the setup
isn't used. But the risks are fairly low — `query` shares much of the code, and
the method is currently experimental."""
actual = ds.eval("z1 + 5", parser=parser)
expect = ds["z1"] + 5
assert_identical(expect, actual)
# check pandas query syntax is supported
if parser == "pandas":
actual = ds.eval("(z1 > 5) and (z2 > 0)", parser=parser)
expect = (ds["z1"] > 5) & (ds["z2"] > 0)
assert_identical(expect, actual)
@pytest.mark.parametrize("test_elements", ([1, 2], np.array([1, 2]), DataArray([1, 2])))
def test_isin(test_elements, backend) -> None:
expected = Dataset(
data_vars={
"var1": (("dim1",), [0, 1]),
"var2": (("dim1",), [1, 1]),
"var3": (("dim1",), [0, 1]),
}
).astype("bool")
if backend == "dask":
expected = expected.chunk()
result = Dataset(
data_vars={
"var1": (("dim1",), [0, 1]),
"var2": (("dim1",), [1, 2]),
"var3": (("dim1",), [0, 1]),
}
).isin(test_elements)
assert_equal(result, expected)
def test_isin_dataset() -> None:
ds = Dataset({"x": [1, 2]})
with pytest.raises(TypeError):
ds.isin(ds)
@pytest.mark.parametrize(
"unaligned_coords",
(
{"x": [2, 1, 0]},
{"x": (["x"], np.asarray([2, 1, 0]))},
{"x": (["x"], np.asarray([1, 2, 0]))},
{"x": pd.Index([2, 1, 0])},
{"x": Variable(dims="x", data=[0, 2, 1])},
{"x": IndexVariable(dims="x", data=[0, 1, 2])},
{"y": 42},
{"y": ("x", [2, 1, 0])},
{"y": ("x", np.asarray([2, 1, 0]))},
{"y": (["x"], np.asarray([2, 1, 0]))},
),
)
@pytest.mark.parametrize("coords", ({"x": ("x", [0, 1, 2])}, {"x": [0, 1, 2]}))
def test_dataset_constructor_aligns_to_explicit_coords(
unaligned_coords, coords
) -> None:
a = xr.DataArray([1, 2, 3], dims=["x"], coords=unaligned_coords)
expected = xr.Dataset(coords=coords)
expected["a"] = a
result = xr.Dataset({"a": a}, coords=coords)
assert_equal(expected, result)
def test_error_message_on_set_supplied() -> None:
with pytest.raises(TypeError, match="has invalid type <class 'set'>"):
xr.Dataset(dict(date=[1, 2, 3], sec={4}))
@pytest.mark.parametrize("unaligned_coords", ({"y": ("b", np.asarray([2, 1, 0]))},))
def test_constructor_raises_with_invalid_coords(unaligned_coords) -> None:
with pytest.raises(ValueError, match="not a subset of the DataArray dimensions"):
xr.DataArray([1, 2, 3], dims=["x"], coords=unaligned_coords)
@pytest.mark.parametrize("ds", [3], indirect=True)
def test_dir_expected_attrs(ds) -> None:
some_expected_attrs = {"pipe", "mean", "isnull", "var1", "dim2", "numbers"}
result = dir(ds)
assert set(result) >= some_expected_attrs
def test_dir_non_string(ds) -> None:
# add a numbered key to ensure this doesn't break dir
ds[5] = "foo"
result = dir(ds)
assert 5 not in result
# GH2172
sample_data = np.random.uniform(size=[2, 2000, 10000])
x = xr.Dataset({"sample_data": (sample_data.shape, sample_data)})
x2 = x["sample_data"]
dir(x2)
def test_dir_unicode(ds) -> None:
ds["unicode"] = "uni"
result = dir(ds)
assert "unicode" in result
def test_raise_no_warning_for_nan_in_binary_ops() -> None:
with assert_no_warnings():
_ = Dataset(data_vars={"x": ("y", [1, 2, np.nan])}) > 0
@pytest.mark.filterwarnings("error")
@pytest.mark.parametrize("ds", (2,), indirect=True)
def test_raise_no_warning_assert_close(ds) -> None:
assert_allclose(ds, ds)
@pytest.mark.parametrize("dask", [True, False])
@pytest.mark.parametrize("edge_order", [1, 2])
def test_differentiate(dask, edge_order) -> None:
rs = np.random.default_rng(42)
coord = [0.2, 0.35, 0.4, 0.6, 0.7, 0.75, 0.76, 0.8]
da = xr.DataArray(
rs.random((8, 6)),
dims=["x", "y"],
coords={"x": coord, "z": 3, "x2d": (("x", "y"), rs.random((8, 6)))},
)
if dask and has_dask:
da = da.chunk({"x": 4})
ds = xr.Dataset({"var": da})
# along x
actual = da.differentiate("x", edge_order)
expected_x = xr.DataArray(
np.gradient(da, da["x"], axis=0, edge_order=edge_order),
dims=da.dims,
coords=da.coords,
)
assert_equal(expected_x, actual)
assert_equal(
ds["var"].differentiate("x", edge_order=edge_order),
ds.differentiate("x", edge_order=edge_order)["var"],
)
# coordinate should not change
assert_equal(da["x"], actual["x"])
# along y
actual = da.differentiate("y", edge_order)
expected_y = xr.DataArray(
np.gradient(da, da["y"], axis=1, edge_order=edge_order),
dims=da.dims,
coords=da.coords,
)
assert_equal(expected_y, actual)
assert_equal(actual, ds.differentiate("y", edge_order=edge_order)["var"])
assert_equal(
ds["var"].differentiate("y", edge_order=edge_order),
ds.differentiate("y", edge_order=edge_order)["var"],
)
with pytest.raises(ValueError):
da.differentiate("x2d")
@pytest.mark.parametrize("dask", [True, False])
def test_differentiate_datetime(dask) -> None:
rs = np.random.default_rng(42)
coord = np.array(
[
"2004-07-13",
"2006-01-13",
"2010-08-13",
"2010-09-13",
"2010-10-11",
"2010-12-13",
"2011-02-13",
"2012-08-13",
],
dtype="datetime64",
)
da = xr.DataArray(
rs.random((8, 6)),
dims=["x", "y"],
coords={"x": coord, "z": 3, "x2d": (("x", "y"), rs.random((8, 6)))},
)
if dask and has_dask:
da = da.chunk({"x": 4})
# along x
actual = da.differentiate("x", edge_order=1, datetime_unit="D")
expected_x = xr.DataArray(
np.gradient(
da, da["x"].variable._to_numeric(datetime_unit="D"), axis=0, edge_order=1
),
dims=da.dims,
coords=da.coords,
)
assert_equal(expected_x, actual)
actual2 = da.differentiate("x", edge_order=1, datetime_unit="h")
assert np.allclose(actual, actual2 * 24)
# for datetime variable
actual = da["x"].differentiate("x", edge_order=1, datetime_unit="D")
assert np.allclose(actual, 1.0)
# with different date unit
da = xr.DataArray(coord.astype("datetime64[ms]"), dims=["x"], coords={"x": coord})
actual = da.differentiate("x", edge_order=1)
assert np.allclose(actual, 1.0)
@requires_cftime
@pytest.mark.parametrize("dask", [True, False])
def test_differentiate_cftime(dask) -> None:
rs = np.random.default_rng(42)
coord = xr.cftime_range("2000", periods=8, freq="2ME")
da = xr.DataArray(
rs.random((8, 6)),
coords={"time": coord, "z": 3, "t2d": (("time", "y"), rs.random((8, 6)))},
dims=["time", "y"],
)
if dask and has_dask:
da = da.chunk({"time": 4})
actual = da.differentiate("time", edge_order=1, datetime_unit="D")
expected_data = np.gradient(
da, da["time"].variable._to_numeric(datetime_unit="D"), axis=0, edge_order=1
)
expected = xr.DataArray(expected_data, coords=da.coords, dims=da.dims)
assert_equal(expected, actual)
actual2 = da.differentiate("time", edge_order=1, datetime_unit="h")
assert_allclose(actual, actual2 * 24)
# Test the differentiation of datetimes themselves
actual = da["time"].differentiate("time", edge_order=1, datetime_unit="D")
assert_allclose(actual, xr.ones_like(da["time"]).astype(float))
@pytest.mark.parametrize("dask", [True, False])
def test_integrate(dask) -> None:
rs = np.random.default_rng(42)
coord = [0.2, 0.35, 0.4, 0.6, 0.7, 0.75, 0.76, 0.8]
da = xr.DataArray(
rs.random((8, 6)),
dims=["x", "y"],
coords={
"x": coord,
"x2": (("x",), rs.random(8)),
"z": 3,
"x2d": (("x", "y"), rs.random((8, 6))),
},
)
if dask and has_dask:
da = da.chunk({"x": 4})
ds = xr.Dataset({"var": da})
# along x
actual = da.integrate("x")
# coordinate that contains x should be dropped.
expected_x = xr.DataArray(
trapezoid(da.compute(), da["x"], axis=0),
dims=["y"],
coords={k: v for k, v in da.coords.items() if "x" not in v.dims},
)
assert_allclose(expected_x, actual.compute())
assert_equal(ds["var"].integrate("x"), ds.integrate("x")["var"])
# make sure result is also a dask array (if the source is dask array)
assert isinstance(actual.data, type(da.data))
# along y
actual = da.integrate("y")
expected_y = xr.DataArray(
trapezoid(da, da["y"], axis=1),
dims=["x"],
coords={k: v for k, v in da.coords.items() if "y" not in v.dims},
)
assert_allclose(expected_y, actual.compute())
assert_equal(actual, ds.integrate("y")["var"])
assert_equal(ds["var"].integrate("y"), ds.integrate("y")["var"])
# along x and y
actual = da.integrate(("y", "x"))
assert actual.ndim == 0
with pytest.raises(ValueError):
da.integrate("x2d")
@requires_scipy
@pytest.mark.parametrize("dask", [True, False])
def test_cumulative_integrate(dask) -> None:
rs = np.random.default_rng(43)
coord = [0.2, 0.35, 0.4, 0.6, 0.7, 0.75, 0.76, 0.8]
da = xr.DataArray(
rs.random((8, 6)),
dims=["x", "y"],
coords={
"x": coord,
"x2": (("x",), rs.random(8)),
"z": 3,
"x2d": (("x", "y"), rs.random((8, 6))),
},
)
if dask and has_dask:
da = da.chunk({"x": 4})
ds = xr.Dataset({"var": da})
# along x
actual = da.cumulative_integrate("x")
from scipy.integrate import cumulative_trapezoid
expected_x = xr.DataArray(
cumulative_trapezoid(da.compute(), da["x"], axis=0, initial=0.0),
dims=["x", "y"],
coords=da.coords,
)
assert_allclose(expected_x, actual.compute())
assert_equal(
ds["var"].cumulative_integrate("x"),
ds.cumulative_integrate("x")["var"],
)
# make sure result is also a dask array (if the source is dask array)
assert isinstance(actual.data, type(da.data))
# along y
actual = da.cumulative_integrate("y")
expected_y = xr.DataArray(
cumulative_trapezoid(da, da["y"], axis=1, initial=0.0),
dims=["x", "y"],
coords=da.coords,
)
assert_allclose(expected_y, actual.compute())
assert_equal(actual, ds.cumulative_integrate("y")["var"])
assert_equal(
ds["var"].cumulative_integrate("y"),
ds.cumulative_integrate("y")["var"],
)
# along x and y
actual = da.cumulative_integrate(("y", "x"))
assert actual.ndim == 2
with pytest.raises(ValueError):
da.cumulative_integrate("x2d")
@pytest.mark.parametrize("dask", [True, False])
@pytest.mark.parametrize("which_datetime", ["np", "cftime"])
def test_trapezoid_datetime(dask, which_datetime) -> None:
rs = np.random.default_rng(42)
coord: ArrayLike
if which_datetime == "np":
coord = np.array(
[
"2004-07-13",
"2006-01-13",
"2010-08-13",
"2010-09-13",
"2010-10-11",
"2010-12-13",
"2011-02-13",
"2012-08-13",
],
dtype="datetime64",
)
else:
if not has_cftime:
pytest.skip("Test requires cftime.")
coord = xr.cftime_range("2000", periods=8, freq="2D")
da = xr.DataArray(
rs.random((8, 6)),
coords={"time": coord, "z": 3, "t2d": (("time", "y"), rs.random((8, 6)))},
dims=["time", "y"],
)
if dask and has_dask:
da = da.chunk({"time": 4})
actual = da.integrate("time", datetime_unit="D")
expected_data = trapezoid(
da.compute().data,
duck_array_ops.datetime_to_numeric(da["time"].data, datetime_unit="D"),
axis=0,
)
expected = xr.DataArray(
expected_data,
dims=["y"],
coords={k: v for k, v in da.coords.items() if "time" not in v.dims},
)
assert_allclose(expected, actual.compute())
# make sure result is also a dask array (if the source is dask array)
assert isinstance(actual.data, type(da.data))
actual2 = da.integrate("time", datetime_unit="h")
assert_allclose(actual, actual2 / 24.0)
def test_no_dict() -> None:
d = Dataset()
with pytest.raises(AttributeError):
_ = d.__dict__
def test_subclass_slots() -> None:
"""Test that Dataset subclasses must explicitly define ``__slots__``.
.. note::
As of 0.13.0, this is actually mitigated into a FutureWarning for any class
defined outside of the xarray package.
"""
with pytest.raises(AttributeError) as e:
class MyDS(Dataset):
pass
assert str(e.value) == "MyDS must explicitly define __slots__"
def test_weakref() -> None:
"""Classes with __slots__ are incompatible with the weakref module unless they
explicitly state __weakref__ among their slots
"""
from weakref import ref
ds = Dataset()
r = ref(ds)
assert r() is ds
def test_deepcopy_obj_array() -> None:
x0 = Dataset(dict(foo=DataArray(np.array([object()]))))
x1 = deepcopy(x0)
assert x0["foo"].values[0] is not x1["foo"].values[0]
def test_deepcopy_recursive() -> None:
# GH:issue:7111
# direct recursion
ds = xr.Dataset({"a": (["x"], [1, 2])})
ds.attrs["other"] = ds
# TODO: cannot use assert_identical on recursive Vars yet...
# lets just ensure that deep copy works without RecursionError
ds.copy(deep=True)
# indirect recursion
ds2 = xr.Dataset({"b": (["y"], [3, 4])})
ds.attrs["other"] = ds2
ds2.attrs["other"] = ds
# TODO: cannot use assert_identical on recursive Vars yet...
# lets just ensure that deep copy works without RecursionError
ds.copy(deep=True)
ds2.copy(deep=True)
def test_clip(ds) -> None:
result = ds.clip(min=0.5)
assert all((result.min(...) >= 0.5).values())
result = ds.clip(max=0.5)
assert all((result.max(...) <= 0.5).values())
result = ds.clip(min=0.25, max=0.75)
assert all((result.min(...) >= 0.25).values())
assert all((result.max(...) <= 0.75).values())
result = ds.clip(min=ds.mean("y"), max=ds.mean("y"))
assert result.sizes == ds.sizes
class TestDropDuplicates:
@pytest.mark.parametrize("keep", ["first", "last", False])
def test_drop_duplicates_1d(self, keep) -> None:
ds = xr.Dataset(
{"a": ("time", [0, 5, 6, 7]), "b": ("time", [9, 3, 8, 2])},
coords={"time": [0, 0, 1, 2]},
)
if keep == "first":
a = [0, 6, 7]
b = [9, 8, 2]
time = [0, 1, 2]
elif keep == "last":
a = [5, 6, 7]
b = [3, 8, 2]
time = [0, 1, 2]
else:
a = [6, 7]
b = [8, 2]
time = [1, 2]
expected = xr.Dataset(
{"a": ("time", a), "b": ("time", b)}, coords={"time": time}
)
result = ds.drop_duplicates("time", keep=keep)
assert_equal(expected, result)
with pytest.raises(
ValueError,
match=re.escape(
"Dimensions ('space',) not found in data dimensions ('time',)"
),
):
ds.drop_duplicates("space", keep=keep)
class TestNumpyCoercion:
def test_from_numpy(self) -> None:
ds = xr.Dataset({"a": ("x", [1, 2, 3])}, coords={"lat": ("x", [4, 5, 6])})
assert_identical(ds.as_numpy(), ds)
@requires_dask
def test_from_dask(self) -> None:
ds = xr.Dataset({"a": ("x", [1, 2, 3])}, coords={"lat": ("x", [4, 5, 6])})
ds_chunked = ds.chunk(1)
assert_identical(ds_chunked.as_numpy(), ds.compute())
@requires_pint
def test_from_pint(self) -> None:
from pint import Quantity
arr = np.array([1, 2, 3])
ds = xr.Dataset(
{"a": ("x", Quantity(arr, units="Pa"))},
coords={"lat": ("x", Quantity(arr + 3, units="m"))},
)
expected = xr.Dataset({"a": ("x", [1, 2, 3])}, coords={"lat": ("x", arr + 3)})
assert_identical(ds.as_numpy(), expected)
@requires_sparse
def test_from_sparse(self) -> None:
import sparse
arr = np.diagflat([1, 2, 3])
sparr = sparse.COO.from_numpy(arr)
ds = xr.Dataset(
{"a": (["x", "y"], sparr)}, coords={"elev": (("x", "y"), sparr + 3)}
)
expected = xr.Dataset(
{"a": (["x", "y"], arr)}, coords={"elev": (("x", "y"), arr + 3)}
)
assert_identical(ds.as_numpy(), expected)
@requires_cupy
def test_from_cupy(self) -> None:
import cupy as cp
arr = np.array([1, 2, 3])
ds = xr.Dataset(
{"a": ("x", cp.array(arr))}, coords={"lat": ("x", cp.array(arr + 3))}
)
expected = xr.Dataset({"a": ("x", [1, 2, 3])}, coords={"lat": ("x", arr + 3)})
assert_identical(ds.as_numpy(), expected)
@requires_dask
@requires_pint
def test_from_pint_wrapping_dask(self) -> None:
import dask
from pint import Quantity
arr = np.array([1, 2, 3])
d = dask.array.from_array(arr)
ds = xr.Dataset(
{"a": ("x", Quantity(d, units="Pa"))},
coords={"lat": ("x", Quantity(d, units="m") * 2)},
)
result = ds.as_numpy()
expected = xr.Dataset({"a": ("x", arr)}, coords={"lat": ("x", arr * 2)})
assert_identical(result, expected)
def test_string_keys_typing() -> None:
"""Tests that string keys to `variables` are permitted by mypy"""
da = xr.DataArray(np.arange(10), dims=["x"])
ds = xr.Dataset(dict(x=da))
mapping = {"y": da}
ds.assign(variables=mapping)
def test_transpose_error() -> None:
# Transpose dataset with list as argument
# Should raise error
ds = xr.Dataset({"foo": (("x", "y"), [[21]]), "bar": (("x", "y"), [[12]])})
with pytest.raises(
TypeError,
match=re.escape(
"transpose requires dim to be passed as multiple arguments. Expected `'y', 'x'`. Received `['y', 'x']` instead"
),
):
ds.transpose(["y", "x"]) # type: ignore[arg-type]