from __future__ import annotations from typing import Any import numpy as np import pandas as pd import pytest import xarray as xr from xarray import DataArray, Dataset, set_options from xarray.tests import ( assert_allclose, assert_equal, assert_identical, has_dask, requires_dask, requires_dask_ge_2024_11_0, requires_numbagg, ) pytestmark = [ pytest.mark.filterwarnings("error:Mean of empty slice"), pytest.mark.filterwarnings("error:All-NaN (slice|axis) encountered"), ] @pytest.mark.parametrize("func", ["mean", "sum"]) @pytest.mark.parametrize("min_periods", [1, 10]) def test_cumulative(d, func, min_periods) -> None: # One dim result = getattr(d.cumulative("z", min_periods=min_periods), func)() expected = getattr(d.rolling(z=d["z"].size, min_periods=min_periods), func)() assert_identical(result, expected) # Multiple dim result = getattr(d.cumulative(["z", "x"], min_periods=min_periods), func)() expected = getattr( d.rolling(z=d["z"].size, x=d["x"].size, min_periods=min_periods), func, )() assert_identical(result, expected) def test_cumulative_vs_cum(d) -> None: result = d.cumulative("z").sum() expected = d.cumsum("z") # cumsum drops the coord of the dimension; cumulative doesn't expected = expected.assign_coords(z=result["z"]) assert_identical(result, expected) class TestDataArrayRolling: @pytest.mark.parametrize("da", (1, 2), indirect=True) @pytest.mark.parametrize("center", [True, False]) @pytest.mark.parametrize("size", [1, 2, 3, 7]) def test_rolling_iter(self, da: DataArray, center: bool, size: int) -> None: rolling_obj = da.rolling(time=size, center=center) rolling_obj_mean = rolling_obj.mean() assert len(rolling_obj.window_labels) == len(da["time"]) assert_identical(rolling_obj.window_labels, da["time"]) for i, (label, window_da) in enumerate(rolling_obj): assert label == da["time"].isel(time=i) actual = rolling_obj_mean.isel(time=i) expected = window_da.mean("time") np.testing.assert_allclose(actual.values, expected.values) @pytest.mark.parametrize("da", (1,), indirect=True) def test_rolling_repr(self, da) -> None: rolling_obj = da.rolling(time=7) assert repr(rolling_obj) == "DataArrayRolling [time->7]" rolling_obj = da.rolling(time=7, center=True) assert repr(rolling_obj) == "DataArrayRolling [time->7(center)]" rolling_obj = da.rolling(time=7, x=3, center=True) assert repr(rolling_obj) == "DataArrayRolling [time->7(center),x->3(center)]" @requires_dask def test_repeated_rolling_rechunks(self) -> None: # regression test for GH3277, GH2514 dat = DataArray(np.random.rand(7653, 300), dims=("day", "item")) dat_chunk = dat.chunk({"item": 20}) dat_chunk.rolling(day=10).mean().rolling(day=250).std() def test_rolling_doc(self, da) -> None: rolling_obj = da.rolling(time=7) # argument substitution worked assert "`mean`" in rolling_obj.mean.__doc__ def test_rolling_properties(self, da) -> None: rolling_obj = da.rolling(time=4) assert rolling_obj.obj.get_axis_num("time") == 1 # catching invalid args with pytest.raises(ValueError, match="window must be > 0"): da.rolling(time=-2) with pytest.raises(ValueError, match="min_periods must be greater than zero"): da.rolling(time=2, min_periods=0) with pytest.raises( KeyError, match=r"\('foo',\) not found in DataArray dimensions", ): da.rolling(foo=2) @requires_dask @pytest.mark.parametrize( "name", ("sum", "mean", "std", "min", "max", "median", "argmin", "argmax") ) @pytest.mark.parametrize("center", (True, False, None)) @pytest.mark.parametrize("min_periods", (1, None)) @pytest.mark.parametrize("backend", ["numpy", "dask"], indirect=True) def test_rolling_wrapped_bottleneck( self, da, name, center, min_periods, compute_backend ) -> None: bn = pytest.importorskip("bottleneck", minversion="1.1") # Test all bottleneck functions rolling_obj = da.rolling(time=7, min_periods=min_periods) func_name = f"move_{name}" actual = getattr(rolling_obj, name)() window = 7 expected = getattr(bn, func_name)( da.values, window=window, axis=1, min_count=min_periods ) # index 0 is at the rightmost edge of the window # need to reverse index here # see GH #8541 if func_name in ["move_argmin", "move_argmax"]: expected = window - 1 - expected # Using assert_allclose because we get tiny (1e-17) differences in numbagg. np.testing.assert_allclose(actual.values, expected) with pytest.warns(DeprecationWarning, match="Reductions are applied"): getattr(rolling_obj, name)(dim="time") # Test center rolling_obj = da.rolling(time=7, center=center) actual = getattr(rolling_obj, name)()["time"] # Using assert_allclose because we get tiny (1e-17) differences in numbagg. assert_allclose(actual, da["time"]) @requires_dask @pytest.mark.parametrize("name", ("mean", "count")) @pytest.mark.parametrize("center", (True, False, None)) @pytest.mark.parametrize("min_periods", (1, None)) @pytest.mark.parametrize("window", (7, 8)) @pytest.mark.parametrize("backend", ["dask"], indirect=True) def test_rolling_wrapped_dask(self, da, name, center, min_periods, window) -> None: # dask version rolling_obj = da.rolling(time=window, min_periods=min_periods, center=center) actual = getattr(rolling_obj, name)().load() if name != "count": with pytest.warns(DeprecationWarning, match="Reductions are applied"): getattr(rolling_obj, name)(dim="time") # numpy version rolling_obj = da.load().rolling( time=window, min_periods=min_periods, center=center ) expected = getattr(rolling_obj, name)() # using all-close because rolling over ghost cells introduces some # precision errors assert_allclose(actual, expected) # with zero chunked array GH:2113 rolling_obj = da.chunk().rolling( time=window, min_periods=min_periods, center=center ) actual = getattr(rolling_obj, name)().load() assert_allclose(actual, expected) @pytest.mark.parametrize("center", (True, None)) def test_rolling_wrapped_dask_nochunk(self, center) -> None: # GH:2113 pytest.importorskip("dask.array") da_day_clim = xr.DataArray( np.arange(1, 367), coords=[np.arange(1, 367)], dims="dayofyear" ) expected = da_day_clim.rolling(dayofyear=31, center=center).mean() actual = da_day_clim.chunk().rolling(dayofyear=31, center=center).mean() assert_allclose(actual, expected) @pytest.mark.parametrize("center", (True, False)) @pytest.mark.parametrize("min_periods", (None, 1, 2, 3)) @pytest.mark.parametrize("window", (1, 2, 3, 4)) def test_rolling_pandas_compat( self, center, window, min_periods, compute_backend ) -> None: s = pd.Series(np.arange(10)) da = DataArray.from_series(s) if min_periods is not None and window < min_periods: min_periods = window s_rolling = s.rolling(window, center=center, min_periods=min_periods).mean() da_rolling = da.rolling( index=window, center=center, min_periods=min_periods ).mean() da_rolling_np = da.rolling( index=window, center=center, min_periods=min_periods ).reduce(np.nanmean) np.testing.assert_allclose(np.asarray(s_rolling.values), da_rolling.values) np.testing.assert_allclose(s_rolling.index, da_rolling["index"]) np.testing.assert_allclose(np.asarray(s_rolling.values), da_rolling_np.values) np.testing.assert_allclose(s_rolling.index, da_rolling_np["index"]) @pytest.mark.parametrize("center", (True, False)) @pytest.mark.parametrize("window", (1, 2, 3, 4)) def test_rolling_construct(self, center: bool, window: int) -> None: s = pd.Series(np.arange(10)) da = DataArray.from_series(s) s_rolling = s.rolling(window, center=center, min_periods=1).mean() da_rolling = da.rolling(index=window, center=center, min_periods=1) da_rolling_mean = da_rolling.construct("window").mean("window") np.testing.assert_allclose(np.asarray(s_rolling.values), da_rolling_mean.values) np.testing.assert_allclose(s_rolling.index, da_rolling_mean["index"]) # with stride da_rolling_mean = da_rolling.construct("window", stride=2).mean("window") np.testing.assert_allclose( np.asarray(s_rolling.values[::2]), da_rolling_mean.values ) np.testing.assert_allclose(s_rolling.index[::2], da_rolling_mean["index"]) # with fill_value da_rolling_mean = da_rolling.construct("window", stride=2, fill_value=0.0).mean( "window" ) assert da_rolling_mean.isnull().sum() == 0 assert (da_rolling_mean == 0.0).sum() >= 0 @pytest.mark.parametrize("da", (1, 2), indirect=True) @pytest.mark.parametrize("center", (True, False)) @pytest.mark.parametrize("min_periods", (None, 1, 2, 3)) @pytest.mark.parametrize("window", (1, 2, 3, 4)) @pytest.mark.parametrize("name", ("sum", "mean", "std", "max")) def test_rolling_reduce( self, da, center, min_periods, window, name, compute_backend ) -> None: if min_periods is not None and window < min_periods: min_periods = window if da.isnull().sum() > 1 and window == 1: # this causes all nan slices window = 2 rolling_obj = da.rolling(time=window, center=center, min_periods=min_periods) # add nan prefix to numpy methods to get similar # behavior as bottleneck actual = rolling_obj.reduce(getattr(np, f"nan{name}")) expected = getattr(rolling_obj, name)() assert_allclose(actual, expected) assert actual.sizes == expected.sizes @pytest.mark.parametrize("center", (True, False)) @pytest.mark.parametrize("min_periods", (None, 1, 2, 3)) @pytest.mark.parametrize("window", (1, 2, 3, 4)) @pytest.mark.parametrize("name", ("sum", "max")) def test_rolling_reduce_nonnumeric( self, center, min_periods, window, name, compute_backend ) -> None: da = DataArray( [0, np.nan, 1, 2, np.nan, 3, 4, 5, np.nan, 6, 7], dims="time" ).isnull() if min_periods is not None and window < min_periods: min_periods = window rolling_obj = da.rolling(time=window, center=center, min_periods=min_periods) # add nan prefix to numpy methods to get similar behavior as bottleneck actual = rolling_obj.reduce(getattr(np, f"nan{name}")) expected = getattr(rolling_obj, name)() assert_allclose(actual, expected) assert actual.sizes == expected.sizes def test_rolling_count_correct(self, compute_backend) -> None: da = DataArray([0, np.nan, 1, 2, np.nan, 3, 4, 5, np.nan, 6, 7], dims="time") kwargs: list[dict[str, Any]] = [ {"time": 11, "min_periods": 1}, {"time": 11, "min_periods": None}, {"time": 7, "min_periods": 2}, ] expecteds = [ DataArray([1, 1, 2, 3, 3, 4, 5, 6, 6, 7, 8], dims="time"), DataArray( [ np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, ], dims="time", ), DataArray([np.nan, np.nan, 2, 3, 3, 4, 5, 5, 5, 5, 5], dims="time"), ] for kwarg, expected in zip(kwargs, expecteds, strict=True): result = da.rolling(**kwarg).count() assert_equal(result, expected) result = da.to_dataset(name="var1").rolling(**kwarg).count()["var1"] assert_equal(result, expected) @pytest.mark.parametrize("da", (1,), indirect=True) @pytest.mark.parametrize("center", (True, False)) @pytest.mark.parametrize("min_periods", (None, 1)) @pytest.mark.parametrize("name", ("sum", "mean", "max")) def test_ndrolling_reduce( self, da, center, min_periods, name, compute_backend ) -> None: rolling_obj = da.rolling(time=3, x=2, center=center, min_periods=min_periods) actual = getattr(rolling_obj, name)() expected = getattr( getattr( da.rolling(time=3, center=center, min_periods=min_periods), name )().rolling(x=2, center=center, min_periods=min_periods), name, )() assert_allclose(actual, expected) assert actual.sizes == expected.sizes if name in ["mean"]: # test our reimplementation of nanmean using np.nanmean expected = getattr(rolling_obj.construct({"time": "tw", "x": "xw"}), name)( ["tw", "xw"] ) count = rolling_obj.count() if min_periods is None: min_periods = 1 assert_allclose(actual, expected.where(count >= min_periods)) @pytest.mark.parametrize("center", (True, False, (True, False))) @pytest.mark.parametrize("fill_value", (np.nan, 0.0)) def test_ndrolling_construct(self, center, fill_value) -> None: da = DataArray( np.arange(5 * 6 * 7).reshape(5, 6, 7).astype(float), dims=["x", "y", "z"], coords={"x": ["a", "b", "c", "d", "e"], "y": np.arange(6)}, ) actual = da.rolling(x=3, z=2, center=center).construct( x="x1", z="z1", fill_value=fill_value ) if not isinstance(center, tuple): center = (center, center) expected = ( da.rolling(x=3, center=center[0]) .construct(x="x1", fill_value=fill_value) .rolling(z=2, center=center[1]) .construct(z="z1", fill_value=fill_value) ) assert_allclose(actual, expected) @pytest.mark.parametrize( "funcname, argument", [ ("reduce", (np.mean,)), ("mean", ()), ("construct", ("window_dim",)), ("count", ()), ], ) def test_rolling_keep_attrs(self, funcname, argument) -> None: attrs_da = {"da_attr": "test"} data = np.linspace(10, 15, 100) coords = np.linspace(1, 10, 100) da = DataArray( data, dims=("coord"), coords={"coord": coords}, attrs=attrs_da, name="name" ) # attrs are now kept per default func = getattr(da.rolling(dim={"coord": 5}), funcname) result = func(*argument) assert result.attrs == attrs_da assert result.name == "name" # discard attrs func = getattr(da.rolling(dim={"coord": 5}), funcname) result = func(*argument, keep_attrs=False) assert result.attrs == {} assert result.name == "name" # test discard attrs using global option func = getattr(da.rolling(dim={"coord": 5}), funcname) with set_options(keep_attrs=False): result = func(*argument) assert result.attrs == {} assert result.name == "name" # keyword takes precedence over global option func = getattr(da.rolling(dim={"coord": 5}), funcname) with set_options(keep_attrs=False): result = func(*argument, keep_attrs=True) assert result.attrs == attrs_da assert result.name == "name" func = getattr(da.rolling(dim={"coord": 5}), funcname) with set_options(keep_attrs=True): result = func(*argument, keep_attrs=False) assert result.attrs == {} assert result.name == "name" @requires_dask @pytest.mark.parametrize("dtype", ["int", "float32", "float64"]) def test_rolling_dask_dtype(self, dtype) -> None: data = DataArray( np.array([1, 2, 3], dtype=dtype), dims="x", coords={"x": [1, 2, 3]} ) unchunked_result = data.rolling(x=3, min_periods=1).mean() chunked_result = data.chunk({"x": 1}).rolling(x=3, min_periods=1).mean() assert chunked_result.dtype == unchunked_result.dtype @requires_numbagg class TestDataArrayRollingExp: @pytest.mark.parametrize("dim", ["time", "x"]) @pytest.mark.parametrize( "window_type, window", [["span", 5], ["alpha", 0.5], ["com", 0.5], ["halflife", 5]], ) @pytest.mark.parametrize("backend", ["numpy"], indirect=True) @pytest.mark.parametrize("func", ["mean", "sum", "var", "std"]) def test_rolling_exp_runs(self, da, dim, window_type, window, func) -> None: da = da.where(da > 0.2) rolling_exp = da.rolling_exp(window_type=window_type, **{dim: window}) result = getattr(rolling_exp, func)() assert isinstance(result, DataArray) @pytest.mark.parametrize("dim", ["time", "x"]) @pytest.mark.parametrize( "window_type, window", [["span", 5], ["alpha", 0.5], ["com", 0.5], ["halflife", 5]], ) @pytest.mark.parametrize("backend", ["numpy"], indirect=True) def test_rolling_exp_mean_pandas(self, da, dim, window_type, window) -> None: da = da.isel(a=0).where(lambda x: x > 0.2) result = da.rolling_exp(window_type=window_type, **{dim: window}).mean() assert isinstance(result, DataArray) pandas_array = da.to_pandas() assert pandas_array.index.name == "time" if dim == "x": pandas_array = pandas_array.T expected = xr.DataArray( pandas_array.ewm(**{window_type: window}).mean() ).transpose(*da.dims) assert_allclose(expected.variable, result.variable) @pytest.mark.parametrize("backend", ["numpy"], indirect=True) @pytest.mark.parametrize("func", ["mean", "sum"]) def test_rolling_exp_keep_attrs(self, da, func) -> None: attrs = {"attrs": "da"} da.attrs = attrs # Equivalent of `da.rolling_exp(time=10).mean` rolling_exp_func = getattr(da.rolling_exp(time=10), func) # attrs are kept per default result = rolling_exp_func() assert result.attrs == attrs # discard attrs result = rolling_exp_func(keep_attrs=False) assert result.attrs == {} # test discard attrs using global option with set_options(keep_attrs=False): result = rolling_exp_func() assert result.attrs == {} # keyword takes precedence over global option with set_options(keep_attrs=False): result = rolling_exp_func(keep_attrs=True) assert result.attrs == attrs with set_options(keep_attrs=True): result = rolling_exp_func(keep_attrs=False) assert result.attrs == {} with pytest.warns( UserWarning, match="Passing ``keep_attrs`` to ``rolling_exp`` has no effect.", ): da.rolling_exp(time=10, keep_attrs=True) class TestDatasetRolling: @pytest.mark.parametrize( "funcname, argument", [ ("reduce", (np.mean,)), ("mean", ()), ("construct", ("window_dim",)), ("count", ()), ], ) def test_rolling_keep_attrs(self, funcname, argument) -> None: global_attrs = {"units": "test", "long_name": "testing"} da_attrs = {"da_attr": "test"} da_not_rolled_attrs = {"da_not_rolled_attr": "test"} data = np.linspace(10, 15, 100) coords = np.linspace(1, 10, 100) ds = Dataset( data_vars={"da": ("coord", data), "da_not_rolled": ("no_coord", data)}, coords={"coord": coords}, attrs=global_attrs, ) ds.da.attrs = da_attrs ds.da_not_rolled.attrs = da_not_rolled_attrs # attrs are now kept per default func = getattr(ds.rolling(dim={"coord": 5}), funcname) result = func(*argument) assert result.attrs == global_attrs assert result.da.attrs == da_attrs assert result.da_not_rolled.attrs == da_not_rolled_attrs assert result.da.name == "da" assert result.da_not_rolled.name == "da_not_rolled" # discard attrs func = getattr(ds.rolling(dim={"coord": 5}), funcname) result = func(*argument, keep_attrs=False) assert result.attrs == {} assert result.da.attrs == {} assert result.da_not_rolled.attrs == {} assert result.da.name == "da" assert result.da_not_rolled.name == "da_not_rolled" # test discard attrs using global option func = getattr(ds.rolling(dim={"coord": 5}), funcname) with set_options(keep_attrs=False): result = func(*argument) assert result.attrs == {} assert result.da.attrs == {} assert result.da_not_rolled.attrs == {} assert result.da.name == "da" assert result.da_not_rolled.name == "da_not_rolled" # keyword takes precedence over global option func = getattr(ds.rolling(dim={"coord": 5}), funcname) with set_options(keep_attrs=False): result = func(*argument, keep_attrs=True) assert result.attrs == global_attrs assert result.da.attrs == da_attrs assert result.da_not_rolled.attrs == da_not_rolled_attrs assert result.da.name == "da" assert result.da_not_rolled.name == "da_not_rolled" func = getattr(ds.rolling(dim={"coord": 5}), funcname) with set_options(keep_attrs=True): result = func(*argument, keep_attrs=False) assert result.attrs == {} assert result.da.attrs == {} assert result.da_not_rolled.attrs == {} assert result.da.name == "da" assert result.da_not_rolled.name == "da_not_rolled" def test_rolling_properties(self, ds) -> None: # catching invalid args with pytest.raises(ValueError, match="window must be > 0"): ds.rolling(time=-2) with pytest.raises(ValueError, match="min_periods must be greater than zero"): ds.rolling(time=2, min_periods=0) with pytest.raises(KeyError, match="time2"): ds.rolling(time2=2) with pytest.raises( KeyError, match=r"\('foo',\) not found in Dataset dimensions", ): ds.rolling(foo=2) @requires_dask_ge_2024_11_0 def test_rolling_construct_automatic_rechunk(self): import dask # Construct dataset with chunk size of (400, 400, 1) or 1.22 MiB da = DataArray( dims=["latitute", "longitude", "time"], data=dask.array.random.random((400, 400, 400), chunks=(-1, -1, 1)), name="foo", ) for obj in [da, da.to_dataset()]: # Dataset now has chunks of size (400, 400, 100 100) or 11.92 GiB rechunked = obj.rolling(time=100, center=True).construct( "window", sliding_window_view_kwargs=dict( automatic_rechunk=True, writeable=False ), ) not_rechunked = obj.rolling(time=100, center=True).construct( "window", sliding_window_view_kwargs=dict( automatic_rechunk=False, writeable=True ), ) assert rechunked.chunksizes != not_rechunked.chunksizes roller = obj.isel(time=slice(30)).rolling(time=10, center=True) one = roller.reduce( np.sum, sliding_window_view_kwargs=dict(automatic_rechunk=True) ) two = roller.reduce( np.sum, sliding_window_view_kwargs=dict(automatic_rechunk=False) ) assert_identical(one, two) @pytest.mark.parametrize( "name", ("sum", "mean", "std", "var", "min", "max", "median") ) @pytest.mark.parametrize("center", (True, False, None)) @pytest.mark.parametrize("min_periods", (1, None)) @pytest.mark.parametrize("key", ("z1", "z2")) @pytest.mark.parametrize("backend", ["numpy"], indirect=True) def test_rolling_wrapped_bottleneck( self, ds, name, center, min_periods, key, compute_backend ) -> None: bn = pytest.importorskip("bottleneck", minversion="1.1") # Test all bottleneck functions rolling_obj = ds.rolling(time=7, min_periods=min_periods) func_name = f"move_{name}" actual = getattr(rolling_obj, name)() if key == "z1": # z1 does not depend on 'Time' axis. Stored as it is. expected = ds[key] elif key == "z2": expected = getattr(bn, func_name)( ds[key].values, window=7, axis=0, min_count=min_periods ) else: raise ValueError np.testing.assert_allclose(actual[key].values, expected) # Test center rolling_obj = ds.rolling(time=7, center=center) actual = getattr(rolling_obj, name)()["time"] assert_allclose(actual, ds["time"]) @pytest.mark.parametrize("center", (True, False)) @pytest.mark.parametrize("min_periods", (None, 1, 2, 3)) @pytest.mark.parametrize("window", (1, 2, 3, 4)) def test_rolling_pandas_compat(self, center, window, min_periods) -> None: df = pd.DataFrame( { "x": np.random.randn(20), "y": np.random.randn(20), "time": np.linspace(0, 1, 20), } ) ds = Dataset.from_dataframe(df) if min_periods is not None and window < min_periods: min_periods = window df_rolling = df.rolling(window, center=center, min_periods=min_periods).mean() ds_rolling = ds.rolling( index=window, center=center, min_periods=min_periods ).mean() np.testing.assert_allclose( np.asarray(df_rolling["x"].values), ds_rolling["x"].values ) np.testing.assert_allclose(df_rolling.index, ds_rolling["index"]) @pytest.mark.parametrize("center", (True, False)) @pytest.mark.parametrize("window", (1, 2, 3, 4)) def test_rolling_construct(self, center: bool, window: int) -> None: df = pd.DataFrame( { "x": np.random.randn(20), "y": np.random.randn(20), "time": np.linspace(0, 1, 20), } ) ds = Dataset.from_dataframe(df) df_rolling = df.rolling(window, center=center, min_periods=1).mean() ds_rolling = ds.rolling(index=window, center=center) ds_rolling_mean = ds_rolling.construct("window").mean("window") np.testing.assert_allclose( np.asarray(df_rolling["x"].values), ds_rolling_mean["x"].values ) np.testing.assert_allclose(df_rolling.index, ds_rolling_mean["index"]) # with fill_value ds_rolling_mean = ds_rolling.construct("window", stride=2, fill_value=0.0).mean( "window" ) assert (ds_rolling_mean.isnull().sum() == 0).to_dataarray(dim="vars").all() assert (ds_rolling_mean["x"] == 0.0).sum() >= 0 @pytest.mark.parametrize("center", (True, False)) @pytest.mark.parametrize("window", (1, 2, 3, 4)) def test_rolling_construct_stride(self, center: bool, window: int) -> None: df = pd.DataFrame( { "x": np.random.randn(20), "y": np.random.randn(20), "time": np.linspace(0, 1, 20), } ) ds = Dataset.from_dataframe(df) df_rolling_mean = df.rolling(window, center=center, min_periods=1).mean() # With an index (dimension coordinate) ds_rolling = ds.rolling(index=window, center=center) ds_rolling_mean = ds_rolling.construct("w", stride=2).mean("w") np.testing.assert_allclose( np.asarray(df_rolling_mean["x"][::2].values), ds_rolling_mean["x"].values ) np.testing.assert_allclose(df_rolling_mean.index[::2], ds_rolling_mean["index"]) # Without index (https://github.com/pydata/xarray/issues/7021) ds2 = ds.drop_vars("index") ds2_rolling = ds2.rolling(index=window, center=center) ds2_rolling_mean = ds2_rolling.construct("w", stride=2).mean("w") np.testing.assert_allclose( np.asarray(df_rolling_mean["x"][::2].values), ds2_rolling_mean["x"].values ) # Mixed coordinates, indexes and 2D coordinates ds3 = xr.Dataset( {"x": ("t", range(20)), "x2": ("y", range(5))}, { "t": range(20), "y": ("y", range(5)), "t2": ("t", range(20)), "y2": ("y", range(5)), "yt": (["t", "y"], np.ones((20, 5))), }, ) ds3_rolling = ds3.rolling(t=window, center=center) ds3_rolling_mean = ds3_rolling.construct("w", stride=2).mean("w") for coord in ds3.coords: assert coord in ds3_rolling_mean.coords @pytest.mark.slow @pytest.mark.parametrize("ds", (1, 2), indirect=True) @pytest.mark.parametrize("center", (True, False)) @pytest.mark.parametrize("min_periods", (None, 1, 2, 3)) @pytest.mark.parametrize("window", (1, 2, 3, 4)) @pytest.mark.parametrize( "name", ("sum", "mean", "std", "var", "min", "max", "median") ) def test_rolling_reduce(self, ds, center, min_periods, window, name) -> None: if min_periods is not None and window < min_periods: min_periods = window if name == "std" and window == 1: pytest.skip("std with window == 1 is unstable in bottleneck") rolling_obj = ds.rolling(time=window, center=center, min_periods=min_periods) # add nan prefix to numpy methods to get similar behavior as bottleneck actual = rolling_obj.reduce(getattr(np, f"nan{name}")) expected = getattr(rolling_obj, name)() assert_allclose(actual, expected) assert ds.sizes == actual.sizes # make sure the order of data_var are not changed. assert list(ds.data_vars.keys()) == list(actual.data_vars.keys()) # Make sure the dimension order is restored for key, src_var in ds.data_vars.items(): assert src_var.dims == actual[key].dims @pytest.mark.parametrize("ds", (2,), indirect=True) @pytest.mark.parametrize("center", (True, False)) @pytest.mark.parametrize("min_periods", (None, 1)) @pytest.mark.parametrize("name", ("sum", "max")) @pytest.mark.parametrize("dask", (True, False)) def test_ndrolling_reduce(self, ds, center, min_periods, name, dask) -> None: if dask and has_dask: ds = ds.chunk({"x": 4}) rolling_obj = ds.rolling(time=4, x=3, center=center, min_periods=min_periods) actual = getattr(rolling_obj, name)() expected = getattr( getattr( ds.rolling(time=4, center=center, min_periods=min_periods), name )().rolling(x=3, center=center, min_periods=min_periods), name, )() assert_allclose(actual, expected) assert actual.sizes == expected.sizes # Do it in the opposite order expected = getattr( getattr( ds.rolling(x=3, center=center, min_periods=min_periods), name )().rolling(time=4, center=center, min_periods=min_periods), name, )() assert_allclose(actual, expected) assert actual.sizes == expected.sizes @pytest.mark.parametrize("center", (True, False, (True, False))) @pytest.mark.parametrize("fill_value", (np.nan, 0.0)) @pytest.mark.parametrize("dask", (True, False)) def test_ndrolling_construct(self, center, fill_value, dask) -> None: da = DataArray( np.arange(5 * 6 * 7).reshape(5, 6, 7).astype(float), dims=["x", "y", "z"], coords={"x": ["a", "b", "c", "d", "e"], "y": np.arange(6)}, ) ds = xr.Dataset({"da": da}) if dask and has_dask: ds = ds.chunk({"x": 4}) actual = ds.rolling(x=3, z=2, center=center).construct( x="x1", z="z1", fill_value=fill_value ) if not isinstance(center, tuple): center = (center, center) expected = ( ds.rolling(x=3, center=center[0]) .construct(x="x1", fill_value=fill_value) .rolling(z=2, center=center[1]) .construct(z="z1", fill_value=fill_value) ) assert_allclose(actual, expected) @requires_dask @pytest.mark.filterwarnings("error") @pytest.mark.parametrize("ds", (2,), indirect=True) @pytest.mark.parametrize("name", ("mean", "max")) def test_raise_no_warning_dask_rolling_assert_close(self, ds, name) -> None: """ This is a puzzle — I can't easily find the source of the warning. It requires `assert_allclose` to be run, for the `ds` param to be 2, and is different for `mean` and `max`. `sum` raises no warning. """ ds = ds.chunk({"x": 4}) rolling_obj = ds.rolling(time=4, x=3) actual = getattr(rolling_obj, name)() expected = getattr(getattr(ds.rolling(time=4), name)().rolling(x=3), name)() assert_allclose(actual, expected) @requires_numbagg class TestDatasetRollingExp: @pytest.mark.parametrize( "backend", ["numpy", pytest.param("dask", marks=requires_dask)], indirect=True ) def test_rolling_exp(self, ds) -> None: result = ds.rolling_exp(time=10, window_type="span").mean() assert isinstance(result, Dataset) @pytest.mark.parametrize("backend", ["numpy"], indirect=True) def test_rolling_exp_keep_attrs(self, ds) -> None: attrs_global = {"attrs": "global"} attrs_z1 = {"attr": "z1"} ds.attrs = attrs_global ds.z1.attrs = attrs_z1 # attrs are kept per default result = ds.rolling_exp(time=10).mean() assert result.attrs == attrs_global assert result.z1.attrs == attrs_z1 # discard attrs result = ds.rolling_exp(time=10).mean(keep_attrs=False) assert result.attrs == {} # TODO: from #8114 — this arguably should be empty, but `apply_ufunc` doesn't do # that at the moment. We should change in `apply_func` rather than # special-case it here. # # assert result.z1.attrs == {} # test discard attrs using global option with set_options(keep_attrs=False): result = ds.rolling_exp(time=10).mean() assert result.attrs == {} # See above # assert result.z1.attrs == {} # keyword takes precedence over global option with set_options(keep_attrs=False): result = ds.rolling_exp(time=10).mean(keep_attrs=True) assert result.attrs == attrs_global assert result.z1.attrs == attrs_z1 with set_options(keep_attrs=True): result = ds.rolling_exp(time=10).mean(keep_attrs=False) assert result.attrs == {} # See above # assert result.z1.attrs == {} with pytest.warns( UserWarning, match="Passing ``keep_attrs`` to ``rolling_exp`` has no effect.", ): ds.rolling_exp(time=10, keep_attrs=True)