CCR/.venv/lib/python3.12/site-packages/xarray/tests/test_indexing.py

1055 lines
36 KiB
Python

from __future__ import annotations
import itertools
from typing import Any
import numpy as np
import pandas as pd
import pytest
from xarray import DataArray, Dataset, Variable
from xarray.core import indexing, nputils
from xarray.core.indexes import PandasIndex, PandasMultiIndex
from xarray.core.types import T_Xarray
from xarray.tests import (
IndexerMaker,
ReturnItem,
assert_array_equal,
assert_identical,
raise_if_dask_computes,
requires_dask,
)
B = IndexerMaker(indexing.BasicIndexer)
class TestIndexCallable:
def test_getitem(self):
def getter(key):
return key * 2
indexer = indexing.IndexCallable(getter)
assert indexer[3] == 6
assert indexer[0] == 0
assert indexer[-1] == -2
def test_setitem(self):
def getter(key):
return key * 2
def setter(key, value):
raise NotImplementedError("Setter not implemented")
indexer = indexing.IndexCallable(getter, setter)
with pytest.raises(NotImplementedError):
indexer[3] = 6
class TestIndexers:
def set_to_zero(self, x, i):
x = x.copy()
x[i] = 0
return x
def test_expanded_indexer(self) -> None:
x = np.random.randn(10, 11, 12, 13, 14)
y = np.arange(5)
arr = ReturnItem()
for i in [
arr[:],
arr[...],
arr[0, :, 10],
arr[..., 10],
arr[:5, ..., 0],
arr[..., 0, :],
arr[y],
arr[y, y],
arr[..., y, y],
arr[..., 0, 1, 2, 3, 4],
]:
j = indexing.expanded_indexer(i, x.ndim)
assert_array_equal(x[i], x[j])
assert_array_equal(self.set_to_zero(x, i), self.set_to_zero(x, j))
with pytest.raises(IndexError, match=r"too many indices"):
indexing.expanded_indexer(arr[1, 2, 3], 2)
def test_stacked_multiindex_min_max(self) -> None:
data = np.random.randn(3, 23, 4)
da = DataArray(
data,
name="value",
dims=["replicate", "rsample", "exp"],
coords=dict(
replicate=[0, 1, 2], exp=["a", "b", "c", "d"], rsample=list(range(23))
),
)
da2 = da.stack(sample=("replicate", "rsample"))
s = da2.sample
assert_array_equal(da2.loc["a", s.max()], data[2, 22, 0])
assert_array_equal(da2.loc["b", s.min()], data[0, 0, 1])
def test_group_indexers_by_index(self) -> None:
mindex = pd.MultiIndex.from_product([["a", "b"], [1, 2]], names=("one", "two"))
data = DataArray(
np.zeros((4, 2, 2)), coords={"x": mindex, "y": [1, 2]}, dims=("x", "y", "z")
)
data.coords["y2"] = ("y", [2.0, 3.0])
grouped_indexers = indexing.group_indexers_by_index(
data, {"z": 0, "one": "a", "two": 1, "y": 0}, {}
)
for idx, indexers in grouped_indexers:
if idx is None:
assert indexers == {"z": 0}
elif idx.equals(data.xindexes["x"]):
assert indexers == {"one": "a", "two": 1}
elif idx.equals(data.xindexes["y"]):
assert indexers == {"y": 0}
assert len(grouped_indexers) == 3
with pytest.raises(KeyError, match=r"no index found for coordinate 'y2'"):
indexing.group_indexers_by_index(data, {"y2": 2.0}, {})
with pytest.raises(
KeyError, match=r"'w' is not a valid dimension or coordinate"
):
indexing.group_indexers_by_index(data, {"w": "a"}, {})
with pytest.raises(ValueError, match=r"cannot supply.*"):
indexing.group_indexers_by_index(data, {"z": 1}, {"method": "nearest"})
def test_map_index_queries(self) -> None:
def create_sel_results(
x_indexer,
x_index,
other_vars,
drop_coords,
drop_indexes,
rename_dims,
):
dim_indexers = {"x": x_indexer}
index_vars = x_index.create_variables()
indexes = {k: x_index for k in index_vars}
variables = {}
variables.update(index_vars)
variables.update(other_vars)
return indexing.IndexSelResult(
dim_indexers=dim_indexers,
indexes=indexes,
variables=variables,
drop_coords=drop_coords,
drop_indexes=drop_indexes,
rename_dims=rename_dims,
)
def test_indexer(
data: T_Xarray,
x: Any,
expected: indexing.IndexSelResult,
) -> None:
results = indexing.map_index_queries(data, {"x": x})
assert results.dim_indexers.keys() == expected.dim_indexers.keys()
assert_array_equal(results.dim_indexers["x"], expected.dim_indexers["x"])
assert results.indexes.keys() == expected.indexes.keys()
for k in results.indexes:
assert results.indexes[k].equals(expected.indexes[k])
assert results.variables.keys() == expected.variables.keys()
for k in results.variables:
assert_array_equal(results.variables[k], expected.variables[k])
assert set(results.drop_coords) == set(expected.drop_coords)
assert set(results.drop_indexes) == set(expected.drop_indexes)
assert results.rename_dims == expected.rename_dims
data = Dataset({"x": ("x", [1, 2, 3])})
mindex = pd.MultiIndex.from_product(
[["a", "b"], [1, 2], [-1, -2]], names=("one", "two", "three")
)
mdata = DataArray(range(8), [("x", mindex)])
test_indexer(data, 1, indexing.IndexSelResult({"x": 0}))
test_indexer(data, np.int32(1), indexing.IndexSelResult({"x": 0}))
test_indexer(data, Variable([], 1), indexing.IndexSelResult({"x": 0}))
test_indexer(mdata, ("a", 1, -1), indexing.IndexSelResult({"x": 0}))
expected = create_sel_results(
[True, True, False, False, False, False, False, False],
PandasIndex(pd.Index([-1, -2]), "three"),
{"one": Variable((), "a"), "two": Variable((), 1)},
["x"],
["one", "two"],
{"x": "three"},
)
test_indexer(mdata, ("a", 1), expected)
expected = create_sel_results(
slice(0, 4, None),
PandasMultiIndex(
pd.MultiIndex.from_product([[1, 2], [-1, -2]], names=("two", "three")),
"x",
),
{"one": Variable((), "a")},
[],
["one"],
{},
)
test_indexer(mdata, "a", expected)
expected = create_sel_results(
[True, True, True, True, False, False, False, False],
PandasMultiIndex(
pd.MultiIndex.from_product([[1, 2], [-1, -2]], names=("two", "three")),
"x",
),
{"one": Variable((), "a")},
[],
["one"],
{},
)
test_indexer(mdata, ("a",), expected)
test_indexer(
mdata, [("a", 1, -1), ("b", 2, -2)], indexing.IndexSelResult({"x": [0, 7]})
)
test_indexer(
mdata, slice("a", "b"), indexing.IndexSelResult({"x": slice(0, 8, None)})
)
test_indexer(
mdata,
slice(("a", 1), ("b", 1)),
indexing.IndexSelResult({"x": slice(0, 6, None)}),
)
test_indexer(
mdata,
{"one": "a", "two": 1, "three": -1},
indexing.IndexSelResult({"x": 0}),
)
expected = create_sel_results(
[True, True, False, False, False, False, False, False],
PandasIndex(pd.Index([-1, -2]), "three"),
{"one": Variable((), "a"), "two": Variable((), 1)},
["x"],
["one", "two"],
{"x": "three"},
)
test_indexer(mdata, {"one": "a", "two": 1}, expected)
expected = create_sel_results(
[True, False, True, False, False, False, False, False],
PandasIndex(pd.Index([1, 2]), "two"),
{"one": Variable((), "a"), "three": Variable((), -1)},
["x"],
["one", "three"],
{"x": "two"},
)
test_indexer(mdata, {"one": "a", "three": -1}, expected)
expected = create_sel_results(
[True, True, True, True, False, False, False, False],
PandasMultiIndex(
pd.MultiIndex.from_product([[1, 2], [-1, -2]], names=("two", "three")),
"x",
),
{"one": Variable((), "a")},
[],
["one"],
{},
)
test_indexer(mdata, {"one": "a"}, expected)
def test_read_only_view(self) -> None:
arr = DataArray(
np.random.rand(3, 3),
coords={"x": np.arange(3), "y": np.arange(3)},
dims=("x", "y"),
) # Create a 2D DataArray
arr = arr.expand_dims({"z": 3}, -1) # New dimension 'z'
arr["z"] = np.arange(3) # New coords to dimension 'z'
with pytest.raises(ValueError, match="Do you want to .copy()"):
arr.loc[0, 0, 0] = 999
class TestLazyArray:
def test_slice_slice(self) -> None:
arr = ReturnItem()
for size in [100, 99]:
# We test even/odd size cases
x = np.arange(size)
slices = [
arr[:3],
arr[:4],
arr[2:4],
arr[:1],
arr[:-1],
arr[5:-1],
arr[-5:-1],
arr[::-1],
arr[5::-1],
arr[:3:-1],
arr[:30:-1],
arr[10:4:],
arr[::4],
arr[4:4:4],
arr[:4:-4],
arr[::-2],
]
for i in slices:
for j in slices:
expected = x[i][j]
new_slice = indexing.slice_slice(i, j, size=size)
actual = x[new_slice]
assert_array_equal(expected, actual)
def test_lazily_indexed_array(self) -> None:
original = np.random.rand(10, 20, 30)
x = indexing.NumpyIndexingAdapter(original)
v = Variable(["i", "j", "k"], original)
lazy = indexing.LazilyIndexedArray(x)
v_lazy = Variable(["i", "j", "k"], lazy)
arr = ReturnItem()
# test orthogonally applied indexers
indexers = [arr[:], 0, -2, arr[:3], [0, 1, 2, 3], [0], np.arange(10) < 5]
for i in indexers:
for j in indexers:
for k in indexers:
if isinstance(j, np.ndarray) and j.dtype.kind == "b":
j = np.arange(20) < 5
if isinstance(k, np.ndarray) and k.dtype.kind == "b":
k = np.arange(30) < 5
expected = np.asarray(v[i, j, k])
for actual in [
v_lazy[i, j, k],
v_lazy[:, j, k][i],
v_lazy[:, :, k][:, j][i],
]:
assert expected.shape == actual.shape
assert_array_equal(expected, actual)
assert isinstance(actual._data, indexing.LazilyIndexedArray)
assert isinstance(v_lazy._data, indexing.LazilyIndexedArray)
# make sure actual.key is appropriate type
if all(
isinstance(k, int | slice) for k in v_lazy._data.key.tuple
):
assert isinstance(v_lazy._data.key, indexing.BasicIndexer)
else:
assert isinstance(v_lazy._data.key, indexing.OuterIndexer)
# test sequentially applied indexers
indexers = [
(3, 2),
(arr[:], 0),
(arr[:2], -1),
(arr[:4], [0]),
([4, 5], 0),
([0, 1, 2], [0, 1]),
([0, 3, 5], arr[:2]),
]
for i, j in indexers:
expected_b = v[i][j]
actual = v_lazy[i][j]
assert expected_b.shape == actual.shape
assert_array_equal(expected_b, actual)
# test transpose
if actual.ndim > 1:
order = np.random.choice(actual.ndim, actual.ndim)
order = np.array(actual.dims)
transposed = actual.transpose(*order)
assert_array_equal(expected_b.transpose(*order), transposed)
assert isinstance(
actual._data,
indexing.LazilyVectorizedIndexedArray | indexing.LazilyIndexedArray,
)
assert isinstance(actual._data, indexing.LazilyIndexedArray)
assert isinstance(actual._data.array, indexing.NumpyIndexingAdapter)
def test_vectorized_lazily_indexed_array(self) -> None:
original = np.random.rand(10, 20, 30)
x = indexing.NumpyIndexingAdapter(original)
v_eager = Variable(["i", "j", "k"], x)
lazy = indexing.LazilyIndexedArray(x)
v_lazy = Variable(["i", "j", "k"], lazy)
arr = ReturnItem()
def check_indexing(v_eager, v_lazy, indexers):
for indexer in indexers:
actual = v_lazy[indexer]
expected = v_eager[indexer]
assert expected.shape == actual.shape
assert isinstance(
actual._data,
indexing.LazilyVectorizedIndexedArray | indexing.LazilyIndexedArray,
)
assert_array_equal(expected, actual)
v_eager = expected
v_lazy = actual
# test orthogonal indexing
indexers = [(arr[:], 0, 1), (Variable("i", [0, 1]),)]
check_indexing(v_eager, v_lazy, indexers)
# vectorized indexing
indexers = [
(Variable("i", [0, 1]), Variable("i", [0, 1]), slice(None)),
(slice(1, 3, 2), 0),
]
check_indexing(v_eager, v_lazy, indexers)
indexers = [
(slice(None, None, 2), 0, slice(None, 10)),
(Variable("i", [3, 2, 4, 3]), Variable("i", [3, 2, 1, 0])),
(Variable(["i", "j"], [[0, 1], [1, 2]]),),
]
check_indexing(v_eager, v_lazy, indexers)
indexers = [
(Variable("i", [3, 2, 4, 3]), Variable("i", [3, 2, 1, 0])),
(Variable(["i", "j"], [[0, 1], [1, 2]]),),
]
check_indexing(v_eager, v_lazy, indexers)
def test_lazily_indexed_array_vindex_setitem(self) -> None:
lazy = indexing.LazilyIndexedArray(np.random.rand(10, 20, 30))
# vectorized indexing
indexer = indexing.VectorizedIndexer(
(np.array([0, 1]), np.array([0, 1]), slice(None, None, None))
)
with pytest.raises(
NotImplementedError,
match=r"Lazy item assignment with the vectorized indexer is not yet",
):
lazy.vindex[indexer] = 0
@pytest.mark.parametrize(
"indexer_class, key, value",
[
(indexing.OuterIndexer, (0, 1, slice(None, None, None)), 10),
(indexing.BasicIndexer, (0, 1, slice(None, None, None)), 10),
],
)
def test_lazily_indexed_array_setitem(self, indexer_class, key, value) -> None:
original = np.random.rand(10, 20, 30)
x = indexing.NumpyIndexingAdapter(original)
lazy = indexing.LazilyIndexedArray(x)
if indexer_class is indexing.BasicIndexer:
indexer = indexer_class(key)
lazy[indexer] = value
elif indexer_class is indexing.OuterIndexer:
indexer = indexer_class(key)
lazy.oindex[indexer] = value
assert_array_equal(original[key], value)
class TestCopyOnWriteArray:
def test_setitem(self) -> None:
original = np.arange(10)
wrapped = indexing.CopyOnWriteArray(original)
wrapped[B[:]] = 0
assert_array_equal(original, np.arange(10))
assert_array_equal(wrapped, np.zeros(10))
def test_sub_array(self) -> None:
original = np.arange(10)
wrapped = indexing.CopyOnWriteArray(original)
child = wrapped[B[:5]]
assert isinstance(child, indexing.CopyOnWriteArray)
child[B[:]] = 0
assert_array_equal(original, np.arange(10))
assert_array_equal(wrapped, np.arange(10))
assert_array_equal(child, np.zeros(5))
def test_index_scalar(self) -> None:
# regression test for GH1374
x = indexing.CopyOnWriteArray(np.array(["foo", "bar"]))
assert np.array(x[B[0]][B[()]]) == "foo"
class TestMemoryCachedArray:
def test_wrapper(self) -> None:
original = indexing.LazilyIndexedArray(np.arange(10))
wrapped = indexing.MemoryCachedArray(original)
assert_array_equal(wrapped, np.arange(10))
assert isinstance(wrapped.array, indexing.NumpyIndexingAdapter)
def test_sub_array(self) -> None:
original = indexing.LazilyIndexedArray(np.arange(10))
wrapped = indexing.MemoryCachedArray(original)
child = wrapped[B[:5]]
assert isinstance(child, indexing.MemoryCachedArray)
assert_array_equal(child, np.arange(5))
assert isinstance(child.array, indexing.NumpyIndexingAdapter)
assert isinstance(wrapped.array, indexing.LazilyIndexedArray)
def test_setitem(self) -> None:
original = np.arange(10)
wrapped = indexing.MemoryCachedArray(original)
wrapped[B[:]] = 0
assert_array_equal(original, np.zeros(10))
def test_index_scalar(self) -> None:
# regression test for GH1374
x = indexing.MemoryCachedArray(np.array(["foo", "bar"]))
assert np.array(x[B[0]][B[()]]) == "foo"
def test_base_explicit_indexer() -> None:
with pytest.raises(TypeError):
indexing.ExplicitIndexer(())
class Subclass(indexing.ExplicitIndexer):
pass
value = Subclass((1, 2, 3))
assert value.tuple == (1, 2, 3)
assert repr(value) == "Subclass((1, 2, 3))"
@pytest.mark.parametrize(
"indexer_cls",
[indexing.BasicIndexer, indexing.OuterIndexer, indexing.VectorizedIndexer],
)
def test_invalid_for_all(indexer_cls) -> None:
with pytest.raises(TypeError):
indexer_cls(None)
with pytest.raises(TypeError):
indexer_cls(([],))
with pytest.raises(TypeError):
indexer_cls((None,))
with pytest.raises(TypeError):
indexer_cls(("foo",))
with pytest.raises(TypeError):
indexer_cls((1.0,))
with pytest.raises(TypeError):
indexer_cls((slice("foo"),))
with pytest.raises(TypeError):
indexer_cls((np.array(["foo"]),))
def check_integer(indexer_cls):
value = indexer_cls((1, np.uint64(2))).tuple
assert all(isinstance(v, int) for v in value)
assert value == (1, 2)
def check_slice(indexer_cls):
(value,) = indexer_cls((slice(1, None, np.int64(2)),)).tuple
assert value == slice(1, None, 2)
assert isinstance(value.step, int)
def check_array1d(indexer_cls):
(value,) = indexer_cls((np.arange(3, dtype=np.int32),)).tuple
assert value.dtype == np.int64
np.testing.assert_array_equal(value, [0, 1, 2])
def check_array2d(indexer_cls):
array = np.array([[1, 2], [3, 4]], dtype=np.int64)
(value,) = indexer_cls((array,)).tuple
assert value.dtype == np.int64
np.testing.assert_array_equal(value, array)
def test_basic_indexer() -> None:
check_integer(indexing.BasicIndexer)
check_slice(indexing.BasicIndexer)
with pytest.raises(TypeError):
check_array1d(indexing.BasicIndexer)
with pytest.raises(TypeError):
check_array2d(indexing.BasicIndexer)
def test_outer_indexer() -> None:
check_integer(indexing.OuterIndexer)
check_slice(indexing.OuterIndexer)
check_array1d(indexing.OuterIndexer)
with pytest.raises(TypeError):
check_array2d(indexing.OuterIndexer)
def test_vectorized_indexer() -> None:
with pytest.raises(TypeError):
check_integer(indexing.VectorizedIndexer)
check_slice(indexing.VectorizedIndexer)
check_array1d(indexing.VectorizedIndexer)
check_array2d(indexing.VectorizedIndexer)
with pytest.raises(ValueError, match=r"numbers of dimensions"):
indexing.VectorizedIndexer(
(np.array(1, dtype=np.int64), np.arange(5, dtype=np.int64))
)
class Test_vectorized_indexer:
@pytest.fixture(autouse=True)
def setup(self):
self.data = indexing.NumpyIndexingAdapter(np.random.randn(10, 12, 13))
self.indexers = [
np.array([[0, 3, 2]]),
np.array([[0, 3, 3], [4, 6, 7]]),
slice(2, -2, 2),
slice(2, -2, 3),
slice(None),
]
def test_arrayize_vectorized_indexer(self) -> None:
for i, j, k in itertools.product(self.indexers, repeat=3):
vindex = indexing.VectorizedIndexer((i, j, k))
vindex_array = indexing._arrayize_vectorized_indexer(
vindex, self.data.shape
)
np.testing.assert_array_equal(
self.data.vindex[vindex], self.data.vindex[vindex_array]
)
actual = indexing._arrayize_vectorized_indexer(
indexing.VectorizedIndexer((slice(None),)), shape=(5,)
)
np.testing.assert_array_equal(actual.tuple, [np.arange(5)])
actual = indexing._arrayize_vectorized_indexer(
indexing.VectorizedIndexer((np.arange(5),) * 3), shape=(8, 10, 12)
)
expected = np.stack([np.arange(5)] * 3)
np.testing.assert_array_equal(np.stack(actual.tuple), expected)
actual = indexing._arrayize_vectorized_indexer(
indexing.VectorizedIndexer((np.arange(5), slice(None))), shape=(8, 10)
)
a, b = actual.tuple
np.testing.assert_array_equal(a, np.arange(5)[:, np.newaxis])
np.testing.assert_array_equal(b, np.arange(10)[np.newaxis, :])
actual = indexing._arrayize_vectorized_indexer(
indexing.VectorizedIndexer((slice(None), np.arange(5))), shape=(8, 10)
)
a, b = actual.tuple
np.testing.assert_array_equal(a, np.arange(8)[np.newaxis, :])
np.testing.assert_array_equal(b, np.arange(5)[:, np.newaxis])
def get_indexers(shape, mode):
if mode == "vectorized":
indexed_shape = (3, 4)
indexer = tuple(np.random.randint(0, s, size=indexed_shape) for s in shape)
return indexing.VectorizedIndexer(indexer)
elif mode == "outer":
indexer = tuple(np.random.randint(0, s, s + 2) for s in shape)
return indexing.OuterIndexer(indexer)
elif mode == "outer_scalar":
indexer = (np.random.randint(0, 3, 4), 0, slice(None, None, 2))
return indexing.OuterIndexer(indexer[: len(shape)])
elif mode == "outer_scalar2":
indexer = (np.random.randint(0, 3, 4), -2, slice(None, None, 2))
return indexing.OuterIndexer(indexer[: len(shape)])
elif mode == "outer1vec":
indexer = [slice(2, -3) for s in shape]
indexer[1] = np.random.randint(0, shape[1], shape[1] + 2)
return indexing.OuterIndexer(tuple(indexer))
elif mode == "basic": # basic indexer
indexer = [slice(2, -3) for s in shape]
indexer[0] = 3
return indexing.BasicIndexer(tuple(indexer))
elif mode == "basic1": # basic indexer
return indexing.BasicIndexer((3,))
elif mode == "basic2": # basic indexer
indexer = [0, 2, 4]
return indexing.BasicIndexer(tuple(indexer[: len(shape)]))
elif mode == "basic3": # basic indexer
indexer = [slice(None) for s in shape]
indexer[0] = slice(-2, 2, -2)
indexer[1] = slice(1, -1, 2)
return indexing.BasicIndexer(tuple(indexer[: len(shape)]))
@pytest.mark.parametrize("size", [100, 99])
@pytest.mark.parametrize(
"sl", [slice(1, -1, 1), slice(None, -1, 2), slice(-1, 1, -1), slice(-1, 1, -2)]
)
def test_decompose_slice(size, sl) -> None:
x = np.arange(size)
slice1, slice2 = indexing._decompose_slice(sl, size)
expected = x[sl]
actual = x[slice1][slice2]
assert_array_equal(expected, actual)
@pytest.mark.parametrize("shape", [(10, 5, 8), (10, 3)])
@pytest.mark.parametrize(
"indexer_mode",
[
"vectorized",
"outer",
"outer_scalar",
"outer_scalar2",
"outer1vec",
"basic",
"basic1",
"basic2",
"basic3",
],
)
@pytest.mark.parametrize(
"indexing_support",
[
indexing.IndexingSupport.BASIC,
indexing.IndexingSupport.OUTER,
indexing.IndexingSupport.OUTER_1VECTOR,
indexing.IndexingSupport.VECTORIZED,
],
)
def test_decompose_indexers(shape, indexer_mode, indexing_support) -> None:
data = np.random.randn(*shape)
indexer = get_indexers(shape, indexer_mode)
backend_ind, np_ind = indexing.decompose_indexer(indexer, shape, indexing_support)
indexing_adapter = indexing.NumpyIndexingAdapter(data)
# Dispatch to appropriate indexing method
if indexer_mode.startswith("vectorized"):
expected = indexing_adapter.vindex[indexer]
elif indexer_mode.startswith("outer"):
expected = indexing_adapter.oindex[indexer]
else:
expected = indexing_adapter[indexer] # Basic indexing
if isinstance(backend_ind, indexing.VectorizedIndexer):
array = indexing_adapter.vindex[backend_ind]
elif isinstance(backend_ind, indexing.OuterIndexer):
array = indexing_adapter.oindex[backend_ind]
else:
array = indexing_adapter[backend_ind]
if len(np_ind.tuple) > 0:
array_indexing_adapter = indexing.NumpyIndexingAdapter(array)
if isinstance(np_ind, indexing.VectorizedIndexer):
array = array_indexing_adapter.vindex[np_ind]
elif isinstance(np_ind, indexing.OuterIndexer):
array = array_indexing_adapter.oindex[np_ind]
else:
array = array_indexing_adapter[np_ind]
np.testing.assert_array_equal(expected, array)
if not all(isinstance(k, indexing.integer_types) for k in np_ind.tuple):
combined_ind = indexing._combine_indexers(backend_ind, shape, np_ind)
assert isinstance(combined_ind, indexing.VectorizedIndexer)
array = indexing_adapter.vindex[combined_ind]
np.testing.assert_array_equal(expected, array)
def test_implicit_indexing_adapter() -> None:
array = np.arange(10, dtype=np.int64)
implicit = indexing.ImplicitToExplicitIndexingAdapter(
indexing.NumpyIndexingAdapter(array), indexing.BasicIndexer
)
np.testing.assert_array_equal(array, np.asarray(implicit))
np.testing.assert_array_equal(array, implicit[:])
def test_implicit_indexing_adapter_copy_on_write() -> None:
array = np.arange(10, dtype=np.int64)
implicit = indexing.ImplicitToExplicitIndexingAdapter(
indexing.CopyOnWriteArray(array)
)
assert isinstance(implicit[:], indexing.ImplicitToExplicitIndexingAdapter)
def test_outer_indexer_consistency_with_broadcast_indexes_vectorized() -> None:
def nonzero(x):
if isinstance(x, np.ndarray) and x.dtype.kind == "b":
x = x.nonzero()[0]
return x
original = np.random.rand(10, 20, 30)
v = Variable(["i", "j", "k"], original)
arr = ReturnItem()
# test orthogonally applied indexers
indexers = [
arr[:],
0,
-2,
arr[:3],
np.array([0, 1, 2, 3]),
np.array([0]),
np.arange(10) < 5,
]
for i, j, k in itertools.product(indexers, repeat=3):
if isinstance(j, np.ndarray) and j.dtype.kind == "b": # match size
j = np.arange(20) < 4
if isinstance(k, np.ndarray) and k.dtype.kind == "b":
k = np.arange(30) < 8
_, expected, new_order = v._broadcast_indexes_vectorized((i, j, k))
expected_data = nputils.NumpyVIndexAdapter(v.data)[expected.tuple]
if new_order:
old_order = range(len(new_order))
expected_data = np.moveaxis(expected_data, old_order, new_order)
outer_index = indexing.OuterIndexer((nonzero(i), nonzero(j), nonzero(k)))
actual = indexing._outer_to_numpy_indexer(outer_index, v.shape)
actual_data = v.data[actual]
np.testing.assert_array_equal(actual_data, expected_data)
def test_create_mask_outer_indexer() -> None:
indexer = indexing.OuterIndexer((np.array([0, -1, 2]),))
expected = np.array([False, True, False])
actual = indexing.create_mask(indexer, (5,))
np.testing.assert_array_equal(expected, actual)
indexer = indexing.OuterIndexer((1, slice(2), np.array([0, -1, 2])))
expected = np.array(2 * [[False, True, False]])
actual = indexing.create_mask(indexer, (5, 5, 5))
np.testing.assert_array_equal(expected, actual)
def test_create_mask_vectorized_indexer() -> None:
indexer = indexing.VectorizedIndexer((np.array([0, -1, 2]), np.array([0, 1, -1])))
expected = np.array([False, True, True])
actual = indexing.create_mask(indexer, (5,))
np.testing.assert_array_equal(expected, actual)
indexer = indexing.VectorizedIndexer(
(np.array([0, -1, 2]), slice(None), np.array([0, 1, -1]))
)
expected = np.array([[False, True, True]] * 2).T
actual = indexing.create_mask(indexer, (5, 2))
np.testing.assert_array_equal(expected, actual)
def test_create_mask_basic_indexer() -> None:
indexer = indexing.BasicIndexer((-1,))
actual = indexing.create_mask(indexer, (3,))
np.testing.assert_array_equal(True, actual)
indexer = indexing.BasicIndexer((0,))
actual = indexing.create_mask(indexer, (3,))
np.testing.assert_array_equal(False, actual)
def test_create_mask_dask() -> None:
da = pytest.importorskip("dask.array")
indexer = indexing.OuterIndexer((1, slice(2), np.array([0, -1, 2])))
expected = np.array(2 * [[False, True, False]])
actual = indexing.create_mask(
indexer, (5, 5, 5), da.empty((2, 3), chunks=((1, 1), (2, 1)))
)
assert actual.chunks == ((1, 1), (2, 1))
np.testing.assert_array_equal(expected, actual)
indexer_vec = indexing.VectorizedIndexer(
(np.array([0, -1, 2]), slice(None), np.array([0, 1, -1]))
)
expected = np.array([[False, True, True]] * 2).T
actual = indexing.create_mask(
indexer_vec, (5, 2), da.empty((3, 2), chunks=((3,), (2,)))
)
assert isinstance(actual, da.Array)
np.testing.assert_array_equal(expected, actual)
with pytest.raises(ValueError):
indexing.create_mask(indexer_vec, (5, 2), da.empty((5,), chunks=(1,)))
def test_create_mask_error() -> None:
with pytest.raises(TypeError, match=r"unexpected key type"):
indexing.create_mask((1, 2), (3, 4)) # type: ignore[arg-type]
@pytest.mark.parametrize(
"indices, expected",
[
(np.arange(5), np.arange(5)),
(np.array([0, -1, -1]), np.array([0, 0, 0])),
(np.array([-1, 1, -1]), np.array([1, 1, 1])),
(np.array([-1, -1, 2]), np.array([2, 2, 2])),
(np.array([-1]), np.array([0])),
(np.array([0, -1, 1, -1, -1]), np.array([0, 0, 1, 1, 1])),
(np.array([0, -1, -1, -1, 1]), np.array([0, 0, 0, 0, 1])),
],
)
def test_posify_mask_subindexer(indices, expected) -> None:
actual = indexing._posify_mask_subindexer(indices)
np.testing.assert_array_equal(expected, actual)
class ArrayWithNamespace:
def __array_namespace__(self, version=None):
pass
class ArrayWithArrayFunction:
def __array_function__(self, func, types, args, kwargs):
pass
class ArrayWithNamespaceAndArrayFunction:
def __array_namespace__(self, version=None):
pass
def __array_function__(self, func, types, args, kwargs):
pass
def as_dask_array(arr, chunks):
try:
import dask.array as da
except ImportError:
return None
return da.from_array(arr, chunks=chunks)
@pytest.mark.parametrize(
["array", "expected_type"],
(
pytest.param(
indexing.CopyOnWriteArray(np.array([1, 2])),
indexing.CopyOnWriteArray,
id="ExplicitlyIndexed",
),
pytest.param(
np.array([1, 2]), indexing.NumpyIndexingAdapter, id="numpy.ndarray"
),
pytest.param(
pd.Index([1, 2]), indexing.PandasIndexingAdapter, id="pandas.Index"
),
pytest.param(
as_dask_array(np.array([1, 2]), chunks=(1,)),
indexing.DaskIndexingAdapter,
id="dask.array",
marks=requires_dask,
),
pytest.param(
ArrayWithNamespace(), indexing.ArrayApiIndexingAdapter, id="array_api"
),
pytest.param(
ArrayWithArrayFunction(),
indexing.NdArrayLikeIndexingAdapter,
id="array_like",
),
pytest.param(
ArrayWithNamespaceAndArrayFunction(),
indexing.ArrayApiIndexingAdapter,
id="array_api_with_fallback",
),
),
)
def test_as_indexable(array, expected_type):
actual = indexing.as_indexable(array)
assert isinstance(actual, expected_type)
def test_indexing_1d_object_array() -> None:
items = (np.arange(3), np.arange(6))
arr = DataArray(np.array(items, dtype=object))
actual = arr[0]
expected_data = np.empty((), dtype=object)
expected_data[()] = items[0]
expected = DataArray(expected_data)
assert [actual.data.item()] == [expected.data.item()]
@requires_dask
def test_indexing_dask_array() -> None:
import dask.array
da = DataArray(
np.ones(10 * 3 * 3).reshape((10, 3, 3)),
dims=("time", "x", "y"),
).chunk(dict(time=-1, x=1, y=1))
with raise_if_dask_computes():
actual = da.isel(time=dask.array.from_array([9], chunks=(1,)))
expected = da.isel(time=[9])
assert_identical(actual, expected)
@requires_dask
def test_indexing_dask_array_scalar() -> None:
# GH4276
import dask.array
a = dask.array.from_array(np.linspace(0.0, 1.0))
da = DataArray(a, dims="x")
x_selector = da.argmax(dim=...)
assert not isinstance(x_selector, DataArray)
with raise_if_dask_computes():
actual = da.isel(x_selector)
expected = da.isel(x=-1)
assert_identical(actual, expected)
@requires_dask
def test_vectorized_indexing_dask_array() -> None:
# https://github.com/pydata/xarray/issues/2511#issuecomment-563330352
darr = DataArray(data=[0.2, 0.4, 0.6], coords={"z": range(3)}, dims=("z",))
indexer = DataArray(
data=np.random.randint(0, 3, 8).reshape(4, 2).astype(int),
coords={"y": range(4), "x": range(2)},
dims=("y", "x"),
)
expected = darr[indexer]
# fails because we can't index pd.Index lazily (yet).
# We could make this succeed by auto-chunking the values
# and constructing a lazy index variable, and not automatically
# create an index for it.
with pytest.raises(ValueError, match="Cannot index with"):
with raise_if_dask_computes():
darr.chunk()[indexer.chunk({"y": 2})]
with pytest.raises(ValueError, match="Cannot index with"):
with raise_if_dask_computes():
actual = darr[indexer.chunk({"y": 2})]
with raise_if_dask_computes():
actual = darr.drop_vars("z").chunk()[indexer.chunk({"y": 2})]
assert_identical(actual, expected.drop_vars("z"))
with raise_if_dask_computes():
actual_variable = darr.variable.chunk()[indexer.variable.chunk({"y": 2})]
assert_identical(actual_variable, expected.variable)
@requires_dask
def test_advanced_indexing_dask_array() -> None:
# GH4663
import dask.array as da
ds = Dataset(
dict(
a=("x", da.from_array(np.random.randint(0, 100, 100))),
b=(("x", "y"), da.random.random((100, 10))),
)
)
expected = ds.b.sel(x=ds.a.compute())
with raise_if_dask_computes():
actual = ds.b.sel(x=ds.a)
assert_identical(expected, actual)
with raise_if_dask_computes():
actual = ds.b.sel(x=ds.a.data)
assert_identical(expected, actual)