2382 lines
80 KiB
Python
2382 lines
80 KiB
Python
import re
|
|
import sys
|
|
import typing
|
|
from collections.abc import Mapping
|
|
from copy import copy, deepcopy
|
|
from textwrap import dedent
|
|
|
|
import numpy as np
|
|
import pytest
|
|
|
|
import xarray as xr
|
|
from xarray import DataArray, Dataset
|
|
from xarray.core.coordinates import DataTreeCoordinates
|
|
from xarray.core.datatree import DataTree
|
|
from xarray.core.treenode import NotFoundInTreeError
|
|
from xarray.testing import assert_equal, assert_identical
|
|
from xarray.tests import (
|
|
assert_array_equal,
|
|
create_test_data,
|
|
requires_dask,
|
|
source_ndarray,
|
|
)
|
|
|
|
ON_WINDOWS = sys.platform == "win32"
|
|
|
|
|
|
class TestTreeCreation:
|
|
def test_empty(self) -> None:
|
|
dt = DataTree(name="root")
|
|
assert dt.name == "root"
|
|
assert dt.parent is None
|
|
assert dt.children == {}
|
|
assert_identical(dt.to_dataset(), xr.Dataset())
|
|
|
|
def test_name(self) -> None:
|
|
dt = DataTree()
|
|
assert dt.name is None
|
|
|
|
dt = DataTree(name="foo")
|
|
assert dt.name == "foo"
|
|
|
|
dt.name = "bar"
|
|
assert dt.name == "bar"
|
|
|
|
dt = DataTree(children={"foo": DataTree()})
|
|
assert dt["/foo"].name == "foo"
|
|
with pytest.raises(
|
|
ValueError, match="cannot set the name of a node which already has a parent"
|
|
):
|
|
dt["/foo"].name = "bar"
|
|
|
|
detached = dt["/foo"].copy()
|
|
assert detached.name == "foo"
|
|
detached.name = "bar"
|
|
assert detached.name == "bar"
|
|
|
|
def test_bad_names(self) -> None:
|
|
with pytest.raises(TypeError):
|
|
DataTree(name=5) # type: ignore[arg-type]
|
|
|
|
with pytest.raises(ValueError):
|
|
DataTree(name="folder/data")
|
|
|
|
def test_data_arg(self) -> None:
|
|
ds = xr.Dataset({"foo": 42})
|
|
tree: DataTree = DataTree(dataset=ds)
|
|
assert_identical(tree.to_dataset(), ds)
|
|
|
|
with pytest.raises(TypeError):
|
|
DataTree(dataset=xr.DataArray(42, name="foo")) # type: ignore[arg-type]
|
|
|
|
def test_child_data_not_copied(self) -> None:
|
|
# regression test for https://github.com/pydata/xarray/issues/9683
|
|
class NoDeepCopy:
|
|
def __deepcopy__(self, memo):
|
|
raise TypeError("class can't be deepcopied")
|
|
|
|
da = xr.DataArray(NoDeepCopy())
|
|
ds = xr.Dataset({"var": da})
|
|
dt1 = xr.DataTree(ds)
|
|
dt2 = xr.DataTree(ds, children={"child": dt1})
|
|
dt3 = xr.DataTree.from_dict({"/": ds, "child": ds})
|
|
assert_identical(dt2, dt3)
|
|
|
|
|
|
class TestFamilyTree:
|
|
def test_dont_modify_children_inplace(self) -> None:
|
|
# GH issue 9196
|
|
child = DataTree()
|
|
DataTree(children={"child": child})
|
|
assert child.parent is None
|
|
|
|
def test_create_two_children(self) -> None:
|
|
root_data = xr.Dataset({"a": ("y", [6, 7, 8]), "set0": ("x", [9, 10])})
|
|
set1_data = xr.Dataset({"a": 0, "b": 1})
|
|
root = DataTree.from_dict(
|
|
{"/": root_data, "/set1": set1_data, "/set1/set2": None}
|
|
)
|
|
assert root["/set1"].name == "set1"
|
|
assert root["/set1/set2"].name == "set2"
|
|
|
|
def test_create_full_tree(self, simple_datatree) -> None:
|
|
d = simple_datatree.to_dict()
|
|
d_keys = list(d.keys())
|
|
|
|
expected_keys = [
|
|
"/",
|
|
"/set1",
|
|
"/set2",
|
|
"/set3",
|
|
"/set1/set1",
|
|
"/set1/set2",
|
|
"/set2/set1",
|
|
]
|
|
|
|
assert d_keys == expected_keys
|
|
|
|
|
|
class TestNames:
|
|
def test_child_gets_named_on_attach(self) -> None:
|
|
sue = DataTree()
|
|
mary = DataTree(children={"Sue": sue})
|
|
assert mary.children["Sue"].name == "Sue"
|
|
|
|
def test_dataset_containing_slashes(self) -> None:
|
|
xda: xr.DataArray = xr.DataArray(
|
|
[[1, 2]],
|
|
coords={"label": ["a"], "R30m/y": [30, 60]},
|
|
)
|
|
xds: xr.Dataset = xr.Dataset({"group/subgroup/my_variable": xda})
|
|
with pytest.raises(
|
|
ValueError,
|
|
match=re.escape(
|
|
"Given variables have names containing the '/' character: "
|
|
"['R30m/y', 'group/subgroup/my_variable']. "
|
|
"Variables stored in DataTree objects cannot have names containing '/' characters, "
|
|
"as this would make path-like access to variables ambiguous."
|
|
),
|
|
):
|
|
DataTree(xds)
|
|
|
|
|
|
class TestPaths:
|
|
def test_path_property(self) -> None:
|
|
john = DataTree.from_dict(
|
|
{
|
|
"/Mary/Sue": DataTree(),
|
|
}
|
|
)
|
|
assert john["/Mary/Sue"].path == "/Mary/Sue"
|
|
assert john.path == "/"
|
|
|
|
def test_path_roundtrip(self) -> None:
|
|
john = DataTree.from_dict(
|
|
{
|
|
"/Mary/Sue": DataTree(),
|
|
}
|
|
)
|
|
assert john["/Mary/Sue"].name == "Sue"
|
|
|
|
def test_same_tree(self) -> None:
|
|
john = DataTree.from_dict(
|
|
{
|
|
"/Mary": DataTree(),
|
|
"/Kate": DataTree(),
|
|
}
|
|
)
|
|
assert john["/Mary"].same_tree(john["/Kate"])
|
|
|
|
def test_relative_paths(self) -> None:
|
|
john = DataTree.from_dict(
|
|
{
|
|
"/Mary/Sue": DataTree(),
|
|
"/Annie": DataTree(),
|
|
}
|
|
)
|
|
sue_result = john["Mary/Sue"]
|
|
if isinstance(sue_result, DataTree):
|
|
sue: DataTree = sue_result
|
|
|
|
annie_result = john["Annie"]
|
|
if isinstance(annie_result, DataTree):
|
|
annie: DataTree = annie_result
|
|
|
|
assert sue.relative_to(john) == "Mary/Sue"
|
|
assert john.relative_to(sue) == "../.."
|
|
assert annie.relative_to(sue) == "../../Annie"
|
|
assert sue.relative_to(annie) == "../Mary/Sue"
|
|
assert sue.relative_to(sue) == "."
|
|
|
|
evil_kate = DataTree()
|
|
with pytest.raises(
|
|
NotFoundInTreeError, match="nodes do not lie within the same tree"
|
|
):
|
|
sue.relative_to(evil_kate)
|
|
|
|
|
|
class TestStoreDatasets:
|
|
def test_create_with_data(self) -> None:
|
|
dat = xr.Dataset({"a": 0})
|
|
john = DataTree(name="john", dataset=dat)
|
|
|
|
assert_identical(john.to_dataset(), dat)
|
|
|
|
with pytest.raises(TypeError):
|
|
DataTree(name="mary", dataset="junk") # type: ignore[arg-type]
|
|
|
|
def test_set_data(self) -> None:
|
|
john = DataTree(name="john")
|
|
dat = xr.Dataset({"a": 0})
|
|
john.dataset = dat # type: ignore[assignment]
|
|
|
|
assert_identical(john.to_dataset(), dat)
|
|
|
|
with pytest.raises(TypeError):
|
|
john.dataset = "junk" # type: ignore[assignment]
|
|
|
|
def test_has_data(self) -> None:
|
|
john = DataTree(name="john", dataset=xr.Dataset({"a": 0}))
|
|
assert john.has_data
|
|
|
|
john_no_data = DataTree(name="john", dataset=None)
|
|
assert not john_no_data.has_data
|
|
|
|
def test_is_hollow(self) -> None:
|
|
john = DataTree(dataset=xr.Dataset({"a": 0}))
|
|
assert john.is_hollow
|
|
|
|
eve = DataTree(children={"john": john})
|
|
assert eve.is_hollow
|
|
|
|
eve.dataset = xr.Dataset({"a": 1}) # type: ignore[assignment]
|
|
assert not eve.is_hollow
|
|
|
|
|
|
class TestToDataset:
|
|
def test_to_dataset_inherited(self) -> None:
|
|
base = xr.Dataset(coords={"a": [1], "b": 2})
|
|
sub = xr.Dataset(coords={"c": [3]})
|
|
tree = DataTree.from_dict({"/": base, "/sub": sub})
|
|
subtree = typing.cast(DataTree, tree["sub"])
|
|
|
|
assert_identical(tree.to_dataset(inherit=False), base)
|
|
assert_identical(subtree.to_dataset(inherit=False), sub)
|
|
|
|
sub_and_base = xr.Dataset(coords={"a": [1], "c": [3]}) # no "b"
|
|
assert_identical(tree.to_dataset(inherit=True), base)
|
|
assert_identical(subtree.to_dataset(inherit=True), sub_and_base)
|
|
|
|
|
|
class TestVariablesChildrenNameCollisions:
|
|
def test_parent_already_has_variable_with_childs_name(self) -> None:
|
|
with pytest.raises(KeyError, match="already contains a variable named a"):
|
|
DataTree.from_dict({"/": xr.Dataset({"a": [0], "b": 1}), "/a": None})
|
|
|
|
def test_parent_already_has_variable_with_childs_name_update(self) -> None:
|
|
dt = DataTree(dataset=xr.Dataset({"a": [0], "b": 1}))
|
|
with pytest.raises(ValueError, match="already contains a variable named a"):
|
|
dt.update({"a": DataTree()})
|
|
|
|
def test_assign_when_already_child_with_variables_name(self) -> None:
|
|
dt = DataTree.from_dict(
|
|
{
|
|
"/a": DataTree(),
|
|
}
|
|
)
|
|
|
|
with pytest.raises(ValueError, match="node already contains a variable"):
|
|
dt.dataset = xr.Dataset({"a": 0}) # type: ignore[assignment]
|
|
|
|
dt.dataset = xr.Dataset() # type: ignore[assignment]
|
|
|
|
new_ds = dt.to_dataset().assign(a=xr.DataArray(0))
|
|
with pytest.raises(ValueError, match="node already contains a variable"):
|
|
dt.dataset = new_ds # type: ignore[assignment]
|
|
|
|
|
|
class TestGet: ...
|
|
|
|
|
|
class TestGetItem:
|
|
def test_getitem_node(self) -> None:
|
|
folder1 = DataTree.from_dict(
|
|
{
|
|
"/results/highres": DataTree(),
|
|
}
|
|
)
|
|
|
|
assert folder1["results"].name == "results"
|
|
assert folder1["results/highres"].name == "highres"
|
|
|
|
def test_getitem_self(self) -> None:
|
|
dt = DataTree()
|
|
assert dt["."] is dt
|
|
|
|
def test_getitem_single_data_variable(self) -> None:
|
|
data = xr.Dataset({"temp": [0, 50]})
|
|
results = DataTree(name="results", dataset=data)
|
|
assert_identical(results["temp"], data["temp"])
|
|
|
|
def test_getitem_single_data_variable_from_node(self) -> None:
|
|
data = xr.Dataset({"temp": [0, 50]})
|
|
folder1 = DataTree.from_dict(
|
|
{
|
|
"/results/highres": data,
|
|
}
|
|
)
|
|
assert_identical(folder1["results/highres/temp"], data["temp"])
|
|
|
|
def test_getitem_nonexistent_node(self) -> None:
|
|
folder1 = DataTree.from_dict({"/results": DataTree()}, name="folder1")
|
|
with pytest.raises(KeyError):
|
|
folder1["results/highres"]
|
|
|
|
def test_getitem_nonexistent_variable(self) -> None:
|
|
data = xr.Dataset({"temp": [0, 50]})
|
|
results = DataTree(name="results", dataset=data)
|
|
with pytest.raises(KeyError):
|
|
results["pressure"]
|
|
|
|
@pytest.mark.xfail(reason="Should be deprecated in favour of .subset")
|
|
def test_getitem_multiple_data_variables(self) -> None:
|
|
data = xr.Dataset({"temp": [0, 50], "p": [5, 8, 7]})
|
|
results = DataTree(name="results", dataset=data)
|
|
assert_identical(results[["temp", "p"]], data[["temp", "p"]]) # type: ignore[index]
|
|
|
|
@pytest.mark.xfail(
|
|
reason="Indexing needs to return whole tree (GH https://github.com/xarray-contrib/datatree/issues/77)"
|
|
)
|
|
def test_getitem_dict_like_selection_access_to_dataset(self) -> None:
|
|
data = xr.Dataset({"temp": [0, 50]})
|
|
results = DataTree(name="results", dataset=data)
|
|
assert_identical(results[{"temp": 1}], data[{"temp": 1}]) # type: ignore[index]
|
|
|
|
|
|
class TestUpdate:
|
|
def test_update(self) -> None:
|
|
dt = DataTree()
|
|
dt.update({"foo": xr.DataArray(0), "a": DataTree()})
|
|
expected = DataTree.from_dict({"/": xr.Dataset({"foo": 0}), "a": None})
|
|
assert_equal(dt, expected)
|
|
assert dt.groups == ("/", "/a")
|
|
|
|
def test_update_new_named_dataarray(self) -> None:
|
|
da = xr.DataArray(name="temp", data=[0, 50])
|
|
folder1 = DataTree(name="folder1")
|
|
folder1.update({"results": da})
|
|
expected = da.rename("results")
|
|
assert_equal(folder1["results"], expected)
|
|
|
|
def test_update_doesnt_alter_child_name(self) -> None:
|
|
dt = DataTree()
|
|
dt.update({"foo": xr.DataArray(0), "a": DataTree(name="b")})
|
|
assert "a" in dt.children
|
|
child = dt["a"]
|
|
assert child.name == "a"
|
|
|
|
def test_update_overwrite(self) -> None:
|
|
actual = DataTree.from_dict({"a": DataTree(xr.Dataset({"x": 1}))})
|
|
actual.update({"a": DataTree(xr.Dataset({"x": 2}))})
|
|
expected = DataTree.from_dict({"a": DataTree(xr.Dataset({"x": 2}))})
|
|
assert_equal(actual, expected)
|
|
|
|
def test_update_coordinates(self) -> None:
|
|
expected = DataTree.from_dict({"/": xr.Dataset(coords={"a": 1})})
|
|
actual = DataTree.from_dict({"/": xr.Dataset()})
|
|
actual.update(xr.Dataset(coords={"a": 1}))
|
|
assert_equal(actual, expected)
|
|
|
|
def test_update_inherited_coords(self) -> None:
|
|
expected = DataTree.from_dict(
|
|
{
|
|
"/": xr.Dataset(coords={"a": 1}),
|
|
"/b": xr.Dataset(coords={"c": 1}),
|
|
}
|
|
)
|
|
actual = DataTree.from_dict(
|
|
{
|
|
"/": xr.Dataset(coords={"a": 1}),
|
|
"/b": xr.Dataset(),
|
|
}
|
|
)
|
|
actual["/b"].update(xr.Dataset(coords={"c": 1}))
|
|
assert_identical(actual, expected)
|
|
|
|
# DataTree.identical() currently does not require that non-inherited
|
|
# coordinates are defined identically, so we need to check this
|
|
# explicitly
|
|
actual_node = actual.children["b"].to_dataset(inherit=False)
|
|
expected_node = expected.children["b"].to_dataset(inherit=False)
|
|
assert_identical(actual_node, expected_node)
|
|
|
|
|
|
class TestCopy:
|
|
def test_copy(self, create_test_datatree) -> None:
|
|
dt = create_test_datatree()
|
|
|
|
for node in dt.root.subtree:
|
|
node.attrs["Test"] = [1, 2, 3]
|
|
|
|
for copied in [dt.copy(deep=False), copy(dt)]:
|
|
assert_identical(dt, copied)
|
|
|
|
for node, copied_node in zip(
|
|
dt.root.subtree, copied.root.subtree, strict=True
|
|
):
|
|
assert node.encoding == copied_node.encoding
|
|
# Note: IndexVariable objects with string dtype are always
|
|
# copied because of xarray.core.util.safe_cast_to_index.
|
|
# Limiting the test to data variables.
|
|
for k in node.data_vars:
|
|
v0 = node.variables[k]
|
|
v1 = copied_node.variables[k]
|
|
assert source_ndarray(v0.data) is source_ndarray(v1.data)
|
|
copied_node["foo"] = xr.DataArray(data=np.arange(5), dims="z")
|
|
assert "foo" not in node
|
|
|
|
copied_node.attrs["foo"] = "bar"
|
|
assert "foo" not in node.attrs
|
|
assert node.attrs["Test"] is copied_node.attrs["Test"]
|
|
|
|
def test_copy_subtree(self) -> None:
|
|
dt = DataTree.from_dict({"/level1/level2/level3": xr.Dataset()})
|
|
|
|
actual = dt["/level1/level2"].copy()
|
|
expected = DataTree.from_dict({"/level3": xr.Dataset()}, name="level2")
|
|
|
|
assert_identical(actual, expected)
|
|
|
|
def test_copy_coord_inheritance(self) -> None:
|
|
tree = DataTree.from_dict(
|
|
{"/": xr.Dataset(coords={"x": [0, 1]}), "/c": DataTree()}
|
|
)
|
|
actual = tree.copy()
|
|
node_ds = actual.children["c"].to_dataset(inherit=False)
|
|
assert_identical(node_ds, xr.Dataset())
|
|
|
|
actual = tree.children["c"].copy()
|
|
expected = DataTree(Dataset(coords={"x": [0, 1]}), name="c")
|
|
assert_identical(expected, actual)
|
|
|
|
actual = tree.children["c"].copy(inherit=False)
|
|
expected = DataTree(name="c")
|
|
assert_identical(expected, actual)
|
|
|
|
def test_deepcopy(self, create_test_datatree) -> None:
|
|
dt = create_test_datatree()
|
|
|
|
for node in dt.root.subtree:
|
|
node.attrs["Test"] = [1, 2, 3]
|
|
|
|
for copied in [dt.copy(deep=True), deepcopy(dt)]:
|
|
assert_identical(dt, copied)
|
|
|
|
for node, copied_node in zip(
|
|
dt.root.subtree, copied.root.subtree, strict=True
|
|
):
|
|
assert node.encoding == copied_node.encoding
|
|
# Note: IndexVariable objects with string dtype are always
|
|
# copied because of xarray.core.util.safe_cast_to_index.
|
|
# Limiting the test to data variables.
|
|
for k in node.data_vars:
|
|
v0 = node.variables[k]
|
|
v1 = copied_node.variables[k]
|
|
assert source_ndarray(v0.data) is not source_ndarray(v1.data)
|
|
copied_node["foo"] = xr.DataArray(data=np.arange(5), dims="z")
|
|
assert "foo" not in node
|
|
|
|
copied_node.attrs["foo"] = "bar"
|
|
assert "foo" not in node.attrs
|
|
assert node.attrs["Test"] is not copied_node.attrs["Test"]
|
|
|
|
@pytest.mark.xfail(reason="data argument not yet implemented")
|
|
def test_copy_with_data(self, create_test_datatree) -> None:
|
|
orig = create_test_datatree()
|
|
# TODO use .data_vars once that property is available
|
|
data_vars = {
|
|
k: v for k, v in orig.variables.items() if k not in orig._coord_names
|
|
}
|
|
new_data = {k: np.random.randn(*v.shape) for k, v in data_vars.items()}
|
|
actual = orig.copy(data=new_data)
|
|
|
|
expected = orig.copy()
|
|
for k, v in new_data.items():
|
|
expected[k].data = v
|
|
assert_identical(expected, actual)
|
|
|
|
# TODO test parents and children?
|
|
|
|
|
|
class TestSetItem:
|
|
def test_setitem_new_child_node(self) -> None:
|
|
john = DataTree(name="john")
|
|
mary = DataTree(name="mary")
|
|
john["mary"] = mary
|
|
|
|
grafted_mary = john["mary"]
|
|
assert grafted_mary.parent is john
|
|
assert grafted_mary.name == "mary"
|
|
|
|
def test_setitem_unnamed_child_node_becomes_named(self) -> None:
|
|
john2 = DataTree(name="john2")
|
|
john2["sonny"] = DataTree()
|
|
assert john2["sonny"].name == "sonny"
|
|
|
|
def test_setitem_new_grandchild_node(self) -> None:
|
|
john = DataTree.from_dict({"/Mary/Rose": DataTree()})
|
|
new_rose = DataTree(dataset=xr.Dataset({"x": 0}))
|
|
john["Mary/Rose"] = new_rose
|
|
|
|
grafted_rose = john["Mary/Rose"]
|
|
assert grafted_rose.parent is john["/Mary"]
|
|
assert grafted_rose.name == "Rose"
|
|
|
|
def test_grafted_subtree_retains_name(self) -> None:
|
|
subtree = DataTree(name="original_subtree_name")
|
|
root = DataTree(name="root")
|
|
root["new_subtree_name"] = subtree
|
|
assert subtree.name == "original_subtree_name"
|
|
|
|
def test_setitem_new_empty_node(self) -> None:
|
|
john = DataTree(name="john")
|
|
john["mary"] = DataTree()
|
|
mary = john["mary"]
|
|
assert isinstance(mary, DataTree)
|
|
assert_identical(mary.to_dataset(), xr.Dataset())
|
|
|
|
def test_setitem_overwrite_data_in_node_with_none(self) -> None:
|
|
john = DataTree.from_dict({"/mary": xr.Dataset()}, name="john")
|
|
|
|
john["mary"] = DataTree()
|
|
assert_identical(john["mary"].to_dataset(), xr.Dataset())
|
|
|
|
john.dataset = xr.Dataset() # type: ignore[assignment]
|
|
with pytest.raises(ValueError, match="has no name"):
|
|
john["."] = DataTree()
|
|
|
|
@pytest.mark.xfail(reason="assigning Datasets doesn't yet create new nodes")
|
|
def test_setitem_dataset_on_this_node(self) -> None:
|
|
data = xr.Dataset({"temp": [0, 50]})
|
|
results = DataTree(name="results")
|
|
results["."] = data
|
|
assert_identical(results.to_dataset(), data)
|
|
|
|
def test_setitem_dataset_as_new_node(self) -> None:
|
|
data = xr.Dataset({"temp": [0, 50]})
|
|
folder1 = DataTree(name="folder1")
|
|
folder1["results"] = data
|
|
assert_identical(folder1["results"].to_dataset(), data)
|
|
|
|
def test_setitem_dataset_as_new_node_requiring_intermediate_nodes(self) -> None:
|
|
data = xr.Dataset({"temp": [0, 50]})
|
|
folder1 = DataTree(name="folder1")
|
|
folder1["results/highres"] = data
|
|
assert_identical(folder1["results/highres"].to_dataset(), data)
|
|
|
|
def test_setitem_named_dataarray(self) -> None:
|
|
da = xr.DataArray(name="temp", data=[0, 50])
|
|
folder1 = DataTree(name="folder1")
|
|
folder1["results"] = da
|
|
expected = da.rename("results")
|
|
assert_equal(folder1["results"], expected)
|
|
|
|
def test_setitem_unnamed_dataarray(self) -> None:
|
|
data = xr.DataArray([0, 50])
|
|
folder1 = DataTree(name="folder1")
|
|
folder1["results"] = data
|
|
assert_equal(folder1["results"], data)
|
|
|
|
def test_setitem_variable(self) -> None:
|
|
var = xr.Variable(data=[0, 50], dims="x")
|
|
folder1 = DataTree(name="folder1")
|
|
folder1["results"] = var
|
|
assert_equal(folder1["results"], xr.DataArray(var))
|
|
|
|
def test_setitem_coerce_to_dataarray(self) -> None:
|
|
folder1 = DataTree(name="folder1")
|
|
folder1["results"] = 0
|
|
assert_equal(folder1["results"], xr.DataArray(0))
|
|
|
|
def test_setitem_add_new_variable_to_empty_node(self) -> None:
|
|
results = DataTree(name="results")
|
|
results["pressure"] = xr.DataArray(data=[2, 3])
|
|
assert "pressure" in results.dataset
|
|
results["temp"] = xr.Variable(data=[10, 11], dims=["x"])
|
|
assert "temp" in results.dataset
|
|
|
|
# What if there is a path to traverse first?
|
|
results_with_path = DataTree(name="results")
|
|
results_with_path["highres/pressure"] = xr.DataArray(data=[2, 3])
|
|
assert "pressure" in results_with_path["highres"].dataset
|
|
results_with_path["highres/temp"] = xr.Variable(data=[10, 11], dims=["x"])
|
|
assert "temp" in results_with_path["highres"].dataset
|
|
|
|
def test_setitem_dataarray_replace_existing_node(self) -> None:
|
|
t = xr.Dataset({"temp": [0, 50]})
|
|
results = DataTree(name="results", dataset=t)
|
|
p = xr.DataArray(data=[2, 3])
|
|
results["pressure"] = p
|
|
expected = t.assign(pressure=p)
|
|
assert_identical(results.to_dataset(), expected)
|
|
|
|
|
|
class TestCoords:
|
|
def test_properties(self) -> None:
|
|
# use int64 for repr consistency on windows
|
|
ds = Dataset(
|
|
data_vars={
|
|
"foo": (["x", "y"], np.random.randn(2, 3)),
|
|
},
|
|
coords={
|
|
"x": ("x", np.array([-1, -2], "int64")),
|
|
"y": ("y", np.array([0, 1, 2], "int64")),
|
|
"a": ("x", np.array([4, 5], "int64")),
|
|
"b": np.int64(-10),
|
|
},
|
|
)
|
|
dt = DataTree(dataset=ds)
|
|
dt["child"] = DataTree()
|
|
|
|
coords = dt.coords
|
|
assert isinstance(coords, DataTreeCoordinates)
|
|
|
|
# len
|
|
assert len(coords) == 4
|
|
|
|
# iter
|
|
assert list(coords) == ["x", "y", "a", "b"]
|
|
|
|
assert_identical(coords["x"].variable, dt["x"].variable)
|
|
assert_identical(coords["y"].variable, dt["y"].variable)
|
|
|
|
assert "x" in coords
|
|
assert "a" in coords
|
|
assert 0 not in coords
|
|
assert "foo" not in coords
|
|
assert "child" not in coords
|
|
|
|
with pytest.raises(KeyError):
|
|
coords["foo"]
|
|
|
|
# TODO this currently raises a ValueError instead of a KeyError
|
|
# with pytest.raises(KeyError):
|
|
# coords[0]
|
|
|
|
# repr
|
|
expected = dedent(
|
|
"""\
|
|
Coordinates:
|
|
* x (x) int64 16B -1 -2
|
|
* y (y) int64 24B 0 1 2
|
|
a (x) int64 16B 4 5
|
|
b int64 8B -10"""
|
|
)
|
|
actual = repr(coords)
|
|
assert expected == actual
|
|
|
|
# dims
|
|
assert coords.sizes == {"x": 2, "y": 3}
|
|
|
|
# dtypes
|
|
assert coords.dtypes == {
|
|
"x": np.dtype("int64"),
|
|
"y": np.dtype("int64"),
|
|
"a": np.dtype("int64"),
|
|
"b": np.dtype("int64"),
|
|
}
|
|
|
|
def test_modify(self) -> None:
|
|
ds = Dataset(
|
|
data_vars={
|
|
"foo": (["x", "y"], np.random.randn(2, 3)),
|
|
},
|
|
coords={
|
|
"x": ("x", np.array([-1, -2], "int64")),
|
|
"y": ("y", np.array([0, 1, 2], "int64")),
|
|
"a": ("x", np.array([4, 5], "int64")),
|
|
"b": np.int64(-10),
|
|
},
|
|
)
|
|
dt = DataTree(dataset=ds)
|
|
dt["child"] = DataTree()
|
|
|
|
actual = dt.copy(deep=True)
|
|
actual.coords["x"] = ("x", ["a", "b"])
|
|
assert_array_equal(actual["x"], ["a", "b"])
|
|
|
|
actual = dt.copy(deep=True)
|
|
actual.coords["z"] = ("z", ["a", "b"])
|
|
assert_array_equal(actual["z"], ["a", "b"])
|
|
|
|
actual = dt.copy(deep=True)
|
|
with pytest.raises(ValueError, match=r"conflicting dimension sizes"):
|
|
actual.coords["x"] = ("x", [-1])
|
|
assert_identical(actual, dt) # should not be modified
|
|
|
|
# TODO: re-enable after implementing reset_coords()
|
|
# actual = dt.copy()
|
|
# del actual.coords["b"]
|
|
# expected = dt.reset_coords("b", drop=True)
|
|
# assert_identical(expected, actual)
|
|
|
|
with pytest.raises(KeyError):
|
|
del dt.coords["not_found"]
|
|
|
|
with pytest.raises(KeyError):
|
|
del dt.coords["foo"]
|
|
|
|
# TODO: re-enable after implementing assign_coords()
|
|
# actual = dt.copy(deep=True)
|
|
# actual.coords.update({"c": 11})
|
|
# expected = dt.assign_coords({"c": 11})
|
|
# assert_identical(expected, actual)
|
|
|
|
# # regression test for GH3746
|
|
# del actual.coords["x"]
|
|
# assert "x" not in actual.xindexes
|
|
|
|
# test that constructors can also handle the `DataTreeCoordinates` object
|
|
ds2 = Dataset(coords=dt.coords)
|
|
assert_identical(ds2.coords, dt.coords)
|
|
da = DataArray(coords=dt.coords)
|
|
assert_identical(da.coords, dt.coords)
|
|
|
|
# DataTree constructor doesn't accept coords= but should still be able to handle DatasetCoordinates
|
|
dt2 = DataTree(dataset=dt.coords)
|
|
assert_identical(dt2.coords, dt.coords)
|
|
|
|
def test_inherited(self) -> None:
|
|
ds = Dataset(
|
|
data_vars={
|
|
"foo": (["x", "y"], np.random.randn(2, 3)),
|
|
},
|
|
coords={
|
|
"x": ("x", np.array([-1, -2], "int64")),
|
|
"y": ("y", np.array([0, 1, 2], "int64")),
|
|
"a": ("x", np.array([4, 5], "int64")),
|
|
"b": np.int64(-10),
|
|
},
|
|
)
|
|
dt = DataTree(dataset=ds)
|
|
dt["child"] = DataTree()
|
|
child = dt["child"]
|
|
|
|
assert set(dt.coords) == {"x", "y", "a", "b"}
|
|
assert set(child.coords) == {"x", "y"}
|
|
|
|
actual = child.copy(deep=True)
|
|
actual.coords["x"] = ("x", ["a", "b"])
|
|
assert_array_equal(actual["x"], ["a", "b"])
|
|
|
|
actual = child.copy(deep=True)
|
|
actual.coords.update({"c": 11})
|
|
expected = child.copy(deep=True)
|
|
expected.coords["c"] = 11
|
|
# check we have only altered the child node
|
|
assert_identical(expected.root, actual.root)
|
|
|
|
with pytest.raises(KeyError):
|
|
# cannot delete inherited coordinate from child node
|
|
del child["x"]
|
|
|
|
# TODO requires a fix for #9472
|
|
# actual = child.copy(deep=True)
|
|
# actual.coords.update({"c": 11})
|
|
# expected = child.assign_coords({"c": 11})
|
|
# assert_identical(expected, actual)
|
|
|
|
|
|
def test_delitem() -> None:
|
|
ds = Dataset({"a": 0}, coords={"x": ("x", [1, 2]), "z": "a"})
|
|
dt = DataTree(ds, children={"c": DataTree()})
|
|
|
|
with pytest.raises(KeyError):
|
|
del dt["foo"]
|
|
|
|
# test delete children
|
|
del dt["c"]
|
|
assert dt.children == {}
|
|
assert set(dt.variables) == {"x", "z", "a"}
|
|
with pytest.raises(KeyError):
|
|
del dt["c"]
|
|
|
|
# test delete variables
|
|
del dt["a"]
|
|
assert set(dt.coords) == {"x", "z"}
|
|
with pytest.raises(KeyError):
|
|
del dt["a"]
|
|
|
|
# test delete coordinates
|
|
del dt["z"]
|
|
assert set(dt.coords) == {"x"}
|
|
with pytest.raises(KeyError):
|
|
del dt["z"]
|
|
|
|
# test delete indexed coordinates
|
|
del dt["x"]
|
|
assert dt.variables == {}
|
|
assert dt.coords == {}
|
|
assert dt.indexes == {}
|
|
with pytest.raises(KeyError):
|
|
del dt["x"]
|
|
|
|
|
|
class TestTreeFromDict:
|
|
def test_data_in_root(self) -> None:
|
|
dat = xr.Dataset()
|
|
dt = DataTree.from_dict({"/": dat})
|
|
assert dt.name is None
|
|
assert dt.parent is None
|
|
assert dt.children == {}
|
|
assert_identical(dt.to_dataset(), dat)
|
|
|
|
def test_one_layer(self) -> None:
|
|
dat1, dat2 = xr.Dataset({"a": 1}), xr.Dataset({"b": 2})
|
|
dt = DataTree.from_dict({"run1": dat1, "run2": dat2})
|
|
assert_identical(dt.to_dataset(), xr.Dataset())
|
|
assert dt.name is None
|
|
assert_identical(dt["run1"].to_dataset(), dat1)
|
|
assert dt["run1"].children == {}
|
|
assert_identical(dt["run2"].to_dataset(), dat2)
|
|
assert dt["run2"].children == {}
|
|
|
|
def test_two_layers(self) -> None:
|
|
dat1, dat2 = xr.Dataset({"a": 1}), xr.Dataset({"a": [1, 2]})
|
|
dt = DataTree.from_dict({"highres/run": dat1, "lowres/run": dat2})
|
|
assert "highres" in dt.children
|
|
assert "lowres" in dt.children
|
|
highres_run = dt["highres/run"]
|
|
assert_identical(highres_run.to_dataset(), dat1)
|
|
|
|
def test_nones(self) -> None:
|
|
dt = DataTree.from_dict({"d": None, "d/e": None})
|
|
assert [node.name for node in dt.subtree] == [None, "d", "e"]
|
|
assert [node.path for node in dt.subtree] == ["/", "/d", "/d/e"]
|
|
assert_identical(dt["d/e"].to_dataset(), xr.Dataset())
|
|
|
|
def test_full(self, simple_datatree) -> None:
|
|
dt = simple_datatree
|
|
paths = list(node.path for node in dt.subtree)
|
|
assert paths == [
|
|
"/",
|
|
"/set1",
|
|
"/set2",
|
|
"/set3",
|
|
"/set1/set1",
|
|
"/set1/set2",
|
|
"/set2/set1",
|
|
]
|
|
|
|
def test_datatree_values(self) -> None:
|
|
dat1 = DataTree(dataset=xr.Dataset({"a": 1}))
|
|
expected = DataTree()
|
|
expected["a"] = dat1
|
|
|
|
actual = DataTree.from_dict({"a": dat1})
|
|
|
|
assert_identical(actual, expected)
|
|
|
|
def test_roundtrip_to_dict(self, simple_datatree) -> None:
|
|
tree = simple_datatree
|
|
roundtrip = DataTree.from_dict(tree.to_dict())
|
|
assert_identical(tree, roundtrip)
|
|
|
|
def test_to_dict(self):
|
|
tree = DataTree.from_dict({"/a/b/c": None})
|
|
roundtrip = DataTree.from_dict(tree.to_dict())
|
|
assert_identical(tree, roundtrip)
|
|
|
|
roundtrip = DataTree.from_dict(tree.to_dict(relative=True))
|
|
assert_identical(tree, roundtrip)
|
|
|
|
roundtrip = DataTree.from_dict(tree.children["a"].to_dict(relative=False))
|
|
assert_identical(tree, roundtrip)
|
|
|
|
expected = DataTree.from_dict({"b/c": None})
|
|
actual = DataTree.from_dict(tree.children["a"].to_dict(relative=True))
|
|
assert_identical(expected, actual)
|
|
|
|
def test_roundtrip_unnamed_root(self, simple_datatree) -> None:
|
|
# See GH81
|
|
|
|
dt = simple_datatree
|
|
dt.name = "root"
|
|
roundtrip = DataTree.from_dict(dt.to_dict())
|
|
assert roundtrip.equals(dt)
|
|
|
|
def test_insertion_order(self) -> None:
|
|
# regression test for GH issue #9276
|
|
reversed = DataTree.from_dict(
|
|
{
|
|
"/Homer/Lisa": xr.Dataset({"age": 8}),
|
|
"/Homer/Bart": xr.Dataset({"age": 10}),
|
|
"/Homer": xr.Dataset({"age": 39}),
|
|
"/": xr.Dataset({"age": 83}),
|
|
}
|
|
)
|
|
expected = DataTree.from_dict(
|
|
{
|
|
"/": xr.Dataset({"age": 83}),
|
|
"/Homer": xr.Dataset({"age": 39}),
|
|
"/Homer/Lisa": xr.Dataset({"age": 8}),
|
|
"/Homer/Bart": xr.Dataset({"age": 10}),
|
|
}
|
|
)
|
|
assert reversed.equals(expected)
|
|
|
|
# Check that Bart and Lisa's order is still preserved within the group,
|
|
# despite 'Bart' coming before 'Lisa' when sorted alphabetically
|
|
assert list(reversed["Homer"].children.keys()) == ["Lisa", "Bart"]
|
|
|
|
def test_array_values(self) -> None:
|
|
data = {"foo": xr.DataArray(1, name="bar")}
|
|
with pytest.raises(TypeError):
|
|
DataTree.from_dict(data) # type: ignore[arg-type]
|
|
|
|
def test_relative_paths(self) -> None:
|
|
tree = DataTree.from_dict({".": None, "foo": None, "./bar": None, "x/y": None})
|
|
paths = [node.path for node in tree.subtree]
|
|
assert paths == [
|
|
"/",
|
|
"/foo",
|
|
"/bar",
|
|
"/x",
|
|
"/x/y",
|
|
]
|
|
|
|
def test_root_keys(self):
|
|
ds = Dataset({"x": 1})
|
|
expected = DataTree(dataset=ds)
|
|
|
|
actual = DataTree.from_dict({"": ds})
|
|
assert_identical(actual, expected)
|
|
|
|
actual = DataTree.from_dict({".": ds})
|
|
assert_identical(actual, expected)
|
|
|
|
actual = DataTree.from_dict({"/": ds})
|
|
assert_identical(actual, expected)
|
|
|
|
actual = DataTree.from_dict({"./": ds})
|
|
assert_identical(actual, expected)
|
|
|
|
with pytest.raises(
|
|
ValueError, match="multiple entries found corresponding to the root node"
|
|
):
|
|
DataTree.from_dict({"": ds, "/": ds})
|
|
|
|
def test_name(self):
|
|
tree = DataTree.from_dict({"/": None}, name="foo")
|
|
assert tree.name == "foo"
|
|
|
|
tree = DataTree.from_dict({"/": DataTree()}, name="foo")
|
|
assert tree.name == "foo"
|
|
|
|
tree = DataTree.from_dict({"/": DataTree(name="bar")}, name="foo")
|
|
assert tree.name == "foo"
|
|
|
|
|
|
class TestDatasetView:
|
|
def test_view_contents(self) -> None:
|
|
ds = create_test_data()
|
|
dt = DataTree(dataset=ds)
|
|
assert ds.identical(
|
|
dt.dataset
|
|
) # this only works because Dataset.identical doesn't check types
|
|
assert isinstance(dt.dataset, xr.Dataset)
|
|
|
|
def test_immutability(self) -> None:
|
|
# See issue https://github.com/xarray-contrib/datatree/issues/38
|
|
dt = DataTree.from_dict(
|
|
{
|
|
"/": None,
|
|
"/a": None,
|
|
},
|
|
name="root",
|
|
)
|
|
|
|
with pytest.raises(
|
|
AttributeError, match="Mutation of the DatasetView is not allowed"
|
|
):
|
|
dt.dataset["a"] = xr.DataArray(0)
|
|
|
|
with pytest.raises(
|
|
AttributeError, match="Mutation of the DatasetView is not allowed"
|
|
):
|
|
dt.dataset.update({"a": 0})
|
|
|
|
# TODO are there any other ways you can normally modify state (in-place)?
|
|
# (not attribute-like assignment because that doesn't work on Dataset anyway)
|
|
|
|
def test_methods(self) -> None:
|
|
ds = create_test_data()
|
|
dt = DataTree(dataset=ds)
|
|
assert ds.mean().identical(dt.dataset.mean())
|
|
assert isinstance(dt.dataset.mean(), xr.Dataset)
|
|
|
|
def test_arithmetic(self, create_test_datatree) -> None:
|
|
dt = create_test_datatree()
|
|
expected = create_test_datatree(modify=lambda ds: 10.0 * ds)[
|
|
"set1"
|
|
].to_dataset()
|
|
result = 10.0 * dt["set1"].dataset
|
|
assert result.identical(expected)
|
|
|
|
def test_init_via_type(self) -> None:
|
|
# from datatree GH issue https://github.com/xarray-contrib/datatree/issues/188
|
|
# xarray's .weighted is unusual because it uses type() to create a Dataset/DataArray
|
|
|
|
a = xr.DataArray(
|
|
np.random.rand(3, 4, 10),
|
|
dims=["x", "y", "time"],
|
|
coords={"area": (["x", "y"], np.random.rand(3, 4))},
|
|
).to_dataset(name="data")
|
|
dt = DataTree(dataset=a)
|
|
|
|
def weighted_mean(ds):
|
|
return ds.weighted(ds.area).mean(["x", "y"])
|
|
|
|
weighted_mean(dt.dataset)
|
|
|
|
|
|
class TestAccess:
|
|
def test_attribute_access(self, create_test_datatree) -> None:
|
|
dt = create_test_datatree()
|
|
|
|
# vars / coords
|
|
for key in ["a", "set0"]:
|
|
assert_equal(dt[key], getattr(dt, key))
|
|
assert key in dir(dt)
|
|
|
|
# dims
|
|
assert_equal(dt["a"]["y"], dt.a.y)
|
|
assert "y" in dir(dt["a"])
|
|
|
|
# children
|
|
for key in ["set1", "set2", "set3"]:
|
|
assert_equal(dt[key], getattr(dt, key))
|
|
assert key in dir(dt)
|
|
|
|
# attrs
|
|
dt.attrs["meta"] = "NASA"
|
|
assert dt.attrs["meta"] == "NASA"
|
|
assert "meta" in dir(dt)
|
|
|
|
def test_ipython_key_completions_complex(self, create_test_datatree) -> None:
|
|
dt = create_test_datatree()
|
|
key_completions = dt._ipython_key_completions_()
|
|
|
|
node_keys = [node.path[1:] for node in dt.descendants]
|
|
assert all(node_key in key_completions for node_key in node_keys)
|
|
|
|
var_keys = list(dt.variables.keys())
|
|
assert all(var_key in key_completions for var_key in var_keys)
|
|
|
|
def test_ipython_key_completitions_subnode(self) -> None:
|
|
tree = xr.DataTree.from_dict({"/": None, "/a": None, "/a/b/": None})
|
|
expected = ["b"]
|
|
actual = tree["a"]._ipython_key_completions_()
|
|
assert expected == actual
|
|
|
|
def test_operation_with_attrs_but_no_data(self) -> None:
|
|
# tests bug from xarray-datatree GH262
|
|
xs = xr.Dataset({"testvar": xr.DataArray(np.ones((2, 3)))})
|
|
dt = DataTree.from_dict({"node1": xs, "node2": xs})
|
|
dt.attrs["test_key"] = 1 # sel works fine without this line
|
|
dt.sel(dim_0=0)
|
|
|
|
|
|
class TestRepr:
|
|
def test_repr_four_nodes(self) -> None:
|
|
dt = DataTree.from_dict(
|
|
{
|
|
"/": xr.Dataset(
|
|
{"e": (("x",), [1.0, 2.0])},
|
|
coords={"x": [2.0, 3.0]},
|
|
),
|
|
"/b": xr.Dataset({"f": (("y",), [3.0])}),
|
|
"/b/c": xr.Dataset(),
|
|
"/b/d": xr.Dataset({"g": 4.0}),
|
|
}
|
|
)
|
|
|
|
result = repr(dt)
|
|
expected = dedent(
|
|
"""
|
|
<xarray.DataTree>
|
|
Group: /
|
|
│ Dimensions: (x: 2)
|
|
│ Coordinates:
|
|
│ * x (x) float64 16B 2.0 3.0
|
|
│ Data variables:
|
|
│ e (x) float64 16B 1.0 2.0
|
|
└── Group: /b
|
|
│ Dimensions: (y: 1)
|
|
│ Dimensions without coordinates: y
|
|
│ Data variables:
|
|
│ f (y) float64 8B 3.0
|
|
├── Group: /b/c
|
|
└── Group: /b/d
|
|
Dimensions: ()
|
|
Data variables:
|
|
g float64 8B 4.0
|
|
"""
|
|
).strip()
|
|
assert result == expected
|
|
|
|
result = repr(dt.b)
|
|
expected = dedent(
|
|
"""
|
|
<xarray.DataTree 'b'>
|
|
Group: /b
|
|
│ Dimensions: (x: 2, y: 1)
|
|
│ Inherited coordinates:
|
|
│ * x (x) float64 16B 2.0 3.0
|
|
│ Dimensions without coordinates: y
|
|
│ Data variables:
|
|
│ f (y) float64 8B 3.0
|
|
├── Group: /b/c
|
|
└── Group: /b/d
|
|
Dimensions: ()
|
|
Data variables:
|
|
g float64 8B 4.0
|
|
"""
|
|
).strip()
|
|
assert result == expected
|
|
|
|
result = repr(dt.b.d)
|
|
expected = dedent(
|
|
"""
|
|
<xarray.DataTree 'd'>
|
|
Group: /b/d
|
|
Dimensions: (x: 2, y: 1)
|
|
Inherited coordinates:
|
|
* x (x) float64 16B 2.0 3.0
|
|
Dimensions without coordinates: y
|
|
Data variables:
|
|
g float64 8B 4.0
|
|
"""
|
|
).strip()
|
|
assert result == expected
|
|
|
|
def test_repr_two_children(self) -> None:
|
|
tree = DataTree.from_dict(
|
|
{
|
|
"/": Dataset(coords={"x": [1.0]}),
|
|
"/first_child": None,
|
|
"/second_child": Dataset({"foo": ("x", [0.0])}, coords={"z": 1.0}),
|
|
}
|
|
)
|
|
|
|
result = repr(tree)
|
|
expected = dedent(
|
|
"""
|
|
<xarray.DataTree>
|
|
Group: /
|
|
│ Dimensions: (x: 1)
|
|
│ Coordinates:
|
|
│ * x (x) float64 8B 1.0
|
|
├── Group: /first_child
|
|
└── Group: /second_child
|
|
Dimensions: (x: 1)
|
|
Coordinates:
|
|
z float64 8B 1.0
|
|
Data variables:
|
|
foo (x) float64 8B 0.0
|
|
"""
|
|
).strip()
|
|
assert result == expected
|
|
|
|
result = repr(tree["first_child"])
|
|
expected = dedent(
|
|
"""
|
|
<xarray.DataTree 'first_child'>
|
|
Group: /first_child
|
|
Dimensions: (x: 1)
|
|
Inherited coordinates:
|
|
* x (x) float64 8B 1.0
|
|
"""
|
|
).strip()
|
|
assert result == expected
|
|
|
|
result = repr(tree["second_child"])
|
|
expected = dedent(
|
|
"""
|
|
<xarray.DataTree 'second_child'>
|
|
Group: /second_child
|
|
Dimensions: (x: 1)
|
|
Coordinates:
|
|
z float64 8B 1.0
|
|
Inherited coordinates:
|
|
* x (x) float64 8B 1.0
|
|
Data variables:
|
|
foo (x) float64 8B 0.0
|
|
"""
|
|
).strip()
|
|
assert result == expected
|
|
|
|
def test_repr_inherited_dims(self) -> None:
|
|
tree = DataTree.from_dict(
|
|
{
|
|
"/": Dataset({"foo": ("x", [1.0])}),
|
|
"/child": Dataset({"bar": ("y", [2.0])}),
|
|
}
|
|
)
|
|
|
|
result = repr(tree)
|
|
expected = dedent(
|
|
"""
|
|
<xarray.DataTree>
|
|
Group: /
|
|
│ Dimensions: (x: 1)
|
|
│ Dimensions without coordinates: x
|
|
│ Data variables:
|
|
│ foo (x) float64 8B 1.0
|
|
└── Group: /child
|
|
Dimensions: (y: 1)
|
|
Dimensions without coordinates: y
|
|
Data variables:
|
|
bar (y) float64 8B 2.0
|
|
"""
|
|
).strip()
|
|
assert result == expected
|
|
|
|
result = repr(tree["child"])
|
|
expected = dedent(
|
|
"""
|
|
<xarray.DataTree 'child'>
|
|
Group: /child
|
|
Dimensions: (x: 1, y: 1)
|
|
Dimensions without coordinates: x, y
|
|
Data variables:
|
|
bar (y) float64 8B 2.0
|
|
"""
|
|
).strip()
|
|
assert result == expected
|
|
|
|
@pytest.mark.skipif(
|
|
ON_WINDOWS, reason="windows (pre NumPy2) uses int32 instead of int64"
|
|
)
|
|
def test_doc_example(self) -> None:
|
|
# regression test for https://github.com/pydata/xarray/issues/9499
|
|
time = xr.DataArray(
|
|
data=np.array(["2022-01", "2023-01"], dtype="<U7"), dims="time"
|
|
)
|
|
stations = xr.DataArray(
|
|
data=np.array(list("abcdef"), dtype="<U1"), dims="station"
|
|
)
|
|
lon = [-100, -80, -60]
|
|
lat = [10, 20, 30]
|
|
# Set up fake data
|
|
wind_speed = xr.DataArray(np.ones((2, 6)) * 2, dims=("time", "station"))
|
|
pressure = xr.DataArray(np.ones((2, 6)) * 3, dims=("time", "station"))
|
|
air_temperature = xr.DataArray(np.ones((2, 6)) * 4, dims=("time", "station"))
|
|
dewpoint = xr.DataArray(np.ones((2, 6)) * 5, dims=("time", "station"))
|
|
infrared = xr.DataArray(np.ones((2, 3, 3)) * 6, dims=("time", "lon", "lat"))
|
|
true_color = xr.DataArray(np.ones((2, 3, 3)) * 7, dims=("time", "lon", "lat"))
|
|
tree = xr.DataTree.from_dict(
|
|
{
|
|
"/": xr.Dataset(
|
|
coords={"time": time},
|
|
),
|
|
"/weather": xr.Dataset(
|
|
coords={"station": stations},
|
|
data_vars={
|
|
"wind_speed": wind_speed,
|
|
"pressure": pressure,
|
|
},
|
|
),
|
|
"/weather/temperature": xr.Dataset(
|
|
data_vars={
|
|
"air_temperature": air_temperature,
|
|
"dewpoint": dewpoint,
|
|
},
|
|
),
|
|
"/satellite": xr.Dataset(
|
|
coords={"lat": lat, "lon": lon},
|
|
data_vars={
|
|
"infrared": infrared,
|
|
"true_color": true_color,
|
|
},
|
|
),
|
|
},
|
|
)
|
|
|
|
result = repr(tree)
|
|
expected = dedent(
|
|
"""
|
|
<xarray.DataTree>
|
|
Group: /
|
|
│ Dimensions: (time: 2)
|
|
│ Coordinates:
|
|
│ * time (time) <U7 56B '2022-01' '2023-01'
|
|
├── Group: /weather
|
|
│ │ Dimensions: (station: 6, time: 2)
|
|
│ │ Coordinates:
|
|
│ │ * station (station) <U1 24B 'a' 'b' 'c' 'd' 'e' 'f'
|
|
│ │ Data variables:
|
|
│ │ wind_speed (time, station) float64 96B 2.0 2.0 2.0 2.0 ... 2.0 2.0 2.0 2.0
|
|
│ │ pressure (time, station) float64 96B 3.0 3.0 3.0 3.0 ... 3.0 3.0 3.0 3.0
|
|
│ └── Group: /weather/temperature
|
|
│ Dimensions: (time: 2, station: 6)
|
|
│ Data variables:
|
|
│ air_temperature (time, station) float64 96B 4.0 4.0 4.0 4.0 ... 4.0 4.0 4.0
|
|
│ dewpoint (time, station) float64 96B 5.0 5.0 5.0 5.0 ... 5.0 5.0 5.0
|
|
└── Group: /satellite
|
|
Dimensions: (lat: 3, lon: 3, time: 2)
|
|
Coordinates:
|
|
* lat (lat) int64 24B 10 20 30
|
|
* lon (lon) int64 24B -100 -80 -60
|
|
Data variables:
|
|
infrared (time, lon, lat) float64 144B 6.0 6.0 6.0 6.0 ... 6.0 6.0 6.0
|
|
true_color (time, lon, lat) float64 144B 7.0 7.0 7.0 7.0 ... 7.0 7.0 7.0
|
|
"""
|
|
).strip()
|
|
assert result == expected
|
|
|
|
result = repr(tree["weather"])
|
|
expected = dedent(
|
|
"""
|
|
<xarray.DataTree 'weather'>
|
|
Group: /weather
|
|
│ Dimensions: (time: 2, station: 6)
|
|
│ Coordinates:
|
|
│ * station (station) <U1 24B 'a' 'b' 'c' 'd' 'e' 'f'
|
|
│ Inherited coordinates:
|
|
│ * time (time) <U7 56B '2022-01' '2023-01'
|
|
│ Data variables:
|
|
│ wind_speed (time, station) float64 96B 2.0 2.0 2.0 2.0 ... 2.0 2.0 2.0 2.0
|
|
│ pressure (time, station) float64 96B 3.0 3.0 3.0 3.0 ... 3.0 3.0 3.0 3.0
|
|
└── Group: /weather/temperature
|
|
Dimensions: (time: 2, station: 6)
|
|
Data variables:
|
|
air_temperature (time, station) float64 96B 4.0 4.0 4.0 4.0 ... 4.0 4.0 4.0
|
|
dewpoint (time, station) float64 96B 5.0 5.0 5.0 5.0 ... 5.0 5.0 5.0
|
|
"""
|
|
).strip()
|
|
assert result == expected
|
|
|
|
|
|
def _exact_match(message: str) -> str:
|
|
return re.escape(dedent(message).strip())
|
|
return "^" + re.escape(dedent(message.rstrip())) + "$"
|
|
|
|
|
|
class TestInheritance:
|
|
def test_inherited_dims(self) -> None:
|
|
dt = DataTree.from_dict(
|
|
{
|
|
"/": xr.Dataset({"d": (("x",), [1, 2])}),
|
|
"/b": xr.Dataset({"e": (("y",), [3])}),
|
|
"/c": xr.Dataset({"f": (("y",), [3, 4, 5])}),
|
|
}
|
|
)
|
|
assert dt.sizes == {"x": 2}
|
|
# nodes should include inherited dimensions
|
|
assert dt.b.sizes == {"x": 2, "y": 1}
|
|
assert dt.c.sizes == {"x": 2, "y": 3}
|
|
# dataset objects created from nodes should not
|
|
assert dt.b.dataset.sizes == {"y": 1}
|
|
assert dt.b.to_dataset(inherit=True).sizes == {"y": 1}
|
|
assert dt.b.to_dataset(inherit=False).sizes == {"y": 1}
|
|
|
|
def test_inherited_coords_index(self) -> None:
|
|
dt = DataTree.from_dict(
|
|
{
|
|
"/": xr.Dataset({"d": (("x",), [1, 2])}, coords={"x": [2, 3]}),
|
|
"/b": xr.Dataset({"e": (("y",), [3])}),
|
|
}
|
|
)
|
|
assert "x" in dt["/b"].indexes
|
|
assert "x" in dt["/b"].coords
|
|
xr.testing.assert_identical(dt["/x"], dt["/b/x"])
|
|
|
|
def test_inherit_only_index_coords(self) -> None:
|
|
dt = DataTree.from_dict(
|
|
{
|
|
"/": xr.Dataset(coords={"x": [1], "y": 2}),
|
|
"/b": xr.Dataset(coords={"z": 3}),
|
|
}
|
|
)
|
|
assert dt.coords.keys() == {"x", "y"}
|
|
xr.testing.assert_equal(
|
|
dt["/x"], xr.DataArray([1], dims=["x"], coords={"x": [1], "y": 2})
|
|
)
|
|
xr.testing.assert_equal(dt["/y"], xr.DataArray(2, coords={"y": 2}))
|
|
assert dt["/b"].coords.keys() == {"x", "z"}
|
|
xr.testing.assert_equal(
|
|
dt["/b/x"], xr.DataArray([1], dims=["x"], coords={"x": [1], "z": 3})
|
|
)
|
|
xr.testing.assert_equal(dt["/b/z"], xr.DataArray(3, coords={"z": 3}))
|
|
|
|
def test_inherited_coords_with_index_are_deduplicated(self) -> None:
|
|
dt = DataTree.from_dict(
|
|
{
|
|
"/": xr.Dataset(coords={"x": [1, 2]}),
|
|
"/b": xr.Dataset(coords={"x": [1, 2]}),
|
|
}
|
|
)
|
|
child_dataset = dt.children["b"].to_dataset(inherit=False)
|
|
expected = xr.Dataset()
|
|
assert_identical(child_dataset, expected)
|
|
|
|
dt["/c"] = xr.Dataset({"foo": ("x", [4, 5])}, coords={"x": [1, 2]})
|
|
child_dataset = dt.children["c"].to_dataset(inherit=False)
|
|
expected = xr.Dataset({"foo": ("x", [4, 5])})
|
|
assert_identical(child_dataset, expected)
|
|
|
|
def test_deduplicated_after_setitem(self) -> None:
|
|
# regression test for GH #9601
|
|
dt = DataTree.from_dict(
|
|
{
|
|
"/": xr.Dataset(coords={"x": [1, 2]}),
|
|
"/b": None,
|
|
}
|
|
)
|
|
dt["b/x"] = dt["x"]
|
|
child_dataset = dt.children["b"].to_dataset(inherit=False)
|
|
expected = xr.Dataset()
|
|
assert_identical(child_dataset, expected)
|
|
|
|
def test_inconsistent_dims(self) -> None:
|
|
expected_msg = _exact_match(
|
|
"""
|
|
group '/b' is not aligned with its parents:
|
|
Group:
|
|
Dimensions: (x: 1)
|
|
Dimensions without coordinates: x
|
|
Data variables:
|
|
c (x) float64 8B 3.0
|
|
From parents:
|
|
Dimensions: (x: 2)
|
|
Dimensions without coordinates: x
|
|
"""
|
|
)
|
|
|
|
with pytest.raises(ValueError, match=expected_msg):
|
|
DataTree.from_dict(
|
|
{
|
|
"/": xr.Dataset({"a": (("x",), [1.0, 2.0])}),
|
|
"/b": xr.Dataset({"c": (("x",), [3.0])}),
|
|
}
|
|
)
|
|
|
|
dt = DataTree()
|
|
dt["/a"] = xr.DataArray([1.0, 2.0], dims=["x"])
|
|
with pytest.raises(ValueError, match=expected_msg):
|
|
dt["/b/c"] = xr.DataArray([3.0], dims=["x"])
|
|
|
|
b = DataTree(dataset=xr.Dataset({"c": (("x",), [3.0])}))
|
|
with pytest.raises(ValueError, match=expected_msg):
|
|
DataTree(
|
|
dataset=xr.Dataset({"a": (("x",), [1.0, 2.0])}),
|
|
children={"b": b},
|
|
)
|
|
|
|
def test_inconsistent_child_indexes(self) -> None:
|
|
expected_msg = _exact_match(
|
|
"""
|
|
group '/b' is not aligned with its parents:
|
|
Group:
|
|
Dimensions: (x: 1)
|
|
Coordinates:
|
|
* x (x) float64 8B 2.0
|
|
Data variables:
|
|
*empty*
|
|
From parents:
|
|
Dimensions: (x: 1)
|
|
Coordinates:
|
|
* x (x) float64 8B 1.0
|
|
"""
|
|
)
|
|
|
|
with pytest.raises(ValueError, match=expected_msg):
|
|
DataTree.from_dict(
|
|
{
|
|
"/": xr.Dataset(coords={"x": [1.0]}),
|
|
"/b": xr.Dataset(coords={"x": [2.0]}),
|
|
}
|
|
)
|
|
|
|
dt = DataTree()
|
|
dt.dataset = xr.Dataset(coords={"x": [1.0]}) # type: ignore[assignment]
|
|
dt["/b"] = DataTree()
|
|
with pytest.raises(ValueError, match=expected_msg):
|
|
dt["/b"].dataset = xr.Dataset(coords={"x": [2.0]})
|
|
|
|
b = DataTree(xr.Dataset(coords={"x": [2.0]}))
|
|
with pytest.raises(ValueError, match=expected_msg):
|
|
DataTree(dataset=xr.Dataset(coords={"x": [1.0]}), children={"b": b})
|
|
|
|
def test_inconsistent_grandchild_indexes(self) -> None:
|
|
expected_msg = _exact_match(
|
|
"""
|
|
group '/b/c' is not aligned with its parents:
|
|
Group:
|
|
Dimensions: (x: 1)
|
|
Coordinates:
|
|
* x (x) float64 8B 2.0
|
|
Data variables:
|
|
*empty*
|
|
From parents:
|
|
Dimensions: (x: 1)
|
|
Coordinates:
|
|
* x (x) float64 8B 1.0
|
|
"""
|
|
)
|
|
|
|
with pytest.raises(ValueError, match=expected_msg):
|
|
DataTree.from_dict(
|
|
{
|
|
"/": xr.Dataset(coords={"x": [1.0]}),
|
|
"/b/c": xr.Dataset(coords={"x": [2.0]}),
|
|
}
|
|
)
|
|
|
|
dt = DataTree()
|
|
dt.dataset = xr.Dataset(coords={"x": [1.0]}) # type: ignore[assignment]
|
|
dt["/b/c"] = DataTree()
|
|
with pytest.raises(ValueError, match=expected_msg):
|
|
dt["/b/c"].dataset = xr.Dataset(coords={"x": [2.0]})
|
|
|
|
c = DataTree(xr.Dataset(coords={"x": [2.0]}))
|
|
b = DataTree(children={"c": c})
|
|
with pytest.raises(ValueError, match=expected_msg):
|
|
DataTree(dataset=xr.Dataset(coords={"x": [1.0]}), children={"b": b})
|
|
|
|
def test_inconsistent_grandchild_dims(self) -> None:
|
|
expected_msg = _exact_match(
|
|
"""
|
|
group '/b/c' is not aligned with its parents:
|
|
Group:
|
|
Dimensions: (x: 1)
|
|
Dimensions without coordinates: x
|
|
Data variables:
|
|
d (x) float64 8B 3.0
|
|
From parents:
|
|
Dimensions: (x: 2)
|
|
Dimensions without coordinates: x
|
|
"""
|
|
)
|
|
|
|
with pytest.raises(ValueError, match=expected_msg):
|
|
DataTree.from_dict(
|
|
{
|
|
"/": xr.Dataset({"a": (("x",), [1.0, 2.0])}),
|
|
"/b/c": xr.Dataset({"d": (("x",), [3.0])}),
|
|
}
|
|
)
|
|
|
|
dt = DataTree()
|
|
dt["/a"] = xr.DataArray([1.0, 2.0], dims=["x"])
|
|
with pytest.raises(ValueError, match=expected_msg):
|
|
dt["/b/c/d"] = xr.DataArray([3.0], dims=["x"])
|
|
|
|
|
|
class TestRestructuring:
|
|
def test_drop_nodes(self) -> None:
|
|
sue = DataTree.from_dict({"Mary": None, "Kate": None, "Ashley": None})
|
|
|
|
# test drop just one node
|
|
dropped_one = sue.drop_nodes(names="Mary")
|
|
assert "Mary" not in dropped_one.children
|
|
|
|
# test drop multiple nodes
|
|
dropped = sue.drop_nodes(names=["Mary", "Kate"])
|
|
assert not set(["Mary", "Kate"]).intersection(set(dropped.children))
|
|
assert "Ashley" in dropped.children
|
|
|
|
# test raise
|
|
with pytest.raises(KeyError, match="nodes {'Mary'} not present"):
|
|
dropped.drop_nodes(names=["Mary", "Ashley"])
|
|
|
|
# test ignore
|
|
childless = dropped.drop_nodes(names=["Mary", "Ashley"], errors="ignore")
|
|
assert childless.children == {}
|
|
|
|
def test_assign(self) -> None:
|
|
dt = DataTree()
|
|
expected = DataTree.from_dict({"/": xr.Dataset({"foo": 0}), "/a": None})
|
|
|
|
# kwargs form
|
|
result = dt.assign(foo=xr.DataArray(0), a=DataTree())
|
|
assert_equal(result, expected)
|
|
|
|
# dict form
|
|
result = dt.assign({"foo": xr.DataArray(0), "a": DataTree()})
|
|
assert_equal(result, expected)
|
|
|
|
|
|
class TestPipe:
|
|
def test_noop(self, create_test_datatree) -> None:
|
|
dt = create_test_datatree()
|
|
|
|
actual = dt.pipe(lambda tree: tree)
|
|
assert actual.identical(dt)
|
|
|
|
def test_params(self, create_test_datatree) -> None:
|
|
dt = create_test_datatree()
|
|
|
|
def f(tree, **attrs):
|
|
return tree.assign(arr_with_attrs=xr.Variable("dim0", [], attrs=attrs))
|
|
|
|
attrs = {"x": 1, "y": 2, "z": 3}
|
|
|
|
actual = dt.pipe(f, **attrs)
|
|
assert actual["arr_with_attrs"].attrs == attrs
|
|
|
|
def test_named_self(self, create_test_datatree) -> None:
|
|
dt = create_test_datatree()
|
|
|
|
def f(x, tree, y):
|
|
tree.attrs.update({"x": x, "y": y})
|
|
return tree
|
|
|
|
attrs = {"x": 1, "y": 2}
|
|
|
|
actual = dt.pipe((f, "tree"), **attrs)
|
|
|
|
assert actual is dt and actual.attrs == attrs
|
|
|
|
|
|
class TestIsomorphicEqualsAndIdentical:
|
|
def test_isomorphic(self):
|
|
tree = DataTree.from_dict({"/a": None, "/a/b": None, "/c": None})
|
|
|
|
diff_data = DataTree.from_dict(
|
|
{"/a": None, "/a/b": None, "/c": xr.Dataset({"foo": 1})}
|
|
)
|
|
assert tree.isomorphic(diff_data)
|
|
|
|
diff_order = DataTree.from_dict({"/c": None, "/a": None, "/a/b": None})
|
|
assert tree.isomorphic(diff_order)
|
|
|
|
diff_nodes = DataTree.from_dict({"/a": None, "/a/b": None, "/d": None})
|
|
assert not tree.isomorphic(diff_nodes)
|
|
|
|
more_nodes = DataTree.from_dict(
|
|
{"/a": None, "/a/b": None, "/c": None, "/d": None}
|
|
)
|
|
assert not tree.isomorphic(more_nodes)
|
|
|
|
def test_minimal_variations(self):
|
|
tree = DataTree.from_dict(
|
|
{
|
|
"/": Dataset({"x": 1}),
|
|
"/child": Dataset({"x": 2}),
|
|
}
|
|
)
|
|
assert tree.equals(tree)
|
|
assert tree.identical(tree)
|
|
|
|
child = tree.children["child"]
|
|
assert child.equals(child)
|
|
assert child.identical(child)
|
|
|
|
new_child = DataTree(dataset=Dataset({"x": 2}), name="child")
|
|
assert child.equals(new_child)
|
|
assert child.identical(new_child)
|
|
|
|
anonymous_child = DataTree(dataset=Dataset({"x": 2}))
|
|
# TODO: re-enable this after fixing .equals() not to require matching
|
|
# names on the root node (i.e., after switching to use zip_subtrees)
|
|
# assert child.equals(anonymous_child)
|
|
assert not child.identical(anonymous_child)
|
|
|
|
different_variables = DataTree.from_dict(
|
|
{
|
|
"/": Dataset(),
|
|
"/other": Dataset({"x": 2}),
|
|
}
|
|
)
|
|
assert not tree.equals(different_variables)
|
|
assert not tree.identical(different_variables)
|
|
|
|
different_root_data = DataTree.from_dict(
|
|
{
|
|
"/": Dataset({"x": 4}),
|
|
"/child": Dataset({"x": 2}),
|
|
}
|
|
)
|
|
assert not tree.equals(different_root_data)
|
|
assert not tree.identical(different_root_data)
|
|
|
|
different_child_data = DataTree.from_dict(
|
|
{
|
|
"/": Dataset({"x": 1}),
|
|
"/child": Dataset({"x": 3}),
|
|
}
|
|
)
|
|
assert not tree.equals(different_child_data)
|
|
assert not tree.identical(different_child_data)
|
|
|
|
different_child_node_attrs = DataTree.from_dict(
|
|
{
|
|
"/": Dataset({"x": 1}),
|
|
"/child": Dataset({"x": 2}, attrs={"foo": "bar"}),
|
|
}
|
|
)
|
|
assert tree.equals(different_child_node_attrs)
|
|
assert not tree.identical(different_child_node_attrs)
|
|
|
|
different_child_variable_attrs = DataTree.from_dict(
|
|
{
|
|
"/": Dataset({"x": 1}),
|
|
"/child": Dataset({"x": ((), 2, {"foo": "bar"})}),
|
|
}
|
|
)
|
|
assert tree.equals(different_child_variable_attrs)
|
|
assert not tree.identical(different_child_variable_attrs)
|
|
|
|
different_name = DataTree.from_dict(
|
|
{
|
|
"/": Dataset({"x": 1}),
|
|
"/child": Dataset({"x": 2}),
|
|
},
|
|
name="different",
|
|
)
|
|
# TODO: re-enable this after fixing .equals() not to require matching
|
|
# names on the root node (i.e., after switching to use zip_subtrees)
|
|
# assert tree.equals(different_name)
|
|
assert not tree.identical(different_name)
|
|
|
|
def test_differently_inherited_coordinates(self):
|
|
root = DataTree.from_dict(
|
|
{
|
|
"/": Dataset(coords={"x": [1, 2]}),
|
|
"/child": Dataset(),
|
|
}
|
|
)
|
|
child = root.children["child"]
|
|
assert child.equals(child)
|
|
assert child.identical(child)
|
|
|
|
new_child = DataTree(dataset=Dataset(coords={"x": [1, 2]}), name="child")
|
|
assert child.equals(new_child)
|
|
assert not child.identical(new_child)
|
|
|
|
deeper_root = DataTree(children={"root": root})
|
|
grandchild = deeper_root["/root/child"]
|
|
assert child.equals(grandchild)
|
|
assert child.identical(grandchild)
|
|
|
|
|
|
class TestSubset:
|
|
def test_match(self) -> None:
|
|
# TODO is this example going to cause problems with case sensitivity?
|
|
dt = DataTree.from_dict(
|
|
{
|
|
"/a/A": None,
|
|
"/a/B": None,
|
|
"/b/A": None,
|
|
"/b/B": None,
|
|
}
|
|
)
|
|
result = dt.match("*/B")
|
|
expected = DataTree.from_dict(
|
|
{
|
|
"/a/B": None,
|
|
"/b/B": None,
|
|
}
|
|
)
|
|
assert_identical(result, expected)
|
|
|
|
result = dt.children["a"].match("B")
|
|
expected = DataTree.from_dict({"/B": None}, name="a")
|
|
assert_identical(result, expected)
|
|
|
|
def test_filter(self) -> None:
|
|
simpsons = DataTree.from_dict(
|
|
{
|
|
"/": xr.Dataset({"age": 83}),
|
|
"/Herbert": xr.Dataset({"age": 40}),
|
|
"/Homer": xr.Dataset({"age": 39}),
|
|
"/Homer/Bart": xr.Dataset({"age": 10}),
|
|
"/Homer/Lisa": xr.Dataset({"age": 8}),
|
|
"/Homer/Maggie": xr.Dataset({"age": 1}),
|
|
},
|
|
name="Abe",
|
|
)
|
|
expected = DataTree.from_dict(
|
|
{
|
|
"/": xr.Dataset({"age": 83}),
|
|
"/Herbert": xr.Dataset({"age": 40}),
|
|
"/Homer": xr.Dataset({"age": 39}),
|
|
},
|
|
name="Abe",
|
|
)
|
|
elders = simpsons.filter(lambda node: node["age"].item() > 18)
|
|
assert_identical(elders, expected)
|
|
|
|
expected = DataTree.from_dict({"/Bart": xr.Dataset({"age": 10})}, name="Homer")
|
|
actual = simpsons.children["Homer"].filter(
|
|
lambda node: node["age"].item() == 10
|
|
)
|
|
assert_identical(actual, expected)
|
|
|
|
|
|
class TestIndexing:
|
|
def test_isel_siblings(self) -> None:
|
|
tree = DataTree.from_dict(
|
|
{
|
|
"/first": xr.Dataset({"a": ("x", [1, 2])}),
|
|
"/second": xr.Dataset({"b": ("x", [1, 2, 3])}),
|
|
}
|
|
)
|
|
|
|
expected = DataTree.from_dict(
|
|
{
|
|
"/first": xr.Dataset({"a": 2}),
|
|
"/second": xr.Dataset({"b": 3}),
|
|
}
|
|
)
|
|
actual = tree.isel(x=-1)
|
|
assert_identical(actual, expected)
|
|
|
|
expected = DataTree.from_dict(
|
|
{
|
|
"/first": xr.Dataset({"a": ("x", [1])}),
|
|
"/second": xr.Dataset({"b": ("x", [1])}),
|
|
}
|
|
)
|
|
actual = tree.isel(x=slice(1))
|
|
assert_identical(actual, expected)
|
|
|
|
actual = tree.isel(x=[0])
|
|
assert_identical(actual, expected)
|
|
|
|
actual = tree.isel(x=slice(None))
|
|
assert_identical(actual, tree)
|
|
|
|
def test_isel_inherited(self) -> None:
|
|
tree = DataTree.from_dict(
|
|
{
|
|
"/": xr.Dataset(coords={"x": [1, 2]}),
|
|
"/child": xr.Dataset({"foo": ("x", [3, 4])}),
|
|
}
|
|
)
|
|
|
|
expected = DataTree.from_dict(
|
|
{
|
|
"/": xr.Dataset(coords={"x": 2}),
|
|
"/child": xr.Dataset({"foo": 4}),
|
|
}
|
|
)
|
|
actual = tree.isel(x=-1)
|
|
assert_identical(actual, expected)
|
|
|
|
expected = DataTree.from_dict(
|
|
{
|
|
"/child": xr.Dataset({"foo": 4}),
|
|
}
|
|
)
|
|
actual = tree.isel(x=-1, drop=True)
|
|
assert_identical(actual, expected)
|
|
|
|
expected = DataTree.from_dict(
|
|
{
|
|
"/": xr.Dataset(coords={"x": [1]}),
|
|
"/child": xr.Dataset({"foo": ("x", [3])}),
|
|
}
|
|
)
|
|
actual = tree.isel(x=[0])
|
|
assert_identical(actual, expected)
|
|
|
|
actual = tree.isel(x=slice(None))
|
|
|
|
# TODO: re-enable after the fix to copy() from #9628 is submitted
|
|
# actual = tree.children["child"].isel(x=slice(None))
|
|
# expected = tree.children["child"].copy()
|
|
# assert_identical(actual, expected)
|
|
|
|
actual = tree.children["child"].isel(x=0)
|
|
expected = DataTree(
|
|
dataset=xr.Dataset({"foo": 3}, coords={"x": 1}),
|
|
name="child",
|
|
)
|
|
assert_identical(actual, expected)
|
|
|
|
def test_sel(self) -> None:
|
|
tree = DataTree.from_dict(
|
|
{
|
|
"/first": xr.Dataset({"a": ("x", [1, 2, 3])}, coords={"x": [1, 2, 3]}),
|
|
"/second": xr.Dataset({"b": ("x", [4, 5])}, coords={"x": [2, 3]}),
|
|
}
|
|
)
|
|
expected = DataTree.from_dict(
|
|
{
|
|
"/first": xr.Dataset({"a": 2}, coords={"x": 2}),
|
|
"/second": xr.Dataset({"b": 4}, coords={"x": 2}),
|
|
}
|
|
)
|
|
actual = tree.sel(x=2)
|
|
assert_identical(actual, expected)
|
|
|
|
actual = tree.children["first"].sel(x=2)
|
|
expected = DataTree(
|
|
dataset=xr.Dataset({"a": 2}, coords={"x": 2}),
|
|
name="first",
|
|
)
|
|
assert_identical(actual, expected)
|
|
|
|
|
|
class TestAggregations:
|
|
def test_reduce_method(self) -> None:
|
|
ds = xr.Dataset({"a": ("x", [False, True, False])})
|
|
dt = DataTree.from_dict({"/": ds, "/results": ds})
|
|
|
|
expected = DataTree.from_dict({"/": ds.any(), "/results": ds.any()})
|
|
|
|
result = dt.any()
|
|
assert_equal(result, expected)
|
|
|
|
def test_nan_reduce_method(self) -> None:
|
|
ds = xr.Dataset({"a": ("x", [1, 2, 3])})
|
|
dt = DataTree.from_dict({"/": ds, "/results": ds})
|
|
|
|
expected = DataTree.from_dict({"/": ds.mean(), "/results": ds.mean()})
|
|
|
|
result = dt.mean()
|
|
assert_equal(result, expected)
|
|
|
|
def test_cum_method(self) -> None:
|
|
ds = xr.Dataset({"a": ("x", [1, 2, 3])})
|
|
dt = DataTree.from_dict({"/": ds, "/results": ds})
|
|
|
|
expected = DataTree.from_dict(
|
|
{
|
|
"/": ds.cumsum(),
|
|
"/results": ds.cumsum(),
|
|
}
|
|
)
|
|
|
|
result = dt.cumsum()
|
|
assert_equal(result, expected)
|
|
|
|
def test_dim_argument(self) -> None:
|
|
dt = DataTree.from_dict(
|
|
{
|
|
"/a": xr.Dataset({"A": ("x", [1, 2])}),
|
|
"/b": xr.Dataset({"B": ("y", [1, 2])}),
|
|
}
|
|
)
|
|
|
|
expected = DataTree.from_dict(
|
|
{
|
|
"/a": xr.Dataset({"A": 1.5}),
|
|
"/b": xr.Dataset({"B": 1.5}),
|
|
}
|
|
)
|
|
actual = dt.mean()
|
|
assert_equal(expected, actual)
|
|
|
|
actual = dt.mean(dim=...)
|
|
assert_equal(expected, actual)
|
|
|
|
expected = DataTree.from_dict(
|
|
{
|
|
"/a": xr.Dataset({"A": 1.5}),
|
|
"/b": xr.Dataset({"B": ("y", [1.0, 2.0])}),
|
|
}
|
|
)
|
|
actual = dt.mean("x")
|
|
assert_equal(expected, actual)
|
|
|
|
with pytest.raises(
|
|
ValueError,
|
|
match=re.escape("Dimension(s) 'invalid' do not exist."),
|
|
):
|
|
dt.mean("invalid")
|
|
|
|
def test_subtree(self) -> None:
|
|
tree = DataTree.from_dict(
|
|
{
|
|
"/child": Dataset({"a": ("x", [1, 2])}),
|
|
}
|
|
)
|
|
expected = DataTree(dataset=Dataset({"a": 1.5}), name="child")
|
|
actual = tree.children["child"].mean()
|
|
assert_identical(expected, actual)
|
|
|
|
|
|
class TestOps:
|
|
def test_unary_op(self) -> None:
|
|
ds1 = xr.Dataset({"a": [5], "b": [3]})
|
|
ds2 = xr.Dataset({"x": [0.1, 0.2], "y": [10, 20]})
|
|
dt = DataTree.from_dict({"/": ds1, "/subnode": ds2})
|
|
|
|
expected = DataTree.from_dict({"/": (-ds1), "/subnode": (-ds2)})
|
|
|
|
result = -dt
|
|
assert_equal(result, expected)
|
|
|
|
def test_unary_op_inherited_coords(self) -> None:
|
|
tree = DataTree(xr.Dataset(coords={"x": [1, 2, 3]}))
|
|
tree["/foo"] = DataTree(xr.Dataset({"bar": ("x", [4, 5, 6])}))
|
|
actual = -tree
|
|
|
|
actual_dataset = actual.children["foo"].to_dataset(inherit=False)
|
|
assert "x" not in actual_dataset.coords
|
|
|
|
expected = tree.copy()
|
|
# unary ops are not applied to coordinate variables, only data variables
|
|
expected["/foo/bar"].data = np.array([-4, -5, -6])
|
|
assert_identical(actual, expected)
|
|
|
|
def test_binary_op_on_int(self) -> None:
|
|
ds1 = xr.Dataset({"a": [5], "b": [3]})
|
|
ds2 = xr.Dataset({"x": [0.1, 0.2], "y": [10, 20]})
|
|
dt = DataTree.from_dict({"/": ds1, "/subnode": ds2})
|
|
|
|
expected = DataTree.from_dict({"/": ds1 * 5, "/subnode": ds2 * 5})
|
|
|
|
result = dt * 5
|
|
assert_equal(result, expected)
|
|
|
|
def test_binary_op_on_dataarray(self) -> None:
|
|
ds1 = xr.Dataset({"a": [5], "b": [3]})
|
|
ds2 = xr.Dataset({"x": [0.1, 0.2], "y": [10, 20]})
|
|
dt = DataTree.from_dict(
|
|
{
|
|
"/": ds1,
|
|
"/subnode": ds2,
|
|
}
|
|
)
|
|
|
|
other_da = xr.DataArray(name="z", data=[0.1, 0.2], dims="z")
|
|
|
|
expected = DataTree.from_dict(
|
|
{
|
|
"/": ds1 * other_da,
|
|
"/subnode": ds2 * other_da,
|
|
}
|
|
)
|
|
|
|
result = dt * other_da
|
|
assert_equal(result, expected)
|
|
|
|
def test_binary_op_on_dataset(self) -> None:
|
|
ds1 = xr.Dataset({"a": [5], "b": [3]})
|
|
ds2 = xr.Dataset({"x": [0.1, 0.2], "y": [10, 20]})
|
|
dt = DataTree.from_dict(
|
|
{
|
|
"/": ds1,
|
|
"/subnode": ds2,
|
|
}
|
|
)
|
|
|
|
other_ds = xr.Dataset({"z": ("z", [0.1, 0.2])})
|
|
|
|
expected = DataTree.from_dict(
|
|
{
|
|
"/": ds1 * other_ds,
|
|
"/subnode": ds2 * other_ds,
|
|
}
|
|
)
|
|
|
|
result = dt * other_ds
|
|
assert_equal(result, expected)
|
|
|
|
def test_binary_op_on_datatree(self) -> None:
|
|
ds1 = xr.Dataset({"a": [5], "b": [3]})
|
|
ds2 = xr.Dataset({"x": [0.1, 0.2], "y": [10, 20]})
|
|
|
|
dt = DataTree.from_dict({"/": ds1, "/subnode": ds2})
|
|
|
|
expected = DataTree.from_dict({"/": ds1 * ds1, "/subnode": ds2 * ds2})
|
|
|
|
result = dt * dt
|
|
assert_equal(result, expected)
|
|
|
|
def test_binary_op_order_invariant(self) -> None:
|
|
tree_ab = DataTree.from_dict({"/a": Dataset({"a": 1}), "/b": Dataset({"b": 2})})
|
|
tree_ba = DataTree.from_dict({"/b": Dataset({"b": 2}), "/a": Dataset({"a": 1})})
|
|
expected = DataTree.from_dict(
|
|
{"/a": Dataset({"a": 2}), "/b": Dataset({"b": 4})}
|
|
)
|
|
actual = tree_ab + tree_ba
|
|
assert_identical(expected, actual)
|
|
|
|
def test_arithmetic_inherited_coords(self) -> None:
|
|
tree = DataTree(xr.Dataset(coords={"x": [1, 2, 3]}))
|
|
tree["/foo"] = DataTree(xr.Dataset({"bar": ("x", [4, 5, 6])}))
|
|
actual = 2 * tree
|
|
|
|
actual_dataset = actual.children["foo"].to_dataset(inherit=False)
|
|
assert "x" not in actual_dataset.coords
|
|
|
|
expected = tree.copy()
|
|
expected["/foo/bar"].data = np.array([8, 10, 12])
|
|
assert_identical(actual, expected)
|
|
|
|
def test_binary_op_commutativity_with_dataset(self) -> None:
|
|
# regression test for #9365
|
|
|
|
ds1 = xr.Dataset({"a": [5], "b": [3]})
|
|
ds2 = xr.Dataset({"x": [0.1, 0.2], "y": [10, 20]})
|
|
dt = DataTree.from_dict(
|
|
{
|
|
"/": ds1,
|
|
"/subnode": ds2,
|
|
}
|
|
)
|
|
|
|
other_ds = xr.Dataset({"z": ("z", [0.1, 0.2])})
|
|
|
|
expected = DataTree.from_dict(
|
|
{
|
|
"/": ds1 * other_ds,
|
|
"/subnode": ds2 * other_ds,
|
|
}
|
|
)
|
|
|
|
result = other_ds * dt
|
|
assert_equal(result, expected)
|
|
|
|
def test_inplace_binary_op(self) -> None:
|
|
ds1 = xr.Dataset({"a": [5], "b": [3]})
|
|
ds2 = xr.Dataset({"x": [0.1, 0.2], "y": [10, 20]})
|
|
dt = DataTree.from_dict({"/": ds1, "/subnode": ds2})
|
|
|
|
expected = DataTree.from_dict({"/": ds1 + 1, "/subnode": ds2 + 1})
|
|
|
|
dt += 1
|
|
assert_equal(dt, expected)
|
|
|
|
def test_dont_broadcast_single_node_tree(self) -> None:
|
|
# regression test for https://github.com/pydata/xarray/issues/9365#issuecomment-2291622577
|
|
ds1 = xr.Dataset({"a": [5], "b": [3]})
|
|
ds2 = xr.Dataset({"x": [0.1, 0.2], "y": [10, 20]})
|
|
dt = DataTree.from_dict({"/": ds1, "/subnode": ds2})
|
|
node = dt["/subnode"]
|
|
|
|
with pytest.raises(
|
|
xr.TreeIsomorphismError,
|
|
match=re.escape(r"children at root node do not match: ['subnode'] vs []"),
|
|
):
|
|
dt * node
|
|
|
|
|
|
class TestUFuncs:
|
|
@pytest.mark.xfail(reason="__array_ufunc__ not implemented yet")
|
|
def test_tree(self, create_test_datatree):
|
|
dt = create_test_datatree()
|
|
expected = create_test_datatree(modify=lambda ds: np.sin(ds))
|
|
result_tree = np.sin(dt)
|
|
assert_equal(result_tree, expected)
|
|
|
|
|
|
class Closer:
|
|
def __init__(self):
|
|
self.closed = False
|
|
|
|
def close(self):
|
|
if self.closed:
|
|
raise RuntimeError("already closed")
|
|
self.closed = True
|
|
|
|
|
|
@pytest.fixture()
|
|
def tree_and_closers():
|
|
tree = DataTree.from_dict({"/child/grandchild": None})
|
|
closers = {
|
|
"/": Closer(),
|
|
"/child": Closer(),
|
|
"/child/grandchild": Closer(),
|
|
}
|
|
for path, closer in closers.items():
|
|
tree[path].set_close(closer.close)
|
|
return tree, closers
|
|
|
|
|
|
class TestClose:
|
|
def test_close(self, tree_and_closers):
|
|
tree, closers = tree_and_closers
|
|
assert not any(closer.closed for closer in closers.values())
|
|
tree.close()
|
|
assert all(closer.closed for closer in closers.values())
|
|
tree.close() # should not error
|
|
|
|
def test_context_manager(self, tree_and_closers):
|
|
tree, closers = tree_and_closers
|
|
assert not any(closer.closed for closer in closers.values())
|
|
with tree:
|
|
pass
|
|
assert all(closer.closed for closer in closers.values())
|
|
|
|
def test_close_child(self, tree_and_closers):
|
|
tree, closers = tree_and_closers
|
|
assert not any(closer.closed for closer in closers.values())
|
|
tree["child"].close() # should only close descendants
|
|
assert not closers["/"].closed
|
|
assert closers["/child"].closed
|
|
assert closers["/child/grandchild"].closed
|
|
|
|
def test_close_datasetview(self, tree_and_closers):
|
|
tree, _ = tree_and_closers
|
|
|
|
with pytest.raises(
|
|
AttributeError,
|
|
match=re.escape(
|
|
r"cannot close a DatasetView(). Close the associated DataTree node instead"
|
|
),
|
|
):
|
|
tree.dataset.close()
|
|
|
|
with pytest.raises(
|
|
AttributeError, match=re.escape(r"cannot modify a DatasetView()")
|
|
):
|
|
tree.dataset.set_close(None)
|
|
|
|
def test_close_dataset(self, tree_and_closers):
|
|
tree, closers = tree_and_closers
|
|
ds = tree.to_dataset() # should discard closers
|
|
ds.close()
|
|
assert not closers["/"].closed
|
|
|
|
# with tree:
|
|
# pass
|
|
|
|
|
|
@requires_dask
|
|
class TestDask:
|
|
def test_chunksizes(self):
|
|
ds1 = xr.Dataset({"a": ("x", np.arange(10))})
|
|
ds2 = xr.Dataset({"b": ("y", np.arange(5))})
|
|
ds3 = xr.Dataset({"c": ("z", np.arange(4))})
|
|
ds4 = xr.Dataset({"d": ("x", np.arange(-5, 5))})
|
|
|
|
groups = {
|
|
"/": ds1.chunk({"x": 5}),
|
|
"/group1": ds2.chunk({"y": 3}),
|
|
"/group2": ds3.chunk({"z": 2}),
|
|
"/group1/subgroup1": ds4.chunk({"x": 5}),
|
|
}
|
|
|
|
tree = xr.DataTree.from_dict(groups)
|
|
|
|
expected_chunksizes = {path: node.chunksizes for path, node in groups.items()}
|
|
|
|
assert tree.chunksizes == expected_chunksizes
|
|
|
|
def test_load(self):
|
|
ds1 = xr.Dataset({"a": ("x", np.arange(10))})
|
|
ds2 = xr.Dataset({"b": ("y", np.arange(5))})
|
|
ds3 = xr.Dataset({"c": ("z", np.arange(4))})
|
|
ds4 = xr.Dataset({"d": ("x", np.arange(-5, 5))})
|
|
|
|
groups = {"/": ds1, "/group1": ds2, "/group2": ds3, "/group1/subgroup1": ds4}
|
|
|
|
expected = xr.DataTree.from_dict(groups)
|
|
tree = xr.DataTree.from_dict(
|
|
{
|
|
"/": ds1.chunk({"x": 5}),
|
|
"/group1": ds2.chunk({"y": 3}),
|
|
"/group2": ds3.chunk({"z": 2}),
|
|
"/group1/subgroup1": ds4.chunk({"x": 5}),
|
|
}
|
|
)
|
|
expected_chunksizes: Mapping[str, Mapping]
|
|
expected_chunksizes = {node.path: {} for node in tree.subtree}
|
|
actual = tree.load()
|
|
|
|
assert_identical(actual, expected)
|
|
assert tree.chunksizes == expected_chunksizes
|
|
assert actual.chunksizes == expected_chunksizes
|
|
|
|
tree = xr.DataTree.from_dict(groups)
|
|
actual = tree.load()
|
|
assert_identical(actual, expected)
|
|
assert actual.chunksizes == expected_chunksizes
|
|
|
|
def test_compute(self):
|
|
ds1 = xr.Dataset({"a": ("x", np.arange(10))})
|
|
ds2 = xr.Dataset({"b": ("y", np.arange(5))})
|
|
ds3 = xr.Dataset({"c": ("z", np.arange(4))})
|
|
ds4 = xr.Dataset({"d": ("x", np.arange(-5, 5))})
|
|
|
|
expected = xr.DataTree.from_dict(
|
|
{"/": ds1, "/group1": ds2, "/group2": ds3, "/group1/subgroup1": ds4}
|
|
)
|
|
tree = xr.DataTree.from_dict(
|
|
{
|
|
"/": ds1.chunk({"x": 5}),
|
|
"/group1": ds2.chunk({"y": 3}),
|
|
"/group2": ds3.chunk({"z": 2}),
|
|
"/group1/subgroup1": ds4.chunk({"x": 5}),
|
|
}
|
|
)
|
|
original_chunksizes = tree.chunksizes
|
|
expected_chunksizes: Mapping[str, Mapping]
|
|
expected_chunksizes = {node.path: {} for node in tree.subtree}
|
|
actual = tree.compute()
|
|
|
|
assert_identical(actual, expected)
|
|
|
|
assert actual.chunksizes == expected_chunksizes, "mismatching chunksizes"
|
|
assert tree.chunksizes == original_chunksizes, "original tree was modified"
|
|
|
|
def test_persist(self):
|
|
ds1 = xr.Dataset({"a": ("x", np.arange(10))})
|
|
ds2 = xr.Dataset({"b": ("y", np.arange(5))})
|
|
ds3 = xr.Dataset({"c": ("z", np.arange(4))})
|
|
ds4 = xr.Dataset({"d": ("x", np.arange(-5, 5))})
|
|
|
|
def fn(x):
|
|
return 2 * x
|
|
|
|
expected = xr.DataTree.from_dict(
|
|
{
|
|
"/": fn(ds1).chunk({"x": 5}),
|
|
"/group1": fn(ds2).chunk({"y": 3}),
|
|
"/group2": fn(ds3).chunk({"z": 2}),
|
|
"/group1/subgroup1": fn(ds4).chunk({"x": 5}),
|
|
}
|
|
)
|
|
# Add trivial second layer to the task graph, persist should reduce to one
|
|
tree = xr.DataTree.from_dict(
|
|
{
|
|
"/": fn(ds1.chunk({"x": 5})),
|
|
"/group1": fn(ds2.chunk({"y": 3})),
|
|
"/group2": fn(ds3.chunk({"z": 2})),
|
|
"/group1/subgroup1": fn(ds4.chunk({"x": 5})),
|
|
}
|
|
)
|
|
original_chunksizes = tree.chunksizes
|
|
original_hlg_depths = {
|
|
node.path: len(node.dataset.__dask_graph__().layers)
|
|
for node in tree.subtree
|
|
}
|
|
|
|
actual = tree.persist()
|
|
actual_hlg_depths = {
|
|
node.path: len(node.dataset.__dask_graph__().layers)
|
|
for node in actual.subtree
|
|
}
|
|
|
|
assert_identical(actual, expected)
|
|
|
|
assert actual.chunksizes == original_chunksizes, "chunksizes were modified"
|
|
assert (
|
|
tree.chunksizes == original_chunksizes
|
|
), "original chunksizes were modified"
|
|
assert all(
|
|
d == 1 for d in actual_hlg_depths.values()
|
|
), "unexpected dask graph depth"
|
|
assert all(
|
|
d == 2 for d in original_hlg_depths.values()
|
|
), "original dask graph was modified"
|
|
|
|
def test_chunk(self):
|
|
ds1 = xr.Dataset({"a": ("x", np.arange(10))})
|
|
ds2 = xr.Dataset({"b": ("y", np.arange(5))})
|
|
ds3 = xr.Dataset({"c": ("z", np.arange(4))})
|
|
ds4 = xr.Dataset({"d": ("x", np.arange(-5, 5))})
|
|
|
|
expected = xr.DataTree.from_dict(
|
|
{
|
|
"/": ds1.chunk({"x": 5}),
|
|
"/group1": ds2.chunk({"y": 3}),
|
|
"/group2": ds3.chunk({"z": 2}),
|
|
"/group1/subgroup1": ds4.chunk({"x": 5}),
|
|
}
|
|
)
|
|
|
|
tree = xr.DataTree.from_dict(
|
|
{"/": ds1, "/group1": ds2, "/group2": ds3, "/group1/subgroup1": ds4}
|
|
)
|
|
actual = tree.chunk({"x": 5, "y": 3, "z": 2})
|
|
|
|
assert_identical(actual, expected)
|
|
assert actual.chunksizes == expected.chunksizes
|
|
|
|
with pytest.raises(TypeError, match="invalid type"):
|
|
tree.chunk(None)
|
|
|
|
with pytest.raises(TypeError, match="invalid type"):
|
|
tree.chunk((1, 2))
|
|
|
|
with pytest.raises(ValueError, match="not found in data dimensions"):
|
|
tree.chunk({"u": 2})
|