Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add Hazard classmethod for loading xarray Datasets #507

Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
39 commits
Select commit Hold shift + click to select a range
4a9a520
Add Hazard classmethod for loading NetCDF file
peanutfun Jul 5, 2022
755f11e
Only display unknown coordinates in `from_raster_netcdf` error message
peanutfun Jul 6, 2022
525394b
Use CLIMADA utilities for converting datetime64
peanutfun Jul 6, 2022
17564d5
Read Hazard from datasets with varying dim/coord definitions
peanutfun Jul 6, 2022
cbc1dc5
Avoid redefining built-in `map`
peanutfun Jul 6, 2022
d90ca87
Add log messages to `Hazard.from_raster_netcdf`
peanutfun Jul 6, 2022
939968b
Extract dimension names from coordinates
peanutfun Jul 7, 2022
b110a94
Use lazy formatting for logger messages
peanutfun Jul 7, 2022
4283cf6
Merge branch 'develop' into 487-add-classmethod-to-hazard-for-reading…
peanutfun Jul 11, 2022
ab40d1a
Reorganize test_base_netcdf.py
peanutfun Jul 15, 2022
2a4283d
Consolidate tests for Hazard.intensity
peanutfun Jul 15, 2022
dac5dd5
Rename from_raster_netcdf to from_raster_xarray
peanutfun Jul 15, 2022
026a33f
Make `from_raster_xarray` read all Hazard data
peanutfun Jul 27, 2022
528b99e
Add more logger messages and update docstring
peanutfun Jul 27, 2022
d96a278
Remove 'fraction' parameter from 'from_raster_xarray'
peanutfun Jul 27, 2022
f817bca
Add examples for `from_raster_xarray`
peanutfun Jul 27, 2022
fb94fc6
Fix type hints in `from_raster_xarray`
peanutfun Jul 27, 2022
bda69a6
Apply suggestions from code review
peanutfun Jul 28, 2022
9c68a9e
Improve docstrings and simplify `from_raster_xarray`
peanutfun Jul 28, 2022
87033ea
Simplify `user_key` update
peanutfun Jul 28, 2022
abc9d32
Optimize storage in CSR matrix by setting NaNs to zero
peanutfun Jul 28, 2022
29b74f3
Make hazard type and unit required arguments
peanutfun Jul 29, 2022
7678d1f
Preprocess dict arguments to save indentation level
peanutfun Jul 29, 2022
c0fb24b
Let `to_csr_matrix` only take ndarrays as arguments
peanutfun Jul 29, 2022
b8c46a3
Do not hard-code coordinate keys
peanutfun Aug 3, 2022
7a9ea42
Remove superfluous newline in docstring
peanutfun Aug 3, 2022
b2eea56
Update docstring of `from_raster_xarray`
peanutfun Aug 4, 2022
1be9772
Update `Hazard.from_raster_xarray`
peanutfun Aug 5, 2022
ccec87f
Add CRS argument
peanutfun Aug 8, 2022
c19f9ea
Merge branch 'develop' into 487-add-classmethod-to-hazard-for-reading…
peanutfun Sep 29, 2022
07e8bf7
Fix bug where wrong Hazard attribute was set
peanutfun Oct 5, 2022
84b0795
Add test for crs parameter in from_raster_xarray
peanutfun Oct 6, 2022
a72112c
Update docstring of from_raster_xarray
peanutfun Oct 6, 2022
d777785
Add 'Test' prefix for test cases in test_base_xarray.py
peanutfun Oct 6, 2022
d36bcc2
Format test_base_xarray
peanutfun Oct 6, 2022
68c1459
Fix comment in from_raster_xarray
peanutfun Oct 6, 2022
2cffc9a
Merge branch 'develop' into 487-add-classmethod-to-hazard-for-reading…
emanuel-schmid Oct 7, 2022
1ff075d
Promote single-valued coordinates to dimensions
peanutfun Oct 14, 2022
1eea94c
Merge branch '487-add-classmethod-to-hazard-for-reading-raster-like-d…
peanutfun Oct 14, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
100 changes: 100 additions & 0 deletions climada/hazard/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
import logging
import pathlib
import warnings
from typing import Union, Optional, Callable, Dict

import geopandas as gpd
import h5py
Expand All @@ -38,6 +39,7 @@
from rasterio.features import rasterize
from rasterio.warp import reproject, Resampling, calculate_default_transform
from scipy import sparse
import xarray as xr

from climada.hazard.tag import Tag as TagHazard
from climada.hazard.centroids.centr import Centroids
Expand Down Expand Up @@ -369,6 +371,104 @@ def set_vector(self, *args, **kwargs):
"Use Hazard.from_vector instead.")
self.__dict__ = Hazard.from_vector(*args, **kwargs).__dict__

@classmethod
def from_raster_netcdf(
cls,
data: Union[xr.Dataset, str],
*,
intensity: str = "intensity",
fraction: Union[str, Callable[[np.ndarray], np.ndarray]] = "fraction",
coordinate_vars: Optional[Dict[str, str]] = None,
):
"""Read raster-like data from a NetCDF file

Parameters
----------
data : xarray.Dataset or str
The NetCDF data to read from. May be a opened dataset or a path to a NetCDF
file, in which case the file is opened first.
intensity : str
Identifier of the `xarray.DataArray` containing the hazard intensity data.
fraction : str or Callable
peanutfun marked this conversation as resolved.
Show resolved Hide resolved
Identifier of the `xarray.DataArray` containing the hazard fraction data.
May be a callable, in which case the callable is applied on the respective
intensity value to yield a fraction.
coordinate_vars : dict(str, str)
Mapping from default coordinate names to coordinate names used in the data
to read. The default coordinates are `time`, `longitude`, and `latitude`.

Returns
-------
hazard : climada.Hazard
A hazard object created from the input data

"""
# If the data is a string, open the respective file
if not isinstance(data, xr.Dataset):
data = xr.open_dataset(data)
hazard = cls() # TODO: Hazard type

# Update coordinate identifiers
coords = {"time": "time", "longitude": "longitude", "latitude": "latitude"}
if coordinate_vars is not None:
unknown_coords = [co not in coords for co in coordinate_vars]
if any(unknown_coords):
raise ValueError(
f"Unknown coordinates passed: '{list(coordinate_vars.keys())}'. "
peanutfun marked this conversation as resolved.
Show resolved Hide resolved
"Supported coordinates are 'time', 'longitude', 'latitude'."
)
coords.update(coordinate_vars)

# Transform coordinates into centroids
lat, lon = np.meshgrid(
data[coords["latitude"]], data[coords["longitude"]], indexing="ij"
)
hazard.centroids = Centroids.from_lat_lon(lat.flatten(), lon.flatten())

def to_hazard_csr_matrix(dataarray):
"""Create a CSR matrix from a 3D data array"""
arr = dataarray.transpose(
coords["time"], coords["latitude"], coords["longitude"]
)
return sparse.csr_matrix(
arr.values.reshape((arr.sizes[coords["time"]], -1))
)

# Read the intensity data and flatten it in spatial dimensions
hazard.intensity = to_hazard_csr_matrix(data[intensity])
hazard.intensity.eliminate_zeros()

# Use fraction data or apply callable
if isinstance(fraction, str):
fraction_arr = data[fraction]
elif isinstance(fraction, Callable):
fraction_arr = xr.apply_ufunc(fraction, data[intensity])
else:
raise TypeError("'fraction' parameter must be 'str' or Callable")
hazard.fraction = to_hazard_csr_matrix(fraction_arr)
hazard.fraction.eliminate_zeros()

# Fill hazard with required information
num_events = data.sizes[coords["time"]]
hazard.event_id = np.array(range(num_events)) + 1 # event_id starts at 1
hazard.frequency = np.ones(num_events) # TODO: Optional read from file
hazard.event_name = list(data[coords["time"]].values)
peanutfun marked this conversation as resolved.
Show resolved Hide resolved

def to_datetime(date: np.datetime64):
peanutfun marked this conversation as resolved.
Show resolved Hide resolved
"""Convert a numpy.datetime64 into a datetime.datetime"""
timestamp = (date - np.datetime64("1970-01-01T00:00:00")) / np.timedelta64(
1, "s"
)
return dt.datetime.utcfromtimestamp(timestamp)

hazard.date = np.array(
[to_datetime(val).toordinal() for val in data[coords["time"]].values]
)
# TODO: hazard.unit

# Done!
return hazard

@classmethod
def from_vector(cls, files_intensity, files_fraction=None, attrs=None,
inten_name=None, frac_name=None, dst_crs=None, haz_type=None):
Expand Down
165 changes: 165 additions & 0 deletions climada/hazard/test/test_base_netcdf.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,165 @@
"""
This file is part of CLIMADA.

Copyright (C) 2022 ETH Zurich, CLIMADA contributors listed in AUTHORS.

CLIMADA is free software: you can redistribute it and/or modify it under the
terms of the GNU General Public License as published by the Free
Software Foundation, version 3.

CLIMADA is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
PARTICULAR PURPOSE. See the GNU General Public License for more details.

You should have received a copy of the GNU General Public License along
with CLIMADA. If not, see <https://www.gnu.org/licenses/>.

---

Test NetCDF reading capabilities of Hazard base class.
"""

import os
import unittest
import datetime as dt
import numpy as np
from scipy.sparse import csr_matrix

import xarray as xr

from climada.hazard.base import Hazard

THIS_DIR = os.path.dirname(__file__)


class ReadDefaultNetCDF(unittest.TestCase):
def setUp(self):
"""Write a simple NetCDF file to read"""
self.netcdf_path = os.path.join(THIS_DIR, "default.nc")
self.intensity = np.array([[[0, 1, 2], [3, 4, 5]], [[6, 7, 8], [9, 10, 11]]])
self.fraction = np.array([[[0, 0, 0], [0, 0, 0]], [[1, 1, 1], [1, 1, 1]]])
self.time = np.array([dt.datetime(1999, 1, 1), dt.datetime(2000, 1, 1)])
self.latitude = np.array([0, 1])
self.longitude = np.array([0, 1, 2])
dset = xr.Dataset(
{
"intensity": (["time", "latitude", "longitude"], self.intensity),
"fraction": (["time", "latitude", "longitude"], self.fraction),
},
dict(time=self.time, latitude=self.latitude, longitude=self.longitude),
)
dset.to_netcdf(self.netcdf_path)

def tearDown(self):
"""Delete the NetCDF file"""
os.remove(self.netcdf_path)

def _assert_default(self, hazard):
"""Assertions for the default hazard to be loaded"""
# Hazard data
self.assertEqual(hazard.tag.haz_type, "")
self.assertIsInstance(hazard.event_id, np.ndarray)
np.testing.assert_array_equal(hazard.event_id, [1, 2])
self.assertIsInstance(hazard.event_name, list)
np.testing.assert_array_equal(
hazard.event_name, [np.datetime64(val) for val in self.time]
)
self.assertIsInstance(hazard.date, np.ndarray)
np.testing.assert_array_equal(
hazard.date, [val.toordinal() for val in self.time]
)

# Centroids
self.assertEqual(hazard.centroids.lat.size, 6)
self.assertEqual(hazard.centroids.lon.size, 6)
np.testing.assert_array_equal(hazard.centroids.lat, [0, 0, 0, 1, 1, 1])
np.testing.assert_array_equal(hazard.centroids.lon, [0, 1, 2, 0, 1, 2])

# Intensity data
self.assertIsInstance(hazard.intensity, csr_matrix)
np.testing.assert_array_equal(hazard.intensity.shape, [2, 6])
np.testing.assert_array_equal(hazard.intensity.toarray()[0], [0, 1, 2, 3, 4, 5])
np.testing.assert_array_equal(
hazard.intensity.toarray()[1], [6, 7, 8, 9, 10, 11]
)

# Fraction data
self.assertIsInstance(hazard.fraction, csr_matrix)
np.testing.assert_array_equal(hazard.fraction.shape, [2, 6])
np.testing.assert_array_equal(hazard.fraction.toarray()[0], [0, 0, 0, 0, 0, 0])
np.testing.assert_array_equal(hazard.fraction.toarray()[1], [1, 1, 1, 1, 1, 1])

def test_load_path(self):
"""Load the data with path as argument"""
hazard = Hazard.from_raster_netcdf(self.netcdf_path)
self._assert_default(hazard)

def test_load_dataset(self):
"""Load the data from an opened dataset as argument"""
dataset = xr.open_dataset(self.netcdf_path)
hazard = Hazard.from_raster_netcdf(dataset)
self._assert_default(hazard)

def test_fraction_callable(self):
"""Test creating a fraction from a callable"""
hazard = Hazard.from_raster_netcdf(
self.netcdf_path, fraction=lambda x: np.where(x > 1, 1, 0)
)
self.assertIsInstance(hazard.fraction, csr_matrix)
np.testing.assert_array_equal(hazard.fraction.shape, [2, 6])
np.testing.assert_array_equal(hazard.fraction.toarray()[0], [0, 0, 1, 1, 1, 1])
np.testing.assert_array_equal(hazard.fraction.toarray()[1], [1, 1, 1, 1, 1, 1])

def test_coordinate_vars(self):
"""Test if custom coodinate names can be used"""
ds_new = xr.Dataset(
{
"intensity": (["time", "y", "x"], self.intensity),
"fraction": (["time", "y", "x"], self.fraction),
},
dict(time=self.time, x=self.longitude, y=self.latitude),
)
hazard = Hazard.from_raster_netcdf(
ds_new, coordinate_vars=dict(latitude="y", longitude="x")
)
self._assert_default(hazard)

def test_errors(self):
"""Check the errors thrown"""
# TODO: Maybe move to 'test_load_path'
# Wrong paths
with self.assertRaises(FileNotFoundError):
Hazard.from_raster_netcdf("file-does-not-exist.nc")
with self.assertRaises(KeyError):
Hazard.from_raster_netcdf(
self.netcdf_path, intensity="wrong-intensity-path"
)
with self.assertRaises(KeyError):
Hazard.from_raster_netcdf(self.netcdf_path, fraction="wrong-fraction-path")

# TODO: Maybe move to 'test_fraction_callable'
# Wrong type passed as fraction
with self.assertRaises(TypeError) as cm:
Hazard.from_raster_netcdf(self.netcdf_path, fraction=3)
self.assertIn(
"'fraction' parameter must be 'str' or Callable", str(cm.exception)
)

# TODO: Maybe move to 'test_coordinate_vars'
# Correctly specified, but the custom coordinate does not exist
with self.assertRaises(ValueError):
Hazard.from_raster_netcdf(
self.netcdf_path, coordinate_vars=dict(time="year")
)
# Wrong coordinate key
with self.assertRaises(ValueError) as cm:
Hazard.from_raster_netcdf(
self.netcdf_path, coordinate_vars=dict(bla="latitude")
)
self.assertIn("Unknown coordinates passed: '['bla']'", str(cm.exception))


# Execute Tests
if __name__ == "__main__":
TESTS = unittest.TestLoader().loadTestsFromTestCase(ReadDefaultNetCDF)
unittest.TextTestRunner(verbosity=2).run(TESTS)