AlongTrack Data - SWOT

AlongTrack Data - SWOT#

In this notebook, we will look at how one can do some simple regridding processes with the AlongTrack SWOT data available from the 2020a OSSE Data Challenge.

import autoroot
import typing as tp
from dataclasses import dataclass
import functools as ft
import numpy as np
import pandas as pd
import xarray as xr
import einops
from metpy.units import units
import pint_xarray
import xarray_dataclasses as xrdataclass
import matplotlib.pyplot as plt
import matplotlib.colors as colors
import matplotlib.ticker as ticker
import seaborn as sns

sns.reset_defaults()
sns.set_context(context="talk", font_scale=0.7)

%load_ext autoreload
%autoreload 2
The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload

Data#

!ls "/gpfswork/rech/yrf/commun/data_challenges/dc20a_osse/raw/dc_obs/"
2020a_SSH_mapping_NATL60_envisat.nc
2020a_SSH_mapping_NATL60_geosat2.nc
2020a_SSH_mapping_NATL60_jason1.nc
2020a_SSH_mapping_NATL60_karin_swot.nc
2020a_SSH_mapping_NATL60_nadir_swot.nc
2020a_SSH_mapping_NATL60_topex-poseidon_interleaved.nc
files_nadir_dc20a = [
    "/gpfswork/rech/yrf/commun/data_challenges/dc20a_osse/raw/dc_obs/2020a_SSH_mapping_NATL60_jason1.nc",
    "/gpfswork/rech/yrf/commun/data_challenges/dc20a_osse/raw/dc_obs/2020a_SSH_mapping_NATL60_envisat.nc",
    "/gpfswork/rech/yrf/commun/data_challenges/dc20a_osse/raw/dc_obs/2020a_SSH_mapping_NATL60_geosat2.nc",
    "/gpfswork/rech/yrf/commun/data_challenges/dc20a_osse/raw/dc_obs/2020a_SSH_mapping_NATL60_topex-poseidon_interleaved.nc",
    "/gpfswork/rech/yrf/commun/data_challenges/dc20a_osse/raw/dc_obs/2020a_SSH_mapping_NATL60_nadir_swot.nc",
]

files_swot_dc20a = [
    "/gpfswork/rech/yrf/commun/data_challenges/dc20a_osse/raw/dc_obs/2020a_SSH_mapping_NATL60_karin_swot.nc",
    
]

ds_swot = xr.open_dataset(files_swot_dc20a[0])
ds_swot
<xarray.Dataset>
Dimensions:      (nC: 52, time: 188121)
Coordinates:
  * nC           (nC) int64 0 1 2 3 4 5 6 7 8 9 ... 43 44 45 46 47 48 49 50 51
  * time         (time) datetime64[ns] 2012-10-02T18:03:42.401288 ... 2013-09...
Data variables: (12/14)
    lon          (nC, time) float64 ...
    lat          (nC, time) float64 ...
    x_al         (nC, time) float32 ...
    x_ac         (nC, time) float32 ...
    lon_nadir    (nC, time) float64 ...
    lat_nadir    (nC, time) float64 ...
    ...           ...
    ssh_obs      (nC, time) float64 ...
    roll_err     (nC, time) float64 ...
    phase_err    (nC, time) float64 ...
    ssh_model    (nC, time) float64 ...
    bd_err       (nC, time) float64 ...
    karin_err    (nC, time) float64 ...
Attributes: (12/26)
    description:               SWOT fixed grid
    corresponding_grid:        /data/MSA_ETU/mballarotta/ETUDE_BOOST-SWOT/out...
    title:                     SWOT-like data simulated by SWOT simulator
    keywords:                  SWOT, altimetry, SSH, satellite, remote sensing
    Conventions:               CF-1.6
    summary:                   SWOT grid data produced
    ...                        ...
    geospatial_lon_units:      degrees_east
    project:                   SWOT
    date_created:              2018-11-27T16:45:37Z
    date_modified:             2018-11-27T16:45:37Z
    keywords_vocabulary:       NASA
    references:                Gaultier, L., C. Ubelmann, and L.-L. Fu, 2016:...
from oceanbench._src.geoprocessing.validation import validate_latlon, validate_time, decode_cf_time, validate_ssh
from oceanbench._src.preprocessing.alongtrack import alongtrack_ssh
from oceanbench._src.geoprocessing.subset import where_slice
from oceanbench._src.preprocessing.alongtrack import remove_swath_dimension
def preprocess_nadir(da):
    
    # validate coordinates
    da = validate_latlon(da)
    da = validate_time(da)
    
    # validate variables
    da = da.rename({"ssh_model": "ssh"})
    da = validate_ssh(da)
    
    # slice time period
    da = da.sel(time=slice("2012-10-22", "2012-12-03"))
    
    # slice region
    da = where_slice(da, "lon", -64.975, -55.007)
    da = where_slice(da, "lat", 33.025, 42.9917)
    
    # reorganized
    da = da.sortby("time")
    
    # assign coordinates
    da = da.set_coords(["time", "lat", "lon"])
    
    return da # da[["ssh"]]

def preprocess_swot(da):
    
    # validate coordinates
    da = validate_latlon(da)
    da = validate_time(da)
    
    # validate variables
    da = da.rename({"ssh_model": "ssh"})
    da = validate_ssh(da)
    
    # slice time period
    da = da.sel(time=slice("2012-10-22", "2012-12-03"))
    
    # remove SWATH dimension
    da = remove_swath_dimension(da, "nC")
    
    # slice region
    da = where_slice(da, "lon", -64.975, -55.007)
    da = where_slice(da, "lat", 33.025, 42.9917)
    
    # reorganized
    da = da.sortby("time")
    
    # assign coordinates
    da = da.set_coords(["time", "lat", "lon"])
    
    return da #da[["ssh"]]
# preprocess_fn = ft.partial(preprocess_nadir_dc20a, variable="ssh_model")

ds_nadir = xr.open_mfdataset(
    files_nadir_dc20a, 
    preprocess=preprocess_nadir,
    combine="nested",
    engine="netcdf4",
    concat_dim="time"
)

ds_nadir = ds_nadir.sortby("time").compute()

ds_nadir
/gpfsscratch/rech/cli/uvo53rl/miniconda3/envs/jejeqx/lib/python3.10/site-packages/xarray/core/indexing.py:1374: PerformanceWarning: Slicing with an out-of-order index is generating 139 times more chunks
  return self.array[key]
/gpfsscratch/rech/cli/uvo53rl/miniconda3/envs/jejeqx/lib/python3.10/site-packages/xarray/core/indexing.py:1374: PerformanceWarning: Slicing with an out-of-order index is generating 139 times more chunks
  return self.array[key]
/gpfsscratch/rech/cli/uvo53rl/miniconda3/envs/jejeqx/lib/python3.10/site-packages/xarray/core/indexing.py:1374: PerformanceWarning: Slicing with an out-of-order index is generating 139 times more chunks
  return self.array[key]
/gpfsscratch/rech/cli/uvo53rl/miniconda3/envs/jejeqx/lib/python3.10/site-packages/xarray/core/indexing.py:1374: PerformanceWarning: Slicing with an out-of-order index is generating 139 times more chunks
  return self.array[key]
/gpfsscratch/rech/cli/uvo53rl/miniconda3/envs/jejeqx/lib/python3.10/site-packages/xarray/core/indexing.py:1374: PerformanceWarning: Slicing with an out-of-order index is generating 139 times more chunks
  return self.array[key]
/gpfsscratch/rech/cli/uvo53rl/miniconda3/envs/jejeqx/lib/python3.10/site-packages/xarray/core/indexing.py:1374: PerformanceWarning: Slicing with an out-of-order index is generating 139 times more chunks
  return self.array[key]
/gpfsscratch/rech/cli/uvo53rl/miniconda3/envs/jejeqx/lib/python3.10/site-packages/xarray/core/indexing.py:1374: PerformanceWarning: Slicing with an out-of-order index is generating 139 times more chunks
  return self.array[key]
/gpfsscratch/rech/cli/uvo53rl/miniconda3/envs/jejeqx/lib/python3.10/site-packages/xarray/core/indexing.py:1374: PerformanceWarning: Slicing with an out-of-order index is generating 139 times more chunks
  return self.array[key]
<xarray.Dataset>
Dimensions:      (time: 42468, cycle: 1)
Coordinates:
    lon          (time) float64 -55.03 -55.06 -55.1 ... -64.89 -64.93 -64.97
    lat          (time) float64 39.58 39.53 39.47 39.42 ... 42.12 42.17 42.22
  * time         (time) datetime64[ns] 2012-10-22T11:16:43.687588 ... 2012-12...
Dimensions without coordinates: cycle
Data variables:
    x_al         (time) float32 1.008e+06 1.008e+06 ... 5.373e+06 5.373e+06
    ncycle       (time, cycle) timedelta64[ns] 17 days 09:34:50.917929600 ......
    model_index  (time) float32 515.0 515.0 515.0 ... 1.533e+03 1.533e+03
    ssh_obs      (time) float64 0.975 1.008 1.009 ... -0.0774 -0.0679 -0.0703
    ssh          (time) float64 0.9958 1.014 1.027 ... -0.0576 -0.0568 -0.051
    nadir_err    (time) float64 -0.0208 -0.0061 -0.0182 ... -0.0111 -0.0193
Attributes: (12/26)
    description:               SWOT fixed grid
    corresponding_grid:        
    title:                     Altimeter like data simulated by SWOT simulator
    keywords:                  check keywords
    Conventions:               CF-1.6
    summary:                   SWOT grid data produced
    ...                        ...
    geospatial_lon_units:      degrees_east
    project:                   SWOT
    date_created:              2018-10-12T12:39:50Z
    date_modified:             2018-10-12T12:39:50Z
    keywords_vocabulary:       NASA
    references:                Gaultier, L., C. Ubelmann, and L.-L. Fu, 2016:...
%matplotlib inline

fig, ax = plt.subplots()

sub_ds = ds_nadir.sel(time=slice("2012-10-26","2012-10-26"))
variable = "ssh"
pts = ax.scatter(sub_ds.lon, sub_ds.lat, c=sub_ds[variable], s=0.1)
ax.set(
    xlabel="Longitude",
    ylabel="Latitude",
    xlim=[-65., -55.],
    ylim=[33., 43.]
)

plt.colorbar(pts, label="Sea Surface Height [m]")
plt.tight_layout()
plt.show()
../../_images/995d24087bed4be638b2f8f3cc1a9bc09934428a66b72d9727485ae4216eb3cc.png
%%time

ds_swot = xr.open_mfdataset(
    files_swot_dc20a, 
    preprocess=preprocess_swot,
    combine="nested",
    engine="netcdf4",
    concat_dim="time"
)

ds_swot = ds_swot.sortby("time").compute()

ds_swot
CPU times: user 17.2 s, sys: 4.08 s, total: 21.3 s
Wall time: 18.1 s
<xarray.Dataset>
Dimensions:      (time: 955000)
Coordinates:
    lon          (time) float64 -55.4 -55.37 -55.39 ... -64.45 -64.47 -64.5
    lat          (time) float64 42.99 42.99 42.97 42.97 ... 42.98 42.99 42.99
  * time         (time) datetime64[ns] 2012-10-23T18:04:12.393963 ... 2012-11...
Data variables: (12/13)
    x_al         (time) float32 1.001e+06 1.001e+06 ... 1.01e+07 1.01e+07
    x_ac         (time) float32 -60.0 -58.0 -60.0 -58.0 ... -60.0 -58.0 -56.0
    lon_nadir    (time) float64 305.3 305.3 305.3 305.3 ... 294.8 294.8 294.8
    lat_nadir    (time) float64 43.12 43.12 43.1 43.1 ... 43.1 43.12 43.12 43.12
    model_index  (time) float32 546.0 546.0 546.0 ... 1.446e+03 1.446e+03
    timing_err   (time) float64 0.00496 0.00496 0.004528 ... -0.03043 -0.03043
    ...           ...
    roll_err     (time) float64 -0.1154 -0.1116 -0.1102 ... 0.07341 0.07088
    phase_err    (time) float64 0.06866 0.06637 0.06041 ... 0.04271 0.04124
    ssh          (time) float64 -0.0096 -0.0116 -0.0083 ... -0.0444 -0.0518
    bd_err       (time) float64 0.009219 0.008615 ... -0.004289 -0.003999
    karin_err    (time) float64 -0.0198 0.0012 0.0096 ... 0.0122 -0.0247 -0.0235
    nC           (time) float64 0.0 1.0 0.0 1.0 2.0 3.0 ... 5.0 6.0 0.0 1.0 2.0
Attributes: (12/26)
    description:               SWOT fixed grid
    corresponding_grid:        /data/MSA_ETU/mballarotta/ETUDE_BOOST-SWOT/out...
    title:                     SWOT-like data simulated by SWOT simulator
    keywords:                  SWOT, altimetry, SSH, satellite, remote sensing
    Conventions:               CF-1.6
    summary:                   SWOT grid data produced
    ...                        ...
    geospatial_lon_units:      degrees_east
    project:                   SWOT
    date_created:              2018-11-27T16:45:37Z
    date_modified:             2018-11-27T16:45:37Z
    keywords_vocabulary:       NASA
    references:                Gaultier, L., C. Ubelmann, and L.-L. Fu, 2016:...
%matplotlib inline

fig, ax = plt.subplots()

sub_ds = ds_swot.sel(time=slice("2012-10-26","2012-10-26"))
variable = "ssh"
pts = ax.scatter(sub_ds.lon, sub_ds.lat, c=sub_ds[variable], s=0.1)
ax.set(
    xlabel="Longitude",
    ylabel="Latitude",
    xlim=[-65., -55.],
    ylim=[33., 43.]
)

plt.colorbar(pts, label="Sea Surface Height [m]")
plt.tight_layout()
plt.show()
../../_images/82352ef950024474b75d97fd5efdcf54672d56b51b13d34c95c9db67e118b561.png

SWOT + NADIR#

ds_nadir
<xarray.Dataset>
Dimensions:      (time: 42468, cycle: 1)
Coordinates:
    lon          (time) float64 -55.03 -55.06 -55.1 ... -64.89 -64.93 -64.97
    lat          (time) float64 39.58 39.53 39.47 39.42 ... 42.12 42.17 42.22
  * time         (time) datetime64[ns] 2012-10-22T11:16:43.687588 ... 2012-12...
Dimensions without coordinates: cycle
Data variables:
    x_al         (time) float32 1.008e+06 1.008e+06 ... 5.373e+06 5.373e+06
    ncycle       (time, cycle) timedelta64[ns] 17 days 09:34:50.917929600 ......
    model_index  (time) float32 515.0 515.0 515.0 ... 1.533e+03 1.533e+03
    ssh_obs      (time) float64 0.975 1.008 1.009 ... -0.0774 -0.0679 -0.0703
    ssh          (time) float64 0.9958 1.014 1.027 ... -0.0576 -0.0568 -0.051
    nadir_err    (time) float64 -0.0208 -0.0061 -0.0182 ... -0.0111 -0.0193
Attributes: (12/26)
    description:               SWOT fixed grid
    corresponding_grid:        
    title:                     Altimeter like data simulated by SWOT simulator
    keywords:                  check keywords
    Conventions:               CF-1.6
    summary:                   SWOT grid data produced
    ...                        ...
    geospatial_lon_units:      degrees_east
    project:                   SWOT
    date_created:              2018-10-12T12:39:50Z
    date_modified:             2018-10-12T12:39:50Z
    keywords_vocabulary:       NASA
    references:                Gaultier, L., C. Ubelmann, and L.-L. Fu, 2016:...
ds_swot
<xarray.Dataset>
Dimensions:      (time: 955000)
Coordinates:
    lon          (time) float64 -55.4 -55.37 -55.39 ... -64.45 -64.47 -64.5
    lat          (time) float64 42.99 42.99 42.97 42.97 ... 42.98 42.99 42.99
  * time         (time) datetime64[ns] 2012-10-23T18:04:12.393963 ... 2012-11...
Data variables: (12/13)
    x_al         (time) float32 1.001e+06 1.001e+06 ... 1.01e+07 1.01e+07
    x_ac         (time) float32 -60.0 -58.0 -60.0 -58.0 ... -60.0 -58.0 -56.0
    lon_nadir    (time) float64 305.3 305.3 305.3 305.3 ... 294.8 294.8 294.8
    lat_nadir    (time) float64 43.12 43.12 43.1 43.1 ... 43.1 43.12 43.12 43.12
    model_index  (time) float32 546.0 546.0 546.0 ... 1.446e+03 1.446e+03
    timing_err   (time) float64 0.00496 0.00496 0.004528 ... -0.03043 -0.03043
    ...           ...
    roll_err     (time) float64 -0.1154 -0.1116 -0.1102 ... 0.07341 0.07088
    phase_err    (time) float64 0.06866 0.06637 0.06041 ... 0.04271 0.04124
    ssh          (time) float64 -0.0096 -0.0116 -0.0083 ... -0.0444 -0.0518
    bd_err       (time) float64 0.009219 0.008615 ... -0.004289 -0.003999
    karin_err    (time) float64 -0.0198 0.0012 0.0096 ... 0.0122 -0.0247 -0.0235
    nC           (time) float64 0.0 1.0 0.0 1.0 2.0 3.0 ... 5.0 6.0 0.0 1.0 2.0
Attributes: (12/26)
    description:               SWOT fixed grid
    corresponding_grid:        /data/MSA_ETU/mballarotta/ETUDE_BOOST-SWOT/out...
    title:                     SWOT-like data simulated by SWOT simulator
    keywords:                  SWOT, altimetry, SSH, satellite, remote sensing
    Conventions:               CF-1.6
    summary:                   SWOT grid data produced
    ...                        ...
    geospatial_lon_units:      degrees_east
    project:                   SWOT
    date_created:              2018-11-27T16:45:37Z
    date_modified:             2018-11-27T16:45:37Z
    keywords_vocabulary:       NASA
    references:                Gaultier, L., C. Ubelmann, and L.-L. Fu, 2016:...
builtins
ds_swotnadir = xr.concat(
    [ds_nadir, ds_swot],
    # compat="override",
    # data_vars=["ssh"],
    # coords="minimal",
    dim="time", 
).sortby("time")
---------------------------------------------------------------------------
KeyError                                  Traceback (most recent call last)
File /gpfsscratch/rech/cli/uvo53rl/miniconda3/envs/jejeqx/lib/python3.10/site-packages/xarray/core/dataset.py:1340, in Dataset._construct_dataarray(self, name)
   1339 try:
-> 1340     variable = self._variables[name]
   1341 except KeyError:

KeyError: 'ncycle'

During handling of the above exception, another exception occurred:

KeyError                                  Traceback (most recent call last)
File /gpfsscratch/rech/cli/uvo53rl/miniconda3/envs/jejeqx/lib/python3.10/site-packages/xarray/core/concat.py:556, in _dataset_concat(datasets, dim, data_vars, coords, compat, positions, fill_value, join, combine_attrs)
    555 try:
--> 556     vars = ensure_common_dims([ds[name].variable for ds in datasets])
    557 except KeyError:

File /gpfsscratch/rech/cli/uvo53rl/miniconda3/envs/jejeqx/lib/python3.10/site-packages/xarray/core/concat.py:556, in <listcomp>(.0)
    555 try:
--> 556     vars = ensure_common_dims([ds[name].variable for ds in datasets])
    557 except KeyError:

File /gpfsscratch/rech/cli/uvo53rl/miniconda3/envs/jejeqx/lib/python3.10/site-packages/xarray/core/dataset.py:1431, in Dataset.__getitem__(self, key)
   1430 if utils.hashable(key):
-> 1431     return self._construct_dataarray(key)
   1432 if utils.iterable_of_hashable(key):

File /gpfsscratch/rech/cli/uvo53rl/miniconda3/envs/jejeqx/lib/python3.10/site-packages/xarray/core/dataset.py:1342, in Dataset._construct_dataarray(self, name)
   1341 except KeyError:
-> 1342     _, name, variable = _get_virtual_variable(self._variables, name, self.dims)
   1344 needed_dims = set(variable.dims)

File /gpfsscratch/rech/cli/uvo53rl/miniconda3/envs/jejeqx/lib/python3.10/site-packages/xarray/core/dataset.py:178, in _get_virtual_variable(variables, key, dim_sizes)
    177 if len(split_key) != 2:
--> 178     raise KeyError(key)
    180 ref_name, var_name = split_key

KeyError: 'ncycle'

During handling of the above exception, another exception occurred:

ValueError                                Traceback (most recent call last)
Cell In[44], line 1
----> 1 ds_swotnadir = xr.concat(
      2     [ds_nadir, ds_swot],
      3     # compat="override",
      4     # data_vars=["ssh"],
      5     # coords="minimal",
      6     dim="time", 
      7 ).sortby("time")

File /gpfsscratch/rech/cli/uvo53rl/miniconda3/envs/jejeqx/lib/python3.10/site-packages/xarray/core/concat.py:243, in concat(objs, dim, data_vars, coords, compat, positions, fill_value, join, combine_attrs)
    231     return _dataarray_concat(
    232         objs,
    233         dim=dim,
   (...)
    240         combine_attrs=combine_attrs,
    241     )
    242 elif isinstance(first_obj, Dataset):
--> 243     return _dataset_concat(
    244         objs,
    245         dim=dim,
    246         data_vars=data_vars,
    247         coords=coords,
    248         compat=compat,
    249         positions=positions,
    250         fill_value=fill_value,
    251         join=join,
    252         combine_attrs=combine_attrs,
    253     )
    254 else:
    255     raise TypeError(
    256         "can only concatenate xarray Dataset and DataArray "
    257         f"objects, got {type(first_obj)}"
    258     )

File /gpfsscratch/rech/cli/uvo53rl/miniconda3/envs/jejeqx/lib/python3.10/site-packages/xarray/core/concat.py:558, in _dataset_concat(datasets, dim, data_vars, coords, compat, positions, fill_value, join, combine_attrs)
    556     vars = ensure_common_dims([ds[name].variable for ds in datasets])
    557 except KeyError:
--> 558     raise ValueError(f"{name!r} is not present in all datasets.")
    560 # Try concatenate the indexes, concatenate the variables when no index
    561 # is found on all datasets.
    562 indexes: list[Index] = list(get_indexes(name))

ValueError: 'ncycle' is not present in all datasets.

Now, let’s combine the best of both worlds!

ds_swotnadir = xr.concat(
    [ds_nadir.drop("ncycle"), ds_swot],
    data_vars=["ssh"],
    coords="minimal",
    dim="time", 
).sortby("time")
---------------------------------------------------------------------------
KeyError                                  Traceback (most recent call last)
File /gpfsscratch/rech/cli/uvo53rl/miniconda3/envs/jejeqx/lib/python3.10/site-packages/xarray/core/dataset.py:1340, in Dataset._construct_dataarray(self, name)
   1339 try:
-> 1340     variable = self._variables[name]
   1341 except KeyError:

KeyError: 'nadir_err'

During handling of the above exception, another exception occurred:

KeyError                                  Traceback (most recent call last)
File /gpfsscratch/rech/cli/uvo53rl/miniconda3/envs/jejeqx/lib/python3.10/site-packages/xarray/core/concat.py:556, in _dataset_concat(datasets, dim, data_vars, coords, compat, positions, fill_value, join, combine_attrs)
    555 try:
--> 556     vars = ensure_common_dims([ds[name].variable for ds in datasets])
    557 except KeyError:

File /gpfsscratch/rech/cli/uvo53rl/miniconda3/envs/jejeqx/lib/python3.10/site-packages/xarray/core/concat.py:556, in <listcomp>(.0)
    555 try:
--> 556     vars = ensure_common_dims([ds[name].variable for ds in datasets])
    557 except KeyError:

File /gpfsscratch/rech/cli/uvo53rl/miniconda3/envs/jejeqx/lib/python3.10/site-packages/xarray/core/dataset.py:1431, in Dataset.__getitem__(self, key)
   1430 if utils.hashable(key):
-> 1431     return self._construct_dataarray(key)
   1432 if utils.iterable_of_hashable(key):

File /gpfsscratch/rech/cli/uvo53rl/miniconda3/envs/jejeqx/lib/python3.10/site-packages/xarray/core/dataset.py:1342, in Dataset._construct_dataarray(self, name)
   1341 except KeyError:
-> 1342     _, name, variable = _get_virtual_variable(self._variables, name, self.dims)
   1344 needed_dims = set(variable.dims)

File /gpfsscratch/rech/cli/uvo53rl/miniconda3/envs/jejeqx/lib/python3.10/site-packages/xarray/core/dataset.py:178, in _get_virtual_variable(variables, key, dim_sizes)
    177 if len(split_key) != 2:
--> 178     raise KeyError(key)
    180 ref_name, var_name = split_key

KeyError: 'nadir_err'

During handling of the above exception, another exception occurred:

ValueError                                Traceback (most recent call last)
Cell In[35], line 1
----> 1 ds_swotnadir = xr.concat(
      2     [ds_nadir.drop("ncycle"), ds_swot],
      3     data_vars=["ssh"],
      4     coords="minimal",
      5     dim="time", 
      6 ).sortby("time")

File /gpfsscratch/rech/cli/uvo53rl/miniconda3/envs/jejeqx/lib/python3.10/site-packages/xarray/core/concat.py:243, in concat(objs, dim, data_vars, coords, compat, positions, fill_value, join, combine_attrs)
    231     return _dataarray_concat(
    232         objs,
    233         dim=dim,
   (...)
    240         combine_attrs=combine_attrs,
    241     )
    242 elif isinstance(first_obj, Dataset):
--> 243     return _dataset_concat(
    244         objs,
    245         dim=dim,
    246         data_vars=data_vars,
    247         coords=coords,
    248         compat=compat,
    249         positions=positions,
    250         fill_value=fill_value,
    251         join=join,
    252         combine_attrs=combine_attrs,
    253     )
    254 else:
    255     raise TypeError(
    256         "can only concatenate xarray Dataset and DataArray "
    257         f"objects, got {type(first_obj)}"
    258     )

File /gpfsscratch/rech/cli/uvo53rl/miniconda3/envs/jejeqx/lib/python3.10/site-packages/xarray/core/concat.py:558, in _dataset_concat(datasets, dim, data_vars, coords, compat, positions, fill_value, join, combine_attrs)
    556     vars = ensure_common_dims([ds[name].variable for ds in datasets])
    557 except KeyError:
--> 558     raise ValueError(f"{name!r} is not present in all datasets.")
    560 # Try concatenate the indexes, concatenate the variables when no index
    561 # is found on all datasets.
    562 indexes: list[Index] = list(get_indexes(name))

ValueError: 'nadir_err' is not present in all datasets.
import xarray as xr
xr.concat(
ds_swotnadir
<xarray.Dataset>
Dimensions:  (time: 997468)
Coordinates:
    lon      (time) float64 -55.03 -55.06 -55.1 -55.13 ... -64.89 -64.93 -64.97
    lat      (time) float64 39.58 39.53 39.47 39.42 ... 42.06 42.12 42.17 42.22
  * time     (time) datetime64[ns] 2012-10-22T11:16:43.687588 ... 2012-12-03T...
Data variables:
    ssh      (time) float64 0.9958 1.014 1.027 1.032 ... -0.0576 -0.0568 -0.051
Attributes: (12/26)
    description:               SWOT fixed grid
    corresponding_grid:        
    title:                     Altimeter like data simulated by SWOT simulator
    keywords:                  check keywords
    Conventions:               CF-1.6
    summary:                   SWOT grid data produced
    ...                        ...
    geospatial_lon_units:      degrees_east
    project:                   SWOT
    date_created:              2018-10-12T12:39:50Z
    date_modified:             2018-10-12T12:39:50Z
    keywords_vocabulary:       NASA
    references:                Gaultier, L., C. Ubelmann, and L.-L. Fu, 2016:...
%matplotlib inline

fig, ax = plt.subplots()

sub_ds = ds_swotnadir.sel(time=slice("2012-10-26","2012-10-26"))
variable = "ssh"
pts = ax.scatter(sub_ds.lon, sub_ds.lat, c=sub_ds[variable], s=0.1)
ax.set(
    xlabel="Longitude",
    ylabel="Latitude",
    xlim=[-65., -55.],
    ylim=[33., 43.]
)

plt.colorbar(pts, label="Sea Surface Height [m]")
plt.tight_layout()
plt.show()
../../_images/28cf7ca55f0eb4d7a2ecd1158775b9f5d0a03add2a65b5667c416afce9e2bc23.png

Gridding#

!ls /gpfswork/rech/yrf/commun/data_challenges/dc20a_osse/staging/natl60/
NATL60-CJM165_GULFSTREAM_ssh_y2013.1y.decoded.nc
NATL60-CJM165_GULFSTREAM_ssh_y2013.1y.nc
NATL60-CJM165_GULFSTREAM_sss_y2013.1y.nc
NATL60-CJM165_GULFSTREAM_sst_y2013.1y.nc
file_natl60 = "/gpfswork/rech/yrf/commun/data_challenges/dc20a_osse/staging/natl60/NATL60-CJM165_GULFSTREAM_ssh_y2013.1y.nc"
def preprocess_natl60(da):
    
    da = validate_latlon(da)
    da = validate_time(da)
    da = decode_cf_time(da, units="seconds since 2012-10-01")
    da = validate_ssh(da)
    
    return da
%%time

files_natl60 = "/gpfswork/rech/yrf/commun/data_challenges/dc20a_osse/staging/natl60/NATL60-CJM165_GULFSTREAM_ssh_y2013.1y.nc"

ds_natl60 = xr.open_mfdataset(
    files_natl60,
    decode_times=False,
    preprocess=preprocess_natl60,
    combine="nested",
    engine="netcdf4",
    concat_dim="time"
)

ds_natl60 = ds_natl60.sortby("time").compute()

ds_natl60
CPU times: user 36.5 ms, sys: 60.2 ms, total: 96.7 ms
Wall time: 96.8 ms
<xarray.Dataset>
Dimensions:  (time: 365, lat: 201, lon: 201)
Coordinates:
  * lon      (lon) float64 -65.0 -64.95 -64.9 -64.85 ... -55.1 -55.05 -55.0
  * lat      (lat) float64 33.0 33.05 33.1 33.15 33.2 ... 42.85 42.9 42.95 43.0
  * time     (time) datetime64[ns] 2012-10-01 2012-10-02 ... 2013-09-30
Data variables:
    ssh      (time, lat, lon) float64 0.5019 0.5019 0.5097 ... -0.135 -0.135

Data Structure#

from oceanbench._src.geoprocessing.gridding import coord_based_to_grid
%%time

ds_nadir_gridded = coord_based_to_grid(
    ds_nadir, 
    ds_natl60,
    data_vars=["ssh"], 
    t_res=pd.to_timedelta(12, unit="hour")
)
CPU times: user 2.38 s, sys: 29.7 ms, total: 2.41 s
Wall time: 2.42 s
%%time

ds_swot_gridded = coord_based_to_grid(
    ds_swot, 
    ds_natl60,
    data_vars=["ssh"], 
    t_res=pd.to_timedelta(12, unit="hour")
)
CPU times: user 4.25 s, sys: 20.8 ms, total: 4.27 s
Wall time: 4.29 s
%%time

ds_swotnadir_gridded = coord_based_to_grid(
    ds_swotnadir, 
    ds_natl60,
    data_vars=["ssh"], 
    t_res=pd.to_timedelta(12, unit="hour")
)
CPU times: user 4.36 s, sys: 30.9 ms, total: 4.39 s
Wall time: 4.41 s
import holoviews as hv
hv.extension("matplotlib")
variable = "ssh" # "vort_r" # "ke" #  
cmap = "viridis" # "RdBu_r" # "YlGnBu_r" #
field_name = "NATL60"

ssh_ds = xr.Dataset({
    field_name: ds_natl60[variable],
    "NADIR": np.isfinite(ds_nadir_gridded[variable]),
    "SWOT": np.isfinite(ds_swot_gridded[variable]),
    "SWOTNADIR": np.isfinite(ds_swotnadir_gridded[variable]),
})


to_plot_ds = ssh_ds.transpose("time", "lat", "lon")#.isel(time=slice(25, 55, 1))

clim = (
    to_plot_ds[
        [field_name, "NADIR", "SWOT", "SWOTNADIR"]
    ].to_array().pipe(lambda da: (da.quantile(0.005).item(), da.quantile(0.995).item()))
)

images = hv.Layout([
    hv.Dataset(to_plot_ds)
    .to(hv.QuadMesh, ["lon", "lat"], v).relabel(v)
    .options(cmap=cmap, clim=clim)
    for v in to_plot_ds]
).cols(2).opts(sublabel_format="")

hv.output(images, holomap="gif", fps=2, dpi=125)
# hv.save(images, filename="dc20a_natl60", fmt="gif", fps=2, dpi=125)