EOCIS data access over STAC - example¶
This example notebook demonstrates how to obtain EOCIS Sea-Surface Temperature
In [1]:
import datetime
from pystac_client import Client
import xarray as xr
import matplotlib.pyplot as plt
import numpy as np
import aiohttp
import time
In [2]:
# open the STAC endpoint provided by CEDA
client = Client.open("https://api.stac.ceda.ac.uk")
In [5]:
# search for SST data from 2023
search = client.search(
collections=['eocis-sst-cdrv3'],
datetime=(datetime.datetime(2023,1,1,12,0,0),datetime.datetime(2023,12,31,12,0,0))
)
In [6]:
# retrieve the items as dictionaries, rather than Item objects
items = list(search.items_as_dicts())
print (len(items)) # we should get 365 items, one for each day of 2023
365
In [7]:
# define a helper function that will get the access url from each stac item
def get_data_url(from_item):
for (key,value) in from_item.assets.items():
if key == "reference_file":
return value.href
return None
In [8]:
# define a helper function that will load an xarray dataset, given its url
def load_dataset(url):
return xr.open_dataset("reference://", engine="zarr", backend_kwargs={
"consolidated": False,
"storage_options": {"fo": url, "remote_protocol": "https","remote_options": {}}
})
return ds
In [9]:
# plot the first item
first_item = search.item_collection().items[0]
first_url = get_data_url(first_item) # get the url for the item
ds = load_dataset(first_url)
ds["analysed_sst"].sel(lat=slice(48,61),lon=slice(-12,3)).plot()
Out[9]:
<matplotlib.collections.QuadMesh at 0x7f2f153ab0b0>
In [10]:
# lets obtain the time series of mean sea surface temperatures for this area for 2023
# go through the items returned from the search, open the dataset
# select the data we need covering the sea around the Bristish Isles and get the mean temperature on each day
# this will take a little time to run, but only the requested subset of the data will be downloaded from CEDA
data = [] # append the mean daily temperature to this list
for itemNo, item in enumerate(search.item_collection().items):
url = get_data_url(item)
attempts = 0
try:
ds = load_dataset(url)
sst = ds["analysed_sst"].sel(lat=slice(48,61),lon=slice(-12,3)).mean(dim=['lat','lon']).load()
data.append(sst)
except Exception as e:
# retrieval can hit occasional problems due to network errors, so use a limited number of retries
attempts += 1
if attempts >= 5:
raise
print ("Retrying:", attempts)
time.sleep(5)
da = xr.concat(data,dim="time")
print(da)
Retrying: 1 <xarray.DataArray 'analysed_sst' (time: 364)> Size: 3kB array([283.04910457, 283.11001034, 283.14907001, 283.19777793, 283.23826667, 283.2553193 , 283.21340045, 283.23088226, 283.20400916, 283.24439321, 283.39155382, 283.44478645, 283.42287385, 283.28176338, 283.20575022, 283.30504011, 283.38080916, 283.51706834, 283.56683083, 283.58905403, 283.60968063, 283.63482482, 283.66823645, 283.70450396, 283.74999401, 283.78201946, 283.82267071, 283.91396279, 283.99195244, 284.03446593, 284.06057754, 284.128088 , 284.19967992, 284.2213998 , 284.29200104, 284.27120135, 284.28121975, 284.3300558 , 284.39810119, 284.53070444, 284.55510485, 284.58747962, 284.61783936, 284.68879913, 284.73500935, 284.8017722 , 284.82263353, 284.84819267, 284.88087684, 284.95594012, 284.98542794, 285.007506 , 285.11578751, 285.21240226, 285.24588716, 285.27928074, 285.35957572, 285.41638857, 285.47973805, 285.61443934, 285.74310419, 285.78720701, 285.82690977, 285.88265347, 285.95608351, 286.01346948, 286.05705161, 286.07898783, 286.17740232, 286.19434383, 286.22809941, 286.29661721, 286.41639654, 286.53581074, 286.59242028, 286.66563937, 286.76659116, 286.86292569, 286.93337606, 287.02885846, ... 281.99196325, 281.93762778, 281.89872211, 281.86638414, 281.84819517, 281.81318484, 281.81815437, 281.76918238, 281.76206621, 281.78749218, 281.80396422, 281.83024458, 281.78815628, 281.85806634, 281.88890441, 281.88974386, 281.85212402, 281.88137659, 281.94405572, 282.00071127, 282.01527019, 282.04344284, 282.02119671, 282.03740276, 282.11079617, 282.15310306, 282.17544259, 282.16061526, 282.13985289, 281.95085815, 281.99224191, 281.98181758, 282.02702479, 282.03516359, 282.12628951, 282.18074877, 282.19307059, 282.1675835 , 282.04355222, 282.05891204, 282.09562524, 282.17830472, 282.2073064 , 282.21867922, 282.2605083 , 282.23095225, 282.20512382, 282.13030534, 282.1973564 , 282.23813492, 282.26372705, 282.25541255, 282.27118195, 282.31587056, 282.37847747, 282.39689197, 282.40711576, 282.31814863, 282.30750224, 282.35608533, 282.44128664, 282.47683727, 282.49746647, 282.58238548, 282.66029214, 282.71838127, 282.82946292, 282.91111353, 282.94301762, 282.98140243, 283.00423279, 282.99788801, 283.00774564, 283.02542816, 283.04016313, 282.96069805, 282.98516547, 283.06857884, 283.08741385, 283.10983412]) Coordinates: * time (time) datetime64[ns] 3kB 2023-12-31T12:00:00 ... 2023-01-01T12:...
In [11]:
# take the mean along the time dimension, and plot the time series
da = da.sortby('time', ascending=True)
da.plot()
plt.title('Mean Sea Surface Temperatures Around the British Isles')
plt.ylabel('Mean SST (K)')
plt.xlabel('Time')
plt.show()
In [ ]: