Under the hood: Get 3D Subset as Array

🚧

This is an advanced topic.

Understanding this content is not required for regular use of Eratos.

Below is the complete break-down of Get 3D Subset as Array (Area over time)

Where self is a gridded data object initialized through:

from eratos.creds import AccessTokenCreds
from eratos.adapter import Adapter

ecreds = AccessTokenCreds(
  "YOUR_KEY_ID_GOES_HERE",
  "YOUR_KEY_SECRET_GOES_HERE"
)

eadapter = Adapter(ecreds)

# Request acccess to a dataset resource in Eratos
max_temperature_data = eadapter.Resource(ern='ern:e-pn.io:resource:eratos.blocks.silo.maxtemperature')

# Access the gridded dataset via the Gridded API:
gridded_max_temperature_data = max_temperature_data.data().gapi()

Function break-down

def get_3d_subset_as_array(self,var,startDate, endDate,bottomLeftPoint,topRightPoint,time_stride = 1,lat_stride = 1, lon_stride = 1,verbose = True):
  
      if type(var) is not str:
          raise TypeError('invalid type for var: %s' % type(var))
      if type(startDate) is not str:
          raise TypeError('invalid type for endDate: %s' % type(startDate))
      if type(endDate) is not str:
          raise TypeError('invalid type for endDate: %s' % type(endDate))  
      #2 date: Create a Unix timestamp in UTC timezone from the DD-MM-YYYY-MM formatted date string - e.g. "01-01-2022"
      unix_ts_utc_start = datetime.datetime.strptime(startDate, '%Y-%m-%d').replace(tzinfo=timezone.utc).timestamp()
      unix_ts_utc_end = datetime.datetime.strptime(endDate, '%Y-%m-%d').replace(tzinfo=timezone.utc).timestamp()

      times = self.get_subset_as_array('time')
      startTime_idx = np.where(times == unix_ts_utc_start)[0][0]
      endTime_idx = (np.where(times == unix_ts_utc_end)[0][0])+1
      spacingTime = times[startTime_idx]-times[endTime_idx]

      # Load Point strings as Shapely Points   
      bottomLeftPoint_shape = wkt.loads(bottomLeftPoint)
      if type(bottomLeftPoint_shape) is not geometry.Point:
          raise ValueError('value inside bottomLeftPoint should be a WKT point')
      topRightPoint_shape = wkt.loads(topRightPoint)
      if type(topRightPoint_shape) is not geometry.Point:
          raise ValueError('value inside topRightPoint should be a WKT point')

      # Find closest index location in underlying gridded dataset of bottomLeft and topRight Points, Snapping often occurs
      lats = self.get_subset_as_array('lat')
      lons = self.get_subset_as_array('lon')
      spacingLat, spacingLon = lats[1]-lats[0], lons[1]-lons[0]
      lats = self.get_subset_as_array('lat')
      lons = self.get_subset_as_array('lon')
      spacingLat, spacingLon = lats[1]-lats[0], lons[1]-lons[0]
      minLatIdx, maxLatIdx = np.argmin(np.abs(lats-bottomLeftPoint_shape.y)), np.argmin(np.abs(lats-topRightPoint_shape.y))
      minLonIdx, maxLonIdx = np.argmin(np.abs(lons -bottomLeftPoint_shape.x)), np.argmin(np.abs(lons -topRightPoint_shape.x))

      data_query_array = self.get_subset_as_array(var, starts=[startTime_idx,minLatIdx,minLonIdx], ends=[endTime_idx,maxLatIdx,maxLonIdx], strides =  [time_stride,lat_stride,lon_stride])

     # Verbose output, explaining whats going on under the hood and how it affects the output
      if verbose:
          print(f""" 
          The following bottom left, {str(bottomLeftPoint)}, and top right {str(topRightPoint)} Points of the grid were provided.
          As the grid corners must correspond to the dataset's underlying grid these points were snapped to the following points: 
          bottom left, {str('Point('+str(lons[minLonIdx]) + " " + str(lats[minLatIdx]) +")")}, and top right {str('Point('+str(lons[maxLonIdx]) + " " + str(lats[maxLatIdx])+")")}""")
          print(f"""
          The returned numpy array will have the following shape: {str(data_query_array.shape)}
          The first array dimension is time with length {str(data_query_array.shape[0])}
          The second array dimension is a south-north vector with length {str(data_query_array.shape[1])} where the index = 0 is the southern most point of the grid {str(lats[minLatIdx])}
          Incrementing at {str(round(spacingLat,2))} degree per 1 increase in index ending at {str(lats[maxLatIdx])}.
          The third array dimension is a west-east vector with length {str(data_query_array.shape[2])} where the index = 0 is the eastern most point of the grid {str(lons[minLonIdx])}
          Incrementing at {str(round(spacingLon,2))} degree per 1 increase in index ending at {str(lons[maxLonIdx])}.
              """)
      return data_query_array