Skip to content

Cell & Coordinate Operations#

Cell coordinate retrieval, cell polygons/points, and map-to-array coordinate conversions.

pyramids.dataset.ops.cell.Cell #

Mixin providing cell coordinate and geometry utilities for Dataset.

Source code in src/pyramids/dataset/ops/cell.py
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
class Cell:
    """Mixin providing cell coordinate and geometry utilities for Dataset."""

    def get_cell_coords(
        self, location: str = "center", domain_only: bool = False
    ) -> np.ndarray:
        """Get coordinates for the center/corner of cells inside the dataset domain.

        Returns the coordinates of the cell centers inside the domain (only the cells that
        do not have nodata value)

        Args:
            location (str):
                Location of the coordinates. Use `center` for the center of a cell, `corner` for the corner of the
                cell (top-left corner).
            domain_only (bool):
                True to exclude the cells out of the domain. Default is False.

        Returns:
            np.ndarray:
                Array with a list of the coordinates to be interpolated, without the NaN.
            np.ndarray:
                Array with all the centers of cells in the domain of the DEM.

        Examples:
            - Create `Dataset` consists of 1 bands, 3 rows, 3 columns, at the point lon/lat (0, 0).

              ```python
              >>> import numpy as np
              >>> arr = np.random.randint(1,3, size=(3, 3))
              >>> top_left_corner = (0, 0)
              >>> cell_size = 0.05
              >>> dataset = Dataset.create_from_array(arr, top_left_corner=top_left_corner, cell_size=cell_size, epsg=4326)

              ```

            - Get the coordinates of the center of cells inside the domain.

              ```python
              >>> coords = dataset.get_cell_coords()
              >>> print(coords)
              [[ 0.025 -0.025]
               [ 0.075 -0.025]
               [ 0.125 -0.025]
               [ 0.025 -0.075]
               [ 0.075 -0.075]
               [ 0.125 -0.075]
               [ 0.025 -0.125]
               [ 0.075 -0.125]
               [ 0.125 -0.125]]

              ```

            - Get the coordinates of the top left corner of cells inside the domain.

              ```python
              >>> coords = dataset.get_cell_coords(location="corner")
              >>> print(coords)
              [[ 0.    0.  ]
               [ 0.05  0.  ]
               [ 0.1   0.  ]
               [ 0.   -0.05]
               [ 0.05 -0.05]
               [ 0.1  -0.05]
               [ 0.   -0.1 ]
               [ 0.05 -0.1 ]
               [ 0.1  -0.1 ]]

              ```
        """
        # check the location parameter
        location = location.lower()
        if location not in ["center", "corner"]:
            raise ValueError(
                "The location parameter can have one of these values: 'center', 'corner', "
                f"but the value: {location} is given."
            )

        if location == "center":
            # Adding 0.5*cell size to get the center
            add_value = 0.5
        else:
            add_value = 0
        # Getting data for the whole grid
        (
            x_init,
            cell_size_x,
            xy_span,
            y_init,
            yy_span,
            cell_size_y,
        ) = self.geotransform

        if cell_size_x != cell_size_y:
            if np.abs(cell_size_x) != np.abs(cell_size_y):
                self.logger.warning(
                    f"The given raster does not have a square cells, the cell size is {cell_size_x}*{cell_size_y} "
                )

        # data in the array
        no_val = self.no_data_value[0] if self.no_data_value[0] is not None else np.nan
        arr = self.read_array(band=0)
        if domain_only and no_val not in arr:
            self.logger.warning(
                "The no data value does not exist in the band, so all the cells will be considered, and the "
                "domain_only filter will not be applied."
            )

        mask_values: list[Any] | None = [no_val] if domain_only else None
        indices = get_indices2(arr, mask=mask_values)

        # exclude the no_data_values cells.
        f1 = [i[0] for i in indices]
        f2 = [i[1] for i in indices]
        x = [x_init + cell_size_x * (i + add_value) for i in f2]
        y = [y_init + cell_size_y * (i + add_value) for i in f1]
        coords = np.array(list(zip(x, y)))

        return coords

    def get_cell_polygons(self, domain_only: bool = False) -> GeoDataFrame:
        """Get a polygon shapely geometry for the raster cells.

        Args:
            domain_only (bool):
                True to get the polygons of the cells inside the domain.

        Returns:
            GeoDataFrame:
                With two columns, geometry, and id.

        Examples:
            - Create `Dataset` consists of 1 band, 3 rows, 3 columns, at the point lon/lat (0, 0).

              ```python
              >>> import numpy as np
              >>> arr = np.random.randint(1,3, size=(3, 3))
              >>> top_left_corner = (0, 0)
              >>> cell_size = 0.05
              >>> dataset = Dataset.create_from_array(arr, top_left_corner=top_left_corner, cell_size=cell_size, epsg=4326)

              ```

            - Get the coordinates of the center of cells inside the domain.

              ```python
              >>> gdf = dataset.get_cell_polygons()
              >>> print(gdf)
                                                     geometry  id
              0  POLYGON ((0 0, 0.05 0, 0.05 -0.05, 0 -0.05, 0 0))   0
              1  POLYGON ((0.05 0, 0.1 0, 0.1 -0.05, 0.05 -0.05...   1
              2  POLYGON ((0.1 0, 0.15 0, 0.15 -0.05, 0.1 -0.05...   2
              3  POLYGON ((0 -0.05, 0.05 -0.05, 0.05 -0.1, 0 -0...   3
              4  POLYGON ((0.05 -0.05, 0.1 -0.05, 0.1 -0.1, 0.0...   4
              5  POLYGON ((0.1 -0.05, 0.15 -0.05, 0.15 -0.1, 0....   5
              6  POLYGON ((0 -0.1, 0.05 -0.1, 0.05 -0.15, 0 -0....   6
              7  POLYGON ((0.05 -0.1, 0.1 -0.1, 0.1 -0.15, 0.05...   7
              8  POLYGON ((0.1 -0.1, 0.15 -0.1, 0.15 -0.15, 0.1...   8
              >>> fig, ax = dataset.plot()
              >>> gdf.plot(ax=ax, facecolor='none', edgecolor="gray", linewidth=2)
              <Axes: >

              ```

        ![get_cell_polygons](./../../_images/dataset/get_cell_polygons.png)
        """
        coords = self.get_cell_coords(location="corner", domain_only=domain_only)
        cell_size = self.geotransform[1]
        epsg = self._get_epsg()
        x = np.zeros((coords.shape[0], 4))
        y = np.zeros((coords.shape[0], 4))
        # fill the top left corner point
        x[:, 0] = coords[:, 0]
        y[:, 0] = coords[:, 1]
        # fill the top right
        x[:, 1] = x[:, 0] + cell_size
        y[:, 1] = y[:, 0]
        # fill the bottom right
        x[:, 2] = x[:, 0] + cell_size
        y[:, 2] = y[:, 0] - cell_size

        # fill the bottom left
        x[:, 3] = x[:, 0]
        y[:, 3] = y[:, 0] - cell_size

        coords_tuples = [list(zip(x[:, i], y[:, i])) for i in range(4)]
        polys_coords = [
            (
                coords_tuples[0][i],
                coords_tuples[1][i],
                coords_tuples[2][i],
                coords_tuples[3][i],
            )
            for i in range(len(x))
        ]
        polygons = list(map(FeatureCollection.create_polygon, polys_coords))
        gdf = gpd.GeoDataFrame(geometry=polygons)
        gdf.set_crs(epsg=epsg, inplace=True)
        gdf["id"] = gdf.index
        return gdf

    def get_cell_points(self, location: str = "center", domain_only: bool = False) -> GeoDataFrame:
        """Get a point shapely geometry for the raster cells center point.

        Args:
            location (str):
                Location of the point, ["corner", "center"]. Default is "center".
            domain_only (bool):
                True to get the points of the cells inside the domain only.

        Returns:
            GeoDataFrame:
                With two columns, geometry, and id.

        Examples:
            - Create `Dataset` consists of 1 band, 3 rows, 3 columns, at the point lon/lat (0, 0).

              ```python
              >>> import numpy as np
              >>> arr = np.random.randint(1,3, size=(3, 3))
              >>> top_left_corner = (0, 0)
              >>> cell_size = 0.05
              >>> dataset = Dataset.create_from_array(arr, top_left_corner=top_left_corner, cell_size=cell_size, epsg=4326)

              ```

            - Get the coordinates of the center of cells inside the domain.

              ```python
              >>> gdf = dataset.get_cell_points()
              >>> print(gdf)
                             geometry  id
              0  POINT (0.025 -0.025)   0
              1  POINT (0.075 -0.025)   1
              2  POINT (0.125 -0.025)   2
              3  POINT (0.025 -0.075)   3
              4  POINT (0.075 -0.075)   4
              5  POINT (0.125 -0.075)   5
              6  POINT (0.025 -0.125)   6
              7  POINT (0.075 -0.125)   7
              8  POINT (0.125 -0.125)   8
              >>> fig, ax = dataset.plot()
              >>> gdf.plot(ax=ax, facecolor='black', linewidth=2)
              <Axes: >

              ```

            ![get_cell_points](./../../_images/dataset/get_cell_points.png)

            - Get the coordinates of the top left corner of cells inside the domain.

              ```python
              >>> gdf = dataset.get_cell_points(location="corner")
              >>> print(gdf)
                          geometry  id
              0         POINT (0 0)   0
              1      POINT (0.05 0)   1
              2       POINT (0.1 0)   2
              3     POINT (0 -0.05)   3
              4  POINT (0.05 -0.05)   4
              5   POINT (0.1 -0.05)   5
              6      POINT (0 -0.1)   6
              7   POINT (0.05 -0.1)   7
              8    POINT (0.1 -0.1)   8
              >>> fig, ax = dataset.plot()
              >>> gdf.plot(ax=ax, facecolor='black', linewidth=4)
              <Axes: >

              ```

            ![get_cell_points-corner](./../../_images/dataset/get_cell_points-corner.png)
        """
        coords = self.get_cell_coords(location=location, domain_only=domain_only)
        epsg = self._get_epsg()

        coords_tuples = list(zip(coords[:, 0], coords[:, 1]))
        points = FeatureCollection.create_point(coords_tuples)
        gdf = gpd.GeoDataFrame(geometry=points)
        gdf.set_crs(epsg=epsg, inplace=True)
        gdf["id"] = gdf.index
        return gdf

    def map_to_array_coordinates(
        self: Dataset,
        points: GeoDataFrame | FeatureCollection | DataFrame,
    ) -> np.ndarray:
        """Convert coordinates of points to array indices.

        - map_to_array_coordinates locates a point with real coordinates (x, y) or (lon, lat) on the array by finding
            the cell indices (row, column) of the nearest cell in the raster.
        - The point coordinate system of the raster has to be projected to be able to calculate the distance.

        Args:
            points (GeoDataFrame | pandas.DataFrame | FeatureCollection):
                - GeoDataFrame: GeoDataFrame with POINT geometry.
                - DataFrame: DataFrame with x, y columns.

        Returns:
            np.ndarray:
                Array with shape (N, 2) containing the row and column indices in the array.

        Examples:
            - Create `Dataset` consisting of 2 bands, 10 rows, 10 columns, at the point lon/lat (0, 0).

              ```python
              >>> import numpy as np
              >>> import pandas as pd
              >>> arr = np.random.randint(1, 3, size=(2, 10, 10))
              >>> top_left_corner = (0, 0)
              >>> cell_size = 0.05
              >>> dataset = Dataset.create_from_array(arr, top_left_corner=top_left_corner, cell_size=cell_size, epsg=4326)

              ```
            - DataFrame with x, y columns:

              - We can give the function a DataFrame with x, y columns to array the coordinates of the points that are located within the dataset domain.

              ```python
              >>> points = pd.DataFrame({"x": [0.025, 0.175, 0.375], "y": [0.025, 0.225, 0.125]})
              >>> indices = dataset.map_to_array_coordinates(points)
              >>> print(indices)
              [[0 0]
               [0 3]
               [0 7]]

              ```
            - GeoDataFrame with POINT geometry:

              - We can give the function a GeoDataFrame with POINT geometry to array the coordinates of the points that locate within the dataset domain.

              ```python
              >>> from shapely.geometry import Point
              >>> from geopandas import GeoDataFrame
              >>> points = GeoDataFrame({"geometry": [Point(0.025, 0.025), Point(0.175, 0.225), Point(0.375, 0.125)]})
              >>> indices = dataset.map_to_array_coordinates(points)
              >>> print(indices)
              [[0 0]
               [0 3]
               [0 7]]

              ```
        """
        if isinstance(points, GeoDataFrame):
            points = FeatureCollection(points)
        elif isinstance(points, DataFrame):
            if all(elem not in points.columns for elem in ["x", "y"]):
                raise ValueError(
                    "If the input is a DataFrame, it should have two columns x, and y"
                )
        else:
            if not isinstance(points, FeatureCollection):
                raise TypeError(
                    "please check points input it should be GeoDataFrame/DataFrame/FeatureCollection - given"
                    f" {type(points)}"
                )
        if not isinstance(points, DataFrame):
            # get the x, y coordinates.
            points.xy()
            points = points.feature.loc[:, ["x", "y"]].values
        else:
            points = points.loc[:, ["x", "y"]].values

        # since the first row is x-coords so the first column in the indices is the column index
        indices = locate_values(points, self.x, self.y)
        # rearrange the columns to make the row index first
        indices = indices[:, [1, 0]]
        return np.asarray(indices)

    def array_to_map_coordinates(
        self: Dataset,
        rows_index: list[Number] | np.ndarray,
        column_index: list[Number] | np.ndarray,
        center: bool = False,
    ) -> tuple[list[Number], list[Number]]:
        """Convert array indices to map coordinates.

        array_to_map_coordinates converts the array indices (rows, cols) to real coordinates (x, y) or (lon, lat).

        Args:
            rows_index (List[Number] | np.ndarray):
                The row indices of the cells in the raster array.
            column_index (List[Number] | np.ndarray):
                The column indices of the cells in the raster array.
            center (bool):
                If True, the coordinates will be the center of the cell. Default is False.

        Returns:
            Tuple[List[Number], List[Number]]:
                A tuple of two lists: the x coordinates and the y coordinates of the cells.

        Examples:
            - Create `Dataset` consisting of 1 band, 10 rows, 10 columns, at the point lon/lat (0, 0):

              ```python
              >>> import numpy as np
              >>> import pandas as pd
              >>> arr = np.random.randint(1, 3, size=(10, 10))
              >>> top_left_corner = (0, 0)
              >>> cell_size = 0.05
              >>> dataset = Dataset.create_from_array(arr, top_left_corner=top_left_corner, cell_size=cell_size, epsg=4326)

              ```

            - Now call the function with two lists of row and column indices:

              ```python
              >>> rows_index = [1, 3, 5]
              >>> column_index = [2, 4, 6]
              >>> coords = dataset.array_to_map_coordinates(rows_index, column_index)
              >>> print(coords) # doctest: +SKIP
              ([0.1, 0.2, 0.3], [-0.05, -0.15, -0.25])

              ```
        """
        top_left_x, top_left_y = self.top_left_corner
        cell_size = self.cell_size
        if center:
            # for the top left corner of the cell
            top_left_x += cell_size / 2
            top_left_y -= cell_size / 2

        x_coord_fn = lambda x: top_left_x + x * cell_size
        y_coord_fn = lambda y: top_left_y - y * cell_size

        x_coords = list(map(x_coord_fn, column_index))
        y_coords = list(map(y_coord_fn, rows_index))

        return x_coords, y_coords

get_cell_coords(location='center', domain_only=False) #

Get coordinates for the center/corner of cells inside the dataset domain.

Returns the coordinates of the cell centers inside the domain (only the cells that do not have nodata value)

Parameters:

Name Type Description Default
location str

Location of the coordinates. Use center for the center of a cell, corner for the corner of the cell (top-left corner).

'center'
domain_only bool

True to exclude the cells out of the domain. Default is False.

False

Returns:

Type Description
ndarray

np.ndarray: Array with a list of the coordinates to be interpolated, without the NaN.

ndarray

np.ndarray: Array with all the centers of cells in the domain of the DEM.

Examples:

  • Create Dataset consists of 1 bands, 3 rows, 3 columns, at the point lon/lat (0, 0).
>>> import numpy as np
>>> arr = np.random.randint(1,3, size=(3, 3))
>>> top_left_corner = (0, 0)
>>> cell_size = 0.05
>>> dataset = Dataset.create_from_array(arr, top_left_corner=top_left_corner, cell_size=cell_size, epsg=4326)
  • Get the coordinates of the center of cells inside the domain.
>>> coords = dataset.get_cell_coords()
>>> print(coords)
[[ 0.025 -0.025]
 [ 0.075 -0.025]
 [ 0.125 -0.025]
 [ 0.025 -0.075]
 [ 0.075 -0.075]
 [ 0.125 -0.075]
 [ 0.025 -0.125]
 [ 0.075 -0.125]
 [ 0.125 -0.125]]
  • Get the coordinates of the top left corner of cells inside the domain.
>>> coords = dataset.get_cell_coords(location="corner")
>>> print(coords)
[[ 0.    0.  ]
 [ 0.05  0.  ]
 [ 0.1   0.  ]
 [ 0.   -0.05]
 [ 0.05 -0.05]
 [ 0.1  -0.05]
 [ 0.   -0.1 ]
 [ 0.05 -0.1 ]
 [ 0.1  -0.1 ]]
Source code in src/pyramids/dataset/ops/cell.py
def get_cell_coords(
    self, location: str = "center", domain_only: bool = False
) -> np.ndarray:
    """Get coordinates for the center/corner of cells inside the dataset domain.

    Returns the coordinates of the cell centers inside the domain (only the cells that
    do not have nodata value)

    Args:
        location (str):
            Location of the coordinates. Use `center` for the center of a cell, `corner` for the corner of the
            cell (top-left corner).
        domain_only (bool):
            True to exclude the cells out of the domain. Default is False.

    Returns:
        np.ndarray:
            Array with a list of the coordinates to be interpolated, without the NaN.
        np.ndarray:
            Array with all the centers of cells in the domain of the DEM.

    Examples:
        - Create `Dataset` consists of 1 bands, 3 rows, 3 columns, at the point lon/lat (0, 0).

          ```python
          >>> import numpy as np
          >>> arr = np.random.randint(1,3, size=(3, 3))
          >>> top_left_corner = (0, 0)
          >>> cell_size = 0.05
          >>> dataset = Dataset.create_from_array(arr, top_left_corner=top_left_corner, cell_size=cell_size, epsg=4326)

          ```

        - Get the coordinates of the center of cells inside the domain.

          ```python
          >>> coords = dataset.get_cell_coords()
          >>> print(coords)
          [[ 0.025 -0.025]
           [ 0.075 -0.025]
           [ 0.125 -0.025]
           [ 0.025 -0.075]
           [ 0.075 -0.075]
           [ 0.125 -0.075]
           [ 0.025 -0.125]
           [ 0.075 -0.125]
           [ 0.125 -0.125]]

          ```

        - Get the coordinates of the top left corner of cells inside the domain.

          ```python
          >>> coords = dataset.get_cell_coords(location="corner")
          >>> print(coords)
          [[ 0.    0.  ]
           [ 0.05  0.  ]
           [ 0.1   0.  ]
           [ 0.   -0.05]
           [ 0.05 -0.05]
           [ 0.1  -0.05]
           [ 0.   -0.1 ]
           [ 0.05 -0.1 ]
           [ 0.1  -0.1 ]]

          ```
    """
    # check the location parameter
    location = location.lower()
    if location not in ["center", "corner"]:
        raise ValueError(
            "The location parameter can have one of these values: 'center', 'corner', "
            f"but the value: {location} is given."
        )

    if location == "center":
        # Adding 0.5*cell size to get the center
        add_value = 0.5
    else:
        add_value = 0
    # Getting data for the whole grid
    (
        x_init,
        cell_size_x,
        xy_span,
        y_init,
        yy_span,
        cell_size_y,
    ) = self.geotransform

    if cell_size_x != cell_size_y:
        if np.abs(cell_size_x) != np.abs(cell_size_y):
            self.logger.warning(
                f"The given raster does not have a square cells, the cell size is {cell_size_x}*{cell_size_y} "
            )

    # data in the array
    no_val = self.no_data_value[0] if self.no_data_value[0] is not None else np.nan
    arr = self.read_array(band=0)
    if domain_only and no_val not in arr:
        self.logger.warning(
            "The no data value does not exist in the band, so all the cells will be considered, and the "
            "domain_only filter will not be applied."
        )

    mask_values: list[Any] | None = [no_val] if domain_only else None
    indices = get_indices2(arr, mask=mask_values)

    # exclude the no_data_values cells.
    f1 = [i[0] for i in indices]
    f2 = [i[1] for i in indices]
    x = [x_init + cell_size_x * (i + add_value) for i in f2]
    y = [y_init + cell_size_y * (i + add_value) for i in f1]
    coords = np.array(list(zip(x, y)))

    return coords

get_cell_polygons(domain_only=False) #

Get a polygon shapely geometry for the raster cells.

Parameters:

Name Type Description Default
domain_only bool

True to get the polygons of the cells inside the domain.

False

Returns:

Name Type Description
GeoDataFrame GeoDataFrame

With two columns, geometry, and id.

Examples:

  • Create Dataset consists of 1 band, 3 rows, 3 columns, at the point lon/lat (0, 0).
>>> import numpy as np
>>> arr = np.random.randint(1,3, size=(3, 3))
>>> top_left_corner = (0, 0)
>>> cell_size = 0.05
>>> dataset = Dataset.create_from_array(arr, top_left_corner=top_left_corner, cell_size=cell_size, epsg=4326)
  • Get the coordinates of the center of cells inside the domain.
>>> gdf = dataset.get_cell_polygons()
>>> print(gdf)
                                       geometry  id
0  POLYGON ((0 0, 0.05 0, 0.05 -0.05, 0 -0.05, 0 0))   0
1  POLYGON ((0.05 0, 0.1 0, 0.1 -0.05, 0.05 -0.05...   1
2  POLYGON ((0.1 0, 0.15 0, 0.15 -0.05, 0.1 -0.05...   2
3  POLYGON ((0 -0.05, 0.05 -0.05, 0.05 -0.1, 0 -0...   3
4  POLYGON ((0.05 -0.05, 0.1 -0.05, 0.1 -0.1, 0.0...   4
5  POLYGON ((0.1 -0.05, 0.15 -0.05, 0.15 -0.1, 0....   5
6  POLYGON ((0 -0.1, 0.05 -0.1, 0.05 -0.15, 0 -0....   6
7  POLYGON ((0.05 -0.1, 0.1 -0.1, 0.1 -0.15, 0.05...   7
8  POLYGON ((0.1 -0.1, 0.15 -0.1, 0.15 -0.15, 0.1...   8
>>> fig, ax = dataset.plot()
>>> gdf.plot(ax=ax, facecolor='none', edgecolor="gray", linewidth=2)
<Axes: >

get_cell_polygons

Source code in src/pyramids/dataset/ops/cell.py
def get_cell_polygons(self, domain_only: bool = False) -> GeoDataFrame:
    """Get a polygon shapely geometry for the raster cells.

    Args:
        domain_only (bool):
            True to get the polygons of the cells inside the domain.

    Returns:
        GeoDataFrame:
            With two columns, geometry, and id.

    Examples:
        - Create `Dataset` consists of 1 band, 3 rows, 3 columns, at the point lon/lat (0, 0).

          ```python
          >>> import numpy as np
          >>> arr = np.random.randint(1,3, size=(3, 3))
          >>> top_left_corner = (0, 0)
          >>> cell_size = 0.05
          >>> dataset = Dataset.create_from_array(arr, top_left_corner=top_left_corner, cell_size=cell_size, epsg=4326)

          ```

        - Get the coordinates of the center of cells inside the domain.

          ```python
          >>> gdf = dataset.get_cell_polygons()
          >>> print(gdf)
                                                 geometry  id
          0  POLYGON ((0 0, 0.05 0, 0.05 -0.05, 0 -0.05, 0 0))   0
          1  POLYGON ((0.05 0, 0.1 0, 0.1 -0.05, 0.05 -0.05...   1
          2  POLYGON ((0.1 0, 0.15 0, 0.15 -0.05, 0.1 -0.05...   2
          3  POLYGON ((0 -0.05, 0.05 -0.05, 0.05 -0.1, 0 -0...   3
          4  POLYGON ((0.05 -0.05, 0.1 -0.05, 0.1 -0.1, 0.0...   4
          5  POLYGON ((0.1 -0.05, 0.15 -0.05, 0.15 -0.1, 0....   5
          6  POLYGON ((0 -0.1, 0.05 -0.1, 0.05 -0.15, 0 -0....   6
          7  POLYGON ((0.05 -0.1, 0.1 -0.1, 0.1 -0.15, 0.05...   7
          8  POLYGON ((0.1 -0.1, 0.15 -0.1, 0.15 -0.15, 0.1...   8
          >>> fig, ax = dataset.plot()
          >>> gdf.plot(ax=ax, facecolor='none', edgecolor="gray", linewidth=2)
          <Axes: >

          ```

    ![get_cell_polygons](./../../_images/dataset/get_cell_polygons.png)
    """
    coords = self.get_cell_coords(location="corner", domain_only=domain_only)
    cell_size = self.geotransform[1]
    epsg = self._get_epsg()
    x = np.zeros((coords.shape[0], 4))
    y = np.zeros((coords.shape[0], 4))
    # fill the top left corner point
    x[:, 0] = coords[:, 0]
    y[:, 0] = coords[:, 1]
    # fill the top right
    x[:, 1] = x[:, 0] + cell_size
    y[:, 1] = y[:, 0]
    # fill the bottom right
    x[:, 2] = x[:, 0] + cell_size
    y[:, 2] = y[:, 0] - cell_size

    # fill the bottom left
    x[:, 3] = x[:, 0]
    y[:, 3] = y[:, 0] - cell_size

    coords_tuples = [list(zip(x[:, i], y[:, i])) for i in range(4)]
    polys_coords = [
        (
            coords_tuples[0][i],
            coords_tuples[1][i],
            coords_tuples[2][i],
            coords_tuples[3][i],
        )
        for i in range(len(x))
    ]
    polygons = list(map(FeatureCollection.create_polygon, polys_coords))
    gdf = gpd.GeoDataFrame(geometry=polygons)
    gdf.set_crs(epsg=epsg, inplace=True)
    gdf["id"] = gdf.index
    return gdf

get_cell_points(location='center', domain_only=False) #

Get a point shapely geometry for the raster cells center point.

Parameters:

Name Type Description Default
location str

Location of the point, ["corner", "center"]. Default is "center".

'center'
domain_only bool

True to get the points of the cells inside the domain only.

False

Returns:

Name Type Description
GeoDataFrame GeoDataFrame

With two columns, geometry, and id.

Examples:

  • Create Dataset consists of 1 band, 3 rows, 3 columns, at the point lon/lat (0, 0).
>>> import numpy as np
>>> arr = np.random.randint(1,3, size=(3, 3))
>>> top_left_corner = (0, 0)
>>> cell_size = 0.05
>>> dataset = Dataset.create_from_array(arr, top_left_corner=top_left_corner, cell_size=cell_size, epsg=4326)
  • Get the coordinates of the center of cells inside the domain.
>>> gdf = dataset.get_cell_points()
>>> print(gdf)
               geometry  id
0  POINT (0.025 -0.025)   0
1  POINT (0.075 -0.025)   1
2  POINT (0.125 -0.025)   2
3  POINT (0.025 -0.075)   3
4  POINT (0.075 -0.075)   4
5  POINT (0.125 -0.075)   5
6  POINT (0.025 -0.125)   6
7  POINT (0.075 -0.125)   7
8  POINT (0.125 -0.125)   8
>>> fig, ax = dataset.plot()
>>> gdf.plot(ax=ax, facecolor='black', linewidth=2)
<Axes: >

get_cell_points

  • Get the coordinates of the top left corner of cells inside the domain.
>>> gdf = dataset.get_cell_points(location="corner")
>>> print(gdf)
            geometry  id
0         POINT (0 0)   0
1      POINT (0.05 0)   1
2       POINT (0.1 0)   2
3     POINT (0 -0.05)   3
4  POINT (0.05 -0.05)   4
5   POINT (0.1 -0.05)   5
6      POINT (0 -0.1)   6
7   POINT (0.05 -0.1)   7
8    POINT (0.1 -0.1)   8
>>> fig, ax = dataset.plot()
>>> gdf.plot(ax=ax, facecolor='black', linewidth=4)
<Axes: >

get_cell_points-corner

Source code in src/pyramids/dataset/ops/cell.py
def get_cell_points(self, location: str = "center", domain_only: bool = False) -> GeoDataFrame:
    """Get a point shapely geometry for the raster cells center point.

    Args:
        location (str):
            Location of the point, ["corner", "center"]. Default is "center".
        domain_only (bool):
            True to get the points of the cells inside the domain only.

    Returns:
        GeoDataFrame:
            With two columns, geometry, and id.

    Examples:
        - Create `Dataset` consists of 1 band, 3 rows, 3 columns, at the point lon/lat (0, 0).

          ```python
          >>> import numpy as np
          >>> arr = np.random.randint(1,3, size=(3, 3))
          >>> top_left_corner = (0, 0)
          >>> cell_size = 0.05
          >>> dataset = Dataset.create_from_array(arr, top_left_corner=top_left_corner, cell_size=cell_size, epsg=4326)

          ```

        - Get the coordinates of the center of cells inside the domain.

          ```python
          >>> gdf = dataset.get_cell_points()
          >>> print(gdf)
                         geometry  id
          0  POINT (0.025 -0.025)   0
          1  POINT (0.075 -0.025)   1
          2  POINT (0.125 -0.025)   2
          3  POINT (0.025 -0.075)   3
          4  POINT (0.075 -0.075)   4
          5  POINT (0.125 -0.075)   5
          6  POINT (0.025 -0.125)   6
          7  POINT (0.075 -0.125)   7
          8  POINT (0.125 -0.125)   8
          >>> fig, ax = dataset.plot()
          >>> gdf.plot(ax=ax, facecolor='black', linewidth=2)
          <Axes: >

          ```

        ![get_cell_points](./../../_images/dataset/get_cell_points.png)

        - Get the coordinates of the top left corner of cells inside the domain.

          ```python
          >>> gdf = dataset.get_cell_points(location="corner")
          >>> print(gdf)
                      geometry  id
          0         POINT (0 0)   0
          1      POINT (0.05 0)   1
          2       POINT (0.1 0)   2
          3     POINT (0 -0.05)   3
          4  POINT (0.05 -0.05)   4
          5   POINT (0.1 -0.05)   5
          6      POINT (0 -0.1)   6
          7   POINT (0.05 -0.1)   7
          8    POINT (0.1 -0.1)   8
          >>> fig, ax = dataset.plot()
          >>> gdf.plot(ax=ax, facecolor='black', linewidth=4)
          <Axes: >

          ```

        ![get_cell_points-corner](./../../_images/dataset/get_cell_points-corner.png)
    """
    coords = self.get_cell_coords(location=location, domain_only=domain_only)
    epsg = self._get_epsg()

    coords_tuples = list(zip(coords[:, 0], coords[:, 1]))
    points = FeatureCollection.create_point(coords_tuples)
    gdf = gpd.GeoDataFrame(geometry=points)
    gdf.set_crs(epsg=epsg, inplace=True)
    gdf["id"] = gdf.index
    return gdf

map_to_array_coordinates(points) #

Convert coordinates of points to array indices.

  • map_to_array_coordinates locates a point with real coordinates (x, y) or (lon, lat) on the array by finding the cell indices (row, column) of the nearest cell in the raster.
  • The point coordinate system of the raster has to be projected to be able to calculate the distance.

Parameters:

Name Type Description Default
points GeoDataFrame | DataFrame | FeatureCollection
  • GeoDataFrame: GeoDataFrame with POINT geometry.
  • DataFrame: DataFrame with x, y columns.
required

Returns:

Type Description
ndarray

np.ndarray: Array with shape (N, 2) containing the row and column indices in the array.

Examples:

  • Create Dataset consisting of 2 bands, 10 rows, 10 columns, at the point lon/lat (0, 0).

>>> import numpy as np
>>> import pandas as pd
>>> arr = np.random.randint(1, 3, size=(2, 10, 10))
>>> top_left_corner = (0, 0)
>>> cell_size = 0.05
>>> dataset = Dataset.create_from_array(arr, top_left_corner=top_left_corner, cell_size=cell_size, epsg=4326)
- DataFrame with x, y columns:

  • We can give the function a DataFrame with x, y columns to array the coordinates of the points that are located within the dataset domain.

>>> points = pd.DataFrame({"x": [0.025, 0.175, 0.375], "y": [0.025, 0.225, 0.125]})
>>> indices = dataset.map_to_array_coordinates(points)
>>> print(indices)
[[0 0]
 [0 3]
 [0 7]]
- GeoDataFrame with POINT geometry:

  • We can give the function a GeoDataFrame with POINT geometry to array the coordinates of the points that locate within the dataset domain.
>>> from shapely.geometry import Point
>>> from geopandas import GeoDataFrame
>>> points = GeoDataFrame({"geometry": [Point(0.025, 0.025), Point(0.175, 0.225), Point(0.375, 0.125)]})
>>> indices = dataset.map_to_array_coordinates(points)
>>> print(indices)
[[0 0]
 [0 3]
 [0 7]]
Source code in src/pyramids/dataset/ops/cell.py
def map_to_array_coordinates(
    self: Dataset,
    points: GeoDataFrame | FeatureCollection | DataFrame,
) -> np.ndarray:
    """Convert coordinates of points to array indices.

    - map_to_array_coordinates locates a point with real coordinates (x, y) or (lon, lat) on the array by finding
        the cell indices (row, column) of the nearest cell in the raster.
    - The point coordinate system of the raster has to be projected to be able to calculate the distance.

    Args:
        points (GeoDataFrame | pandas.DataFrame | FeatureCollection):
            - GeoDataFrame: GeoDataFrame with POINT geometry.
            - DataFrame: DataFrame with x, y columns.

    Returns:
        np.ndarray:
            Array with shape (N, 2) containing the row and column indices in the array.

    Examples:
        - Create `Dataset` consisting of 2 bands, 10 rows, 10 columns, at the point lon/lat (0, 0).

          ```python
          >>> import numpy as np
          >>> import pandas as pd
          >>> arr = np.random.randint(1, 3, size=(2, 10, 10))
          >>> top_left_corner = (0, 0)
          >>> cell_size = 0.05
          >>> dataset = Dataset.create_from_array(arr, top_left_corner=top_left_corner, cell_size=cell_size, epsg=4326)

          ```
        - DataFrame with x, y columns:

          - We can give the function a DataFrame with x, y columns to array the coordinates of the points that are located within the dataset domain.

          ```python
          >>> points = pd.DataFrame({"x": [0.025, 0.175, 0.375], "y": [0.025, 0.225, 0.125]})
          >>> indices = dataset.map_to_array_coordinates(points)
          >>> print(indices)
          [[0 0]
           [0 3]
           [0 7]]

          ```
        - GeoDataFrame with POINT geometry:

          - We can give the function a GeoDataFrame with POINT geometry to array the coordinates of the points that locate within the dataset domain.

          ```python
          >>> from shapely.geometry import Point
          >>> from geopandas import GeoDataFrame
          >>> points = GeoDataFrame({"geometry": [Point(0.025, 0.025), Point(0.175, 0.225), Point(0.375, 0.125)]})
          >>> indices = dataset.map_to_array_coordinates(points)
          >>> print(indices)
          [[0 0]
           [0 3]
           [0 7]]

          ```
    """
    if isinstance(points, GeoDataFrame):
        points = FeatureCollection(points)
    elif isinstance(points, DataFrame):
        if all(elem not in points.columns for elem in ["x", "y"]):
            raise ValueError(
                "If the input is a DataFrame, it should have two columns x, and y"
            )
    else:
        if not isinstance(points, FeatureCollection):
            raise TypeError(
                "please check points input it should be GeoDataFrame/DataFrame/FeatureCollection - given"
                f" {type(points)}"
            )
    if not isinstance(points, DataFrame):
        # get the x, y coordinates.
        points.xy()
        points = points.feature.loc[:, ["x", "y"]].values
    else:
        points = points.loc[:, ["x", "y"]].values

    # since the first row is x-coords so the first column in the indices is the column index
    indices = locate_values(points, self.x, self.y)
    # rearrange the columns to make the row index first
    indices = indices[:, [1, 0]]
    return np.asarray(indices)

array_to_map_coordinates(rows_index, column_index, center=False) #

Convert array indices to map coordinates.

array_to_map_coordinates converts the array indices (rows, cols) to real coordinates (x, y) or (lon, lat).

Parameters:

Name Type Description Default
rows_index List[Number] | ndarray

The row indices of the cells in the raster array.

required
column_index List[Number] | ndarray

The column indices of the cells in the raster array.

required
center bool

If True, the coordinates will be the center of the cell. Default is False.

False

Returns:

Type Description
tuple[list[Number], list[Number]]

Tuple[List[Number], List[Number]]: A tuple of two lists: the x coordinates and the y coordinates of the cells.

Examples:

  • Create Dataset consisting of 1 band, 10 rows, 10 columns, at the point lon/lat (0, 0):
>>> import numpy as np
>>> import pandas as pd
>>> arr = np.random.randint(1, 3, size=(10, 10))
>>> top_left_corner = (0, 0)
>>> cell_size = 0.05
>>> dataset = Dataset.create_from_array(arr, top_left_corner=top_left_corner, cell_size=cell_size, epsg=4326)
  • Now call the function with two lists of row and column indices:
>>> rows_index = [1, 3, 5]
>>> column_index = [2, 4, 6]
>>> coords = dataset.array_to_map_coordinates(rows_index, column_index)
>>> print(coords) # doctest: +SKIP
([0.1, 0.2, 0.3], [-0.05, -0.15, -0.25])
Source code in src/pyramids/dataset/ops/cell.py
def array_to_map_coordinates(
    self: Dataset,
    rows_index: list[Number] | np.ndarray,
    column_index: list[Number] | np.ndarray,
    center: bool = False,
) -> tuple[list[Number], list[Number]]:
    """Convert array indices to map coordinates.

    array_to_map_coordinates converts the array indices (rows, cols) to real coordinates (x, y) or (lon, lat).

    Args:
        rows_index (List[Number] | np.ndarray):
            The row indices of the cells in the raster array.
        column_index (List[Number] | np.ndarray):
            The column indices of the cells in the raster array.
        center (bool):
            If True, the coordinates will be the center of the cell. Default is False.

    Returns:
        Tuple[List[Number], List[Number]]:
            A tuple of two lists: the x coordinates and the y coordinates of the cells.

    Examples:
        - Create `Dataset` consisting of 1 band, 10 rows, 10 columns, at the point lon/lat (0, 0):

          ```python
          >>> import numpy as np
          >>> import pandas as pd
          >>> arr = np.random.randint(1, 3, size=(10, 10))
          >>> top_left_corner = (0, 0)
          >>> cell_size = 0.05
          >>> dataset = Dataset.create_from_array(arr, top_left_corner=top_left_corner, cell_size=cell_size, epsg=4326)

          ```

        - Now call the function with two lists of row and column indices:

          ```python
          >>> rows_index = [1, 3, 5]
          >>> column_index = [2, 4, 6]
          >>> coords = dataset.array_to_map_coordinates(rows_index, column_index)
          >>> print(coords) # doctest: +SKIP
          ([0.1, 0.2, 0.3], [-0.05, -0.15, -0.25])

          ```
    """
    top_left_x, top_left_y = self.top_left_corner
    cell_size = self.cell_size
    if center:
        # for the top left corner of the cell
        top_left_x += cell_size / 2
        top_left_y -= cell_size / 2

    x_coord_fn = lambda x: top_left_x + x * cell_size
    y_coord_fn = lambda y: top_left_y - y * cell_size

    x_coords = list(map(x_coord_fn, column_index))
    y_coords = list(map(y_coord_fn, rows_index))

    return x_coords, y_coords