Skip to content

Spatial Operations#

Crop, align, reproject, resample, CRS handling, and coordinate conversion.

pyramids.dataset.ops.spatial.Spatial #

Source code in src/pyramids/dataset/ops/spatial.py
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
class Spatial:

    def _get_crs(self) -> str:
        """Get coordinate reference system."""
        return str(self.raster.GetProjection())

    def set_crs(self, crs: str | None = None, epsg: int | None = None) -> None:
        """Set the Coordinate Reference System (CRS).

            Set the Coordinate Reference System (CRS) of a

        Args:
            crs (str):
                Optional if epsg is specified. WKT string. i.e.
                    ```
                    'GEOGCS["WGS 84",DATUM["WGS_1984",SPHEROID["WGS 84", 6378137,298.257223563,AUTHORITY["EPSG","7030"],
                    AUTHORITY["EPSG","6326"]],PRIMEM["Greenwich",0,AUTHORITY["EPSG","8901"]],UNIT["degree",
                    0.0174532925199433,AUTHORITY["EPSG","9122"]],AXIS["Latitude",NORTH],AXIS["Longitude",EAST],
                    AUTHORITY["EPSG","4326"]]'
                    ```
            epsg (int):
                Optional if crs is specified. EPSG code specifying the projection.
        """
        # first change the projection of the gdal dataset object
        # second change the epsg attribute of the Dataset object
        if self.driver_type == "ascii":
            raise TypeError(
                "Setting CRS for ASCII file is not possible, you can save the files to a geotiff and then "
                "reset the crs"
            )
        else:
            if crs is not None:
                self.raster.SetProjection(crs)
                self._epsg = FeatureCollection.get_epsg_from_prj(crs)
            elif epsg is not None:
                sr = type(self)._create_sr_from_epsg(epsg)
                self.raster.SetProjection(sr.ExportToWkt())
                self._epsg = epsg
            else:
                raise ValueError("Either crs or epsg must be provided.")

    def to_crs(
        self,
        to_epsg: int,
        method: str = "nearest neighbor",
        maintain_alignment: bool = False,
    ) -> Dataset:
        """Reproject the dataset to any projection.

            (default the WGS84 web mercator projection, without resampling)

        Args:
            to_epsg (int):
                reference number to the new projection (https://epsg.io/). Default 3857 is the reference number of WGS84
                web mercator.
            method (str):
                resampling method. Default is "nearest neighbor". See https://gisgeography.com/raster-resampling/.
                Allowed values: "nearest neighbor", "cubic", "bilinear".
            maintain_alignment (bool):
                True to maintain the number of rows and columns of the raster the same after reprojection.
                Default is False.

        Returns:
            Dataset:
                A new reprojected Dataset.

        Examples:
            - Create a dataset and reproject it:

              ```python
              >>> import numpy as np
              >>> arr = np.random.rand(4, 5, 5)
              >>> top_left_corner = (0, 0)
              >>> cell_size = 0.05
              >>> dataset = Dataset.create_from_array(arr, top_left_corner=top_left_corner, cell_size=cell_size, epsg=4326)
              >>> print(dataset)
              <BLANKLINE>
                          Cell size: 0.05
                          Dimension: 5 * 5
                          EPSG: 4326
                          Number of Bands: 4
                          Band names: ['Band_1', 'Band_2', 'Band_3', 'Band_4']
                          Mask: -9999.0
                          Data type: float64
                          File:...
              <BLANKLINE>
              >>> print(dataset.crs)
              GEOGCS["WGS 84",DATUM["WGS_1984",SPHEROID["WGS 84",6378137,298.257223563,AUTHORITY["EPSG","7030"]],AUTHORITY["EPSG","6326"]],PRIMEM["Greenwich",0,AUTHORITY["EPSG","8901"]],UNIT["degree",0.0174532925199433,AUTHORITY["EPSG","9122"]],AXIS["Latitude",NORTH],AXIS["Longitude",EAST],AUTHORITY["EPSG","4326"]]
              >>> print(dataset.epsg)
              4326
              >>> reprojected_dataset = dataset.to_crs(to_epsg=3857)
              >>> print(reprojected_dataset)
              <BLANKLINE>
                          Cell size: 5565.983370404396
                          Dimension: 5 * 5
                          EPSG: 3857
                          Number of Bands: 4
                          Band names: ['Band_1', 'Band_2', 'Band_3', 'Band_4']
                          Mask: -9999.0
                          Data type: float64
                          File:...
              <BLANKLINE>
              >>> print(reprojected_dataset.crs)
              PROJCS["WGS 84 / Pseudo-Mercator",GEOGCS["WGS 84",DATUM["WGS_1984",SPHEROID["WGS 84",6378137,298.257223563,AUTHORITY["EPSG","7030"]],AUTHORITY["EPSG","6326"]],PRIMEM["Greenwich",0,AUTHORITY["EPSG","8901"]],UNIT["degree",0.0174532925199433,AUTHORITY["EPSG","9122"]],AUTHORITY["EPSG","4326"]],PROJECTION["Mercator_1SP"],PARAMETER["central_meridian",0],PARAMETER["scale_factor",1],PARAMETER["false_easting",0],PARAMETER["false_northing",0],UNIT["metre",1,AUTHORITY["EPSG","9001"]],AXIS["Easting",EAST],AXIS["Northing",NORTH],EXTENSION["PROJ4","+proj=merc +a=6378137 +b=6378137 +lat_ts=0 +lon_0=0 +x_0=0 +y_0=0 +k=1 +units=m +nadgrids=@null +wktext +no_defs"],AUTHORITY["EPSG","3857"]]
              >>> print(reprojected_dataset.epsg)
              3857

              ```

        """
        if not isinstance(to_epsg, int):
            raise TypeError(
                "please enter correct integer number for to_epsg more information "
                f"https://epsg.io/, given {type(to_epsg)}"
            )
        if not isinstance(method, str):
            raise TypeError(
                "Please enter a correct method, for more information, see documentation "
            )
        if method not in INTERPOLATION_METHODS.keys():
            raise ValueError(
                f"The given interpolation method: {method} does not exist, existing methods are "
                f"{INTERPOLATION_METHODS.keys()}"
            )

        resampling_method: Any = INTERPOLATION_METHODS.get(method)

        if maintain_alignment:
            dst_obj = self._reproject_with_ReprojectImage(to_epsg, resampling_method)
        else:
            dst = gdal.Warp("", self.raster, dstSRS=f"EPSG:{to_epsg}", format="VRT")
            dst_obj = type(self)(dst)

        return dst_obj

    def _get_epsg(self) -> int:
        """Get the EPSG number.

            This function reads the projection of a GEOGCS file or tiff file.

        Returns:
            int: EPSG number.
        """
        prj = self._get_crs()
        epsg = FeatureCollection.get_epsg_from_prj(prj)

        return epsg

    @staticmethod
    def _create_sr_from_epsg(epsg: int) -> SpatialReference:
        """Create a spatial reference object from EPSG number.

        https://gdal.org/tutorials/osr_api_tut.html

        Args:
            epsg (int):
                EPSG number.

        Returns:
            SpatialReference:
                SpatialReference object.
        """
        sr = osr.SpatialReference()
        sr.ImportFromEPSG(int(epsg))
        return sr

    def convert_longitude(self) -> Dataset:
        """Convert Longitude.

        - convert the longitude from 0-360 to -180 - 180.
        - currently the function works correctly if the raster covers the whole world, it means that the columns
            in the rasters covers from longitude 0 to 360.

        Returns:
            Dataset:
                A new Dataset with longitude converted to -180/180.
        """
        # dst = gdal.Warp(
        #     "",
        #     self.raster,
        #     dstSRS="+proj=longlat +ellps=WGS84 +datum=WGS84 +lon_0=0 +over",
        #     format="VRT",
        # )
        lon = self.lon
        src = self.raster
        # create a copy
        drv = gdal.GetDriverByName("MEM")
        dst = drv.CreateCopy("", src, 0)
        # convert the 0 to 360 to -180 to 180
        if lon[-1] <= 180:
            raise ValueError("The raster should cover the whole globe")

        first_to_translated = np.where(lon > 180)[0][0]

        ind = list(range(first_to_translated, len(lon)))
        ind_2 = list(range(0, first_to_translated))

        for band in range(self.band_count):
            arr = self.read_array(band=band)
            arr_rearranged = arr[:, ind + ind_2]
            dst.GetRasterBand(band + 1).WriteArray(arr_rearranged)

        # correct the geotransform
        top_left_corner = self.top_left_corner
        gt = list(self.geotransform)
        if lon[-1] > 180:
            new_gt = top_left_corner[0] - 180
            gt[0] = new_gt

        dst.SetGeoTransform(gt)
        return type(self)(dst)

    def resample(
        self, cell_size: int | float, method: str = "nearest neighbor"
    ) -> Dataset:
        """resample.

        resample method reprojects a raster to any projection (default the WGS84 web mercator projection,
        without resampling). The function returns a GDAL in-memory file object.

        Args:
            cell_size (int):
                New cell size to resample the raster. If None, raster will not be resampled.
            method (str):
                Resampling method: "nearest neighbor", "cubic", or "bilinear". Default is "nearest neighbor".

        Returns:
            Dataset:
                A new resampled Dataset.

        Examples:
            - Create a Dataset with 4 bands, 10 rows, 10 columns, at lon/lat (0, 0):

              ```python
              >>> import numpy as np
              >>> arr = np.random.rand(4, 10, 10)
              >>> top_left_corner = (0, 0)
              >>> cell_size = 0.05
              >>> dataset = Dataset.create_from_array(arr, top_left_corner=top_left_corner, cell_size=cell_size, epsg=4326)
              >>> print(dataset)
              <BLANKLINE>
                          Cell size: 0.05
                          Dimension: 10 * 10
                          EPSG: 4326
                          Number of Bands: 4
                          Band names: ['Band_1', 'Band_2', 'Band_3', 'Band_4']
                          Mask: -9999.0
                          Data type: float64
                          File: ...
              <BLANKLINE>
              >>> dataset.plot(band=0)
              (<Figure size 800x800 with 2 Axes>, <Axes: >)

              ```
              ![resample-source](./../../_images/dataset/resample-source.png)

            - Resample the raster to a new cell size of 0.1:

              ```python
              >>> new_dataset = dataset.resample(cell_size=0.1)
              >>> print(new_dataset)
              <BLANKLINE>
                          Cell size: 0.1
                          Dimension: 5 * 5
                          EPSG: 4326
                          Number of Bands: 4
                          Band names: ['Band_1', 'Band_2', 'Band_3', 'Band_4']
                          Mask: -9999.0
                          Data type: float64
                          File:...
              <BLANKLINE>
              >>> new_dataset.plot(band=0)
              (<Figure size 800x800 with 2 Axes>, <Axes: >)

              ```
              ![resample-new](./../../_images/dataset/resample-new.png)

            - Resampling the dataset from cell_size 0.05 to 0.1 degrees reduced the number of cells to 5 in each
              dimension instead of 10.
        """
        if not isinstance(method, str):
            raise TypeError(
                "Please enter a correct method, for more information, see documentation"
            )
        if method not in INTERPOLATION_METHODS.keys():
            raise ValueError(
                f"The given interpolation method does not exist, existing methods are "
                f"{INTERPOLATION_METHODS.keys()}"
            )

        resampling_method: Any = INTERPOLATION_METHODS.get(method)

        sr_src = osr.SpatialReference(wkt=self.crs)

        ulx = self.geotransform[0]
        uly = self.geotransform[3]
        # transform the right lower corner point
        lrx = self.geotransform[0] + self.geotransform[1] * self.columns
        lry = self.geotransform[3] + self.geotransform[5] * self.rows

        # new geotransform
        new_geo = (
            self.geotransform[0],
            cell_size,
            self.geotransform[2],
            self.geotransform[3],
            self.geotransform[4],
            -1 * cell_size,
        )
        # create a new raster
        cols = int(np.round(abs(lrx - ulx) / cell_size))
        rows = int(np.round(abs(uly - lry) / cell_size))
        dtype = self.gdal_dtype[0]
        bands = self.band_count

        dst_obj = type(self)._build_dataset(
            cols, rows, bands, dtype, new_geo, sr_src.ExportToWkt(),
            self.no_data_value,
        )
        gdal.ReprojectImage(
            self.raster,
            dst_obj.raster,
            sr_src.ExportToWkt(),
            sr_src.ExportToWkt(),
            resampling_method,
        )

        return dst_obj

    def _reproject_with_ReprojectImage(
        self, to_epsg: int, method: str = "nearest neighbor"
    ) -> Dataset:
        src_gt = self.geotransform
        src_x = self.columns
        src_y = self.rows

        src_sr = osr.SpatialReference(wkt=self.crs)
        src_epsg = self.epsg

        dst_sr = self._create_sr_from_epsg(to_epsg)

        # in case the source crs is GCS and longitude is in the west hemisphere, gdal
        # reads longitude from 0 to 360 and a transformation factor wont work with values
        # greater than 180
        if src_epsg != to_epsg:
            if src_epsg == "4326" and src_gt[0] > 180:
                lng_new = src_gt[0] - 360
                # transformation factors
                tx = osr.CoordinateTransformation(src_sr, dst_sr)
                # transform the right upper corner point
                ulx, uly, ulz = tx.TransformPoint(lng_new, src_gt[3])
                # transform the right lower corner point
                lrx, lry, lrz = tx.TransformPoint(
                    lng_new + src_gt[1] * src_x, src_gt[3] + src_gt[5] * src_y
                )
            else:
                xs = [src_gt[0], src_gt[0] + src_gt[1] * src_x]
                ys = [src_gt[3], src_gt[3] + src_gt[5] * src_y]

                [uly, lry], [ulx, lrx] = FeatureCollection.reproject_points(
                    ys, xs, from_epsg=src_epsg, to_epsg=to_epsg
                )
                # old transform
                # # transform the right upper corner point
                # (ulx, uly, ulz) = tx.TransformPoint(src_gt[0], src_gt[3])
                # # transform the right lower corner point
                # (lrx, lry, lrz) = tx.TransformPoint(
                #     src_gt[0] + src_gt[1] * src_x, src_gt[3] + src_gt[5] * src_y
                # )

        else:
            ulx = src_gt[0]
            uly = src_gt[3]
            lrx = src_gt[0] + src_gt[1] * src_x
            lry = src_gt[3] + src_gt[5] * src_y

        # get the cell size in the source raster and convert it to the new crs
        # x coordinates or longitudes
        xs = [src_gt[0], src_gt[0] + src_gt[1]]
        # y coordinates or latitudes
        ys = [src_gt[3], src_gt[3]]

        if src_epsg != to_epsg:
            # transform the two-point coordinates to the new crs to calculate the new cell size
            new_ys, new_xs = FeatureCollection.reproject_points(
                ys, xs, from_epsg=src_epsg, to_epsg=to_epsg, precision=6
            )
        else:
            new_xs = xs
            # new_ys = ys

        # TODO: the function does not always maintain alignment, based on the conversion of the cell_size and the
        # pivot point
        pixel_spacing = np.abs(new_xs[0] - new_xs[1])

        # create a new raster
        cols = int(np.round(abs(lrx - ulx) / pixel_spacing))
        rows = int(np.round(abs(uly - lry) / pixel_spacing))

        dtype = self.gdal_dtype[0]
        new_geo = (
            ulx,
            pixel_spacing,
            src_gt[2],
            uly,
            src_gt[4],
            np.sign(src_gt[-1]) * pixel_spacing,
        )
        dst_obj = type(self)._build_dataset(
            cols, rows, self.band_count, dtype, new_geo, dst_sr.ExportToWkt(),
            self.no_data_value,
        )
        gdal.ReprojectImage(
            self.raster,
            dst_obj.raster,
            src_sr.ExportToWkt(),
            dst_sr.ExportToWkt(),
            method,
        )
        return dst_obj

    def fill_gaps(self, mask, src_array: np.ndarray) -> np.ndarray:
        """Fill gaps in src_array using nearest neighbors where mask indicates valid cells.

        Args:
            mask (Dataset | np.ndarray):
                Mask dataset or array used to determine valid cells.
            src_array (np.ndarray):
                Source array whose gaps will be filled.

        Returns:
            np.ndarray: The source array with gaps filled where applicable.
        """
        # align function only equate the no of rows and columns only
        # match no_data_value inserts no_data_value in src raster to all places like mask
        # still places that has no_data_value in the src raster, but it is not no_data_value in the mask
        # and now has to be filled with values
        # compare no of element that is not no_data_value in both rasters to make sure they are matched
        # if both inputs are rasters
        mask_array = mask.read_array()
        row = mask.rows
        col = mask.columns
        mask_noval = mask.no_data_value[0]

        if isinstance(mask, AbstractDataset) and isinstance(self, AbstractDataset):
            # there might be cells that are out of domain in the src but not out of domain in the mask
            # so change all the src_noval to mask_noval in the src_array
            # src_array[np.isclose(src_array, self.no_data_value[0], rtol=0.001)] = mask_noval
            # then count them (out of domain cells) in the src_array
            elem_src = src_array.size - np.count_nonzero(
                src_array[np.isclose(src_array, self.no_data_value[0], rtol=0.001)]
            )
            # count the out of domain cells in the mask
            elem_mask = mask_array.size - np.count_nonzero(
                mask_array[np.isclose(mask_array, mask_noval, rtol=0.001)]
            )

            # if not equal, then store indices of those cells that don't match
            if elem_mask > elem_src:
                rows = [
                    i
                    for i in range(row)
                    for j in range(col)
                    if np.isclose(src_array[i, j], self.no_data_value[0], rtol=0.001)
                    and not np.isclose(mask_array[i, j], mask_noval, rtol=0.001)
                ]
                cols = [
                    j
                    for i in range(row)
                    for j in range(col)
                    if np.isclose(src_array[i, j], self.no_data_value[0], rtol=0.001)
                    and not np.isclose(mask_array[i, j], mask_noval, rtol=0.001)
                ]
            # interpolate those missing cells by the nearest neighbor
            if elem_mask > elem_src:
                src_array = type(self)._nearest_neighbour(
                    src_array, self.no_data_value[0], rows, cols
                )
        return src_array

    def _crop_aligned(
        self,
        mask: gdal.Dataset | np.ndarray,
        mask_noval: int | float | None = None,
        fill_gaps: bool = False,
    ) -> Dataset:
        """Clip/crop by matching the nodata layout from mask to the source raster.

        Both rasters must have the same dimensions (rows and columns). Use MatchRasterAlignment prior to this
        method to align both rasters.

        Args:
            mask (Dataset | np.ndarray):
                Mask raster to get the location of the NoDataValue and where it is in the array.
            mask_noval (int | float, optional):
                In case the mask is a numpy array, the mask_noval has to be given.
            fill_gaps (bool):
                Whether to fill gaps after cropping. Default is False.

        Returns:
            Dataset:
                The raster with NoDataValue stored in its cells exactly the same as the source raster.
        """
        if isinstance(mask, AbstractDataset):
            mask_gt = mask.geotransform
            mask_epsg = mask.epsg
            row = mask.rows
            col = mask.columns
            mask_noval = mask.no_data_value[0]
            mask_array = mask.read_array(band=0)
        elif isinstance(mask, np.ndarray):
            if mask_noval is None:
                raise ValueError(
                    "You have to enter the value of the no_val parameter when the mask is a numpy array"
                )
            mask_array = mask.copy()
            row, col = mask.shape
        else:
            raise TypeError(
                "The second parameter 'mask' has to be either gdal.Dataset or numpy array"
                f"given - {type(mask)}"
            )

        band_count = self.band_count
        src_sref = osr.SpatialReference(wkt=self.crs)
        src_array = self.read_array()

        if not row == self.rows or not col == self.columns:
            raise ValueError(
                "Two rasters have different number of columns or rows, please resample or match both rasters"
            )

        if isinstance(mask, AbstractDataset):
            if (
                not self.top_left_corner == mask.top_left_corner
                or not self.cell_size == mask.cell_size
            ):
                raise ValueError(
                    "the location of the upper left corner of both rasters is not the same or cell size is "
                    "different please match both rasters first "
                )

            if not mask_epsg == self.epsg:
                raise ValueError(
                    "Dataset A & B are using different coordinate systems please reproject one of them to "
                    "the other raster coordinate system"
                )

        if band_count > 1:
            # check if the no data value for the src complies with the dtype of the src as sometimes the band is full
            # of values and the no_data_value is not used at all in the band, and when we try to replace any value in
            # the array with the no_data_value it will raise an error.
            no_data_value = self._check_no_data_value(self.no_data_value)

            for band in range(self.band_count):
                if mask_noval is None:
                    src_array[band, np.isnan(mask_array)] = self.no_data_value[band]
                else:
                    src_array[band, np.isclose(mask_array, mask_noval, rtol=0.001)] = (
                        no_data_value[band]
                    )
        else:
            if mask_noval is None:
                src_array[np.isnan(mask_array)] = self.no_data_value[0]
            else:
                src_array[np.isclose(mask_array, mask_noval, rtol=0.001)] = (
                    self.no_data_value[0]
                )

        if fill_gaps:
            src_array = self.fill_gaps(mask, src_array)

        dst = type(self)._create_dataset(
            col, row, band_count, self.gdal_dtype[0], driver="MEM"
        )
        # but with a lot of computations,
        # if the mask is an array and the mask_gt is not defined, use the src_gt as both the mask and the src
        # are aligned, so they have the sam gt
        try:
            # set the geotransform
            dst.SetGeoTransform(mask_gt)
            # set the projection
            dst.SetProjection(mask.crs)
        except UnboundLocalError:
            dst.SetGeoTransform(self.geotransform)
            dst.SetProjection(src_sref.ExportToWkt())

        dst_obj = type(self)(dst)
        # set the no data value
        dst_obj._set_no_data_value(self.no_data_value)
        if band_count > 1:
            for band in range(band_count):
                dst_obj.raster.GetRasterBand(band + 1).WriteArray(src_array[band, :, :])
        else:
            dst_obj.raster.GetRasterBand(1).WriteArray(src_array)
        return dst_obj

    def _check_alignment(self, mask) -> bool:
        """Check if raster is aligned with a given mask raster."""
        if not isinstance(mask, AbstractDataset):
            raise TypeError("The second parameter should be a Dataset")

        return self.rows == mask.rows and self.columns == mask.columns

    def align(
        self,
        alignment_src: Dataset,
    ) -> Dataset:
        """Align the current dataset (rows and columns) to match a given dataset.

        Copies spatial properties from alignment_src to the current raster:
            - The coordinate system
            - The number of rows and columns
            - Cell size
        Then resamples values from the current dataset using the nearest neighbor interpolation.

        Args:
            alignment_src (Dataset):
                Spatial information source raster to get the spatial information (coordinate system, number of rows and
                columns). The data values of the current dataset are resampled to this alignment.

        Returns:
            Dataset: A new aligned Dataset.

        Examples:
            - The source dataset has a `top_left_corner` at (0, 0) with a 5*5 alignment, and a 0.05 degree cell size.

              ```python
              >>> import numpy as np
              >>> arr = np.random.rand(5, 5)
              >>> top_left_corner = (0, 0)
              >>> cell_size = 0.05
              >>> dataset = Dataset.create_from_array(arr, top_left_corner=top_left_corner, cell_size=cell_size, epsg=4326)
              >>> print(dataset)
              <BLANKLINE>
                          Cell size: 0.05
                          Dimension: 5 * 5
                          EPSG: 4326
                          Number of Bands: 1
                          Band names: ['Band_1']
                          Mask: -9999.0
                          Data type: float64
                          File:...
              <BLANKLINE>

              ```

            - The dataset to be aligned has a top_left_corner at (-0.1, 0.1) (i.e., it has two more rows on top of the
              dataset, and two columns on the left of the dataset).

              ```python
              >>> arr = np.random.rand(10, 10)
              >>> top_left_corner = (-0.1, 0.1)
              >>> cell_size = 0.07
              >>> dataset_target = Dataset.create_from_array(arr, top_left_corner=top_left_corner, cell_size=cell_size,
              ... epsg=4326)
              >>> print(dataset_target)
              <BLANKLINE>
                          Cell size: 0.07
                          Dimension: 10 * 10
                          EPSG: 4326
                          Number of Bands: 1
                          Band names: ['Band_1']
                          Mask: -9999.0
                          Data type: float64
                          File:...
              <BLANKLINE>

              ```

            ![align-source-target](./../../_images/dataset/align-source-target.png)

            - Now call the `align` method and use the dataset as the alignment source.

              ```python
              >>> aligned_dataset = dataset_target.align(dataset)
              >>> print(aligned_dataset)
              <BLANKLINE>
                          Cell size: 0.05
                          Dimension: 5 * 5
                          EPSG: 4326
                          Number of Bands: 1
                          Band names: ['Band_1']
                          Mask: -9999.0
                          Data type: float64
                          File:...
              <BLANKLINE>

              ```

            ![align-result](./../../_images/dataset/align-result.png)
        """
        if isinstance(alignment_src, AbstractDataset):
            src = alignment_src
        else:
            raise TypeError(
                "First parameter should be a Dataset read using Dataset.openRaster or a path to the raster, "
                f"given {type(alignment_src)}"
            )

        # reproject the raster to match the projection of alignment_src
        reprojected_raster_b: Dataset = self
        if self.epsg != src.epsg:
            reprojected_raster_b = self.to_crs(src.epsg)  # type: ignore[assignment]
        dst_obj = type(self)._build_dataset(
            src.columns, src.rows, self.band_count, src.gdal_dtype[0],
            src.geotransform, src.crs, self.no_data_value,
        )
        method = gdal.GRA_NearestNeighbour
        # resample the reprojected_RasterB
        gdal.ReprojectImage(
            reprojected_raster_b.raster,
            dst_obj.raster,
            src.crs,
            src.crs,
            method,
        )

        return dst_obj

    def _crop_with_raster(
        self,
        mask: gdal.Dataset | str,
    ) -> Dataset:
        """Crop this raster using another raster as a mask.

        Args:
            mask (Dataset | str):
                The raster you want to use as a mask to crop this raster; it can be a path or a GDAL Dataset.

        Returns:
            Dataset:
                The cropped raster.
        """
        # get information from the mask raster
        if isinstance(mask, (str, Path)):
            mask = type(self).read_file(mask)
        elif isinstance(mask, AbstractDataset):
            mask = mask
        else:
            raise TypeError(
                "The second parameter has to be either path to the mask raster or a gdal.Dataset object"
            )
        if not self._check_alignment(mask):
            # first align the mask with the src raster
            mask = mask.align(self)
        # crop the src raster with the aligned mask
        dst_obj = self._crop_aligned(mask)

        dst_obj = type(self)._correct_wrap_cutline_error(dst_obj)
        return dst_obj

    def _crop_with_polygon_warp(
        self, feature: FeatureCollection | GeoDataFrame, touch: bool = True
    ) -> Dataset:
        """Crop raster with polygon.

            - Do not convert the polygon into a raster but rather use it directly to crop the raster using the
            gdal.warp function.

        Args:
            feature (FeatureCollection | GeoDataFrame):
                Vector mask.
            touch (bool):
                Include cells that touch the polygon, not only those entirely inside the polygon mask. Defaults to True.

        Returns:
            Dataset:
                Cropped dataset.
        """
        if isinstance(feature, GeoDataFrame):
            feature = FeatureCollection(feature)
        else:
            if not isinstance(feature, FeatureCollection):
                raise TypeError(
                    f"The function takes only a FeatureCollection or GeoDataFrame, given {type(feature)}"
                )

        feature = feature._gdf_to_ds()
        warp_options = gdal.WarpOptions(
            format="VRT",
            # outputBounds=feature.total_bounds,
            cropToCutline=not touch,
            cutlineDSName=feature.file_name,
            # cutlineLayer=feature.layer_names[0],
            multithread=True,
        )
        dst = gdal.Warp("", self.raster, options=warp_options)
        # Use the base Dataset class (not a subclass like NetCDF) for intermediate GDAL warp results
        # because _correct_wrap_cutline_error calls create_from_array which has different behavior in
        # subclasses.
        base_cls = next(
            c for c in type(self).__mro__
            if AbstractDataset in getattr(c, "__bases__", ())
        )
        dst_obj = base_cls(dst)

        if touch:
            dst_obj = base_cls._correct_wrap_cutline_error(dst_obj)

        return dst_obj

    @staticmethod
    def _correct_wrap_cutline_error(src: Dataset) -> Dataset:
        """Correct wrap cutline error.

        https://github.com/serapeum-org/pyramids/issues/74
        """
        big_array = src.read_array()
        value_to_remove = src.no_data_value[0]
        """Remove rows and columns that are all filled with a certain value from a 2D array."""
        # Find rows and columns to be removed
        if big_array.ndim == 2:
            rows_to_remove = np.all(big_array == value_to_remove, axis=1)
            cols_to_remove = np.all(big_array == value_to_remove, axis=0)
            # Use boolean indexing to remove rows and columns
            small_array = big_array[~rows_to_remove][:, ~cols_to_remove]
        elif big_array.ndim == 3:
            rows_to_remove = np.all(big_array == value_to_remove, axis=(0, 2))
            cols_to_remove = np.all(big_array == value_to_remove, axis=(0, 1))
            # Use boolean indexing to remove rows and columns
            # first remove the rows then the columns
            small_array = big_array[:, ~rows_to_remove, :]
            small_array = small_array[:, :, ~cols_to_remove]
            n_rows = np.count_nonzero(~rows_to_remove)
            n_cols = np.count_nonzero(~cols_to_remove)
            small_array = small_array.reshape((src.band_count, n_rows, n_cols))
        else:
            raise ValueError("Array must be 2D or 3D")

        x_ind = np.where(~rows_to_remove)[0][0]
        y_ind = np.where(~cols_to_remove)[0][0]
        new_x = src.x[y_ind] - src.cell_size / 2
        new_y = src.y[x_ind] + src.cell_size / 2
        new_gt = (new_x, src.cell_size, 0, new_y, 0, -src.cell_size)
        new_src = src.create_from_array(
            small_array, geo=new_gt, epsg=src.epsg, no_data_value=src.no_data_value
        )
        return new_src

    def crop(
        self,
        mask: GeoDataFrame | FeatureCollection,
        touch: bool = True,
    ) -> Dataset:
        """Crop dataset using dataset/feature collection.

            Crop/Clip the Dataset object using a polygon/raster.

        Args:
            mask (GeoDataFrame | Dataset):
                GeoDataFrame with a polygon geometry, or a Dataset object.
            touch (bool):
                Include the cells that touch the polygon, not only those that lie entirely inside the polygon mask.
                Default is True.

        Returns:
            Dataset:
                A new cropped Dataset.

        Hint:
            - If the mask is a dataset with multi-bands, the `crop` method will use the first band as the mask.

        Examples:
            - Crop the raster using a polygon mask.

              - The polygon covers 4 cells in the 3rd and 4th rows and 3rd and 4th column `arr[2:4, 2:4]`, so the result
                dataset will have the same number of bands `4`, 2 rows and 2 columns.
              - First, create the dataset to have 4 bands, 10 rows and 10 columns; the dataset has a cell size of 0.05
                degree, the top left corner of the dataset is (0, 0).

              ```python
              >>> import numpy as np
              >>> import geopandas as gpd
              >>> from shapely.geometry import Polygon
              >>> arr = np.random.rand(4, 10, 10)
              >>> cell_size = 0.05
              >>> top_left_corner = (0, 0)
              >>> dataset = Dataset.create_from_array(
              ...         arr, top_left_corner=top_left_corner, cell_size=cell_size, epsg=4326
              ... )

              ```
            - Second, create the polygon using shapely polygon, and use the xmin, ymin, xmax, ymax = [0.1, -0.2, 0.2 -0.1]
                to cover the 4 cells.

                ```python
                >>> mask = gpd.GeoDataFrame(geometry=[Polygon([(0.1, -0.1), (0.1, -0.2), (0.2, -0.2), (0.2, -0.1)])], crs=4326)

                ```
            - Pass the `geodataframe` to the crop method using the `mask` parameter.

              ```python
              >>> cropped_dataset = dataset.crop(mask=mask)

              ```
            - Check the cropped dataset:

              ```python
              >>> print(cropped_dataset.shape)
              (4, 2, 2)
              >>> print(cropped_dataset.geotransform)
              (0.1, 0.05, 0.0, -0.1, 0.0, -0.05)
              >>> print(cropped_dataset.read_array(band=0))# doctest: +SKIP
              [[0.00921161 0.90841171]
               [0.355636   0.18650262]]
              >>> print(arr[0, 2:4, 2:4])# doctest: +SKIP
              [[0.00921161 0.90841171]
               [0.355636   0.18650262]]

              ```
            - Crop a raster using another raster mask:

              - Create a mask dataset with the same extent of the polygon we used in the previous example.

              ```python
              >>> geotransform = (0.1, 0.05, 0.0, -0.1, 0.0, -0.05)
              >>> mask_dataset = Dataset.create_from_array(np.random.rand(2, 2), geo=geotransform, epsg=4326)

              ```
            - Then use the mask dataset to crop the dataset.

              ```python
              >>> cropped_dataset_2 = dataset.crop(mask=mask_dataset)
              >>> print(cropped_dataset_2.shape)
              (4, 2, 2)

              ```
            - Check the cropped dataset:

              ```python
              >>> print(cropped_dataset_2.geotransform)
              (0.1, 0.05, 0.0, -0.1, 0.0, -0.05)
              >>> print(cropped_dataset_2.read_array(band=0))# doctest: +SKIP
              [[0.00921161 0.90841171]
               [0.355636   0.18650262]]
              >>> print(arr[0, 2:4, 2:4])# doctest: +SKIP
               [[0.00921161 0.90841171]
               [0.355636   0.18650262]]

              ```

        """
        if isinstance(mask, GeoDataFrame):
            dst = self._crop_with_polygon_warp(mask, touch=touch)
        elif isinstance(mask, AbstractDataset):
            dst = self._crop_with_raster(mask)
        else:
            raise TypeError(
                "The second parameter: mask could be either GeoDataFrame or Dataset object"
            )

        return dst

set_crs(crs=None, epsg=None) #

Set the Coordinate Reference System (CRS).

Set the Coordinate Reference System (CRS) of a

Parameters:

Name Type Description Default
crs str

Optional if epsg is specified. WKT string. i.e.

'GEOGCS["WGS 84",DATUM["WGS_1984",SPHEROID["WGS 84", 6378137,298.257223563,AUTHORITY["EPSG","7030"],
AUTHORITY["EPSG","6326"]],PRIMEM["Greenwich",0,AUTHORITY["EPSG","8901"]],UNIT["degree",
0.0174532925199433,AUTHORITY["EPSG","9122"]],AXIS["Latitude",NORTH],AXIS["Longitude",EAST],
AUTHORITY["EPSG","4326"]]'

None
epsg int

Optional if crs is specified. EPSG code specifying the projection.

None
Source code in src/pyramids/dataset/ops/spatial.py
def set_crs(self, crs: str | None = None, epsg: int | None = None) -> None:
    """Set the Coordinate Reference System (CRS).

        Set the Coordinate Reference System (CRS) of a

    Args:
        crs (str):
            Optional if epsg is specified. WKT string. i.e.
                ```
                'GEOGCS["WGS 84",DATUM["WGS_1984",SPHEROID["WGS 84", 6378137,298.257223563,AUTHORITY["EPSG","7030"],
                AUTHORITY["EPSG","6326"]],PRIMEM["Greenwich",0,AUTHORITY["EPSG","8901"]],UNIT["degree",
                0.0174532925199433,AUTHORITY["EPSG","9122"]],AXIS["Latitude",NORTH],AXIS["Longitude",EAST],
                AUTHORITY["EPSG","4326"]]'
                ```
        epsg (int):
            Optional if crs is specified. EPSG code specifying the projection.
    """
    # first change the projection of the gdal dataset object
    # second change the epsg attribute of the Dataset object
    if self.driver_type == "ascii":
        raise TypeError(
            "Setting CRS for ASCII file is not possible, you can save the files to a geotiff and then "
            "reset the crs"
        )
    else:
        if crs is not None:
            self.raster.SetProjection(crs)
            self._epsg = FeatureCollection.get_epsg_from_prj(crs)
        elif epsg is not None:
            sr = type(self)._create_sr_from_epsg(epsg)
            self.raster.SetProjection(sr.ExportToWkt())
            self._epsg = epsg
        else:
            raise ValueError("Either crs or epsg must be provided.")

to_crs(to_epsg, method='nearest neighbor', maintain_alignment=False) #

Reproject the dataset to any projection.

(default the WGS84 web mercator projection, without resampling)

Parameters:

Name Type Description Default
to_epsg int

reference number to the new projection (https://epsg.io/). Default 3857 is the reference number of WGS84 web mercator.

required
method str

resampling method. Default is "nearest neighbor". See https://gisgeography.com/raster-resampling/. Allowed values: "nearest neighbor", "cubic", "bilinear".

'nearest neighbor'
maintain_alignment bool

True to maintain the number of rows and columns of the raster the same after reprojection. Default is False.

False

Returns:

Name Type Description
Dataset Dataset

A new reprojected Dataset.

Examples:

  • Create a dataset and reproject it:
>>> import numpy as np
>>> arr = np.random.rand(4, 5, 5)
>>> top_left_corner = (0, 0)
>>> cell_size = 0.05
>>> dataset = Dataset.create_from_array(arr, top_left_corner=top_left_corner, cell_size=cell_size, epsg=4326)
>>> print(dataset)
<BLANKLINE>
            Cell size: 0.05
            Dimension: 5 * 5
            EPSG: 4326
            Number of Bands: 4
            Band names: ['Band_1', 'Band_2', 'Band_3', 'Band_4']
            Mask: -9999.0
            Data type: float64
            File:...
<BLANKLINE>
>>> print(dataset.crs)
GEOGCS["WGS 84",DATUM["WGS_1984",SPHEROID["WGS 84",6378137,298.257223563,AUTHORITY["EPSG","7030"]],AUTHORITY["EPSG","6326"]],PRIMEM["Greenwich",0,AUTHORITY["EPSG","8901"]],UNIT["degree",0.0174532925199433,AUTHORITY["EPSG","9122"]],AXIS["Latitude",NORTH],AXIS["Longitude",EAST],AUTHORITY["EPSG","4326"]]
>>> print(dataset.epsg)
4326
>>> reprojected_dataset = dataset.to_crs(to_epsg=3857)
>>> print(reprojected_dataset)
<BLANKLINE>
            Cell size: 5565.983370404396
            Dimension: 5 * 5
            EPSG: 3857
            Number of Bands: 4
            Band names: ['Band_1', 'Band_2', 'Band_3', 'Band_4']
            Mask: -9999.0
            Data type: float64
            File:...
<BLANKLINE>
>>> print(reprojected_dataset.crs)
PROJCS["WGS 84 / Pseudo-Mercator",GEOGCS["WGS 84",DATUM["WGS_1984",SPHEROID["WGS 84",6378137,298.257223563,AUTHORITY["EPSG","7030"]],AUTHORITY["EPSG","6326"]],PRIMEM["Greenwich",0,AUTHORITY["EPSG","8901"]],UNIT["degree",0.0174532925199433,AUTHORITY["EPSG","9122"]],AUTHORITY["EPSG","4326"]],PROJECTION["Mercator_1SP"],PARAMETER["central_meridian",0],PARAMETER["scale_factor",1],PARAMETER["false_easting",0],PARAMETER["false_northing",0],UNIT["metre",1,AUTHORITY["EPSG","9001"]],AXIS["Easting",EAST],AXIS["Northing",NORTH],EXTENSION["PROJ4","+proj=merc +a=6378137 +b=6378137 +lat_ts=0 +lon_0=0 +x_0=0 +y_0=0 +k=1 +units=m +nadgrids=@null +wktext +no_defs"],AUTHORITY["EPSG","3857"]]
>>> print(reprojected_dataset.epsg)
3857
Source code in src/pyramids/dataset/ops/spatial.py
def to_crs(
    self,
    to_epsg: int,
    method: str = "nearest neighbor",
    maintain_alignment: bool = False,
) -> Dataset:
    """Reproject the dataset to any projection.

        (default the WGS84 web mercator projection, without resampling)

    Args:
        to_epsg (int):
            reference number to the new projection (https://epsg.io/). Default 3857 is the reference number of WGS84
            web mercator.
        method (str):
            resampling method. Default is "nearest neighbor". See https://gisgeography.com/raster-resampling/.
            Allowed values: "nearest neighbor", "cubic", "bilinear".
        maintain_alignment (bool):
            True to maintain the number of rows and columns of the raster the same after reprojection.
            Default is False.

    Returns:
        Dataset:
            A new reprojected Dataset.

    Examples:
        - Create a dataset and reproject it:

          ```python
          >>> import numpy as np
          >>> arr = np.random.rand(4, 5, 5)
          >>> top_left_corner = (0, 0)
          >>> cell_size = 0.05
          >>> dataset = Dataset.create_from_array(arr, top_left_corner=top_left_corner, cell_size=cell_size, epsg=4326)
          >>> print(dataset)
          <BLANKLINE>
                      Cell size: 0.05
                      Dimension: 5 * 5
                      EPSG: 4326
                      Number of Bands: 4
                      Band names: ['Band_1', 'Band_2', 'Band_3', 'Band_4']
                      Mask: -9999.0
                      Data type: float64
                      File:...
          <BLANKLINE>
          >>> print(dataset.crs)
          GEOGCS["WGS 84",DATUM["WGS_1984",SPHEROID["WGS 84",6378137,298.257223563,AUTHORITY["EPSG","7030"]],AUTHORITY["EPSG","6326"]],PRIMEM["Greenwich",0,AUTHORITY["EPSG","8901"]],UNIT["degree",0.0174532925199433,AUTHORITY["EPSG","9122"]],AXIS["Latitude",NORTH],AXIS["Longitude",EAST],AUTHORITY["EPSG","4326"]]
          >>> print(dataset.epsg)
          4326
          >>> reprojected_dataset = dataset.to_crs(to_epsg=3857)
          >>> print(reprojected_dataset)
          <BLANKLINE>
                      Cell size: 5565.983370404396
                      Dimension: 5 * 5
                      EPSG: 3857
                      Number of Bands: 4
                      Band names: ['Band_1', 'Band_2', 'Band_3', 'Band_4']
                      Mask: -9999.0
                      Data type: float64
                      File:...
          <BLANKLINE>
          >>> print(reprojected_dataset.crs)
          PROJCS["WGS 84 / Pseudo-Mercator",GEOGCS["WGS 84",DATUM["WGS_1984",SPHEROID["WGS 84",6378137,298.257223563,AUTHORITY["EPSG","7030"]],AUTHORITY["EPSG","6326"]],PRIMEM["Greenwich",0,AUTHORITY["EPSG","8901"]],UNIT["degree",0.0174532925199433,AUTHORITY["EPSG","9122"]],AUTHORITY["EPSG","4326"]],PROJECTION["Mercator_1SP"],PARAMETER["central_meridian",0],PARAMETER["scale_factor",1],PARAMETER["false_easting",0],PARAMETER["false_northing",0],UNIT["metre",1,AUTHORITY["EPSG","9001"]],AXIS["Easting",EAST],AXIS["Northing",NORTH],EXTENSION["PROJ4","+proj=merc +a=6378137 +b=6378137 +lat_ts=0 +lon_0=0 +x_0=0 +y_0=0 +k=1 +units=m +nadgrids=@null +wktext +no_defs"],AUTHORITY["EPSG","3857"]]
          >>> print(reprojected_dataset.epsg)
          3857

          ```

    """
    if not isinstance(to_epsg, int):
        raise TypeError(
            "please enter correct integer number for to_epsg more information "
            f"https://epsg.io/, given {type(to_epsg)}"
        )
    if not isinstance(method, str):
        raise TypeError(
            "Please enter a correct method, for more information, see documentation "
        )
    if method not in INTERPOLATION_METHODS.keys():
        raise ValueError(
            f"The given interpolation method: {method} does not exist, existing methods are "
            f"{INTERPOLATION_METHODS.keys()}"
        )

    resampling_method: Any = INTERPOLATION_METHODS.get(method)

    if maintain_alignment:
        dst_obj = self._reproject_with_ReprojectImage(to_epsg, resampling_method)
    else:
        dst = gdal.Warp("", self.raster, dstSRS=f"EPSG:{to_epsg}", format="VRT")
        dst_obj = type(self)(dst)

    return dst_obj

convert_longitude() #

Convert Longitude.

  • convert the longitude from 0-360 to -180 - 180.
  • currently the function works correctly if the raster covers the whole world, it means that the columns in the rasters covers from longitude 0 to 360.

Returns:

Name Type Description
Dataset Dataset

A new Dataset with longitude converted to -180/180.

Source code in src/pyramids/dataset/ops/spatial.py
def convert_longitude(self) -> Dataset:
    """Convert Longitude.

    - convert the longitude from 0-360 to -180 - 180.
    - currently the function works correctly if the raster covers the whole world, it means that the columns
        in the rasters covers from longitude 0 to 360.

    Returns:
        Dataset:
            A new Dataset with longitude converted to -180/180.
    """
    # dst = gdal.Warp(
    #     "",
    #     self.raster,
    #     dstSRS="+proj=longlat +ellps=WGS84 +datum=WGS84 +lon_0=0 +over",
    #     format="VRT",
    # )
    lon = self.lon
    src = self.raster
    # create a copy
    drv = gdal.GetDriverByName("MEM")
    dst = drv.CreateCopy("", src, 0)
    # convert the 0 to 360 to -180 to 180
    if lon[-1] <= 180:
        raise ValueError("The raster should cover the whole globe")

    first_to_translated = np.where(lon > 180)[0][0]

    ind = list(range(first_to_translated, len(lon)))
    ind_2 = list(range(0, first_to_translated))

    for band in range(self.band_count):
        arr = self.read_array(band=band)
        arr_rearranged = arr[:, ind + ind_2]
        dst.GetRasterBand(band + 1).WriteArray(arr_rearranged)

    # correct the geotransform
    top_left_corner = self.top_left_corner
    gt = list(self.geotransform)
    if lon[-1] > 180:
        new_gt = top_left_corner[0] - 180
        gt[0] = new_gt

    dst.SetGeoTransform(gt)
    return type(self)(dst)

resample(cell_size, method='nearest neighbor') #

resample.

resample method reprojects a raster to any projection (default the WGS84 web mercator projection, without resampling). The function returns a GDAL in-memory file object.

Parameters:

Name Type Description Default
cell_size int

New cell size to resample the raster. If None, raster will not be resampled.

required
method str

Resampling method: "nearest neighbor", "cubic", or "bilinear". Default is "nearest neighbor".

'nearest neighbor'

Returns:

Name Type Description
Dataset Dataset

A new resampled Dataset.

Examples:

  • Create a Dataset with 4 bands, 10 rows, 10 columns, at lon/lat (0, 0):

>>> import numpy as np
>>> arr = np.random.rand(4, 10, 10)
>>> top_left_corner = (0, 0)
>>> cell_size = 0.05
>>> dataset = Dataset.create_from_array(arr, top_left_corner=top_left_corner, cell_size=cell_size, epsg=4326)
>>> print(dataset)
<BLANKLINE>
            Cell size: 0.05
            Dimension: 10 * 10
            EPSG: 4326
            Number of Bands: 4
            Band names: ['Band_1', 'Band_2', 'Band_3', 'Band_4']
            Mask: -9999.0
            Data type: float64
            File: ...
<BLANKLINE>
>>> dataset.plot(band=0)
(<Figure size 800x800 with 2 Axes>, <Axes: >)
resample-source

  • Resample the raster to a new cell size of 0.1:

>>> new_dataset = dataset.resample(cell_size=0.1)
>>> print(new_dataset)
<BLANKLINE>
            Cell size: 0.1
            Dimension: 5 * 5
            EPSG: 4326
            Number of Bands: 4
            Band names: ['Band_1', 'Band_2', 'Band_3', 'Band_4']
            Mask: -9999.0
            Data type: float64
            File:...
<BLANKLINE>
>>> new_dataset.plot(band=0)
(<Figure size 800x800 with 2 Axes>, <Axes: >)
resample-new

  • Resampling the dataset from cell_size 0.05 to 0.1 degrees reduced the number of cells to 5 in each dimension instead of 10.
Source code in src/pyramids/dataset/ops/spatial.py
def resample(
    self, cell_size: int | float, method: str = "nearest neighbor"
) -> Dataset:
    """resample.

    resample method reprojects a raster to any projection (default the WGS84 web mercator projection,
    without resampling). The function returns a GDAL in-memory file object.

    Args:
        cell_size (int):
            New cell size to resample the raster. If None, raster will not be resampled.
        method (str):
            Resampling method: "nearest neighbor", "cubic", or "bilinear". Default is "nearest neighbor".

    Returns:
        Dataset:
            A new resampled Dataset.

    Examples:
        - Create a Dataset with 4 bands, 10 rows, 10 columns, at lon/lat (0, 0):

          ```python
          >>> import numpy as np
          >>> arr = np.random.rand(4, 10, 10)
          >>> top_left_corner = (0, 0)
          >>> cell_size = 0.05
          >>> dataset = Dataset.create_from_array(arr, top_left_corner=top_left_corner, cell_size=cell_size, epsg=4326)
          >>> print(dataset)
          <BLANKLINE>
                      Cell size: 0.05
                      Dimension: 10 * 10
                      EPSG: 4326
                      Number of Bands: 4
                      Band names: ['Band_1', 'Band_2', 'Band_3', 'Band_4']
                      Mask: -9999.0
                      Data type: float64
                      File: ...
          <BLANKLINE>
          >>> dataset.plot(band=0)
          (<Figure size 800x800 with 2 Axes>, <Axes: >)

          ```
          ![resample-source](./../../_images/dataset/resample-source.png)

        - Resample the raster to a new cell size of 0.1:

          ```python
          >>> new_dataset = dataset.resample(cell_size=0.1)
          >>> print(new_dataset)
          <BLANKLINE>
                      Cell size: 0.1
                      Dimension: 5 * 5
                      EPSG: 4326
                      Number of Bands: 4
                      Band names: ['Band_1', 'Band_2', 'Band_3', 'Band_4']
                      Mask: -9999.0
                      Data type: float64
                      File:...
          <BLANKLINE>
          >>> new_dataset.plot(band=0)
          (<Figure size 800x800 with 2 Axes>, <Axes: >)

          ```
          ![resample-new](./../../_images/dataset/resample-new.png)

        - Resampling the dataset from cell_size 0.05 to 0.1 degrees reduced the number of cells to 5 in each
          dimension instead of 10.
    """
    if not isinstance(method, str):
        raise TypeError(
            "Please enter a correct method, for more information, see documentation"
        )
    if method not in INTERPOLATION_METHODS.keys():
        raise ValueError(
            f"The given interpolation method does not exist, existing methods are "
            f"{INTERPOLATION_METHODS.keys()}"
        )

    resampling_method: Any = INTERPOLATION_METHODS.get(method)

    sr_src = osr.SpatialReference(wkt=self.crs)

    ulx = self.geotransform[0]
    uly = self.geotransform[3]
    # transform the right lower corner point
    lrx = self.geotransform[0] + self.geotransform[1] * self.columns
    lry = self.geotransform[3] + self.geotransform[5] * self.rows

    # new geotransform
    new_geo = (
        self.geotransform[0],
        cell_size,
        self.geotransform[2],
        self.geotransform[3],
        self.geotransform[4],
        -1 * cell_size,
    )
    # create a new raster
    cols = int(np.round(abs(lrx - ulx) / cell_size))
    rows = int(np.round(abs(uly - lry) / cell_size))
    dtype = self.gdal_dtype[0]
    bands = self.band_count

    dst_obj = type(self)._build_dataset(
        cols, rows, bands, dtype, new_geo, sr_src.ExportToWkt(),
        self.no_data_value,
    )
    gdal.ReprojectImage(
        self.raster,
        dst_obj.raster,
        sr_src.ExportToWkt(),
        sr_src.ExportToWkt(),
        resampling_method,
    )

    return dst_obj

fill_gaps(mask, src_array) #

Fill gaps in src_array using nearest neighbors where mask indicates valid cells.

Parameters:

Name Type Description Default
mask Dataset | ndarray

Mask dataset or array used to determine valid cells.

required
src_array ndarray

Source array whose gaps will be filled.

required

Returns:

Type Description
ndarray

np.ndarray: The source array with gaps filled where applicable.

Source code in src/pyramids/dataset/ops/spatial.py
def fill_gaps(self, mask, src_array: np.ndarray) -> np.ndarray:
    """Fill gaps in src_array using nearest neighbors where mask indicates valid cells.

    Args:
        mask (Dataset | np.ndarray):
            Mask dataset or array used to determine valid cells.
        src_array (np.ndarray):
            Source array whose gaps will be filled.

    Returns:
        np.ndarray: The source array with gaps filled where applicable.
    """
    # align function only equate the no of rows and columns only
    # match no_data_value inserts no_data_value in src raster to all places like mask
    # still places that has no_data_value in the src raster, but it is not no_data_value in the mask
    # and now has to be filled with values
    # compare no of element that is not no_data_value in both rasters to make sure they are matched
    # if both inputs are rasters
    mask_array = mask.read_array()
    row = mask.rows
    col = mask.columns
    mask_noval = mask.no_data_value[0]

    if isinstance(mask, AbstractDataset) and isinstance(self, AbstractDataset):
        # there might be cells that are out of domain in the src but not out of domain in the mask
        # so change all the src_noval to mask_noval in the src_array
        # src_array[np.isclose(src_array, self.no_data_value[0], rtol=0.001)] = mask_noval
        # then count them (out of domain cells) in the src_array
        elem_src = src_array.size - np.count_nonzero(
            src_array[np.isclose(src_array, self.no_data_value[0], rtol=0.001)]
        )
        # count the out of domain cells in the mask
        elem_mask = mask_array.size - np.count_nonzero(
            mask_array[np.isclose(mask_array, mask_noval, rtol=0.001)]
        )

        # if not equal, then store indices of those cells that don't match
        if elem_mask > elem_src:
            rows = [
                i
                for i in range(row)
                for j in range(col)
                if np.isclose(src_array[i, j], self.no_data_value[0], rtol=0.001)
                and not np.isclose(mask_array[i, j], mask_noval, rtol=0.001)
            ]
            cols = [
                j
                for i in range(row)
                for j in range(col)
                if np.isclose(src_array[i, j], self.no_data_value[0], rtol=0.001)
                and not np.isclose(mask_array[i, j], mask_noval, rtol=0.001)
            ]
        # interpolate those missing cells by the nearest neighbor
        if elem_mask > elem_src:
            src_array = type(self)._nearest_neighbour(
                src_array, self.no_data_value[0], rows, cols
            )
    return src_array

align(alignment_src) #

Align the current dataset (rows and columns) to match a given dataset.

Copies spatial properties from alignment_src to the current raster
  • The coordinate system
  • The number of rows and columns
  • Cell size

Then resamples values from the current dataset using the nearest neighbor interpolation.

Parameters:

Name Type Description Default
alignment_src Dataset

Spatial information source raster to get the spatial information (coordinate system, number of rows and columns). The data values of the current dataset are resampled to this alignment.

required

Returns:

Name Type Description
Dataset Dataset

A new aligned Dataset.

Examples:

  • The source dataset has a top_left_corner at (0, 0) with a 5*5 alignment, and a 0.05 degree cell size.
>>> import numpy as np
>>> arr = np.random.rand(5, 5)
>>> top_left_corner = (0, 0)
>>> cell_size = 0.05
>>> dataset = Dataset.create_from_array(arr, top_left_corner=top_left_corner, cell_size=cell_size, epsg=4326)
>>> print(dataset)
<BLANKLINE>
            Cell size: 0.05
            Dimension: 5 * 5
            EPSG: 4326
            Number of Bands: 1
            Band names: ['Band_1']
            Mask: -9999.0
            Data type: float64
            File:...
<BLANKLINE>
  • The dataset to be aligned has a top_left_corner at (-0.1, 0.1) (i.e., it has two more rows on top of the dataset, and two columns on the left of the dataset).
>>> arr = np.random.rand(10, 10)
>>> top_left_corner = (-0.1, 0.1)
>>> cell_size = 0.07
>>> dataset_target = Dataset.create_from_array(arr, top_left_corner=top_left_corner, cell_size=cell_size,
... epsg=4326)
>>> print(dataset_target)
<BLANKLINE>
            Cell size: 0.07
            Dimension: 10 * 10
            EPSG: 4326
            Number of Bands: 1
            Band names: ['Band_1']
            Mask: -9999.0
            Data type: float64
            File:...
<BLANKLINE>

align-source-target

  • Now call the align method and use the dataset as the alignment source.
>>> aligned_dataset = dataset_target.align(dataset)
>>> print(aligned_dataset)
<BLANKLINE>
            Cell size: 0.05
            Dimension: 5 * 5
            EPSG: 4326
            Number of Bands: 1
            Band names: ['Band_1']
            Mask: -9999.0
            Data type: float64
            File:...
<BLANKLINE>

align-result

Source code in src/pyramids/dataset/ops/spatial.py
def align(
    self,
    alignment_src: Dataset,
) -> Dataset:
    """Align the current dataset (rows and columns) to match a given dataset.

    Copies spatial properties from alignment_src to the current raster:
        - The coordinate system
        - The number of rows and columns
        - Cell size
    Then resamples values from the current dataset using the nearest neighbor interpolation.

    Args:
        alignment_src (Dataset):
            Spatial information source raster to get the spatial information (coordinate system, number of rows and
            columns). The data values of the current dataset are resampled to this alignment.

    Returns:
        Dataset: A new aligned Dataset.

    Examples:
        - The source dataset has a `top_left_corner` at (0, 0) with a 5*5 alignment, and a 0.05 degree cell size.

          ```python
          >>> import numpy as np
          >>> arr = np.random.rand(5, 5)
          >>> top_left_corner = (0, 0)
          >>> cell_size = 0.05
          >>> dataset = Dataset.create_from_array(arr, top_left_corner=top_left_corner, cell_size=cell_size, epsg=4326)
          >>> print(dataset)
          <BLANKLINE>
                      Cell size: 0.05
                      Dimension: 5 * 5
                      EPSG: 4326
                      Number of Bands: 1
                      Band names: ['Band_1']
                      Mask: -9999.0
                      Data type: float64
                      File:...
          <BLANKLINE>

          ```

        - The dataset to be aligned has a top_left_corner at (-0.1, 0.1) (i.e., it has two more rows on top of the
          dataset, and two columns on the left of the dataset).

          ```python
          >>> arr = np.random.rand(10, 10)
          >>> top_left_corner = (-0.1, 0.1)
          >>> cell_size = 0.07
          >>> dataset_target = Dataset.create_from_array(arr, top_left_corner=top_left_corner, cell_size=cell_size,
          ... epsg=4326)
          >>> print(dataset_target)
          <BLANKLINE>
                      Cell size: 0.07
                      Dimension: 10 * 10
                      EPSG: 4326
                      Number of Bands: 1
                      Band names: ['Band_1']
                      Mask: -9999.0
                      Data type: float64
                      File:...
          <BLANKLINE>

          ```

        ![align-source-target](./../../_images/dataset/align-source-target.png)

        - Now call the `align` method and use the dataset as the alignment source.

          ```python
          >>> aligned_dataset = dataset_target.align(dataset)
          >>> print(aligned_dataset)
          <BLANKLINE>
                      Cell size: 0.05
                      Dimension: 5 * 5
                      EPSG: 4326
                      Number of Bands: 1
                      Band names: ['Band_1']
                      Mask: -9999.0
                      Data type: float64
                      File:...
          <BLANKLINE>

          ```

        ![align-result](./../../_images/dataset/align-result.png)
    """
    if isinstance(alignment_src, AbstractDataset):
        src = alignment_src
    else:
        raise TypeError(
            "First parameter should be a Dataset read using Dataset.openRaster or a path to the raster, "
            f"given {type(alignment_src)}"
        )

    # reproject the raster to match the projection of alignment_src
    reprojected_raster_b: Dataset = self
    if self.epsg != src.epsg:
        reprojected_raster_b = self.to_crs(src.epsg)  # type: ignore[assignment]
    dst_obj = type(self)._build_dataset(
        src.columns, src.rows, self.band_count, src.gdal_dtype[0],
        src.geotransform, src.crs, self.no_data_value,
    )
    method = gdal.GRA_NearestNeighbour
    # resample the reprojected_RasterB
    gdal.ReprojectImage(
        reprojected_raster_b.raster,
        dst_obj.raster,
        src.crs,
        src.crs,
        method,
    )

    return dst_obj

crop(mask, touch=True) #

Crop dataset using dataset/feature collection.

Crop/Clip the Dataset object using a polygon/raster.

Parameters:

Name Type Description Default
mask GeoDataFrame | Dataset

GeoDataFrame with a polygon geometry, or a Dataset object.

required
touch bool

Include the cells that touch the polygon, not only those that lie entirely inside the polygon mask. Default is True.

True

Returns:

Name Type Description
Dataset Dataset

A new cropped Dataset.

Hint
  • If the mask is a dataset with multi-bands, the crop method will use the first band as the mask.

Examples:

  • Crop the raster using a polygon mask.

  • The polygon covers 4 cells in the 3rd and 4th rows and 3rd and 4th column arr[2:4, 2:4], so the result dataset will have the same number of bands 4, 2 rows and 2 columns.

  • First, create the dataset to have 4 bands, 10 rows and 10 columns; the dataset has a cell size of 0.05 degree, the top left corner of the dataset is (0, 0).

>>> import numpy as np
>>> import geopandas as gpd
>>> from shapely.geometry import Polygon
>>> arr = np.random.rand(4, 10, 10)
>>> cell_size = 0.05
>>> top_left_corner = (0, 0)
>>> dataset = Dataset.create_from_array(
...         arr, top_left_corner=top_left_corner, cell_size=cell_size, epsg=4326
... )
- Second, create the polygon using shapely polygon, and use the xmin, ymin, xmax, ymax = [0.1, -0.2, 0.2 -0.1] to cover the 4 cells.

```python
>>> mask = gpd.GeoDataFrame(geometry=[Polygon([(0.1, -0.1), (0.1, -0.2), (0.2, -0.2), (0.2, -0.1)])], crs=4326)

```
  • Pass the geodataframe to the crop method using the mask parameter.

>>> cropped_dataset = dataset.crop(mask=mask)
- Check the cropped dataset:

>>> print(cropped_dataset.shape)
(4, 2, 2)
>>> print(cropped_dataset.geotransform)
(0.1, 0.05, 0.0, -0.1, 0.0, -0.05)
>>> print(cropped_dataset.read_array(band=0))# doctest: +SKIP
[[0.00921161 0.90841171]
 [0.355636   0.18650262]]
>>> print(arr[0, 2:4, 2:4])# doctest: +SKIP
[[0.00921161 0.90841171]
 [0.355636   0.18650262]]
- Crop a raster using another raster mask:

  • Create a mask dataset with the same extent of the polygon we used in the previous example.

>>> geotransform = (0.1, 0.05, 0.0, -0.1, 0.0, -0.05)
>>> mask_dataset = Dataset.create_from_array(np.random.rand(2, 2), geo=geotransform, epsg=4326)
- Then use the mask dataset to crop the dataset.

>>> cropped_dataset_2 = dataset.crop(mask=mask_dataset)
>>> print(cropped_dataset_2.shape)
(4, 2, 2)
- Check the cropped dataset:

>>> print(cropped_dataset_2.geotransform)
(0.1, 0.05, 0.0, -0.1, 0.0, -0.05)
>>> print(cropped_dataset_2.read_array(band=0))# doctest: +SKIP
[[0.00921161 0.90841171]
 [0.355636   0.18650262]]
>>> print(arr[0, 2:4, 2:4])# doctest: +SKIP
 [[0.00921161 0.90841171]
 [0.355636   0.18650262]]
Source code in src/pyramids/dataset/ops/spatial.py
def crop(
    self,
    mask: GeoDataFrame | FeatureCollection,
    touch: bool = True,
) -> Dataset:
    """Crop dataset using dataset/feature collection.

        Crop/Clip the Dataset object using a polygon/raster.

    Args:
        mask (GeoDataFrame | Dataset):
            GeoDataFrame with a polygon geometry, or a Dataset object.
        touch (bool):
            Include the cells that touch the polygon, not only those that lie entirely inside the polygon mask.
            Default is True.

    Returns:
        Dataset:
            A new cropped Dataset.

    Hint:
        - If the mask is a dataset with multi-bands, the `crop` method will use the first band as the mask.

    Examples:
        - Crop the raster using a polygon mask.

          - The polygon covers 4 cells in the 3rd and 4th rows and 3rd and 4th column `arr[2:4, 2:4]`, so the result
            dataset will have the same number of bands `4`, 2 rows and 2 columns.
          - First, create the dataset to have 4 bands, 10 rows and 10 columns; the dataset has a cell size of 0.05
            degree, the top left corner of the dataset is (0, 0).

          ```python
          >>> import numpy as np
          >>> import geopandas as gpd
          >>> from shapely.geometry import Polygon
          >>> arr = np.random.rand(4, 10, 10)
          >>> cell_size = 0.05
          >>> top_left_corner = (0, 0)
          >>> dataset = Dataset.create_from_array(
          ...         arr, top_left_corner=top_left_corner, cell_size=cell_size, epsg=4326
          ... )

          ```
        - Second, create the polygon using shapely polygon, and use the xmin, ymin, xmax, ymax = [0.1, -0.2, 0.2 -0.1]
            to cover the 4 cells.

            ```python
            >>> mask = gpd.GeoDataFrame(geometry=[Polygon([(0.1, -0.1), (0.1, -0.2), (0.2, -0.2), (0.2, -0.1)])], crs=4326)

            ```
        - Pass the `geodataframe` to the crop method using the `mask` parameter.

          ```python
          >>> cropped_dataset = dataset.crop(mask=mask)

          ```
        - Check the cropped dataset:

          ```python
          >>> print(cropped_dataset.shape)
          (4, 2, 2)
          >>> print(cropped_dataset.geotransform)
          (0.1, 0.05, 0.0, -0.1, 0.0, -0.05)
          >>> print(cropped_dataset.read_array(band=0))# doctest: +SKIP
          [[0.00921161 0.90841171]
           [0.355636   0.18650262]]
          >>> print(arr[0, 2:4, 2:4])# doctest: +SKIP
          [[0.00921161 0.90841171]
           [0.355636   0.18650262]]

          ```
        - Crop a raster using another raster mask:

          - Create a mask dataset with the same extent of the polygon we used in the previous example.

          ```python
          >>> geotransform = (0.1, 0.05, 0.0, -0.1, 0.0, -0.05)
          >>> mask_dataset = Dataset.create_from_array(np.random.rand(2, 2), geo=geotransform, epsg=4326)

          ```
        - Then use the mask dataset to crop the dataset.

          ```python
          >>> cropped_dataset_2 = dataset.crop(mask=mask_dataset)
          >>> print(cropped_dataset_2.shape)
          (4, 2, 2)

          ```
        - Check the cropped dataset:

          ```python
          >>> print(cropped_dataset_2.geotransform)
          (0.1, 0.05, 0.0, -0.1, 0.0, -0.05)
          >>> print(cropped_dataset_2.read_array(band=0))# doctest: +SKIP
          [[0.00921161 0.90841171]
           [0.355636   0.18650262]]
          >>> print(arr[0, 2:4, 2:4])# doctest: +SKIP
           [[0.00921161 0.90841171]
           [0.355636   0.18650262]]

          ```

    """
    if isinstance(mask, GeoDataFrame):
        dst = self._crop_with_polygon_warp(mask, touch=touch)
    elif isinstance(mask, AbstractDataset):
        dst = self._crop_with_raster(mask)
    else:
        raise TypeError(
            "The second parameter: mask could be either GeoDataFrame or Dataset object"
        )

    return dst