Skip to content

Dataset Class#

  • Detailed class diagram for the Dataset class and related components:
Hold "Ctrl" to enable pan & zoom
classDiagram
    %% configuration class
    class Config {
    }

    %% abstract base class for rasters
    class RasterBase {
        +__init__(src, access)
        +__str__()
        +__repr__()
        +access()
        +raster()
        +raster(value)
        +values()
        +rows()
        +columns()
        +shape()
        +geotransform()
        +top_left_corner()
        +epsg()
        +epsg(value)
        +crs()
        +crs(value)
        +cell_size()
        +no_data_value()
        +no_data_value(value)
        +meta_data()
        +meta_data(value)
        +block_size()
        +block_size(value)
        +file_name()
        +driver_type()
        +read_file(path, read_only)
        +read_array(band, window)
        +_read_block(band, window)
        +plot(band, exclude_value, rgb, surface_reflectance, cutoff, overview, overview_index, percentile, basemap, **kwargs)
    }

    %% concrete raster class
    class Dataset {
        +__init__(src, access)
        +__str__()
        +__repr__()
        +access()
        +raster()
        +raster(value)
        +values()
        +rows()
        +columns()
        +shape()
        +geotransform()
        +epsg()
        +epsg(value)
        +crs()
        +crs(value)
        +cell_size()
        +band_count()
        +band_names()
        +band_names(name_list)
        +band_units()
        +band_units(value)
        +no_data_value()
        +no_data_value(value)
        +meta_data()
        +meta_data(value)
        +block_size()
        +block_size(value)
        +file_name()
        +driver_type()
        +scale()
        +scale(value)
        +offset()
        +offset(value)
        +read_file(path, read_only)
        +create_from_array(arr, top_left_corner, cell_size, epsg)
        +read_array(band, window)
        +_read_block(band, window)
        +_resolve_plot_band(band, rgb)
        +plot(band, exclude_value, rgb, surface_reflectance, cutoff, overview, overview_index, percentile, basemap, rgb_options, **kwargs)
        +to_file(path, driver, band)
        +to_crs(to_epsg, method, maintain_alignment)
        +resample(cell_size, method)
        +align(alignment_src)
        +crop(mask, touch)
        +merge(src, dst, no_data_value, init, n)
        +apply(ufunc)
        +overlay(classes_map, exclude_value)
    }



    %% Driver catalog
    class _utils_Catalog {
    }

    %% NetCDF
    class NetCDF {
    }

    %% error classes
    class _errors_ReadOnlyError
    class _errors_DatasetNotFoundError
    class _errors_NoDataValueError
    class _errors_AlignmentError
    class _errors_DriverNotExistError
    class _errors_FileFormatNotSupportedError
    class _errors_OptionalPackageDoesNotExist
    class _errors_FailedToSaveError
    class _errors_OutOfBoundsError

    %% inheritance relations
    RasterBase <|-- Dataset
    Dataset <|-- NetCDF

    %% composition/usage relations
    RasterBase ..> _utils_Catalog : "uses Catalog constant"
    RasterBase ..> feature_FeatureCollection : "vector ops"
    Dataset ..> feature_FeatureCollection : "vector ops"
    Dataset ..> _errors_ReadOnlyError : "raises"
    Dataset ..> _errors_AlignmentError : "raises"
    Dataset ..> _errors_NoDataValueError : "raises"
    Dataset ..> _errors_FailedToSaveError : "raises"
    Dataset ..> _errors_OutOfBoundsError : "raises"
    NetCDF ..> _errors_OptionalPackageDoesNotExist : "raises"
    Config ..> Dataset : "initialises raster settings"
Hold "Ctrl" to enable pan & zoom
classDiagram

    %% Central dataset class with its main attributes
    class Dataset {
        +raster
        +cell_size
        +values
        +shape
        +rows
        +columns
        +pivot_point
        +geotransform
        +bounds
        +bbox
        +epsg
        +crs
        +lon
        +lat
        +x
        +y
        +band_count
        +band_names
        +variables
        +no_data_value
        +meta_data
        +dtype
        +gdal_dtype
        +numpy_dtype
        +file_name
        +time_stamp
        +driver_type
    }

    %% Group: visualisation functionality
    class Visualization {
        +plot()
        +overview_count()
        +read_overview_array()
        +create_overviews()
        +recreate_overviews()
        +get_overview()
    }
    Dataset --> Visualization : «visualisation»

    %% Group: data access methods
    class AccessData {
        +read_array()
        +get_variables()
        +count_domain_cells()
        +get_band_names()
        +extract()
        +stats()
    }
    Dataset --> AccessData : «data access»

    %% Group: mathematical operations on raster values
    class MathOperations {
        +apply()
        +fill()
        +normalize()
        +cluster()
        +cluster2()
        +get_tile()
        +groupNeighbours()
    }
    Dataset --> MathOperations : «math ops»

    %% Group: spatial operations and reprojection
    class SpatialOperations {
        +to_crs()
        +resample()
        +align()
        +crop()
        +locate_points()
        +overlay()
        +extract()
        +footprint()
    }
    Dataset --> SpatialOperations : «spatial ops»

    %% Group: conversion to other data types
    class Conversion {
        +to_feature_collection()
    }
    Dataset --> Conversion : «conversion»

    %% Group: coordinate system handling
    class OSR {
        +create_sr_from_epsg()
    }
    Dataset --> OSR : «osr»

    %% Group: bounding‐box and bounds calculations
    class BBoxBounds {
        +calculate_bbox()
        +calculate_bounds()
    }
    Dataset --> BBoxBounds : «bbox/bounds»

    %% Group: CRS/EPSG getters
    class CrsEpsg {
        +get_crs()
        +get_epsg()
    }
    Dataset --> CrsEpsg : «crs/epsg»

    %% Group: latitude/longitude getters
    class LatLon {
        +get_lat_lon()
    }
    Dataset --> LatLon : «lat/lon»

    %% Group: band names management
    class BandNames {
        +get_band_names_internal()
        +set_band_names()
    }
    Dataset --> BandNames : «band names»

    %% Group: timestamp handling
    class TimeStamp {
        +get_time_variable()
        +read_variable()
    }
    Dataset --> TimeStamp : «time»

    %% Group: handling of no‐data values
    class NoDataValue {
        +set_no_data_value()
        +set_no_data_value_backend()
        +change_no_data_value_attr()
    }
    Dataset --> NoDataValue : «no data value»

    %% Group: helpers for creating GDAL datasets
    class GdalDataset {
        +create_empty_driver()
        +create_driver_from_scratch()
        +create_mem_gtiff_dataset()
    }
    Dataset --> GdalDataset : «gdal creation»

    %% Group: factory methods for creating Dataset objects
    class CreateObject {
        +from_gdal_dataset()
        +read_file()
        +create_from_array()
        +dataset_like()
        +from_bytes()
        +from_band_files()
        +from_archive()
    }
    Dataset --> CreateObject : «object factory»

Factory methods at a glance#

Method Use when
read_file(path, vsi=…, file_i=…) Open a path, URL, or archive member (zip/tar/gzip). URLs auto-rewrite to /vsi*.
from_bytes(data, suffix=".tif") The caller already holds the bytes (HTTP body, DB blob, S3 get_object payload). Backed by /vsimem/.
from_band_files(paths) Stack N single-band rasters (one file per band) into one multi-band Dataset — the natural target for the <asset>.<band>.tif layout of GEE / Landsat / Sentinel downloads.
from_archive(url_or_path, member_glob=…) Merge every matching member of a local or remote archive into one multi-band Dataset (composes from_band_files over gdal.ReadDir). For one-Dataset-per-member use DatasetCollection.from_archive.
create_from_array(arr, …) Build a Dataset from a numpy array + geobox.
dataset_like(template, arr) Stamp a new Dataset that inherits its grid / CRS from template.

See the Recipes page for runnable examples of each.

pyramids.dataset.Dataset #

Bases: RasterBase

Single-band or multi-band raster dataset (GeoTIFF, etc.).

Wraps a GDAL dataset with spatial operations (crop, reproject, align, mosaic), band-level I/O, and no-data handling. For NetCDF files use the :class:~pyramids.netcdf.NetCDF subclass; for temporal stacks of rasters use :class:~pyramids.dataset.DatasetCollection.

The seven public-API families are exposed as collaborator instances (ds.io, ds.spatial, ds.bands, ds.analysis, ds.cell, ds.vectorize, ds.cog) and via thin facade methods on the Dataset itself, so ds.crop(mask) and ds.spatial.crop(mask) are equivalent. Each collaborator holds a weakref proxy back to the Dataset; the proxy keeps GDAL handle release deterministic on Windows.

Source code in src/pyramids/dataset/dataset.py
 178
 179
 180
 181
 182
 183
 184
 185
 186
 187
 188
 189
 190
 191
 192
 193
 194
 195
 196
 197
 198
 199
 200
 201
 202
 203
 204
 205
 206
 207
 208
 209
 210
 211
 212
 213
 214
 215
 216
 217
 218
 219
 220
 221
 222
 223
 224
 225
 226
 227
 228
 229
 230
 231
 232
 233
 234
 235
 236
 237
 238
 239
 240
 241
 242
 243
 244
 245
 246
 247
 248
 249
 250
 251
 252
 253
 254
 255
 256
 257
 258
 259
 260
 261
 262
 263
 264
 265
 266
 267
 268
 269
 270
 271
 272
 273
 274
 275
 276
 277
 278
 279
 280
 281
 282
 283
 284
 285
 286
 287
 288
 289
 290
 291
 292
 293
 294
 295
 296
 297
 298
 299
 300
 301
 302
 303
 304
 305
 306
 307
 308
 309
 310
 311
 312
 313
 314
 315
 316
 317
 318
 319
 320
 321
 322
 323
 324
 325
 326
 327
 328
 329
 330
 331
 332
 333
 334
 335
 336
 337
 338
 339
 340
 341
 342
 343
 344
 345
 346
 347
 348
 349
 350
 351
 352
 353
 354
 355
 356
 357
 358
 359
 360
 361
 362
 363
 364
 365
 366
 367
 368
 369
 370
 371
 372
 373
 374
 375
 376
 377
 378
 379
 380
 381
 382
 383
 384
 385
 386
 387
 388
 389
 390
 391
 392
 393
 394
 395
 396
 397
 398
 399
 400
 401
 402
 403
 404
 405
 406
 407
 408
 409
 410
 411
 412
 413
 414
 415
 416
 417
 418
 419
 420
 421
 422
 423
 424
 425
 426
 427
 428
 429
 430
 431
 432
 433
 434
 435
 436
 437
 438
 439
 440
 441
 442
 443
 444
 445
 446
 447
 448
 449
 450
 451
 452
 453
 454
 455
 456
 457
 458
 459
 460
 461
 462
 463
 464
 465
 466
 467
 468
 469
 470
 471
 472
 473
 474
 475
 476
 477
 478
 479
 480
 481
 482
 483
 484
 485
 486
 487
 488
 489
 490
 491
 492
 493
 494
 495
 496
 497
 498
 499
 500
 501
 502
 503
 504
 505
 506
 507
 508
 509
 510
 511
 512
 513
 514
 515
 516
 517
 518
 519
 520
 521
 522
 523
 524
 525
 526
 527
 528
 529
 530
 531
 532
 533
 534
 535
 536
 537
 538
 539
 540
 541
 542
 543
 544
 545
 546
 547
 548
 549
 550
 551
 552
 553
 554
 555
 556
 557
 558
 559
 560
 561
 562
 563
 564
 565
 566
 567
 568
 569
 570
 571
 572
 573
 574
 575
 576
 577
 578
 579
 580
 581
 582
 583
 584
 585
 586
 587
 588
 589
 590
 591
 592
 593
 594
 595
 596
 597
 598
 599
 600
 601
 602
 603
 604
 605
 606
 607
 608
 609
 610
 611
 612
 613
 614
 615
 616
 617
 618
 619
 620
 621
 622
 623
 624
 625
 626
 627
 628
 629
 630
 631
 632
 633
 634
 635
 636
 637
 638
 639
 640
 641
 642
 643
 644
 645
 646
 647
 648
 649
 650
 651
 652
 653
 654
 655
 656
 657
 658
 659
 660
 661
 662
 663
 664
 665
 666
 667
 668
 669
 670
 671
 672
 673
 674
 675
 676
 677
 678
 679
 680
 681
 682
 683
 684
 685
 686
 687
 688
 689
 690
 691
 692
 693
 694
 695
 696
 697
 698
 699
 700
 701
 702
 703
 704
 705
 706
 707
 708
 709
 710
 711
 712
 713
 714
 715
 716
 717
 718
 719
 720
 721
 722
 723
 724
 725
 726
 727
 728
 729
 730
 731
 732
 733
 734
 735
 736
 737
 738
 739
 740
 741
 742
 743
 744
 745
 746
 747
 748
 749
 750
 751
 752
 753
 754
 755
 756
 757
 758
 759
 760
 761
 762
 763
 764
 765
 766
 767
 768
 769
 770
 771
 772
 773
 774
 775
 776
 777
 778
 779
 780
 781
 782
 783
 784
 785
 786
 787
 788
 789
 790
 791
 792
 793
 794
 795
 796
 797
 798
 799
 800
 801
 802
 803
 804
 805
 806
 807
 808
 809
 810
 811
 812
 813
 814
 815
 816
 817
 818
 819
 820
 821
 822
 823
 824
 825
 826
 827
 828
 829
 830
 831
 832
 833
 834
 835
 836
 837
 838
 839
 840
 841
 842
 843
 844
 845
 846
 847
 848
 849
 850
 851
 852
 853
 854
 855
 856
 857
 858
 859
 860
 861
 862
 863
 864
 865
 866
 867
 868
 869
 870
 871
 872
 873
 874
 875
 876
 877
 878
 879
 880
 881
 882
 883
 884
 885
 886
 887
 888
 889
 890
 891
 892
 893
 894
 895
 896
 897
 898
 899
 900
 901
 902
 903
 904
 905
 906
 907
 908
 909
 910
 911
 912
 913
 914
 915
 916
 917
 918
 919
 920
 921
 922
 923
 924
 925
 926
 927
 928
 929
 930
 931
 932
 933
 934
 935
 936
 937
 938
 939
 940
 941
 942
 943
 944
 945
 946
 947
 948
 949
 950
 951
 952
 953
 954
 955
 956
 957
 958
 959
 960
 961
 962
 963
 964
 965
 966
 967
 968
 969
 970
 971
 972
 973
 974
 975
 976
 977
 978
 979
 980
 981
 982
 983
 984
 985
 986
 987
 988
 989
 990
 991
 992
 993
 994
 995
 996
 997
 998
 999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298
2299
2300
2301
2302
2303
2304
2305
2306
2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387
2388
2389
2390
2391
2392
2393
2394
2395
2396
2397
2398
2399
2400
2401
2402
2403
2404
2405
2406
2407
2408
2409
2410
2411
2412
2413
2414
2415
2416
2417
2418
2419
2420
2421
2422
2423
2424
2425
2426
2427
2428
2429
2430
2431
2432
2433
2434
2435
2436
2437
2438
2439
2440
2441
2442
2443
2444
2445
2446
2447
2448
2449
2450
2451
2452
2453
2454
2455
2456
2457
2458
2459
2460
2461
2462
2463
2464
2465
2466
2467
2468
2469
2470
2471
2472
2473
2474
2475
2476
2477
2478
2479
2480
2481
2482
2483
2484
2485
2486
2487
2488
2489
2490
2491
2492
2493
2494
2495
2496
2497
2498
2499
2500
2501
2502
2503
2504
2505
2506
2507
2508
2509
2510
2511
2512
2513
2514
2515
2516
2517
2518
2519
2520
2521
2522
2523
2524
2525
2526
2527
2528
2529
2530
2531
2532
2533
2534
2535
2536
2537
2538
2539
2540
2541
2542
2543
2544
2545
2546
2547
2548
2549
2550
2551
2552
2553
2554
2555
2556
2557
2558
2559
2560
2561
2562
2563
2564
2565
2566
2567
2568
2569
2570
2571
2572
2573
2574
2575
2576
2577
2578
2579
2580
2581
2582
2583
2584
2585
2586
2587
2588
2589
2590
2591
2592
2593
2594
2595
2596
2597
2598
2599
2600
2601
2602
2603
2604
2605
2606
2607
2608
2609
2610
2611
2612
2613
2614
2615
2616
2617
2618
2619
2620
2621
2622
2623
2624
2625
2626
2627
2628
2629
2630
2631
2632
2633
2634
2635
2636
2637
2638
2639
2640
2641
2642
2643
2644
2645
2646
2647
2648
2649
2650
2651
2652
2653
2654
2655
2656
2657
2658
2659
2660
2661
2662
2663
2664
2665
2666
2667
2668
2669
2670
2671
2672
2673
2674
2675
2676
2677
2678
2679
2680
2681
2682
2683
2684
2685
2686
2687
2688
2689
2690
2691
2692
2693
2694
2695
2696
2697
2698
2699
2700
2701
2702
2703
2704
2705
2706
2707
2708
2709
2710
2711
2712
2713
2714
2715
2716
2717
2718
2719
2720
2721
2722
2723
2724
2725
2726
2727
2728
2729
2730
2731
2732
2733
2734
2735
2736
2737
2738
2739
2740
2741
2742
2743
2744
2745
2746
2747
2748
2749
2750
2751
2752
2753
2754
2755
2756
2757
2758
2759
2760
2761
2762
2763
2764
2765
2766
2767
2768
2769
2770
2771
2772
2773
2774
2775
2776
2777
2778
2779
2780
2781
2782
2783
2784
2785
2786
2787
2788
2789
2790
2791
2792
2793
2794
2795
2796
2797
2798
2799
2800
2801
2802
2803
2804
2805
class Dataset(RasterBase):
    """Single-band or multi-band raster dataset (GeoTIFF, etc.).

    Wraps a GDAL dataset with spatial operations (crop, reproject, align,
    mosaic), band-level I/O, and no-data handling. For NetCDF files use
    the :class:`~pyramids.netcdf.NetCDF` subclass; for temporal stacks of
    rasters use :class:`~pyramids.dataset.DatasetCollection`.

    The seven public-API families are exposed as collaborator instances
    (`ds.io`, `ds.spatial`, `ds.bands`, `ds.analysis`,
    `ds.cell`, `ds.vectorize`, `ds.cog`) and via thin facade
    methods on the Dataset itself, so `ds.crop(mask)` and
    `ds.spatial.crop(mask)` are equivalent. Each collaborator holds a
    weakref proxy back to the Dataset; the proxy keeps GDAL handle
    release deterministic on Windows.
    """

    def __init__(self, src: gdal.Dataset, access: str = "read_only"):
        """__init__."""
        self.logger = logging.getLogger(__name__)
        super().__init__(src, access=access)

        self._no_data_value = [
            src.GetRasterBand(i).GetNoDataValue() for i in range(1, self.band_count + 1)
        ]
        self._band_names = self._get_band_names()
        self._band_units = [
            src.GetRasterBand(i).GetUnitType() for i in range(1, self.band_count + 1)
        ]

        # Each collaborator owns the bodies of one public-API family
        # (io, spatial, bands, analysis, cell, vectorize, cog) and
        # holds a `weakref.proxy(self)` back-reference. Dataset
        # exposes facade methods that delegate to the collaborator,
        # so both `ds.crop(mask)` and `ds.spatial.crop(mask)` are
        # equivalent.
        self.io = IO(self)
        self.spatial = Spatial(self)
        self.bands = Bands(self)
        self.analysis = Analysis(self)
        self.cell = Cell(self)
        self.vectorize = Vectorize(self)
        self.cog = COG(self)

    def _update_inplace(self, src: gdal.Dataset, access: str | None = None) -> None:
        """Swap internal state from a new GDAL dataset.

        Creates a fresh instance of `type(self)` and copies its
        internal state into `self`. Using `type(self)` rather
        than the literal `Dataset` is what keeps a NetCDF instance
        a NetCDF after any in-place op (set_crs, change_no_data_value,
        apply(inplace=True), to_file). Subclasses that carry extra
        state across the swap (e.g. NetCDF's variable-subset
        attributes) override this method.

        after `__dict__.update`, the collaborators on
        `self` came from `new.__dict__` and point at the temporary
        `new` instance, not at `self`. Re-bind every collaborator's
        `_ds` to `self` so subsequent `self.spatial.crop(...)`
        calls reach back into `self`, not the discarded `new`.

        Why ``collab._ds = self_proxy`` works despite the slot:
            ``_Engine`` declares ``__slots__ = ("_ds",)`` (see
            :mod:`pyramids.dataset.engines._base`). Slots prevent
            adding *new* attributes to an instance, not reassigning
            existing ones, so direct rebinding of the single
            declared slot stays legal. The proxy is freshly built
            from ``self`` (not pulled from ``new``) so the engines
            point at the live Dataset after the swap.
        """
        new = type(self)(src, access=access or self._access)
        self.__dict__.update(new.__dict__)
        # Re-bind via `weakref.proxy` so the back-reference stays
        # weak after the dict swap (matches `_Engine.__init__`).
        # Direct slot reassignment is allowed because `_Engine`
        # declares `_ds` as its only slot — see method docstring.
        self_proxy = weakref.proxy(self)
        for attr in _COLLABORATOR_ATTRS:
            collab = self.__dict__.get(attr)
            if collab is not None:
                collab._ds = self_proxy

    def focal_mean(self, radius: int = 1, *, chunks=None, band: int = 0):
        """Thin forwarder to :func:`pyramids.dataset.ops._focal.focal_mean`."""
        return focal_mean(self, radius=radius, chunks=chunks, band=band)

    def focal_std(self, radius: int = 1, *, chunks=None, band: int = 0):
        """Thin forwarder to :func:`pyramids.dataset.ops._focal.focal_std`."""
        return focal_std(self, radius=radius, chunks=chunks, band=band)

    def focal_apply(self, func, radius: int = 1, *, chunks=None, band: int = 0):
        """Thin forwarder to :func:`pyramids.dataset.ops._focal.focal_apply`."""
        return focal_apply(self, func, radius=radius, chunks=chunks, band=band)

    def slope(self, *, chunks=None, band: int = 0, units: str = "degrees"):
        """Thin forwarder to :func:`pyramids.dataset.ops._focal.slope`."""
        return slope(self, chunks=chunks, band=band, units=units)

    def aspect(self, *, chunks=None, band: int = 0):
        """Thin forwarder to :func:`pyramids.dataset.ops._focal.aspect`."""
        return aspect(self, chunks=chunks, band=band)

    def hillshade(
        self,
        *,
        azimuth: float = 315.0,
        altitude: float = 45.0,
        chunks=None,
        band: int = 0,
    ):
        """Thin forwarder to :func:`pyramids.dataset.ops._focal.hillshade`."""
        return hillshade(
            self,
            azimuth=azimuth,
            altitude=altitude,
            chunks=chunks,
            band=band,
        )

    def get_cell_coords(self, *args, **kwargs):
        """Facade — delegates to :meth:`Cell.get_cell_coords <pyramids.dataset.engines.Cell.get_cell_coords>`."""
        return self.cell.get_cell_coords(*args, **kwargs)

    def get_cell_polygons(self, *args, **kwargs):
        """Facade — delegates to :meth:`Cell.get_cell_polygons <pyramids.dataset.engines.Cell.get_cell_polygons>`."""
        return self.cell.get_cell_polygons(*args, **kwargs)

    def get_cell_points(self, *args, **kwargs):
        """Facade — delegates to :meth:`Cell.get_cell_points <pyramids.dataset.engines.Cell.get_cell_points>`."""
        return self.cell.get_cell_points(*args, **kwargs)

    def map_to_array_coordinates(self, *args, **kwargs):
        """Facade — delegates to :meth:`Cell.map_to_array_coordinates <pyramids.dataset.engines.Cell.map_to_array_coordinates>`."""
        return self.cell.map_to_array_coordinates(*args, **kwargs)

    def array_to_map_coordinates(self, *args, **kwargs):
        """Facade — delegates to :meth:`Cell.array_to_map_coordinates <pyramids.dataset.engines.Cell.array_to_map_coordinates>`."""
        return self.cell.array_to_map_coordinates(*args, **kwargs)

    def to_cog(self, *args, **kwargs):
        """Facade — delegates to :meth:`COG.to_cog <pyramids.dataset.engines.COG.to_cog>`."""
        return self.cog.to_cog(*args, **kwargs)

    @property
    def is_cog(self) -> bool:
        """Facade — delegates to :attr:`COG.is_cog <pyramids.dataset.engines.COG.is_cog>`."""
        return self.cog.is_cog

    def validate_cog(self, *args, **kwargs):
        """Facade — delegates to :meth:`COG.validate_cog <pyramids.dataset.engines.COG.validate_cog>`."""
        return self.cog.validate_cog(*args, **kwargs)

    def cog_info(self, *args, **kwargs):
        """Facade — delegates to :meth:`COG.info <pyramids.dataset.engines.COG.info>`."""
        return self.cog.info(*args, **kwargs)

    def to_cog_bytes(self, *args, **kwargs):
        """Facade — delegates to :meth:`COG.to_cog_bytes <pyramids.dataset.engines.COG.to_cog_bytes>`."""
        return self.cog.to_cog_bytes(*args, **kwargs)

    def read_part(self, *args, **kwargs):
        """Facade — delegates to :meth:`COG.read_part <pyramids.dataset.engines.COG.read_part>`."""
        return self.cog.read_part(*args, **kwargs)

    def preview(self, *args, **kwargs):
        """Facade — delegates to :meth:`COG.preview <pyramids.dataset.engines.COG.preview>`."""
        return self.cog.preview(*args, **kwargs)

    def point(self, *args, **kwargs):
        """Facade — delegates to :meth:`COG.point <pyramids.dataset.engines.COG.point>`."""
        return self.cog.point(*args, **kwargs)

    def read_tile(self, *args, **kwargs):
        """Facade — delegates to :meth:`COG.read_tile <pyramids.dataset.engines.COG.read_tile>`."""
        return self.cog.read_tile(*args, **kwargs)

    def to_feature_collection(self, *args, **kwargs):
        """Facade — delegates to :meth:`Vectorize.to_feature_collection <pyramids.dataset.engines.Vectorize.to_feature_collection>`."""
        return self.vectorize.to_feature_collection(*args, **kwargs)

    def contour(self, *args, **kwargs):
        """Facade — delegates to :meth:`Vectorize.contour <pyramids.dataset.engines.Vectorize.contour>`."""
        return self.vectorize.contour(*args, **kwargs)

    def translate(self, *args, **kwargs):
        """Facade — delegates to :meth:`Vectorize.translate <pyramids.dataset.engines.Vectorize.translate>`."""
        return self.vectorize.translate(*args, **kwargs)

    def cluster(self, *args, **kwargs):
        """Facade — delegates to :meth:`Vectorize.cluster <pyramids.dataset.engines.Vectorize.cluster>`."""
        return self.vectorize.cluster(*args, **kwargs)

    def cluster2(self, *args, **kwargs):
        """Facade — delegates to :meth:`Vectorize.cluster2 <pyramids.dataset.engines.Vectorize.cluster2>`."""
        return self.vectorize.cluster2(*args, **kwargs)

    def stats(self, *args, **kwargs):
        """Facade — delegates to :meth:`Analysis.stats <pyramids.dataset.engines.Analysis.stats>`."""
        return self.analysis.stats(*args, **kwargs)

    def count_domain_cells(self, *args, **kwargs):
        """Facade — delegates to :meth:`Analysis.count_domain_cells <pyramids.dataset.engines.Analysis.count_domain_cells>`."""
        return self.analysis.count_domain_cells(*args, **kwargs)

    def apply(self, *args, **kwargs):
        """Facade — delegates to :meth:`Analysis.apply <pyramids.dataset.engines.Analysis.apply>`.

        The collaborator returns `None` for `inplace=True` so the facade
        can substitute the actual `self` (preserving identity); the proxy
        used by the collaborator's back-reference would otherwise fail
        `result is ds` checks.
        """
        result = self.analysis.apply(*args, **kwargs)
        return self if result is None else result

    def fill(self, *args, **kwargs):
        """Facade — delegates to :meth:`Analysis.fill <pyramids.dataset.engines.Analysis.fill>`.

        The collaborator returns `None` for `inplace=True`; see
        :meth:`apply` for the rationale.
        """
        result = self.analysis.fill(*args, **kwargs)
        return self if result is None else result

    def extract(self, *args, **kwargs):
        """Facade — delegates to :meth:`Analysis.extract <pyramids.dataset.engines.Analysis.extract>`."""
        return self.analysis.extract(*args, **kwargs)

    def sample(self, *args, **kwargs):
        """Facade — delegates to :meth:`Analysis.sample <pyramids.dataset.engines.Analysis.sample>`."""
        return self.analysis.sample(*args, **kwargs)

    def sieve(self, *args, **kwargs):
        """Facade — delegates to :meth:`Analysis.sieve <pyramids.dataset.engines.Analysis.sieve>`."""
        return self.analysis.sieve(*args, **kwargs)

    def proximity(self, *args, **kwargs):
        """Facade — delegates to :meth:`Analysis.proximity <pyramids.dataset.engines.Analysis.proximity>`."""
        return self.analysis.proximity(*args, **kwargs)

    def overlay(self, *args, **kwargs):
        """Facade — delegates to :meth:`Analysis.overlay <pyramids.dataset.engines.Analysis.overlay>`."""
        return self.analysis.overlay(*args, **kwargs)

    def get_mask(self, *args, **kwargs):
        """Facade — delegates to :meth:`Analysis.get_mask <pyramids.dataset.engines.Analysis.get_mask>`."""
        return self.analysis.get_mask(*args, **kwargs)

    def footprint(self, *args, **kwargs):
        """Facade — delegates to :meth:`Analysis.footprint <pyramids.dataset.engines.Analysis.footprint>`."""
        return self.analysis.footprint(*args, **kwargs)

    def get_histogram(self, *args, **kwargs):
        """Facade — delegates to :meth:`Analysis.get_histogram <pyramids.dataset.engines.Analysis.get_histogram>`."""
        return self.analysis.get_histogram(*args, **kwargs)

    def _resolve_plot_band(
        self, band: int | None, rgb: list[int] | None
    ) -> tuple[int, list[int] | None]:
        """Resolve which band index (and effective ``rgb`` list) to render for :meth:`plot`.

        Applies the GeoTIFF / Sentinel-imagery band-resolution policy that used to live
        inside :meth:`Analysis.plot`. The rules, in order, are:

        1. If ``band`` is explicitly provided, it is returned as-is (and ``rgb`` passes
           through untouched).
        2. If the dataset has fewer than 3 bands, return ``(0, rgb)``.
        3. If the dataset has 3+ bands but **no** band has a GDAL ``ColorInterpretation``
           set (i.e. every band reports ``undefined``), return ``(0, rgb)``. This is the
           D-1 fix: ``band_count >= 3`` alone is not a sufficient signal that the data
           is an RGB image — multi-band scalar cubes (e.g. time series stacked into one
           GeoTIFF) also have ``band_count >= 3`` and must not be misinterpreted as RGB.
        4. Otherwise, treat the dataset as RGB imagery. If ``rgb`` was supplied, its
           first entry is the red band. If it was not supplied, resolve red/green/blue
           via :meth:`get_band_by_color`; fall back to ``[2, 1, 0]`` (the default
           Sentinel-2 band order) only when one or more colour channels can't be
           identified.

        Args:
            band: User-supplied band index, or ``None`` to trigger the heuristic.
            rgb: User-supplied ``[r, g, b]`` band index list, or ``None``.

        Returns:
            tuple[int, list[int] | None]: The resolved single-band index and the
                effective ``rgb`` list to forward to :meth:`Analysis.plot`. The ``rgb``
                element is ``None`` when no RGB rendering should happen.

        Examples:
            - Explicit ``band`` is always returned untouched (rule 1):

              ```python
              >>> import numpy as np
              >>> from pyramids.dataset import Dataset
              >>> arr = np.random.rand(4, 8, 8).astype(np.float32)
              >>> ds = Dataset.create_from_array(
              ...     arr, top_left_corner=(0, 0), cell_size=0.1, epsg=4326,
              ... )
              >>> ds._resolve_plot_band(band=2, rgb=None)
              (2, None)

              ```

            - Single-band raster falls back to band ``0`` (rule 2):

              ```python
              >>> single = np.random.rand(6, 6).astype(np.float32)
              >>> ds_1band = Dataset.create_from_array(
              ...     single, top_left_corner=(0, 0), cell_size=0.1, epsg=4326,
              ... )
              >>> ds_1band._resolve_plot_band(band=None, rgb=None)
              (0, None)

              ```

            - Multi-band dataset with no ``ColorInterpretation`` defaults to band ``0``
              (rule 3, the D-1 fix). ``Dataset.create_from_array`` produces a multi-band
              MEM raster whose bands all report ``undefined`` colour interpretation —
              asserted explicitly here so this doctest fails loudly if that ever changes:

              ```python
              >>> list(ds.band_color.values())
              ['undefined', 'undefined', 'undefined', 'undefined']
              >>> ds._resolve_plot_band(band=None, rgb=None)
              (0, None)

              ```

            - Explicit ``rgb`` passes through alongside an explicit ``band``:

              ```python
              >>> ds._resolve_plot_band(band=1, rgb=[2, 1, 0])
              (1, [2, 1, 0])

              ```
        """
        if band is not None:
            # Coerce to a plain ``int`` here too (the RGB branch already
            # does) so the return type matches the ``tuple[int, ...]``
            # docstring even when the caller passed e.g. a ``numpy.int64``.
            resolved_band = int(band)
            resolved_rgb = rgb
        elif self.band_count < 3:
            resolved_band = 0
            resolved_rgb = rgb
        else:
            band_colors = list(self.band_color.values())
            has_color_interp = any(c != UNDEFINED_COLOR_INTERP for c in band_colors)
            if not has_color_interp:
                resolved_band = 0
                resolved_rgb = rgb
            else:
                if rgb is None:
                    candidate: list[int | None] = [
                        self.get_band_by_color("red"),
                        self.get_band_by_color("green"),
                        self.get_band_by_color("blue"),
                    ]
                    if None in candidate:
                        resolved_rgb = [2, 1, 0]
                    else:
                        resolved_rgb = [int(v) for v in candidate]
                else:
                    resolved_rgb = rgb
                resolved_band = int(resolved_rgb[0])
        return resolved_band, resolved_rgb

    def plot(
        self,
        band: int | None = None,
        exclude_value: Any | None = None,
        rgb: list[int] | None = None,
        surface_reflectance: int | None = None,
        cutoff: list | None = None,
        overview: bool | None = False,
        overview_index: int | None = 0,
        percentile: int | None = None,
        basemap: bool | str | None = None,
        rgb_options: dict | None = None,
        **kwargs: Any,
    ):
        """Plot the values/overviews of a band.

        Facade for :meth:`Analysis.plot <pyramids.dataset.engines.Analysis.plot>`. Resolves
        the band index via :meth:`_resolve_plot_band` (GeoTIFF/Sentinel semantics) and then
        forwards the call to the generic rendering engine.

        When ``band`` is ``None`` and the dataset looks like an RGB image — i.e. it has
        at least 3 bands **and** at least one band has a GDAL ``ColorInterpretation`` set —
        the red band is auto-selected (either from ``rgb[0]`` or by resolving the colour
        tags). Otherwise the facade defaults to band ``0``. See
        :meth:`Analysis.plot` for the full kwargs surface.

        The four satellite-imagery kwargs ``rgb``, ``surface_reflectance``, ``cutoff``,
        and ``percentile`` may be grouped under a single ``rgb_options=`` dict
        (recommended) or passed loose at the top level (deprecated; emits
        :class:`DeprecationWarning`). When both forms are mixed, the values inside
        ``rgb_options`` win.

        Args:
            band (int, optional):
                Band index to render. When ``None``, the index is resolved by
                :meth:`_resolve_plot_band`.
            exclude_value (Any, optional):
                Pixel value to mask out before plotting. Default is ``None``.
            rgb (list[int], optional):
                **Deprecated**; pass via ``rgb_options={"rgb": [...]}`` instead.
                Three- or four-element list of band indices ``[r, g, b]`` (optionally
                ``[r, g, b, a]``) to render the dataset as a true-colour composite.
                Only honoured when the dataset has at least 3 bands and at least one
                band reports a colour interpretation. Default is ``None``.
            surface_reflectance (int, optional):
                **Deprecated**; pass via ``rgb_options={"surface_reflectance": ...}``.
                Surface-reflectance scale factor used to normalise satellite reflectance
                bands (typically ``10000`` for Sentinel-2). Default is ``None``.
            cutoff (list, optional):
                **Deprecated**; pass via ``rgb_options={"cutoff": ...}``.
                Per-band clip values used when rendering RGB composites. Default is
                ``None``.
            overview (bool, optional):
                If ``True``, plot the overview pyramid level instead of the full-resolution
                array. Default is ``False``.
            overview_index (int, optional):
                Index of the overview level to plot when ``overview=True``. Default is ``0``.
            percentile (int, optional):
                **Deprecated**; pass via ``rgb_options={"percentile": ...}``.
                Percentile used when computing colour-scale limits. Default is ``None``.
            basemap (bool or str, optional):
                If ``True``, overlay an OpenStreetMap basemap. If a string, use it as the
                contextily/xyzservices tile-provider name (e.g. ``"CartoDB.Positron"``).
                Default is ``None``. Requires the ``[viz]`` extra.
            rgb_options (dict, optional):
                Grouped Sentinel-imagery kwargs. Accepted keys:
                ``"rgb"``, ``"surface_reflectance"``, ``"cutoff"``,
                ``"percentile"``. Recommended over the loose top-level
                kwargs (which emit :class:`DeprecationWarning`). Default
                is ``None``.
            **kwargs:
                Additional keyword arguments forwarded verbatim to
                :meth:`Analysis.plot`. See that method for the full kwargs surface
                (figure size, color scale, color bar, basemap, etc.).

        Returns:
            ArrayGlyph: A cleopatra ``ArrayGlyph`` wrapping the rendered figure.
                Use ``cleo.fig`` (the :class:`matplotlib.figure.Figure`) and ``cleo.ax``
                (the :class:`matplotlib.axes.Axes`) to drop down to raw matplotlib.

        Examples:
            - Render the first band of a single-band MEM raster. Tagged ``+SKIP`` because
              the call requires the optional ``[viz]`` extra (cleopatra + matplotlib):

              ```python
              >>> import numpy as np
              >>> from pyramids.dataset import Dataset
              >>> arr = np.random.rand(8, 8).astype(np.float32)
              >>> ds = Dataset.create_from_array(
              ...     arr, top_left_corner=(0, 0), cell_size=0.1, epsg=4326,
              ... )
              >>> cleo = ds.plot()  # doctest: +SKIP
              >>> cleo.fig          # doctest: +SKIP
              <Figure size 800x800 with 2 Axes>

              ```

            - Override the resolved band index. The facade forwards ``band=1`` straight
              to the engine without consulting the heuristic:

              ```python
              >>> cleo = ds.plot(band=1)  # doctest: +SKIP

              ```

            - Render a multi-band raster as a true-colour composite via the
              recommended ``rgb_options=`` group:

              ```python
              >>> arr3 = np.random.rand(3, 8, 8).astype(np.float32)
              >>> rgb_ds = Dataset.create_from_array(
              ...     arr3, top_left_corner=(0, 0), cell_size=0.1, epsg=4326,
              ... )
              >>> cleo = rgb_ds.plot(  # doctest: +SKIP
              ...     rgb_options={"rgb": [0, 1, 2], "surface_reflectance": 255},
              ... )

              ```

            - The deprecated loose-kwarg form still works but emits a
              :class:`DeprecationWarning`. New code should prefer the
              grouped ``rgb_options=`` form shown above:

              ```python
              >>> cleo = rgb_ds.plot(  # doctest: +SKIP
              ...     rgb=[0, 1, 2], surface_reflectance=255,
              ... )
              DeprecationWarning: Passing `rgb=`, `surface_reflectance=`...

              ```
        """
        rgb, surface_reflectance, cutoff, percentile = self._merge_rgb_options(
            rgb_options=rgb_options,
            rgb=rgb,
            surface_reflectance=surface_reflectance,
            cutoff=cutoff,
            percentile=percentile,
        )
        resolved_band, resolved_rgb = self._resolve_plot_band(band, rgb)
        return self.analysis.plot(
            band=resolved_band,
            exclude_value=exclude_value,
            rgb=resolved_rgb,
            surface_reflectance=surface_reflectance,
            cutoff=cutoff,
            overview=overview,
            overview_index=overview_index,
            percentile=percentile,
            basemap=basemap,
            **kwargs,
        )

    @staticmethod
    def _merge_rgb_options(
        *,
        rgb_options: dict | None,
        rgb: list[int] | None,
        surface_reflectance: int | None,
        cutoff: list | None,
        percentile: int | None,
    ) -> tuple[list[int] | None, int | None, list | None, int | None]:
        """Merge `rgb_options=` with the deprecated loose top-level kwargs.

        Returns the resolved four-tuple ``(rgb, surface_reflectance,
        cutoff, percentile)`` passed forward to the renderer. Values in
        ``rgb_options`` win over the loose kwargs on collision; using
        any of the four loose kwargs emits a :class:`DeprecationWarning`
        recommending the grouped form.

        Args:
            rgb_options: Recommended grouped form (``{"rgb": ..., ...}``).
                Accepted keys: ``"rgb"``, ``"surface_reflectance"``,
                ``"cutoff"``, ``"percentile"``. ``None`` means
                no grouped options were supplied.
            rgb: Deprecated top-level kwarg.
            surface_reflectance: Deprecated top-level kwarg.
            cutoff: Deprecated top-level kwarg.
            percentile: Deprecated top-level kwarg.

        Returns:
            tuple: ``(rgb, surface_reflectance, cutoff, percentile)``
                resolved values, with the grouped form taking precedence.

        Raises:
            ValueError: If ``rgb_options`` contains a key outside the
                accepted set ``{"rgb", "surface_reflectance", "cutoff",
                "percentile"}``.

        Examples:
            - Pass everything through the grouped form (recommended).
              No warnings are emitted and the returned four-tuple
              mirrors the inputs in order:

                ```python
                >>> import warnings
                >>> from pyramids.dataset import Dataset
                >>> with warnings.catch_warnings(record=True) as caught:
                ...     warnings.simplefilter("always")
                ...     result = Dataset._merge_rgb_options(
                ...         rgb_options={
                ...             "rgb": [2, 1, 0],
                ...             "surface_reflectance": 10000,
                ...         },
                ...         rgb=None,
                ...         surface_reflectance=None,
                ...         cutoff=None,
                ...         percentile=None,
                ...     )
                >>> result
                ([2, 1, 0], 10000, None, None)
                >>> [w.category.__name__ for w in caught]
                []

                ```

            - Passing a value via the loose top-level kwarg path emits
              a :class:`DeprecationWarning` that names the kwarg(s)
              used and points to the grouped replacement:

                ```python
                >>> import warnings
                >>> from pyramids.dataset import Dataset
                >>> with warnings.catch_warnings(record=True) as caught:
                ...     warnings.simplefilter("always")
                ...     result = Dataset._merge_rgb_options(
                ...         rgb_options=None,
                ...         rgb=[2, 1, 0],
                ...         surface_reflectance=None,
                ...         cutoff=None,
                ...         percentile=None,
                ...     )
                >>> result
                ([2, 1, 0], None, None, None)
                >>> caught[0].category.__name__
                'DeprecationWarning'

                ```

            - When both forms collide, ``rgb_options`` wins. A
              :class:`DeprecationWarning` is still emitted for the loose
              kwarg:

                ```python
                >>> import warnings
                >>> from pyramids.dataset import Dataset
                >>> with warnings.catch_warnings(record=True) as caught:
                ...     warnings.simplefilter("always")
                ...     result = Dataset._merge_rgb_options(
                ...         rgb_options={"rgb": [0, 1, 2]},
                ...         rgb=[3, 4, 5],
                ...         surface_reflectance=None,
                ...         cutoff=None,
                ...         percentile=None,
                ...     )
                >>> result
                ([0, 1, 2], None, None, None)

                ```

            - An unknown key in ``rgb_options`` raises
              :class:`ValueError`:

                ```python
                >>> from pyramids.dataset import Dataset
                >>> Dataset._merge_rgb_options(  # doctest: +IGNORE_EXCEPTION_DETAIL
                ...     rgb_options={"unknown": 1},
                ...     rgb=None,
                ...     surface_reflectance=None,
                ...     cutoff=None,
                ...     percentile=None,
                ... )
                Traceback (most recent call last):
                    ...
                ValueError: Unknown keys in `rgb_options`: ['unknown']...

                ```
        """
        loose_pairs = {
            "rgb": rgb,
            "surface_reflectance": surface_reflectance,
            "cutoff": cutoff,
            "percentile": percentile,
        }
        opts = rgb_options or {}
        unknown = set(opts) - set(loose_pairs)
        if unknown:
            raise ValueError(
                f"Unknown keys in `rgb_options`: {sorted(unknown)}. "
                f"Accepted: {sorted(loose_pairs)}."
            )
        used_loose = [name for name, value in loose_pairs.items() if value is not None]
        # Split the deprecated loose kwargs into those overridden by a
        # matching `rgb_options` key (a collision — the loose value is
        # discarded) and those used on their own. They get distinct
        # warning text so the message isn't misleading: a user who *did*
        # use the grouped form shouldn't be told "group them" again.
        overridden = [name for name in used_loose if name in opts]
        pure_loose = [name for name in used_loose if name not in opts]
        if pure_loose:
            warnings.warn(
                "Passing "
                + ", ".join(f"`{name}=`" for name in pure_loose)
                + " as top-level kwargs to `Dataset.plot` is deprecated. "
                "Group them under `rgb_options={...}` instead.",
                DeprecationWarning,
                stacklevel=3,
            )
        if overridden:
            warnings.warn(
                "Both the loose "
                + ", ".join(f"`{name}=`" for name in overridden)
                + " kwarg(s) and `rgb_options` were given for the same key(s); "
                "`rgb_options` wins — drop the loose form.",
                DeprecationWarning,
                stacklevel=3,
            )
        rgb = opts.get("rgb", rgb)
        surface_reflectance = opts.get("surface_reflectance", surface_reflectance)
        cutoff = opts.get("cutoff", cutoff)
        percentile = opts.get("percentile", percentile)
        return rgb, surface_reflectance, cutoff, percentile

    def crop(self, *args, **kwargs):
        """Facade — delegates to :meth:`Spatial.crop <pyramids.dataset.engines.Spatial.crop>`."""
        return self.spatial.crop(*args, **kwargs)

    def to_crs(self, *args, **kwargs):
        """Facade — delegates to :meth:`Spatial.to_crs <pyramids.dataset.engines.Spatial.to_crs>`."""
        return self.spatial.to_crs(*args, **kwargs)

    def set_crs(self, *args, **kwargs):
        """Facade — delegates to :meth:`Spatial.set_crs <pyramids.dataset.engines.Spatial.set_crs>`."""
        return self.spatial.set_crs(*args, **kwargs)

    def convert_longitude(self, *args, **kwargs):
        """Facade — delegates to :meth:`Spatial.convert_longitude <pyramids.dataset.engines.Spatial.convert_longitude>`."""
        return self.spatial.convert_longitude(*args, **kwargs)

    def resample(self, *args, **kwargs):
        """Facade — delegates to :meth:`Spatial.resample <pyramids.dataset.engines.Spatial.resample>`."""
        return self.spatial.resample(*args, **kwargs)

    def align(self, *args, **kwargs):
        """Facade — delegates to :meth:`Spatial.align <pyramids.dataset.engines.Spatial.align>`."""
        return self.spatial.align(*args, **kwargs)

    def fill_gaps(self, *args, **kwargs):
        """Facade — delegates to :meth:`Spatial.fill_gaps <pyramids.dataset.engines.Spatial.fill_gaps>`."""
        return self.spatial.fill_gaps(*args, **kwargs)

    def read_array(self, *args, **kwargs):
        """Facade — delegates to :meth:`IO.read_array <pyramids.dataset.engines.IO.read_array>`."""
        return self.io.read_array(*args, **kwargs)

    def write_array(self, *args, **kwargs):
        """Facade — delegates to :meth:`IO.write_array <pyramids.dataset.engines.IO.write_array>`."""
        return self.io.write_array(*args, **kwargs)

    def to_file(self, *args, **kwargs):
        """Facade — delegates to :meth:`IO.to_file <pyramids.dataset.engines.IO.to_file>`."""
        return self.io.to_file(*args, **kwargs)

    def to_raster(self, *args, **kwargs):
        """Facade — delegates to :meth:`IO.to_raster <pyramids.dataset.engines.IO.to_raster>`."""
        return self.io.to_raster(*args, **kwargs)

    def get_block_arrangement(self, *args, **kwargs):
        """Facade — delegates to :meth:`IO.get_block_arrangement <pyramids.dataset.engines.IO.get_block_arrangement>`."""
        return self.io.get_block_arrangement(*args, **kwargs)

    def get_tile(self, *args, **kwargs):
        """Facade — delegates to :meth:`IO.get_tile <pyramids.dataset.engines.IO.get_tile>`."""
        return self.io.get_tile(*args, **kwargs)

    def map_blocks(self, *args, **kwargs):
        """Facade — delegates to :meth:`IO.map_blocks <pyramids.dataset.engines.IO.map_blocks>`."""
        return self.io.map_blocks(*args, **kwargs)

    def to_xyz(self, *args, **kwargs):
        """Facade — delegates to :meth:`IO.to_xyz <pyramids.dataset.engines.IO.to_xyz>`."""
        return self.io.to_xyz(*args, **kwargs)

    @property
    def overview_count(self):
        """Facade — delegates to :attr:`IO.overview_count <pyramids.dataset.engines.IO.overview_count>`."""
        return self.io.overview_count

    def create_overviews(self, *args, **kwargs):
        """Facade — delegates to :meth:`IO.create_overviews <pyramids.dataset.engines.IO.create_overviews>`."""
        return self.io.create_overviews(*args, **kwargs)

    def recreate_overviews(self, *args, **kwargs):
        """Facade — delegates to :meth:`IO.recreate_overviews <pyramids.dataset.engines.IO.recreate_overviews>`."""
        return self.io.recreate_overviews(*args, **kwargs)

    def get_overview(self, *args, **kwargs):
        """Facade — delegates to :meth:`IO.get_overview <pyramids.dataset.engines.IO.get_overview>`."""
        return self.io.get_overview(*args, **kwargs)

    def read_overview_array(self, *args, **kwargs):
        """Facade — delegates to :meth:`IO.read_overview_array <pyramids.dataset.engines.IO.read_overview_array>`."""
        return self.io.read_overview_array(*args, **kwargs)

    def _read_block(self, *args, **kwargs):
        """Facade — concrete override of the abstract :meth:`RasterBase._read_block`."""
        return self.io._read_block(*args, **kwargs)

    def get_attribute_table(self, *args, **kwargs):
        """Facade — delegates to :meth:`Bands.get_attribute_table <pyramids.dataset.engines.Bands.get_attribute_table>`."""
        return self.bands.get_attribute_table(*args, **kwargs)

    def set_attribute_table(self, *args, **kwargs):
        """Facade — delegates to :meth:`Bands.set_attribute_table <pyramids.dataset.engines.Bands.set_attribute_table>`."""
        return self.bands.set_attribute_table(*args, **kwargs)

    def add_band(self, *args, **kwargs):
        """Facade — delegates to :meth:`Bands.add_band <pyramids.dataset.engines.Bands.add_band>`."""
        return self.bands.add_band(*args, **kwargs)

    def get_band_by_color(self, *args, **kwargs):
        """Facade — delegates to :meth:`Bands.get_band_by_color <pyramids.dataset.engines.Bands.get_band_by_color>`."""
        return self.bands.get_band_by_color(*args, **kwargs)

    def change_no_data_value(self, *args, **kwargs):
        """Facade — concrete override of the abstract :meth:`RasterBase.change_no_data_value`.

        The collaborator returns `None` for the `inplace=True` path; the
        facade substitutes `self` for identity preservation, matching
        :meth:`apply` and :meth:`fill`.
        """
        result = self.bands.change_no_data_value(*args, **kwargs)
        return self if result is None else result

    @property
    def band_color(self):
        """Facade — delegates to :attr:`Bands.band_color <pyramids.dataset.engines.Bands.band_color>`."""
        return self.bands.band_color

    @band_color.setter
    def band_color(self, values):
        self.bands.band_color = values

    @property
    def color_table(self):
        """Facade — delegates to :attr:`Bands.color_table <pyramids.dataset.engines.Bands.color_table>`."""
        return self.bands.color_table

    @color_table.setter
    def color_table(self, df):
        self.bands.color_table = df

    def _check_no_data_value(self, *args, **kwargs):
        """Facade — concrete override of the abstract :meth:`RasterBase._check_no_data_value`."""
        return self.bands._check_no_data_value(*args, **kwargs)

    def _set_no_data_value(self, *args, **kwargs):
        """Facade — concrete override of the abstract :meth:`RasterBase._set_no_data_value`."""
        return self.bands._set_no_data_value(*args, **kwargs)

    def _calculate_bbox(self) -> list:
        """Concrete override of :meth:`RasterBase._calculate_bbox`.

        Direct on Dataset (not via the Bands collaborator) because the
        `bbox` / `bounds` properties are reachable before the
        collaborator is wired during `Dataset.__init__`.
        """
        x_min, y_max = self.top_left_corner
        y_min = y_max - self.rows * self.cell_size
        x_max = x_min + self.columns * self.cell_size
        return [x_min, y_min, x_max, y_max]

    def _calculate_bounds(self):
        """Concrete override of :meth:`RasterBase._calculate_bounds`."""
        x_min, y_min, x_max, y_max = self._calculate_bbox()
        coords = [(x_min, y_max), (x_min, y_min), (x_max, y_min), (x_max, y_max)]
        poly = create_polygon(coords)
        gdf = gpd.GeoDataFrame(geometry=[poly])
        gdf.set_crs(epsg=self.epsg, inplace=True)
        return gdf

    def _get_band_names(self):
        """Concrete override of :meth:`RasterBase._get_band_names`.

        Defined directly on Dataset (not via the bands collaborator)
        because `Dataset.__init__` calls `self._get_band_names()`
        before the `Bands` collaborator is wired up. Mirrors
        :meth:`Bands._get_band_names`.
        """
        names = []
        for i in range(1, self.band_count + 1):
            band = self.raster.GetRasterBand(i)
            if band.GetDescription():
                names.append(band.GetDescription())
            else:
                band_name = f"Band_{band.GetBand()}"
                metadata = band.GetDataset().GetMetadata_Dict()
                if band_name in metadata and metadata[band_name]:
                    names.append(metadata[band_name])
                else:
                    names.append(band_name)
        return names

    def _get_crs(self) -> str:
        """Concrete override of :meth:`RasterBase._get_crs`.

        Defined directly on Dataset rather than as a facade because
        `RasterBase.__init__` calls `_get_epsg()` (which calls
        `_get_crs()`) before `Dataset.__init__` has a chance to wire
        up the Spatial collaborator. The Spatial collaborator's
        `_get_crs` body is the same one-liner.
        """
        return str(self.raster.GetProjection())

    def _get_epsg(self) -> int:
        """Concrete override of :meth:`RasterBase._get_epsg`.

        Defined directly on Dataset for the same reason as
        :meth:`_get_crs`.
        """
        return epsg_from_wkt(self._get_crs())

    def zonal_stats(
        self,
        fc,
        *,
        stats=("mean",),
        method: str = "rasterize",
        band: int = 0,
    ):
        """Compute zonal statistics of this dataset over a polygon FeatureCollection.

        Thin forwarder to
        :func:`pyramids.dataset.ops._zonal.zonal_stats`; see that
        function for the full argument contract.

        Args:
            fc: A :class:`pyramids.feature.FeatureCollection` of
                polygons sharing this dataset's CRS.
            stats: Sequence of stat names (`"mean"`, `"sum"`,
                `"min"`, `"max"`, `"std"`, `"var"`,
                `"count"`).
            method: `"rasterize"` is the only supported value today;
                an area-weighted `"fractional"` method is planned.
            band: Zero-based band index.

        Returns:
            pandas.DataFrame: Indexed by `fc.index`; one column per stat.
        """
        return _zonal_stats(self, fc, stats=stats, method=method, band=band)

    def to_zarr(
        self,
        store,
        *,
        compute: bool = True,
        mode: str = "w",
        chunks=None,
        storage_options: dict | None = None,
    ):
        """Serialise this Dataset to a Zarr store (parallel writes per chunk).

        Thin forwarder to
        :func:`pyramids.dataset.ops._zarr.write_dataset_to_zarr`; see
        that function for the full argument contract. Zarr is the
        only raster output format where pyramids can write in true
        parallel — each dask chunk becomes an independent Zarr chunk
        file. Requires the `[lazy]` optional extra.

        Args:
            store: Target store (path / fsspec URL / zarr.Store).
            compute: `True` writes immediately; `False` returns a
                :class:`dask.delayed.Delayed`.
            mode: Zarr open mode, usually `"w"` or `"a"`.
            chunks: Chunk spec forwarded to :meth:`read_array`.
                `None` defaults to `"auto"` via the zarr helper.
            storage_options: fsspec options for cloud stores.
        """
        resolved_chunks = chunks if chunks is not None else "auto"
        return write_dataset_to_zarr(
            self,
            store,
            compute=compute,
            mode=mode,
            chunks=resolved_chunks,
            storage_options=storage_options,
        )

    @classmethod
    def from_zarr(
        cls,
        store,
        *,
        chunks=None,
        storage_options: dict | None = None,
    ) -> Dataset:
        """Load a pyramids-written Zarr store into a new :class:`Dataset`.

        Thin forwarder to
        :func:`pyramids.dataset.ops._zarr.read_dataset_from_zarr`.

        Args:
            store: Input store (path / fsspec URL / zarr.Store).
            chunks: If non-None, the loaded Dataset is flagged as
                dask-backed so downstream `read_array` calls return
                lazy arrays.
            storage_options: fsspec options for cloud stores.
        """
        return read_dataset_from_zarr(
            store,
            chunks=chunks,
            storage_options=storage_options,
        )

    def __str__(self) -> str:
        """__str__."""
        message = f"""
            Top Left Corner: {self.top_left_corner}
            Cell size: {self.cell_size}
            Dimension: {self.rows} * {self.columns}
            EPSG: {self.epsg}
            Number of Bands: {self.band_count}
            Band names: {self.band_names}
            Band colors: {self.band_color}
            Band units: {self.band_units}
            Scale: {self.scale}
            Offset: {self.offset}
            Mask: {self.no_data_value[0]}
            Data type: {self.dtype[0]}
            File: {self.file_name}
        """
        return message

    def __repr__(self) -> str:
        """__repr__."""
        return str(gdal.Info(self.raster))

    @property
    def access(self) -> str:
        """
        Access mode.

        Returns:
            str:
                The access mode of the dataset (read_only/write).
        """
        return str(super().access)

    @property
    def raster(self) -> gdal.Dataset:
        """Base GDAL Dataset (read-only)."""
        return super().raster

    @property
    def rows(self) -> int:
        """Number of rows in the raster array."""
        return int(self._rows)

    @property
    def columns(self) -> int:
        """Number of columns in the raster array."""
        return int(self._columns)

    @property
    def shape(self) -> tuple[int, int, int]:
        """Shape (bands, rows, columns)."""
        return self.band_count, self.rows, self.columns

    @property
    def geotransform(self) -> tuple[float, float, float, float, float, float]:
        """WKT projection.

        (top left corner X/lon coordinate, cell_size, 0, top left corner y/lat coordinate, 0, -cell_size).

        See Also:
            - Dataset.top_left_corner: Coordinate of the top left corner of the dataset.
            - Dataset.epsg: EPSG number of the dataset coordinate reference system.
        """
        gt: tuple[float, float, float, float, float, float] = self._geotransform
        return gt

    @property
    def epsg(self) -> int:
        """EPSG number."""
        return self._epsg

    @epsg.setter
    def epsg(self, value: int):
        """EPSG number."""
        sr = sr_from_epsg(value)
        self.raster.SetProjection(sr.ExportToWkt())
        self._update_inplace(self._raster)

    @property
    def crs(self) -> str:
        """Coordinate reference system.

        Returns:
            str:
                the coordinate reference system of the dataset.

        See Also:
            Dataset.set_crs : Set the Coordinate Reference System (CRS).
            Dataset.to_crs : Reproject the dataset to any projection.
            Dataset.epsg : epsg number of the dataset coordinate reference system.
        """
        return self._get_crs()

    @crs.setter
    def crs(self, value: str):
        """Coordinate reference system.

        Args:
            value (str):
                WellKnownText (WKT) string.

        See Also:
            - Dataset.set_crs: Set the Coordinate Reference System (CRS).
            - Dataset.to_crs: Reproject the dataset to any projection.
            - Dataset.epsg: EPSG number of the dataset coordinate reference system.
        """
        self.set_crs(value)

    @property
    def cell_size(self) -> float:
        """Cell size."""
        return float(self._cell_size)

    @property
    def band_count(self) -> int:
        """Number of bands in the raster."""
        return int(self._band_count)

    @property
    def band_names(self) -> list[str]:
        """Band names."""
        return self._get_band_names()

    @band_names.setter
    def band_names(self, name_list: list):
        """Band names."""
        self.bands._set_band_names(name_list)

    @property
    def band_units(self) -> list[str]:
        """Band units."""
        return self._band_units

    @band_units.setter
    def band_units(self, value: list[str]):
        """Band units setter."""
        self._band_units = value
        for i, val in enumerate(value):
            self._iloc(i).SetUnitType(val)

    def convert_units(self, target: str, band: int | None = None) -> Dataset:
        """Convert band values to ``target`` units, returning a new Dataset.

        Unlike the :attr:`band_units` setter — which only relabels bands — this
        actually transforms the stored values using a small affine conversion table
        (see :func:`pyramids.dataset.ops.units.convert_array`) and records the new
        unit on the result. No-data cells are preserved unchanged. The output is a
        new in-memory ``float64`` Dataset; the source is left untouched.

        Args:
            target: Target unit label (e.g. ``"celsius"``, ``"hPa"``, ``"knots"``).
            band: Zero-based band index to convert. ``None`` (default) converts every
                band; bands already in ``target`` units are passed through unchanged.

        Returns:
            A new :class:`Dataset` with converted values and updated
            :attr:`band_units`.

        Raises:
            ValueError: ``band`` is out of range, a converted band has no source unit
                set, or the ``(source, target)`` pair is unsupported.

        Examples:
            - Convert a Kelvin raster to Celsius and read the new values:
                ```python
                >>> import numpy as np
                >>> from pyramids.dataset import Dataset
                >>> ds = Dataset.create_from_array(
                ...     np.array([[273.15, 283.15], [293.15, 303.15]]),
                ...     top_left_corner=(0, 0), cell_size=1.0, epsg=4326,
                ... )
                >>> ds.band_units = ["K"]
                >>> converted = ds.convert_units("celsius")
                >>> converted.read_array().tolist()
                [[0.0, 10.0], [20.0, 30.0]]
                >>> converted.band_units
                ['celsius']

                ```
            - An unsupported target raises a clear error:
                ```python
                >>> import numpy as np
                >>> from pyramids.dataset import Dataset
                >>> ds = Dataset.create_from_array(
                ...     np.array([[273.15]]), top_left_corner=(0, 0), cell_size=1.0, epsg=4326,
                ... )
                >>> ds.band_units = ["K"]
                >>> try:
                ...     ds.convert_units("furlongs")
                ... except ValueError as exc:
                ...     print("No unit conversion" in str(exc))
                True

                ```
        """
        if band is not None and not 0 <= band < self.band_count:
            raise ValueError(
                f"band {band} is out of range for a {self.band_count}-band dataset."
            )

        band_indices = range(self.band_count) if band is None else [band]
        source_units = list(self.band_units)
        new_units = list(self.band_units)

        full = self.read_array()
        single_band = self.band_count == 1
        stack = full[np.newaxis, ...] if single_band else full
        out = stack.astype("float64").copy()
        no_data = self.no_data_value

        for index in band_indices:
            layer = out[index]
            nodata_value = no_data[index]
            mask = layer == nodata_value if nodata_value is not None else None
            converted = convert_array(layer, source_units[index], target)
            if mask is not None:
                converted[mask] = nodata_value
            out[index] = converted
            new_units[index] = target

        result_array = out[0] if single_band else out
        result = self.create_from_array(
            result_array,
            geo=self.geotransform,
            epsg=self.epsg,
            no_data_value=list(no_data),
        )
        result.band_units = new_units
        return result

    @property
    def no_data_value(self) -> tuple:
        """Per-band nodata markers as an immutable tuple.

        Returns a `tuple` (not a `list`) to make the read-only
        contract explicit — assign through the setter to change
        values; mutating the returned object never propagates to
        the underlying state.
        """
        return tuple(self._no_data_value)

    @no_data_value.setter
    def no_data_value(self, value: list | tuple | np.ndarray | Number):
        """Set the no_data_value marker on every band.

        Args:
            value: Either a scalar (broadcast to all bands) or a
                sequence (`list`, `tuple`, or 1-D :class:`numpy.ndarray`)
                with `len == band_count` providing one value per band.
                A 0-D ndarray is treated as a scalar.

        Raises:
            ValueError: When `value` is a sequence whose length
                differs from `band_count`, or a multi-dimensional
                ndarray (only 0-D scalars and 1-D sequences are
                accepted).

        Notes:
            - The setter does not change the values of the cells to the new no_data_value, it only changes the
            `no_data_value` attribute.
            - Use this method to change the `no_data_value` attribute to match the value that is stored in the cells.
            - To change the values of the cells, to the new no_data_value, use the `change_no_data_value` method.

        See Also:
            - Dataset.change_no_data_value: Change the No Data Value.
        """
        if isinstance(value, np.ndarray):
            if value.ndim == 0:
                value = value.item()
            elif value.ndim == 1:
                value = value.tolist()
            else:
                raise ValueError(
                    f"no_data_value ndarray must be 0-D (scalar) or 1-D "
                    f"(per-band sequence); got ndim={value.ndim}"
                )
        if isinstance(value, (list, tuple)):
            if len(value) != self.band_count:
                raise ValueError(
                    f"no_data_value sequence length {len(value)} does "
                    f"not match band_count {self.band_count}"
                )
            for i, val in enumerate(value):
                self.bands._change_no_data_value_attr(i, val)
        else:
            for i in range(self.band_count):
                self.bands._change_no_data_value_attr(i, value)

    @property
    def meta_data(self):
        """Meta-data."""
        return super().meta_data

    @meta_data.setter
    def meta_data(self, value: dict[str, str]):
        """Meta-data."""
        for key, val in value.items():
            self._raster.SetMetadataItem(key, val)

    @property
    def block_size(self) -> list[tuple[int, int]]:
        """Block Size.

        The block size is the size of the block that the raster is divided into, the block size is used to
        read and write the raster data in blocks.

        See Also:
            - Dataset.get_block_arrangement: Get block arrangement to read the dataset in chunks.
            - Dataset.get_tile: Get tiles.
            - Dataset.read_array: Read the data stored in the dataset bands.
        """
        return self._block_size

    @block_size.setter
    def block_size(self, value: list[tuple[int, int]]):
        """Block Size.

        Args:
            value (List[Tuple[int, int]]):
                block size for each band in the raster(512, 512).
        """
        if len(value[0]) != 2:
            raise ValueError("block size should be a tuple of 2 integers")

        self._block_size = value

    @property
    def file_name(self):
        """File name."""
        return super().file_name

    @property
    def driver_type(self):
        """Driver Type."""
        return super().driver_type

    @property
    def scale(self) -> list[float]:
        """Scale.

        The value of the scale is used to convert the pixel values to the real-world values.
        """
        scale_list = []
        for i in range(self.band_count):
            band_scale = self._iloc(i).GetScale()
            scale_list.append(band_scale if band_scale is not None else 1.0)
        return scale_list

    @scale.setter
    def scale(self, value: list[float]):
        """Scale."""
        for i, val in enumerate(value):
            self._iloc(i).SetScale(val)

    @property
    def offset(self):
        """Offset.

        The value of the offset is used to convert the pixel values to the real-world values.
        """
        offset_list = []
        for i in range(self.band_count):
            band_offset = self._iloc(i).GetOffset()
            offset_list.append(band_offset if band_offset is not None else 0)
        return offset_list

    @offset.setter
    def offset(self, value: list[float]):
        """Offset."""
        for i, val in enumerate(value):
            self._iloc(i).SetOffset(val)

    @property
    def top_left_corner(self):
        """Top left corner coordinates.

        See Also:
            - Dataset.geotransform: Dataset geotransform.
        """
        return super().top_left_corner

    @property
    def bounds(self) -> GeoDataFrame:
        """Bounds - the bbox as a geodataframe with a polygon geometry.

        See Also:
            - Dataset.bbox: Dataset bounding box.
        """
        return self._calculate_bounds()

    @property
    def bbox(self) -> list:
        """Bound box [xmin, ymin, xmax, ymax].

        See Also:
            - Dataset.bounds: Dataset bounding polygon.
        """
        return self._calculate_bbox()

    def to_stac_item(
        self,
        item_id: str,
        *,
        asset_href: str,
        datetime=None,
        start_datetime=None,
        end_datetime=None,
        asset_key: str = "data",
        asset_media_type: str | None = None,
        with_proj: bool = True,
        with_raster: bool = True,
        precision: int = 6,
    ) -> dict:
        """Describe this raster as a STAC Item dict (proj + raster extensions).

        Thin forwarder to :func:`pyramids.dataset._stac.to_stac_item` — the
        inverse of :meth:`DatasetCollection.from_stac`. Returns a plain
        STAC-JSON dict (pystac not required); the footprint is this dataset's
        bounding rectangle reprojected to EPSG:4326.

        Args:
            item_id: The STAC Item id.
            asset_href: Href to record for the single data asset.
            datetime: Item datetime (`datetime.datetime` or RFC 3339 string).
                `None` with no range defaults to the current UTC time; `None`
                with `start_datetime`/`end_datetime` writes a null `datetime`
                plus the range (the STAC-valid null-datetime form).
            start_datetime: Optional range start, written to
                `properties.start_datetime`.
            end_datetime: Optional range end, written to
                `properties.end_datetime`.
            asset_key: Key for the data asset (default `"data"`).
            asset_media_type: Optional media type for the asset.
            with_proj: Populate the `proj` extension from the grid.
            with_raster: Populate `raster:bands` (data_type + nodata).
            precision: Decimal places for the reprojected footprint.

        Returns:
            dict: The STAC Item (a GeoJSON Feature).
        """
        # Imported here to avoid the dataset <-> stac import cycle at load time.
        from pyramids.dataset._stac import to_stac_item

        return to_stac_item(
            self,
            item_id,
            asset_href=asset_href,
            datetime=datetime,
            start_datetime=start_datetime,
            end_datetime=end_datetime,
            asset_key=asset_key,
            asset_media_type=asset_media_type,
            with_proj=with_proj,
            with_raster=with_raster,
            precision=precision,
        )

    @property
    def total_bounds(self) -> np.ndarray:
        """Bounding box `[minx, miny, maxx, maxy]` as a NumPy array.

        introduced this property so that `Dataset` and
        :class:`pyramids.feature.FeatureCollection` expose the same
        shape (`GeoDataFrame.total_bounds` is the geopandas name
        for exactly this array), letting both classes satisfy the
        :class:`pyramids.base.protocols.SpatialObject` protocol.
        """
        return np.asarray(self._calculate_bbox())

    @property
    def lon(self) -> np.ndarray:
        """Longitude / x cell-centre coordinates.

        Uses the geotransform's pixel width (``geotransform[1]``) so the axis is
        correct even when cells are not square (pixel width != pixel height). Reads the
        cached ``_geotransform`` (like :attr:`top_left_corner`) rather than the
        ``geotransform`` property, so subclasses that derive ``geotransform`` from
        ``lon``/``lat`` (e.g. :class:`~pyramids.netcdf.NetCDF`) do not recurse.

        Examples:
            - Read the column-centre longitudes of a small raster:
                ```python
                >>> import numpy as np
                >>> from pyramids.dataset import Dataset
                >>> ds = Dataset.create_from_array(
                ...     np.zeros((2, 3)), top_left_corner=(0.0, 0.0), cell_size=0.5, epsg=4326,
                ... )
                >>> ds.lon.tolist()
                [0.25, 0.75, 1.25]

                ```

        See Also:
            - Dataset.x: Dataset x coordinates.
            - Dataset.lat: Dataset latitude.
        """
        pixel_width = self._geotransform[1]
        x_coords = self.get_x_lon_dimension_array(
            self.top_left_corner[0], pixel_width, self.columns
        )
        return x_coords

    @property
    def lat(self) -> np.ndarray:
        """Latitude / y cell-centre coordinates.

        Uses the geotransform's pixel height (``abs(geotransform[5])``) rather than
        :attr:`cell_size` (which only tracks pixel width), so the axis is correct for
        non-square cells. Reads the cached ``_geotransform`` (like
        :attr:`top_left_corner`) rather than the ``geotransform`` property, so
        subclasses that derive ``geotransform`` from ``lon``/``lat`` (e.g.
        :class:`~pyramids.netcdf.NetCDF`) do not recurse.

        Examples:
            - Row-centre latitudes decrease from north to south:
                ```python
                >>> import numpy as np
                >>> from pyramids.dataset import Dataset
                >>> ds = Dataset.create_from_array(
                ...     np.zeros((2, 3)), top_left_corner=(0.0, 0.0), cell_size=0.5, epsg=4326,
                ... )
                >>> ds.lat.tolist()
                [-0.25, -0.75]

                ```
            - With non-square cells the latitude axis uses the pixel height, not the
              pixel width:
                ```python
                >>> import numpy as np
                >>> from pyramids.dataset import Dataset
                >>> ds = Dataset.create_from_array(
                ...     np.zeros((2, 3)), geo=(10.0, 2.0, 0.0, 50.0, 0.0, -1.0), epsg=4326,
                ... )
                >>> ds.lat.tolist()
                [49.5, 48.5]

                ```

        See Also:
            - Dataset.x: Dataset x coordinates.
            - Dataset.y: Dataset y coordinates.
            - Dataset.lon: Dataset longitude.
        """
        pixel_height = abs(self._geotransform[5])
        y_coords = self.get_y_lat_dimension_array(
            self.top_left_corner[1], pixel_height, self.rows
        )
        return y_coords

    @property
    def x(self) -> np.ndarray:
        """X cell-centre coordinates (alias of :attr:`lon`).

        Examples:
            - x mirrors lon for the same raster:
                ```python
                >>> import numpy as np
                >>> from pyramids.dataset import Dataset
                >>> ds = Dataset.create_from_array(
                ...     np.zeros((2, 3)), top_left_corner=(0.0, 0.0), cell_size=0.5, epsg=4326,
                ... )
                >>> ds.x.tolist()
                [0.25, 0.75, 1.25]

                ```

        See Also:
            - Dataset.lon: the longitude axis this property aliases.
            - Dataset.y: Dataset y coordinates.
        """
        return self.lon

    @property
    def y(self) -> np.ndarray:
        """Y cell-centre coordinates (alias of :attr:`lat`).

        Examples:
            - y mirrors lat for the same raster:
                ```python
                >>> import numpy as np
                >>> from pyramids.dataset import Dataset
                >>> ds = Dataset.create_from_array(
                ...     np.zeros((2, 3)), top_left_corner=(0.0, 0.0), cell_size=0.5, epsg=4326,
                ... )
                >>> ds.y.tolist()
                [-0.25, -0.75]

                ```

        See Also:
            - Dataset.lat: the latitude axis this property aliases.
            - Dataset.x: Dataset x coordinates.
        """
        return self.lat

    @property
    def gdal_dtype(self):
        """Data Type."""
        return [
            self.raster.GetRasterBand(i).DataType for i in range(1, self.band_count + 1)
        ]

    @property
    def numpy_dtype(self) -> list[type]:
        """List of the numpy data Type of each band, the data type is a numpy function."""
        return [
            DTYPE_CONVERSION_DF.loc[DTYPE_CONVERSION_DF["gdal"] == i, "numpy"].values[0]
            for i in self.gdal_dtype
        ]

    @property
    def dtype(self) -> list[str]:
        """List of the data Type of each band as strings."""
        return [
            DTYPE_CONVERSION_DF.loc[DTYPE_CONVERSION_DF["gdal"] == i, "name"].values[0]
            for i in self.gdal_dtype
        ]

    @classmethod
    def read_file(
        cls,
        path: str | Path,
        read_only=True,
        file_i: int = 0,
        *,
        vsi: str | None = None,
    ) -> Dataset:
        """Open a raster from a path, URL, or archive member.

        Plain local paths, ``/vsi*`` paths, and URL schemes
        (``http(s)://``, ``s3://``, ``gs://``, ``az://`` / ``abfs://``,
        ``file://``) are all accepted — URLs are transparently rewritten to
        GDAL's virtual filesystem (GDAL fetches via HTTP range requests for
        ``http(s)``). Compressed archives are detected from the extension; pass
        ``vsi=`` to be explicit about it (e.g. an archive with an unusual
        extension, or to open a specific member by index).

        Args:
            path (str | Path):
                Path or URL of the file to open.
            read_only (bool):
                File mode; set to ``False`` to open in update mode.
            file_i (int):
                Which member to open when ``path`` is (or is forced to be) a
                multi-file archive. Default ``0``.
            vsi (str | None):
                Treat ``path`` as an archive of this kind and open member
                ``file_i`` from inside it: ``"zip"``, ``"tar"`` (also
                ``"tar.gz"`` / ``"tgz"``), ``"gzip"`` (also ``"gz"``), or
                ``"auto"`` (infer from the extension). Default ``None`` —
                ``path`` is opened directly / extension-sniffed as before.
                Works for archives reachable locally or over the network
                (``/vsizip//vsicurl/…`` is built automatically) **provided the
                file name carries a recognised archive extension** — GDAL's
                archive handlers key off the extension, so an extension-less
                download URL must first be fetched and saved with a ``.zip``
                name (or written to ``/vsimem/<name>.zip`` via
                :func:`osgeo.gdal.FileFromMemBuffer`).

        Returns:
            Dataset:
                Opened dataset instance.

        See Also:
            - :meth:`read_array`: read the values stored in a dataset band.
            - :meth:`from_bytes`: open a raster held in memory.
            - :meth:`pyramids.dataset.DatasetCollection.from_archive`: open
              *every* member of an archive as a temporal stack.
        """
        src = _io.read_file(path, read_only=read_only, file_i=file_i, vsi=vsi)
        return cls(src, access="read_only" if read_only else "write")

    @classmethod
    def from_bytes(
        cls,
        data: bytes | bytearray | memoryview,
        *,
        suffix: str = ".tif",
        name: str | None = None,
        read_only: bool = True,
    ) -> Dataset:
        """Open a raster held in memory as a byte string.

        Writes ``data`` to a temporary GDAL ``/vsimem/`` path and opens
        it — no on-disk temp file needed. Useful for HTTP response
        bodies (``requests.get(url).content``), object-store
        ``get_object`` payloads, database blobs, and test fixtures.

        This is **not** a URL helper. Reading from a URL is already
        supported by :meth:`read_file`, which rewrites ``http(s)://``,
        ``s3://``, ``gs://``, ``az://`` / ``abfs://`` and ``file://``
        to GDAL ``/vsi*`` paths. Use ``from_bytes`` only when you
        already hold the bytes.

        The ``/vsimem/`` entry is removed automatically when the
        returned :class:`Dataset` is garbage-collected
        (:func:`weakref.finalize`); :meth:`close` does not need to be
        called for cleanup. Note that an in-memory dataset is **not
        picklable** — :meth:`__reduce__` raises ``TypeError`` for
        ``/vsimem/`` paths; call :meth:`to_file` first to anchor it to
        disk before sending it to another process.

        Args:
            data: Raw bytes of a raster (GeoTIFF, ASCII grid, ...). For
                NetCDF bytes use :meth:`pyramids.netcdf.NetCDF.from_bytes`.
            suffix: Extension hint for GDAL's driver detection. Needed
                only for headerless formats (e.g. ESRI ASCII grid:
                ``suffix=".asc"``); GDAL sniffs anything with a magic
                header regardless. Defaults to ``".tif"``.
            name: Optional label recorded as the dataset's
                :attr:`file_name` (cosmetic only — it is still an
                in-memory dataset). Defaults to ``None``.
            read_only: Open the dataset read-only. Defaults to ``True``.

        Returns:
            Dataset: The opened in-memory dataset.

        Raises:
            TypeError: ``data`` is not a bytes-like object.
            ValueError: GDAL could not open the bytes (corrupt /
                truncated payload, or a headerless format without a
                ``suffix`` hint).

        Examples:
            - Open the bytes of a downloaded GeoTIFF and inspect it (the
              bytes here come from a file, but they could just as well be
              ``requests.get(url).content``):
                ```python
                >>> from pathlib import Path
                >>> from pyramids.dataset import Dataset
                >>> data = Path("tests/data/acc4000.tif").read_bytes()
                >>> ds = Dataset.from_bytes(data, name="downloaded-scene")
                >>> ds.band_count
                1
                >>> ds.shape
                (1, 13, 14)
                >>> ds.epsg
                32618
                >>> ds.file_name
                'downloaded-scene'
                >>> ds.close()

                ```
            - The bytes path yields the same data as opening the file directly:
                ```python
                >>> from pathlib import Path
                >>> from pyramids.dataset import Dataset
                >>> data = Path("tests/data/acc4000.tif").read_bytes()
                >>> from_bytes = Dataset.from_bytes(data)
                >>> from_file = Dataset.read_file("tests/data/acc4000.tif")
                >>> from_bytes.shape == from_file.shape
                True
                >>> from_bytes.epsg == from_file.epsg
                True

                ```
            - An in-memory dataset cannot be pickled — anchor it to disk first:
                ```python
                >>> import pickle
                >>> from pathlib import Path
                >>> from pyramids.dataset import Dataset
                >>> data = Path("tests/data/acc4000.tif").read_bytes()
                >>> try:
                ...     pickle.dumps(Dataset.from_bytes(data))
                ... except TypeError as exc:
                ...     print("to_file" in str(exc))
                True

                ```

        See Also:
            - :meth:`read_file`: open a raster from a path or URL.
            - :meth:`to_file`: write an in-memory dataset to disk.
            - :meth:`pyramids.netcdf.NetCDF.from_bytes`: the NetCDF variant.
        """
        src, vsi_path = _io.bytes_to_gdal(data, suffix=suffix, read_only=read_only)
        try:
            obj = cls(src, access="read_only" if read_only else "write")
        except Exception as e:
            src = None
            _io.silent_unlink(vsi_path)
            raise ValueError(
                "could not open the supplied bytes as a raster dataset "
                f"(the data may be corrupt or truncated): {e}"
            ) from e
        obj._vsimem_path = vsi_path
        weakref.finalize(obj, _io.silent_unlink, vsi_path)
        if name is not None:
            obj._file_name = str(name)
        return obj

    def copy(self, path: str | Path | None = None) -> Dataset:
        """Deep copy.

        Args:
            path (str, optional):
                Destination path to save the copied dataset. If None
                is passed, the copied dataset is created in memory.

        Returns:
            Dataset: An independent copy. Access mode of the returned
            Dataset:

            * `path is None` (in-memory copy) → access mode of the
              source is preserved. A `copy()` of a read-only source
              stays read-only at the pyramids level (the underlying
              MEM driver is always writable; pyramids enforces the
              flag itself).
            * `path is not None` (on-disk copy) → `"write"`,
              because the caller has just created a new file they
              presumably want to populate.
        """
        if path is None:
            path = ""
            driver = "MEM"
            new_access = self._access
        else:
            driver = "GTiff"
            new_access = "write"

        src = gdal.GetDriverByName(driver).CreateCopy(str(path), self._raster)
        return Dataset(src, access=new_access)

    def close(self) -> None:
        """Close the dataset.

        Safe to call multiple times — subsequent calls after the first are no-ops.
        """
        if self._raster is not None:
            self._raster.FlushCache()
            self._raster = None

    @staticmethod
    def _create_dataset(
        cols: int,
        rows: int,
        bands: int,
        dtype: int,
        driver: str = "MEM",
        path: str | Path | None = None,
    ) -> gdal.Dataset:
        """Create a GDAL driver.

            creates a driver and save it to disk and in memory if the path is not given.

        Args:
            cols (int):
                Number of columns.
            rows (int):
                Number of rows.
            bands (int):
                Number of bands.
            dtype:
                GDAL data type.
            driver (str):
                Driver type ["GTiff", "MEM"].
            path (str):
                Path to save the GTiff driver.

        Returns:
            gdal driver
        """
        if path:
            driver = "GTiff" if driver == "MEM" else driver
            if not isinstance(path, (str, Path)):
                raise TypeError(
                    f"The path input should be string or Path type, given: {type(path)}"
                )
            path = Path(path)
            if driver == "GTiff" and path.suffix != ".tif":
                raise TypeError(
                    "The path to save the created raster should end with .tif"
                )
            # LZW is a lossless compression method achieve the highest compression but with a lot
            # of computations.
            src = gdal.GetDriverByName(driver).Create(
                str(path), cols, rows, bands, dtype, ["COMPRESS=LZW"]
            )
        else:
            # for memory drivers
            driver = "MEM"
            src = gdal.GetDriverByName(driver).Create("", cols, rows, bands, dtype)
        return src

    @classmethod
    def _build_dataset(
        cls,
        cols: int,
        rows: int,
        bands: int,
        dtype: int,
        geo: tuple,
        crs: str,
        no_data_value: Any | None = DEFAULT_NO_DATA_VALUE,
        driver: str = "MEM",
        path: str | Path | None = None,
        access: str = "write",
        array: np.ndarray | None = None,
    ) -> Dataset:
        """Build a Dataset: allocate, set geo/CRS, optionally fill no-data, optionally write.

        Single canonical factory for raster construction. Consolidates the
        ``_create_dataset + SetGeoTransform + SetProjection + wrap +
        _set_no_data_value (+ WriteArray)` pattern that `create``,
        `create_from_array`, `dataset_like`, and the per-op factories
        across `Spatial` / `Analysis` all need.

        Args:
            cols: Number of columns.
            rows: Number of rows.
            bands: Number of bands.
            dtype: GDAL data type code.
            geo: Geotransform tuple
                `(top_left_x, pixel_w, row_skew, top_left_y, col_skew,
                pixel_h)`.
            crs: Projection as WKT string.
            no_data_value: No-data value. Scalar (broadcast to all bands)
                or list (one per band). Pass `None` to skip the
                `_set_no_data_value` call so bands have no no-data
                sentinel — the same behaviour the public `create`
                factory exposes.
            driver: GDAL driver type. Default `"MEM"`.
            path: Path for disk-based drivers. `None` keeps the
                dataset in memory.
            access: Access mode for the Dataset wrapper. Default `"write"`.
                Note: MEM driver datasets can be written to regardless
                of access mode since the access flag is enforced at the
                pyramids level, not by GDAL.
            array: Optional numpy array to write into the bands after
                construction. When the array is 2-D it goes to band 1;
                when 3-D, `array[i, :, :]` goes to band `i+1`. The
                caller is responsible for matching `array.shape` to
                `bands x rows x cols` (or `rows x cols` for a
                single-band array). Default `None` (allocate but
                don't write).

        Returns:
            Dataset: A fully configured Dataset object.
        """
        dst = cls._create_dataset(cols, rows, bands, dtype, driver=driver, path=path)
        dst.SetGeoTransform(geo)
        dst.SetProjection(crs)
        dst_obj = cls(dst, access=access)
        if no_data_value is not None:
            dst_obj._set_no_data_value(no_data_value=no_data_value)
        if array is not None:
            if array.ndim == 2:
                dst_obj.raster.GetRasterBand(1).WriteArray(array)
            else:
                for i in range(bands):
                    dst_obj.raster.GetRasterBand(i + 1).WriteArray(array[i, :, :])
            dst_obj._raster.FlushCache()
        return dst_obj

    @classmethod
    def create(
        cls,
        cell_size: int | float,
        rows: int,
        columns: int,
        dtype: str,
        bands: int,
        top_left_corner: tuple,
        epsg: int,
        no_data_value: Any | None = None,
        path: str | Path | None = None,
    ) -> Dataset:
        """Create a new dataset and fill it with the no_data_value.

        The new dataset will have an array filled with the no_data_value.

        Args:
            cell_size (int|float):
                Cell size.
            rows (int):
                Number of rows.
            columns (int):
                Number of columns.
            dtype (str):
                Data type.
            bands (int|None):
                Number of bands to create in the output raster.
            top_left_corner (Tuple):
                Coordinates of the top left corner point.
            epsg (int):
                EPSG number to identify the projection of the coordinates in the created raster.
            no_data_value (float|None):
                No data value.
            path (str, optional):
                Path on disk; if None, the dataset is created in memory. Default is None.

        Returns:
            Dataset: A new dataset
        """
        gdal_dtype = numpy_to_gdal_dtype(dtype)
        crs_wkt = sr_from_epsg(epsg).ExportToWkt()
        geotransform = (
            top_left_corner[0],
            cell_size,
            0,
            top_left_corner[1],
            0,
            -1 * cell_size,
        )
        return cls._build_dataset(
            columns,
            rows,
            bands,
            gdal_dtype,
            geotransform,
            crs_wkt,
            no_data_value,
            path=path,
        )

    @classmethod
    def from_features(
        cls,
        features: FeatureCollection,
        *,
        cell_size: Any | None = None,
        template: Dataset | None = None,
        column_name: str | list[str] | None = None,
    ) -> Dataset:
        """Rasterize a :class:`FeatureCollection` into a new :class:`Dataset`.

        Burns the values from `column_name` (or every attribute
        column if `None`) into a single-band or multi-band raster.
        When a `template` Dataset is given, the output adopts its
        geotransform, cell size, row/column count, and no-data value.
        Otherwise `cell_size` controls the resolution and the extent
        is derived from :attr:`FeatureCollection.total_bounds`.

        Args:
            features (FeatureCollection):
                The vector to rasterize.
            cell_size (int | float | None):
                Cell size for the new raster. Required unless
                `template` is given.
            template (Dataset | None):
                Optional template raster. When supplied, the output
                inherits its geotransform and no-data value.
            column_name (str | list[str] | None):
                Attribute column(s) to burn as band values. `None`
                burns every non-geometry column as a separate band.
                Mixed-dtype column lists are promoted to the smallest
                numpy dtype that holds every selected column without
                lossy cast (numpy result-type rules).

        Returns:
            Dataset: The burned raster.

        Raises:
            ValueError: `cell_size` missing or non-positive,
                `column_name` empty or referencing missing columns.
            TypeError: `template` is not a Dataset, or
                `column_name` is not `str` / `list` / `None`.
            CRSError: `features.epsg` is `None`, or
                `template.epsg!= features.epsg`.
        """
        return rasterize_features(
            features,
            cls,
            cell_size=cell_size,
            template=template,
            column_name=column_name,
        )

    @classmethod
    def from_points(
        cls,
        points: FeatureCollection,
        value_column: str,
        *,
        algorithm: str = "invdist:power=2.0:smoothing=0.0",
        cell_size: float | None = None,
        width: int | None = None,
        height: int | None = None,
        bbox: tuple[float, float, float, float] | None = None,
        epsg: Any | None = None,
    ) -> Dataset:
        """Interpolate scattered point samples onto a regular grid (``gdal.Grid``).

        The GDAL-native equivalent of ``gdal_grid`` — turns an irregular point
        layer (gauge readings, soundings, station observations) into a
        continuous single-band raster. The output extent defaults to the points'
        bounding box and the resolution is set by ``cell_size`` (or an explicit
        ``width``/``height``).

        Args:
            points (FeatureCollection):
                A point :class:`FeatureCollection` carrying ``value_column``.
            value_column (str):
                Numeric attribute column to interpolate (the Z field).
            algorithm (str):
                A ``gdal.Grid`` algorithm string. Defaults to inverse-distance
                weighting (``"invdist:power=2.0:smoothing=0.0"``). Other options
                include ``"invdistnn"``, ``"nearest"``, ``"linear"``, and
                ``"average"``.
            cell_size (float | None):
                Output pixel size in the points' CRS units. Required unless both
                ``width`` and ``height`` are given.
            width (int | None):
                Output width in pixels. Overrides ``cell_size`` on the x axis.
            height (int | None):
                Output height in pixels. Overrides ``cell_size`` on the y axis.
            bbox (tuple[float, float, float, float] | None):
                ``(minx, miny, maxx, maxy)`` output extent. Defaults to the
                points' total bounds.
            epsg (int | None):
                Output EPSG code. Defaults to the points' CRS.

        Returns:
            Dataset: A single-band raster of the interpolated surface.

        Raises:
            ValueError: ``value_column`` missing, output bounds degenerate, or
                neither ``cell_size`` nor ``width``+``height`` provided.
            FailedToSaveError: ``gdal.Grid`` produced no dataset.

        Examples:
            - Inverse-distance interpolate four corner readings onto a 1-degree
              grid and read back the surface shape:
                ```python
                >>> from shapely.geometry import Point
                >>> from geopandas import GeoDataFrame
                >>> from pyramids.feature import FeatureCollection
                >>> from pyramids.dataset import Dataset
                >>> gdf = GeoDataFrame(
                ...     {"rain": [10.0, 20.0, 30.0, 40.0]},
                ...     geometry=[Point(0, 0), Point(10, 0), Point(0, 10), Point(10, 10)],
                ...     crs="EPSG:4326",
                ... )
                >>> ds = Dataset.from_points(FeatureCollection(gdf), "rain", cell_size=1.0)
                >>> (ds.rows, ds.columns, ds.band_count)
                (10, 10, 1)

                ```
            - Use nearest-neighbour with an explicit output size:
                ```python
                >>> from shapely.geometry import Point
                >>> from geopandas import GeoDataFrame
                >>> from pyramids.feature import FeatureCollection
                >>> from pyramids.dataset import Dataset
                >>> gdf = GeoDataFrame(
                ...     {"z": [1.0, 2.0, 3.0, 4.0]},
                ...     geometry=[Point(0, 0), Point(5, 0), Point(0, 5), Point(5, 5)],
                ...     crs="EPSG:4326",
                ... )
                >>> ds = Dataset.from_points(
                ...     FeatureCollection(gdf), "z", algorithm="nearest", width=5, height=5
                ... )
                >>> ds.columns
                5

                ```
        """
        return grid_points(
            points,
            value_column,
            cls,
            algorithm=algorithm,
            cell_size=cell_size,
            width=width,
            height=height,
            bbox=bbox,
            epsg=epsg,
        )

    @classmethod
    def create_from_array(  # type: ignore[override]
        cls,
        arr: np.ndarray,
        top_left_corner: tuple[float, float] | None = None,
        cell_size: int | float | None = None,
        geo: tuple[float, float, float, float, float, float] | None = None,
        epsg: str | int = 4326,
        no_data_value: Any | list = DEFAULT_NO_DATA_VALUE,
        driver_type: str = "MEM",
        path: str | Path | None = None,
    ) -> Dataset:
        """Create a new dataset from an array.

        Args:
            arr (np.ndarray):
                Numpy array.
            top_left_corner (Tuple[float, float], optional):
                The coordinates of the top left corner of the dataset.
            cell_size (int|float, optional):
                Cell size in the same units of the coordinate reference system defined by the `epsg`
                parameter.
            geo (Tuple[float, float, float, float, float, float], optional):
                Geotransform tuple (minimum lon/x, pixel-size, rotation, maximum lat/y, rotation,
                pixel-size).
            epsg (int):
                Integer reference number to the projection (https://epsg.io/).
            no_data_value (Any, optional):
                No data value to mask the cells out of the domain. The default is -9999.
            driver_type (str, optional):
                Driver type ["GTiff", "MEM", "netcdf"]. Default is "MEM".
            path (str, optional):
                Path to save the driver.

        Returns:
            Dataset:
                Dataset object will be returned.
        """
        if geo is None:
            if top_left_corner is None or cell_size is None:
                raise ValueError(
                    "Either top_left_corner and cell_size or geo should be provided."
                )
            geo = (
                top_left_corner[0],
                cell_size,
                0,
                top_left_corner[1],
                0,
                -1 * cell_size,
            )

        if arr.ndim == 2:
            bands = 1
            rows = int(arr.shape[0])
            cols = int(arr.shape[1])
        else:
            bands = arr.shape[0]
            rows = int(arr.shape[1])
            cols = int(arr.shape[2])

        return cls._build_dataset(
            cols,
            rows,
            bands,
            numpy_to_gdal_dtype(arr),
            geo,
            sr_from_epsg(int(epsg)).ExportToWkt(),
            no_data_value,
            driver=driver_type,
            path=path,
            array=arr,
        )

    @classmethod
    def dataset_like(
        cls,
        src: Dataset,
        array: np.ndarray,
        path: str | Path | None = None,
    ) -> Dataset:
        """Create a new dataset like another dataset.

        dataset_like method creates a Dataset from an array like another source dataset. The new dataset
        will have the same `projection`, `coordinates` or the `top left corner` of the original dataset,
        `cell size`, `no_data_velue`, and number of `rows` and `columns`.
        the array and the source dataset should have the same number of columns and rows

        Args:
            src (Dataset):
                source raster to get the spatial information
            array (ndarray):
                data to store in the new dataset.
            path (str, optional):
                path to save the new dataset, if not given, the method will return in-memory dataset.

        Returns:
            Dataset:
                if the `path` is given, the method will save the new raster to the given path, else the
                method will return an in-memory dataset.
        """
        if not isinstance(array, np.ndarray):
            raise TypeError("array should be of type numpy array")

        bands = 1 if array.ndim == 2 else array.shape[0]
        return cls._build_dataset(
            src.columns,
            src.rows,
            bands,
            numpy_to_gdal_dtype(array),
            src.geotransform,
            src.crs,
            src.no_data_value[0],
            path=path,
            array=array,
        )

    @classmethod
    def from_band_files(
        cls,
        files: list[str | Path],
        *,
        band_names: list[str] | None = None,
        align: bool = False,
        no_data_value: Any = _INHERIT_NO_DATA,
        path: str | Path | None = None,
    ) -> Dataset:
        """Stack N single-band rasters into one multi-band :class:`Dataset`.

        Each input file becomes one band, in order, with its name preserved.
        This is the natural target for an Earth Engine default download
        (``<assetSlug>.<bandName>.tif`` — one file per band), a Landsat
        Collection-2 scene (per-band ``.TIF``), or a Sentinel-2 SAFE
        (per-band JP2s).

        By default all inputs must already share the same grid and CRS;
        pass ``align=True`` to resample mismatched rasters onto the first
        file's grid (nearest-neighbour, via :meth:`align`). When the inputs
        have different numpy dtypes the output dtype is the smallest type
        that holds every input without a lossy cast.

        Args:
            files: Paths (or URLs / ``/vsi*`` strings) of the single-band
                rasters to stack. Order is preserved as band order.
            band_names: Explicit band names, one per file. When ``None``
                (default) names are derived from the file names
                (``<slug>.<band>.tif`` → ``<band>``; dotless stems are kept
                whole; duplicates get a ``_<n>`` suffix).
            align: When ``False`` (default), a grid/CRS mismatch among the
                inputs raises :class:`AlignmentError`. When ``True``, every
                input is resampled onto ``files[0]``'s grid first.
            no_data_value: No-data value stamped on the output bands. When
                omitted, it is inherited from the source rasters (a warning
                is issued if they disagree, and the first file's value
                wins; if no source declares one, the output has none). Pass
                an explicit value (including ``None`` for "no no-data
                sentinel") to override.
            path: Output ``.tif`` path. When ``None`` (default) the result
                is an in-memory dataset.

        Returns:
            Dataset: A multi-band dataset with ``band_count == len(files)``
            and ``band_names`` set.

        Raises:
            ValueError: ``files`` is empty, ``band_names`` length does not
                match ``files``, an input has more than one band, or ``path``
                does not end in ``.tif``.
            AlignmentError: ``align=False`` and the inputs do not share a
                grid/CRS.
            CRSError: An input raster has no CRS.

        Examples:
            - Stack three per-band GeoTIFFs into one 3-band dataset; band
              names come from the file names:
                ```python
                >>> import numpy as np
                >>> import tempfile, os
                >>> from pyramids.dataset import Dataset
                >>> d = tempfile.mkdtemp()
                >>> paths = []
                >>> for name, val in [("scene.B2.tif", 2), ("scene.B3.tif", 3), ("scene.B4.tif", 4)]:
                ...     p = os.path.join(d, name)
                ...     _ = Dataset.create_from_array(
                ...         np.full((4, 5), val, dtype="int16"),
                ...         top_left_corner=(0, 0), cell_size=1.0, epsg=4326, path=p,
                ...     ).close()
                ...     paths.append(p)
                >>> ds = Dataset.from_band_files(paths)
                >>> ds.band_count
                3
                >>> ds.band_names
                ['B2', 'B3', 'B4']
                >>> [int(ds.read_array(band=i).flat[0]) for i in range(3)]
                [2, 3, 4]

                ```
            - Override the band names explicitly:
                ```python
                >>> ds = Dataset.from_band_files(paths, band_names=["blue", "green", "red"])
                >>> ds.band_names
                ['blue', 'green', 'red']

                ```
            - Mismatched grids are rejected unless ``align=True``:
                ```python
                >>> odd = os.path.join(d, "odd.tif")
                >>> _ = Dataset.create_from_array(
                ...     np.zeros((8, 9), dtype="int16"),
                ...     top_left_corner=(0, 0), cell_size=0.5, epsg=4326, path=odd,
                ... ).close()
                >>> try:
                ...     Dataset.from_band_files([paths[0], odd])
                ... except AlignmentError as exc:
                ...     print("align=True" in str(exc))
                True
                >>> aligned = Dataset.from_band_files([paths[0], odd], align=True)
                >>> aligned.band_count
                2
                >>> (aligned.rows, aligned.columns) == (
                ...     Dataset.read_file(paths[0]).rows,
                ...     Dataset.read_file(paths[0]).columns,
                ... )
                True

                ```

        See Also:
            - :meth:`align`: resample one dataset onto another's grid.
            - :meth:`create_from_array`: build a dataset from a numpy array.
            - :meth:`pyramids.dataset.DatasetCollection.from_files`: stack
              rasters along *time* instead of along *bands*.
        """
        resolved_paths = [str(_io._parse_path(str(p))) for p in files]
        if not resolved_paths:
            raise ValueError("from_band_files requires at least one file")

        datasets = [cls.read_file(p) for p in resolved_paths]
        for p, ds in zip(resolved_paths, datasets):
            if ds.band_count != 1:
                raise ValueError(
                    f"{p!r} has {ds.band_count} bands; from_band_files expects exactly "
                    "one band per file"
                )
            if not ds.crs:
                raise CRSError(f"{p!r} has no CRS; cannot stack rasters without a CRS")

        template = datasets[0]

        if band_names is not None:
            out_names = list(band_names)
            if len(out_names) != len(resolved_paths):
                raise ValueError(
                    f"band_names has {len(out_names)} entries but {len(resolved_paths)} "
                    "files were given"
                )
        else:
            out_names = _derive_band_names(resolved_paths)

        if no_data_value is _INHERIT_NO_DATA:
            source_nd = [ds.no_data_value[0] for ds in datasets]
            present = [v for v in source_nd if v is not None]
            if not present:
                resolved_nd: Any | None = None
            else:
                resolved_nd = source_nd[0] if source_nd[0] is not None else present[0]
                # NaN != NaN, so plain set() over-reports disagreement for
                # float-NaN sentinels (the GeoTIFF default for float rasters).
                # Normalise NaN to a single key so we only warn when distinct
                # *real* values are present.
                distinct = {
                    "__nan__" if isinstance(v, float) and np.isnan(v) else v
                    for v in present
                }
                if len(distinct) > 1:
                    warnings.warn(
                        f"source rasters disagree on no-data value ({sorted(set(present))}); "
                        f"using {resolved_nd!r}",
                        stacklevel=2,
                    )
        else:
            resolved_nd = no_data_value

        if path is not None and not str(path).lower().endswith(".tif"):
            # TypeError to match ``_create_dataset`` (used by every other
            # factory: ``create_from_array``, ``dataset_like`` etc.) — keeping
            # one convention across the public surface.
            raise TypeError("the path to save the stacked raster should end with .tif")

        if not align:
            for p, ds in zip(resolved_paths[1:], datasets[1:]):
                if not _same_grid(template, ds):
                    raise AlignmentError(
                        f"{p!r} does not share the grid/CRS of {resolved_paths[0]!r}; "
                        "pass align=True to resample mismatched rasters onto the first "
                        "file's grid"
                    )

        # gdal.BuildVRT(separate=True) does not promote dtypes (it truncates the
        # wider bands) — take that low-memory band-by-band path only when the
        # grids already match and every input shares one dtype. Otherwise read
        # the (possibly resampled) band arrays and let numpy pick the common dtype.
        uniform_dtype = len({ds.gdal_dtype[0] for ds in datasets}) == 1

        if align or not uniform_dtype:
            if align:
                # Resample every input onto the first file's grid in the
                # promoted dtype. Dataset.align adopts the alignment source's
                # dtype, so cast the template first to avoid truncating wider
                # inputs (e.g. a float band onto an int template).
                target_np_dtype = np.result_type(
                    *(ds.numpy_dtype[0] for ds in datasets)
                )
                grid_template = cls.create_from_array(
                    template.read_array(band=0).astype(target_np_dtype, copy=False),
                    geo=template.geotransform,
                    epsg=template.epsg,
                    no_data_value=resolved_nd,
                )
                # Dataset.align uses the source's no_data_value to fill the warp
                # destination, so the aligned fringe carries the SOURCE's sentinel.
                # When sources disagree on nodata (resolved_nd is the first one
                # by "first-wins" policy + a UserWarning), bands whose source's
                # sentinel != resolved_nd would still have that sentinel in the
                # fringe, which would no longer match the output band's declared
                # nodata. Remap so what's in the array matches what's declared.
                # Sources that already match the template grid skip the full
                # gdal.Warp round-trip and just astype, which is lossless.
                band_arrays = []
                for ds_i in datasets:
                    if _same_grid(template, ds_i):
                        arr = ds_i.read_array(band=0).astype(
                            target_np_dtype, copy=False
                        )
                    else:
                        arr = ds_i.align(grid_template).read_array(band=0)
                    band_arrays.append(
                        _remap_nodata_to(arr, ds_i.no_data_value[0], resolved_nd)
                    )
            else:
                band_arrays = [ds.read_array(band=0) for ds in datasets]
            stacked = np.stack(band_arrays, axis=0)
            obj = cls._build_dataset(
                template.columns,
                template.rows,
                len(resolved_paths),
                numpy_to_gdal_dtype(stacked),
                template.geotransform,
                template.crs,
                resolved_nd,
                path=path,
                array=stacked,
            )
        else:
            vrt = gdal.BuildVRT("", resolved_paths, separate=True)
            if (
                vrt is None
            ):  # pragma: no cover - BuildVRT returns None only on bad input
                raise AlignmentError(
                    f"gdal.BuildVRT could not stack {resolved_paths!r}"
                )
            if path is not None:
                dst = gdal.GetDriverByName("GTiff").CreateCopy(
                    str(path), vrt, strict=1, options=["COMPRESS=LZW"]
                )
            else:
                dst = gdal.GetDriverByName("MEM").CreateCopy("", vrt, strict=1)
            vrt = None
            # BuildVRT(separate=True) carries each source band's no-data through;
            # honour an explicit override (including ``None`` = drop it).
            for i in range(dst.RasterCount):
                band = dst.GetRasterBand(i + 1)
                if resolved_nd is None:
                    band.DeleteNoDataValue()
                else:
                    band.SetNoDataValue(float(resolved_nd))
            obj = cls(dst, access="write")

        obj.band_names = out_names
        obj._raster.FlushCache()
        return obj

    @classmethod
    def from_archive(
        cls,
        url_or_path: str | Path,
        *,
        kind: str = "auto",
        member_glob: str = "*",
        band_names: list[str] | None = None,
        align: bool = False,
        no_data_value: Any = _INHERIT_NO_DATA,
        path: str | Path | None = None,
    ) -> Dataset:
        """Open every raster in an archive and merge them into one multi-band Dataset.

        Lists the archive's members (locally or over the network — a remote ZIP
        is read via the chained ``/vsizip//vsicurl/…`` path) and hands them to
        :meth:`from_band_files`. For "one Dataset per member" (a temporal stack)
        use :meth:`pyramids.dataset.DatasetCollection.from_archive` instead.

        The archive's file name must carry a recognised extension (``.zip`` /
        ``.tar`` / ``.tar.gz`` / ``.gz``) — GDAL's archive handlers key off the
        extension. An extension-less download URL (e.g. an Earth Engine
        ``getDownloadURL`` ending in ``:getPixels``) must first be fetched and
        saved with a ``.zip`` name (or written to ``/vsimem/<name>.zip`` via
        :func:`osgeo.gdal.FileFromMemBuffer`) before calling this.

        Args:
            url_or_path: Path or URL of the archive (``.zip`` / ``.tar`` /
                ``.tar.gz`` / ``.gz``).
            kind: Archive kind — ``"zip"``, ``"tar"`` (also ``"tar.gz"`` /
                ``"tgz"``), ``"gzip"`` (also ``"gz"``), or ``"auto"`` (default,
                infer from the extension).
            member_glob: :mod:`fnmatch` pattern selecting which members to stack.
                Default ``"*"`` (all top-level members, sorted by name). Pass e.g.
                ``"*.tif"`` for an archive that also ships sidecar files.
            band_names: Explicit per-band names; ``None`` derives them from the
                member names (see :meth:`from_band_files`).
            align: When ``True``, resample mismatched members onto the first
                member's grid instead of raising :class:`AlignmentError`.
            no_data_value: No-data value for the output bands; omitted means
                "inherit from the members".
            path: Output ``.tif`` path; ``None`` keeps the result in memory.

        Returns:
            Dataset: A multi-band dataset, one band per matching archive member.

        Raises:
            FileFormatNotSupportedError: ``kind="auto"`` and the extension is
                not recognised, or the archive could not be listed.
            FileNotFoundError: No member matched ``member_glob``.
            ValueError / AlignmentError / CRSError: As for :meth:`from_band_files`.

        Examples:
            - Stack the raster members of a local ZIP into one multi-band dataset
              (band names come from the member names):
                ```python
                >>> import os, tempfile, zipfile
                >>> import numpy as np
                >>> from pyramids.dataset import Dataset
                >>> d = tempfile.mkdtemp()
                >>> members = []
                >>> for name, val in [("scene.B2.tif", 2), ("scene.B3.tif", 3)]:
                ...     p = os.path.join(d, name)
                ...     _ = Dataset.create_from_array(
                ...         np.full((4, 5), val, dtype="int16"),
                ...         top_left_corner=(0, 0), cell_size=1.0, epsg=4326, path=p,
                ...     ).close()
                ...     members.append(p)
                >>> zip_path = os.path.join(d, "download.zip")
                >>> with zipfile.ZipFile(zip_path, "w") as zf:
                ...     for m in members:
                ...         zf.write(m, arcname=os.path.basename(m))
                >>> ds = Dataset.from_archive(zip_path, member_glob="*.tif")
                >>> ds.band_count
                2
                >>> ds.band_names
                ['B2', 'B3']
                >>> [int(ds.read_array(band=i).flat[0]) for i in range(2)]
                [2, 3]

                ```

        See Also:
            - :meth:`from_band_files`: stack a known list of single-band rasters.
            - :meth:`pyramids.dataset.DatasetCollection.from_archive`: open each
              member as a separate timestep instead of merging them into bands.
        """
        dir_vsi = _io._archive_dir_vsi(url_or_path, kind)
        members = _io._archive_members(dir_vsi, member_glob)
        member_paths = [f"{dir_vsi}/{m}" for m in members]
        return cls.from_band_files(
            member_paths,
            band_names=band_names,
            align=align,
            no_data_value=no_data_value,
            path=path,
        )

is_cog property #

Facade — delegates to :attr:COG.is_cog <pyramids.dataset.engines.COG.is_cog>.

overview_count property #

Facade — delegates to :attr:IO.overview_count <pyramids.dataset.engines.IO.overview_count>.

band_color property writable #

Facade — delegates to :attr:Bands.band_color <pyramids.dataset.engines.Bands.band_color>.

color_table property writable #

Facade — delegates to :attr:Bands.color_table <pyramids.dataset.engines.Bands.color_table>.

access property #

Access mode.

Returns:

Name Type Description
str str

The access mode of the dataset (read_only/write).

raster property #

Base GDAL Dataset (read-only).

rows property #

Number of rows in the raster array.

columns property #

Number of columns in the raster array.

shape property #

Shape (bands, rows, columns).

geotransform property #

WKT projection.

(top left corner X/lon coordinate, cell_size, 0, top left corner y/lat coordinate, 0, -cell_size).

See Also
  • Dataset.top_left_corner: Coordinate of the top left corner of the dataset.
  • Dataset.epsg: EPSG number of the dataset coordinate reference system.

crs property writable #

Coordinate reference system.

Returns:

Name Type Description
str str

the coordinate reference system of the dataset.

See Also

Dataset.set_crs : Set the Coordinate Reference System (CRS). Dataset.to_crs : Reproject the dataset to any projection. Dataset.epsg : epsg number of the dataset coordinate reference system.

band_count property #

Number of bands in the raster.

no_data_value property writable #

Per-band nodata markers as an immutable tuple.

Returns a tuple (not a list) to make the read-only contract explicit — assign through the setter to change values; mutating the returned object never propagates to the underlying state.

block_size property writable #

Block Size.

The block size is the size of the block that the raster is divided into, the block size is used to read and write the raster data in blocks.

See Also
  • Dataset.get_block_arrangement: Get block arrangement to read the dataset in chunks.
  • Dataset.get_tile: Get tiles.
  • Dataset.read_array: Read the data stored in the dataset bands.

scale property writable #

Scale.

The value of the scale is used to convert the pixel values to the real-world values.

offset property writable #

Offset.

The value of the offset is used to convert the pixel values to the real-world values.

top_left_corner property #

Top left corner coordinates.

See Also
  • Dataset.geotransform: Dataset geotransform.

bounds property #

Bounds - the bbox as a geodataframe with a polygon geometry.

See Also
  • Dataset.bbox: Dataset bounding box.

bbox property #

Bound box [xmin, ymin, xmax, ymax].

See Also
  • Dataset.bounds: Dataset bounding polygon.

total_bounds property #

Bounding box [minx, miny, maxx, maxy] as a NumPy array.

introduced this property so that Dataset and :class:pyramids.feature.FeatureCollection expose the same shape (GeoDataFrame.total_bounds is the geopandas name for exactly this array), letting both classes satisfy the :class:pyramids.base.protocols.SpatialObject protocol.

lon property #

Longitude / x cell-centre coordinates.

Uses the geotransform's pixel width (geotransform[1]) so the axis is correct even when cells are not square (pixel width != pixel height). Reads the cached _geotransform (like :attr:top_left_corner) rather than the geotransform property, so subclasses that derive geotransform from lon/lat (e.g. :class:~pyramids.netcdf.NetCDF) do not recurse.

Examples:

  • Read the column-centre longitudes of a small raster:
    >>> import numpy as np
    >>> from pyramids.dataset import Dataset
    >>> ds = Dataset.create_from_array(
    ...     np.zeros((2, 3)), top_left_corner=(0.0, 0.0), cell_size=0.5, epsg=4326,
    ... )
    >>> ds.lon.tolist()
    [0.25, 0.75, 1.25]
    
See Also
  • Dataset.x: Dataset x coordinates.
  • Dataset.lat: Dataset latitude.

lat property #

Latitude / y cell-centre coordinates.

Uses the geotransform's pixel height (abs(geotransform[5])) rather than :attr:cell_size (which only tracks pixel width), so the axis is correct for non-square cells. Reads the cached _geotransform (like :attr:top_left_corner) rather than the geotransform property, so subclasses that derive geotransform from lon/lat (e.g. :class:~pyramids.netcdf.NetCDF) do not recurse.

Examples:

  • Row-centre latitudes decrease from north to south:
    >>> import numpy as np
    >>> from pyramids.dataset import Dataset
    >>> ds = Dataset.create_from_array(
    ...     np.zeros((2, 3)), top_left_corner=(0.0, 0.0), cell_size=0.5, epsg=4326,
    ... )
    >>> ds.lat.tolist()
    [-0.25, -0.75]
    
  • With non-square cells the latitude axis uses the pixel height, not the pixel width:
    >>> import numpy as np
    >>> from pyramids.dataset import Dataset
    >>> ds = Dataset.create_from_array(
    ...     np.zeros((2, 3)), geo=(10.0, 2.0, 0.0, 50.0, 0.0, -1.0), epsg=4326,
    ... )
    >>> ds.lat.tolist()
    [49.5, 48.5]
    
See Also
  • Dataset.x: Dataset x coordinates.
  • Dataset.y: Dataset y coordinates.
  • Dataset.lon: Dataset longitude.

x property #

X cell-centre coordinates (alias of :attr:lon).

Examples:

  • x mirrors lon for the same raster:
    >>> import numpy as np
    >>> from pyramids.dataset import Dataset
    >>> ds = Dataset.create_from_array(
    ...     np.zeros((2, 3)), top_left_corner=(0.0, 0.0), cell_size=0.5, epsg=4326,
    ... )
    >>> ds.x.tolist()
    [0.25, 0.75, 1.25]
    
See Also
  • Dataset.lon: the longitude axis this property aliases.
  • Dataset.y: Dataset y coordinates.

y property #

Y cell-centre coordinates (alias of :attr:lat).

Examples:

  • y mirrors lat for the same raster:
    >>> import numpy as np
    >>> from pyramids.dataset import Dataset
    >>> ds = Dataset.create_from_array(
    ...     np.zeros((2, 3)), top_left_corner=(0.0, 0.0), cell_size=0.5, epsg=4326,
    ... )
    >>> ds.y.tolist()
    [-0.25, -0.75]
    
See Also
  • Dataset.lat: the latitude axis this property aliases.
  • Dataset.x: Dataset x coordinates.

numpy_dtype property #

List of the numpy data Type of each band, the data type is a numpy function.

dtype property #

List of the data Type of each band as strings.

__init__(src, access='read_only') #

init.

Source code in src/pyramids/dataset/dataset.py
def __init__(self, src: gdal.Dataset, access: str = "read_only"):
    """__init__."""
    self.logger = logging.getLogger(__name__)
    super().__init__(src, access=access)

    self._no_data_value = [
        src.GetRasterBand(i).GetNoDataValue() for i in range(1, self.band_count + 1)
    ]
    self._band_names = self._get_band_names()
    self._band_units = [
        src.GetRasterBand(i).GetUnitType() for i in range(1, self.band_count + 1)
    ]

    # Each collaborator owns the bodies of one public-API family
    # (io, spatial, bands, analysis, cell, vectorize, cog) and
    # holds a `weakref.proxy(self)` back-reference. Dataset
    # exposes facade methods that delegate to the collaborator,
    # so both `ds.crop(mask)` and `ds.spatial.crop(mask)` are
    # equivalent.
    self.io = IO(self)
    self.spatial = Spatial(self)
    self.bands = Bands(self)
    self.analysis = Analysis(self)
    self.cell = Cell(self)
    self.vectorize = Vectorize(self)
    self.cog = COG(self)

focal_mean(radius=1, *, chunks=None, band=0) #

Thin forwarder to :func:pyramids.dataset.ops._focal.focal_mean.

Source code in src/pyramids/dataset/dataset.py
def focal_mean(self, radius: int = 1, *, chunks=None, band: int = 0):
    """Thin forwarder to :func:`pyramids.dataset.ops._focal.focal_mean`."""
    return focal_mean(self, radius=radius, chunks=chunks, band=band)

focal_std(radius=1, *, chunks=None, band=0) #

Thin forwarder to :func:pyramids.dataset.ops._focal.focal_std.

Source code in src/pyramids/dataset/dataset.py
def focal_std(self, radius: int = 1, *, chunks=None, band: int = 0):
    """Thin forwarder to :func:`pyramids.dataset.ops._focal.focal_std`."""
    return focal_std(self, radius=radius, chunks=chunks, band=band)

focal_apply(func, radius=1, *, chunks=None, band=0) #

Thin forwarder to :func:pyramids.dataset.ops._focal.focal_apply.

Source code in src/pyramids/dataset/dataset.py
def focal_apply(self, func, radius: int = 1, *, chunks=None, band: int = 0):
    """Thin forwarder to :func:`pyramids.dataset.ops._focal.focal_apply`."""
    return focal_apply(self, func, radius=radius, chunks=chunks, band=band)

slope(*, chunks=None, band=0, units='degrees') #

Thin forwarder to :func:pyramids.dataset.ops._focal.slope.

Source code in src/pyramids/dataset/dataset.py
def slope(self, *, chunks=None, band: int = 0, units: str = "degrees"):
    """Thin forwarder to :func:`pyramids.dataset.ops._focal.slope`."""
    return slope(self, chunks=chunks, band=band, units=units)

aspect(*, chunks=None, band=0) #

Thin forwarder to :func:pyramids.dataset.ops._focal.aspect.

Source code in src/pyramids/dataset/dataset.py
def aspect(self, *, chunks=None, band: int = 0):
    """Thin forwarder to :func:`pyramids.dataset.ops._focal.aspect`."""
    return aspect(self, chunks=chunks, band=band)

hillshade(*, azimuth=315.0, altitude=45.0, chunks=None, band=0) #

Thin forwarder to :func:pyramids.dataset.ops._focal.hillshade.

Source code in src/pyramids/dataset/dataset.py
def hillshade(
    self,
    *,
    azimuth: float = 315.0,
    altitude: float = 45.0,
    chunks=None,
    band: int = 0,
):
    """Thin forwarder to :func:`pyramids.dataset.ops._focal.hillshade`."""
    return hillshade(
        self,
        azimuth=azimuth,
        altitude=altitude,
        chunks=chunks,
        band=band,
    )

get_cell_coords(*args, **kwargs) #

Facade — delegates to :meth:Cell.get_cell_coords <pyramids.dataset.engines.Cell.get_cell_coords>.

Source code in src/pyramids/dataset/dataset.py
def get_cell_coords(self, *args, **kwargs):
    """Facade — delegates to :meth:`Cell.get_cell_coords <pyramids.dataset.engines.Cell.get_cell_coords>`."""
    return self.cell.get_cell_coords(*args, **kwargs)

get_cell_polygons(*args, **kwargs) #

Facade — delegates to :meth:Cell.get_cell_polygons <pyramids.dataset.engines.Cell.get_cell_polygons>.

Source code in src/pyramids/dataset/dataset.py
def get_cell_polygons(self, *args, **kwargs):
    """Facade — delegates to :meth:`Cell.get_cell_polygons <pyramids.dataset.engines.Cell.get_cell_polygons>`."""
    return self.cell.get_cell_polygons(*args, **kwargs)

get_cell_points(*args, **kwargs) #

Facade — delegates to :meth:Cell.get_cell_points <pyramids.dataset.engines.Cell.get_cell_points>.

Source code in src/pyramids/dataset/dataset.py
def get_cell_points(self, *args, **kwargs):
    """Facade — delegates to :meth:`Cell.get_cell_points <pyramids.dataset.engines.Cell.get_cell_points>`."""
    return self.cell.get_cell_points(*args, **kwargs)

map_to_array_coordinates(*args, **kwargs) #

Facade — delegates to :meth:Cell.map_to_array_coordinates <pyramids.dataset.engines.Cell.map_to_array_coordinates>.

Source code in src/pyramids/dataset/dataset.py
def map_to_array_coordinates(self, *args, **kwargs):
    """Facade — delegates to :meth:`Cell.map_to_array_coordinates <pyramids.dataset.engines.Cell.map_to_array_coordinates>`."""
    return self.cell.map_to_array_coordinates(*args, **kwargs)

array_to_map_coordinates(*args, **kwargs) #

Facade — delegates to :meth:Cell.array_to_map_coordinates <pyramids.dataset.engines.Cell.array_to_map_coordinates>.

Source code in src/pyramids/dataset/dataset.py
def array_to_map_coordinates(self, *args, **kwargs):
    """Facade — delegates to :meth:`Cell.array_to_map_coordinates <pyramids.dataset.engines.Cell.array_to_map_coordinates>`."""
    return self.cell.array_to_map_coordinates(*args, **kwargs)

to_cog(*args, **kwargs) #

Facade — delegates to :meth:COG.to_cog <pyramids.dataset.engines.COG.to_cog>.

Source code in src/pyramids/dataset/dataset.py
def to_cog(self, *args, **kwargs):
    """Facade — delegates to :meth:`COG.to_cog <pyramids.dataset.engines.COG.to_cog>`."""
    return self.cog.to_cog(*args, **kwargs)

validate_cog(*args, **kwargs) #

Facade — delegates to :meth:COG.validate_cog <pyramids.dataset.engines.COG.validate_cog>.

Source code in src/pyramids/dataset/dataset.py
def validate_cog(self, *args, **kwargs):
    """Facade — delegates to :meth:`COG.validate_cog <pyramids.dataset.engines.COG.validate_cog>`."""
    return self.cog.validate_cog(*args, **kwargs)

cog_info(*args, **kwargs) #

Facade — delegates to :meth:COG.info <pyramids.dataset.engines.COG.info>.

Source code in src/pyramids/dataset/dataset.py
def cog_info(self, *args, **kwargs):
    """Facade — delegates to :meth:`COG.info <pyramids.dataset.engines.COG.info>`."""
    return self.cog.info(*args, **kwargs)

to_cog_bytes(*args, **kwargs) #

Facade — delegates to :meth:COG.to_cog_bytes <pyramids.dataset.engines.COG.to_cog_bytes>.

Source code in src/pyramids/dataset/dataset.py
def to_cog_bytes(self, *args, **kwargs):
    """Facade — delegates to :meth:`COG.to_cog_bytes <pyramids.dataset.engines.COG.to_cog_bytes>`."""
    return self.cog.to_cog_bytes(*args, **kwargs)

read_part(*args, **kwargs) #

Facade — delegates to :meth:COG.read_part <pyramids.dataset.engines.COG.read_part>.

Source code in src/pyramids/dataset/dataset.py
def read_part(self, *args, **kwargs):
    """Facade — delegates to :meth:`COG.read_part <pyramids.dataset.engines.COG.read_part>`."""
    return self.cog.read_part(*args, **kwargs)

preview(*args, **kwargs) #

Facade — delegates to :meth:COG.preview <pyramids.dataset.engines.COG.preview>.

Source code in src/pyramids/dataset/dataset.py
def preview(self, *args, **kwargs):
    """Facade — delegates to :meth:`COG.preview <pyramids.dataset.engines.COG.preview>`."""
    return self.cog.preview(*args, **kwargs)

point(*args, **kwargs) #

Facade — delegates to :meth:COG.point <pyramids.dataset.engines.COG.point>.

Source code in src/pyramids/dataset/dataset.py
def point(self, *args, **kwargs):
    """Facade — delegates to :meth:`COG.point <pyramids.dataset.engines.COG.point>`."""
    return self.cog.point(*args, **kwargs)

read_tile(*args, **kwargs) #

Facade — delegates to :meth:COG.read_tile <pyramids.dataset.engines.COG.read_tile>.

Source code in src/pyramids/dataset/dataset.py
def read_tile(self, *args, **kwargs):
    """Facade — delegates to :meth:`COG.read_tile <pyramids.dataset.engines.COG.read_tile>`."""
    return self.cog.read_tile(*args, **kwargs)

to_feature_collection(*args, **kwargs) #

Facade — delegates to :meth:Vectorize.to_feature_collection <pyramids.dataset.engines.Vectorize.to_feature_collection>.

Source code in src/pyramids/dataset/dataset.py
def to_feature_collection(self, *args, **kwargs):
    """Facade — delegates to :meth:`Vectorize.to_feature_collection <pyramids.dataset.engines.Vectorize.to_feature_collection>`."""
    return self.vectorize.to_feature_collection(*args, **kwargs)

contour(*args, **kwargs) #

Facade — delegates to :meth:Vectorize.contour <pyramids.dataset.engines.Vectorize.contour>.

Source code in src/pyramids/dataset/dataset.py
def contour(self, *args, **kwargs):
    """Facade — delegates to :meth:`Vectorize.contour <pyramids.dataset.engines.Vectorize.contour>`."""
    return self.vectorize.contour(*args, **kwargs)

translate(*args, **kwargs) #

Facade — delegates to :meth:Vectorize.translate <pyramids.dataset.engines.Vectorize.translate>.

Source code in src/pyramids/dataset/dataset.py
def translate(self, *args, **kwargs):
    """Facade — delegates to :meth:`Vectorize.translate <pyramids.dataset.engines.Vectorize.translate>`."""
    return self.vectorize.translate(*args, **kwargs)

cluster(*args, **kwargs) #

Facade — delegates to :meth:Vectorize.cluster <pyramids.dataset.engines.Vectorize.cluster>.

Source code in src/pyramids/dataset/dataset.py
def cluster(self, *args, **kwargs):
    """Facade — delegates to :meth:`Vectorize.cluster <pyramids.dataset.engines.Vectorize.cluster>`."""
    return self.vectorize.cluster(*args, **kwargs)

cluster2(*args, **kwargs) #

Facade — delegates to :meth:Vectorize.cluster2 <pyramids.dataset.engines.Vectorize.cluster2>.

Source code in src/pyramids/dataset/dataset.py
def cluster2(self, *args, **kwargs):
    """Facade — delegates to :meth:`Vectorize.cluster2 <pyramids.dataset.engines.Vectorize.cluster2>`."""
    return self.vectorize.cluster2(*args, **kwargs)

stats(*args, **kwargs) #

Facade — delegates to :meth:Analysis.stats <pyramids.dataset.engines.Analysis.stats>.

Source code in src/pyramids/dataset/dataset.py
def stats(self, *args, **kwargs):
    """Facade — delegates to :meth:`Analysis.stats <pyramids.dataset.engines.Analysis.stats>`."""
    return self.analysis.stats(*args, **kwargs)

count_domain_cells(*args, **kwargs) #

Facade — delegates to :meth:Analysis.count_domain_cells <pyramids.dataset.engines.Analysis.count_domain_cells>.

Source code in src/pyramids/dataset/dataset.py
def count_domain_cells(self, *args, **kwargs):
    """Facade — delegates to :meth:`Analysis.count_domain_cells <pyramids.dataset.engines.Analysis.count_domain_cells>`."""
    return self.analysis.count_domain_cells(*args, **kwargs)

apply(*args, **kwargs) #

Facade — delegates to :meth:Analysis.apply <pyramids.dataset.engines.Analysis.apply>.

The collaborator returns None for inplace=True so the facade can substitute the actual self (preserving identity); the proxy used by the collaborator's back-reference would otherwise fail result is ds checks.

Source code in src/pyramids/dataset/dataset.py
def apply(self, *args, **kwargs):
    """Facade — delegates to :meth:`Analysis.apply <pyramids.dataset.engines.Analysis.apply>`.

    The collaborator returns `None` for `inplace=True` so the facade
    can substitute the actual `self` (preserving identity); the proxy
    used by the collaborator's back-reference would otherwise fail
    `result is ds` checks.
    """
    result = self.analysis.apply(*args, **kwargs)
    return self if result is None else result

fill(*args, **kwargs) #

Facade — delegates to :meth:Analysis.fill <pyramids.dataset.engines.Analysis.fill>.

The collaborator returns None for inplace=True; see :meth:apply for the rationale.

Source code in src/pyramids/dataset/dataset.py
def fill(self, *args, **kwargs):
    """Facade — delegates to :meth:`Analysis.fill <pyramids.dataset.engines.Analysis.fill>`.

    The collaborator returns `None` for `inplace=True`; see
    :meth:`apply` for the rationale.
    """
    result = self.analysis.fill(*args, **kwargs)
    return self if result is None else result

extract(*args, **kwargs) #

Facade — delegates to :meth:Analysis.extract <pyramids.dataset.engines.Analysis.extract>.

Source code in src/pyramids/dataset/dataset.py
def extract(self, *args, **kwargs):
    """Facade — delegates to :meth:`Analysis.extract <pyramids.dataset.engines.Analysis.extract>`."""
    return self.analysis.extract(*args, **kwargs)

sample(*args, **kwargs) #

Facade — delegates to :meth:Analysis.sample <pyramids.dataset.engines.Analysis.sample>.

Source code in src/pyramids/dataset/dataset.py
def sample(self, *args, **kwargs):
    """Facade — delegates to :meth:`Analysis.sample <pyramids.dataset.engines.Analysis.sample>`."""
    return self.analysis.sample(*args, **kwargs)

sieve(*args, **kwargs) #

Facade — delegates to :meth:Analysis.sieve <pyramids.dataset.engines.Analysis.sieve>.

Source code in src/pyramids/dataset/dataset.py
def sieve(self, *args, **kwargs):
    """Facade — delegates to :meth:`Analysis.sieve <pyramids.dataset.engines.Analysis.sieve>`."""
    return self.analysis.sieve(*args, **kwargs)

proximity(*args, **kwargs) #

Facade — delegates to :meth:Analysis.proximity <pyramids.dataset.engines.Analysis.proximity>.

Source code in src/pyramids/dataset/dataset.py
def proximity(self, *args, **kwargs):
    """Facade — delegates to :meth:`Analysis.proximity <pyramids.dataset.engines.Analysis.proximity>`."""
    return self.analysis.proximity(*args, **kwargs)

overlay(*args, **kwargs) #

Facade — delegates to :meth:Analysis.overlay <pyramids.dataset.engines.Analysis.overlay>.

Source code in src/pyramids/dataset/dataset.py
def overlay(self, *args, **kwargs):
    """Facade — delegates to :meth:`Analysis.overlay <pyramids.dataset.engines.Analysis.overlay>`."""
    return self.analysis.overlay(*args, **kwargs)

get_mask(*args, **kwargs) #

Facade — delegates to :meth:Analysis.get_mask <pyramids.dataset.engines.Analysis.get_mask>.

Source code in src/pyramids/dataset/dataset.py
def get_mask(self, *args, **kwargs):
    """Facade — delegates to :meth:`Analysis.get_mask <pyramids.dataset.engines.Analysis.get_mask>`."""
    return self.analysis.get_mask(*args, **kwargs)

footprint(*args, **kwargs) #

Facade — delegates to :meth:Analysis.footprint <pyramids.dataset.engines.Analysis.footprint>.

Source code in src/pyramids/dataset/dataset.py
def footprint(self, *args, **kwargs):
    """Facade — delegates to :meth:`Analysis.footprint <pyramids.dataset.engines.Analysis.footprint>`."""
    return self.analysis.footprint(*args, **kwargs)

get_histogram(*args, **kwargs) #

Facade — delegates to :meth:Analysis.get_histogram <pyramids.dataset.engines.Analysis.get_histogram>.

Source code in src/pyramids/dataset/dataset.py
def get_histogram(self, *args, **kwargs):
    """Facade — delegates to :meth:`Analysis.get_histogram <pyramids.dataset.engines.Analysis.get_histogram>`."""
    return self.analysis.get_histogram(*args, **kwargs)

plot(band=None, exclude_value=None, rgb=None, surface_reflectance=None, cutoff=None, overview=False, overview_index=0, percentile=None, basemap=None, rgb_options=None, **kwargs) #

Plot the values/overviews of a band.

Facade for :meth:Analysis.plot <pyramids.dataset.engines.Analysis.plot>. Resolves the band index via :meth:_resolve_plot_band (GeoTIFF/Sentinel semantics) and then forwards the call to the generic rendering engine.

When band is None and the dataset looks like an RGB image — i.e. it has at least 3 bands and at least one band has a GDAL ColorInterpretation set — the red band is auto-selected (either from rgb[0] or by resolving the colour tags). Otherwise the facade defaults to band 0. See :meth:Analysis.plot for the full kwargs surface.

The four satellite-imagery kwargs rgb, surface_reflectance, cutoff, and percentile may be grouped under a single rgb_options= dict (recommended) or passed loose at the top level (deprecated; emits :class:DeprecationWarning). When both forms are mixed, the values inside rgb_options win.

Parameters:

Name Type Description Default
band int

Band index to render. When None, the index is resolved by :meth:_resolve_plot_band.

None
exclude_value Any

Pixel value to mask out before plotting. Default is None.

None
rgb list[int]

Deprecated; pass via rgb_options={"rgb": [...]} instead. Three- or four-element list of band indices [r, g, b] (optionally [r, g, b, a]) to render the dataset as a true-colour composite. Only honoured when the dataset has at least 3 bands and at least one band reports a colour interpretation. Default is None.

None
surface_reflectance int

Deprecated; pass via rgb_options={"surface_reflectance": ...}. Surface-reflectance scale factor used to normalise satellite reflectance bands (typically 10000 for Sentinel-2). Default is None.

None
cutoff list

Deprecated; pass via rgb_options={"cutoff": ...}. Per-band clip values used when rendering RGB composites. Default is None.

None
overview bool

If True, plot the overview pyramid level instead of the full-resolution array. Default is False.

False
overview_index int

Index of the overview level to plot when overview=True. Default is 0.

0
percentile int

Deprecated; pass via rgb_options={"percentile": ...}. Percentile used when computing colour-scale limits. Default is None.

None
basemap bool or str

If True, overlay an OpenStreetMap basemap. If a string, use it as the contextily/xyzservices tile-provider name (e.g. "CartoDB.Positron"). Default is None. Requires the [viz] extra.

None
rgb_options dict

Grouped Sentinel-imagery kwargs. Accepted keys: "rgb", "surface_reflectance", "cutoff", "percentile". Recommended over the loose top-level kwargs (which emit :class:DeprecationWarning). Default is None.

None
**kwargs Any

Additional keyword arguments forwarded verbatim to :meth:Analysis.plot. See that method for the full kwargs surface (figure size, color scale, color bar, basemap, etc.).

{}

Returns:

Name Type Description
ArrayGlyph

A cleopatra ArrayGlyph wrapping the rendered figure. Use cleo.fig (the :class:matplotlib.figure.Figure) and cleo.ax (the :class:matplotlib.axes.Axes) to drop down to raw matplotlib.

Examples:

  • Render the first band of a single-band MEM raster. Tagged +SKIP because the call requires the optional [viz] extra (cleopatra + matplotlib):
>>> import numpy as np
>>> from pyramids.dataset import Dataset
>>> arr = np.random.rand(8, 8).astype(np.float32)
>>> ds = Dataset.create_from_array(
...     arr, top_left_corner=(0, 0), cell_size=0.1, epsg=4326,
... )
>>> cleo = ds.plot()  # doctest: +SKIP
>>> cleo.fig          # doctest: +SKIP
<Figure size 800x800 with 2 Axes>
  • Override the resolved band index. The facade forwards band=1 straight to the engine without consulting the heuristic:
>>> cleo = ds.plot(band=1)  # doctest: +SKIP
  • Render a multi-band raster as a true-colour composite via the recommended rgb_options= group:
>>> arr3 = np.random.rand(3, 8, 8).astype(np.float32)
>>> rgb_ds = Dataset.create_from_array(
...     arr3, top_left_corner=(0, 0), cell_size=0.1, epsg=4326,
... )
>>> cleo = rgb_ds.plot(  # doctest: +SKIP
...     rgb_options={"rgb": [0, 1, 2], "surface_reflectance": 255},
... )
  • The deprecated loose-kwarg form still works but emits a :class:DeprecationWarning. New code should prefer the grouped rgb_options= form shown above:
>>> cleo = rgb_ds.plot(  # doctest: +SKIP
...     rgb=[0, 1, 2], surface_reflectance=255,
... )
DeprecationWarning: Passing `rgb=`, `surface_reflectance=`...
Source code in src/pyramids/dataset/dataset.py
def plot(
    self,
    band: int | None = None,
    exclude_value: Any | None = None,
    rgb: list[int] | None = None,
    surface_reflectance: int | None = None,
    cutoff: list | None = None,
    overview: bool | None = False,
    overview_index: int | None = 0,
    percentile: int | None = None,
    basemap: bool | str | None = None,
    rgb_options: dict | None = None,
    **kwargs: Any,
):
    """Plot the values/overviews of a band.

    Facade for :meth:`Analysis.plot <pyramids.dataset.engines.Analysis.plot>`. Resolves
    the band index via :meth:`_resolve_plot_band` (GeoTIFF/Sentinel semantics) and then
    forwards the call to the generic rendering engine.

    When ``band`` is ``None`` and the dataset looks like an RGB image — i.e. it has
    at least 3 bands **and** at least one band has a GDAL ``ColorInterpretation`` set —
    the red band is auto-selected (either from ``rgb[0]`` or by resolving the colour
    tags). Otherwise the facade defaults to band ``0``. See
    :meth:`Analysis.plot` for the full kwargs surface.

    The four satellite-imagery kwargs ``rgb``, ``surface_reflectance``, ``cutoff``,
    and ``percentile`` may be grouped under a single ``rgb_options=`` dict
    (recommended) or passed loose at the top level (deprecated; emits
    :class:`DeprecationWarning`). When both forms are mixed, the values inside
    ``rgb_options`` win.

    Args:
        band (int, optional):
            Band index to render. When ``None``, the index is resolved by
            :meth:`_resolve_plot_band`.
        exclude_value (Any, optional):
            Pixel value to mask out before plotting. Default is ``None``.
        rgb (list[int], optional):
            **Deprecated**; pass via ``rgb_options={"rgb": [...]}`` instead.
            Three- or four-element list of band indices ``[r, g, b]`` (optionally
            ``[r, g, b, a]``) to render the dataset as a true-colour composite.
            Only honoured when the dataset has at least 3 bands and at least one
            band reports a colour interpretation. Default is ``None``.
        surface_reflectance (int, optional):
            **Deprecated**; pass via ``rgb_options={"surface_reflectance": ...}``.
            Surface-reflectance scale factor used to normalise satellite reflectance
            bands (typically ``10000`` for Sentinel-2). Default is ``None``.
        cutoff (list, optional):
            **Deprecated**; pass via ``rgb_options={"cutoff": ...}``.
            Per-band clip values used when rendering RGB composites. Default is
            ``None``.
        overview (bool, optional):
            If ``True``, plot the overview pyramid level instead of the full-resolution
            array. Default is ``False``.
        overview_index (int, optional):
            Index of the overview level to plot when ``overview=True``. Default is ``0``.
        percentile (int, optional):
            **Deprecated**; pass via ``rgb_options={"percentile": ...}``.
            Percentile used when computing colour-scale limits. Default is ``None``.
        basemap (bool or str, optional):
            If ``True``, overlay an OpenStreetMap basemap. If a string, use it as the
            contextily/xyzservices tile-provider name (e.g. ``"CartoDB.Positron"``).
            Default is ``None``. Requires the ``[viz]`` extra.
        rgb_options (dict, optional):
            Grouped Sentinel-imagery kwargs. Accepted keys:
            ``"rgb"``, ``"surface_reflectance"``, ``"cutoff"``,
            ``"percentile"``. Recommended over the loose top-level
            kwargs (which emit :class:`DeprecationWarning`). Default
            is ``None``.
        **kwargs:
            Additional keyword arguments forwarded verbatim to
            :meth:`Analysis.plot`. See that method for the full kwargs surface
            (figure size, color scale, color bar, basemap, etc.).

    Returns:
        ArrayGlyph: A cleopatra ``ArrayGlyph`` wrapping the rendered figure.
            Use ``cleo.fig`` (the :class:`matplotlib.figure.Figure`) and ``cleo.ax``
            (the :class:`matplotlib.axes.Axes`) to drop down to raw matplotlib.

    Examples:
        - Render the first band of a single-band MEM raster. Tagged ``+SKIP`` because
          the call requires the optional ``[viz]`` extra (cleopatra + matplotlib):

          ```python
          >>> import numpy as np
          >>> from pyramids.dataset import Dataset
          >>> arr = np.random.rand(8, 8).astype(np.float32)
          >>> ds = Dataset.create_from_array(
          ...     arr, top_left_corner=(0, 0), cell_size=0.1, epsg=4326,
          ... )
          >>> cleo = ds.plot()  # doctest: +SKIP
          >>> cleo.fig          # doctest: +SKIP
          <Figure size 800x800 with 2 Axes>

          ```

        - Override the resolved band index. The facade forwards ``band=1`` straight
          to the engine without consulting the heuristic:

          ```python
          >>> cleo = ds.plot(band=1)  # doctest: +SKIP

          ```

        - Render a multi-band raster as a true-colour composite via the
          recommended ``rgb_options=`` group:

          ```python
          >>> arr3 = np.random.rand(3, 8, 8).astype(np.float32)
          >>> rgb_ds = Dataset.create_from_array(
          ...     arr3, top_left_corner=(0, 0), cell_size=0.1, epsg=4326,
          ... )
          >>> cleo = rgb_ds.plot(  # doctest: +SKIP
          ...     rgb_options={"rgb": [0, 1, 2], "surface_reflectance": 255},
          ... )

          ```

        - The deprecated loose-kwarg form still works but emits a
          :class:`DeprecationWarning`. New code should prefer the
          grouped ``rgb_options=`` form shown above:

          ```python
          >>> cleo = rgb_ds.plot(  # doctest: +SKIP
          ...     rgb=[0, 1, 2], surface_reflectance=255,
          ... )
          DeprecationWarning: Passing `rgb=`, `surface_reflectance=`...

          ```
    """
    rgb, surface_reflectance, cutoff, percentile = self._merge_rgb_options(
        rgb_options=rgb_options,
        rgb=rgb,
        surface_reflectance=surface_reflectance,
        cutoff=cutoff,
        percentile=percentile,
    )
    resolved_band, resolved_rgb = self._resolve_plot_band(band, rgb)
    return self.analysis.plot(
        band=resolved_band,
        exclude_value=exclude_value,
        rgb=resolved_rgb,
        surface_reflectance=surface_reflectance,
        cutoff=cutoff,
        overview=overview,
        overview_index=overview_index,
        percentile=percentile,
        basemap=basemap,
        **kwargs,
    )

crop(*args, **kwargs) #

Facade — delegates to :meth:Spatial.crop <pyramids.dataset.engines.Spatial.crop>.

Source code in src/pyramids/dataset/dataset.py
def crop(self, *args, **kwargs):
    """Facade — delegates to :meth:`Spatial.crop <pyramids.dataset.engines.Spatial.crop>`."""
    return self.spatial.crop(*args, **kwargs)

to_crs(*args, **kwargs) #

Facade — delegates to :meth:Spatial.to_crs <pyramids.dataset.engines.Spatial.to_crs>.

Source code in src/pyramids/dataset/dataset.py
def to_crs(self, *args, **kwargs):
    """Facade — delegates to :meth:`Spatial.to_crs <pyramids.dataset.engines.Spatial.to_crs>`."""
    return self.spatial.to_crs(*args, **kwargs)

set_crs(*args, **kwargs) #

Facade — delegates to :meth:Spatial.set_crs <pyramids.dataset.engines.Spatial.set_crs>.

Source code in src/pyramids/dataset/dataset.py
def set_crs(self, *args, **kwargs):
    """Facade — delegates to :meth:`Spatial.set_crs <pyramids.dataset.engines.Spatial.set_crs>`."""
    return self.spatial.set_crs(*args, **kwargs)

convert_longitude(*args, **kwargs) #

Facade — delegates to :meth:Spatial.convert_longitude <pyramids.dataset.engines.Spatial.convert_longitude>.

Source code in src/pyramids/dataset/dataset.py
def convert_longitude(self, *args, **kwargs):
    """Facade — delegates to :meth:`Spatial.convert_longitude <pyramids.dataset.engines.Spatial.convert_longitude>`."""
    return self.spatial.convert_longitude(*args, **kwargs)

resample(*args, **kwargs) #

Facade — delegates to :meth:Spatial.resample <pyramids.dataset.engines.Spatial.resample>.

Source code in src/pyramids/dataset/dataset.py
def resample(self, *args, **kwargs):
    """Facade — delegates to :meth:`Spatial.resample <pyramids.dataset.engines.Spatial.resample>`."""
    return self.spatial.resample(*args, **kwargs)

align(*args, **kwargs) #

Facade — delegates to :meth:Spatial.align <pyramids.dataset.engines.Spatial.align>.

Source code in src/pyramids/dataset/dataset.py
def align(self, *args, **kwargs):
    """Facade — delegates to :meth:`Spatial.align <pyramids.dataset.engines.Spatial.align>`."""
    return self.spatial.align(*args, **kwargs)

fill_gaps(*args, **kwargs) #

Facade — delegates to :meth:Spatial.fill_gaps <pyramids.dataset.engines.Spatial.fill_gaps>.

Source code in src/pyramids/dataset/dataset.py
def fill_gaps(self, *args, **kwargs):
    """Facade — delegates to :meth:`Spatial.fill_gaps <pyramids.dataset.engines.Spatial.fill_gaps>`."""
    return self.spatial.fill_gaps(*args, **kwargs)

read_array(*args, **kwargs) #

Facade — delegates to :meth:IO.read_array <pyramids.dataset.engines.IO.read_array>.

Source code in src/pyramids/dataset/dataset.py
def read_array(self, *args, **kwargs):
    """Facade — delegates to :meth:`IO.read_array <pyramids.dataset.engines.IO.read_array>`."""
    return self.io.read_array(*args, **kwargs)

write_array(*args, **kwargs) #

Facade — delegates to :meth:IO.write_array <pyramids.dataset.engines.IO.write_array>.

Source code in src/pyramids/dataset/dataset.py
def write_array(self, *args, **kwargs):
    """Facade — delegates to :meth:`IO.write_array <pyramids.dataset.engines.IO.write_array>`."""
    return self.io.write_array(*args, **kwargs)

to_file(*args, **kwargs) #

Facade — delegates to :meth:IO.to_file <pyramids.dataset.engines.IO.to_file>.

Source code in src/pyramids/dataset/dataset.py
def to_file(self, *args, **kwargs):
    """Facade — delegates to :meth:`IO.to_file <pyramids.dataset.engines.IO.to_file>`."""
    return self.io.to_file(*args, **kwargs)

to_raster(*args, **kwargs) #

Facade — delegates to :meth:IO.to_raster <pyramids.dataset.engines.IO.to_raster>.

Source code in src/pyramids/dataset/dataset.py
def to_raster(self, *args, **kwargs):
    """Facade — delegates to :meth:`IO.to_raster <pyramids.dataset.engines.IO.to_raster>`."""
    return self.io.to_raster(*args, **kwargs)

get_block_arrangement(*args, **kwargs) #

Facade — delegates to :meth:IO.get_block_arrangement <pyramids.dataset.engines.IO.get_block_arrangement>.

Source code in src/pyramids/dataset/dataset.py
def get_block_arrangement(self, *args, **kwargs):
    """Facade — delegates to :meth:`IO.get_block_arrangement <pyramids.dataset.engines.IO.get_block_arrangement>`."""
    return self.io.get_block_arrangement(*args, **kwargs)

get_tile(*args, **kwargs) #

Facade — delegates to :meth:IO.get_tile <pyramids.dataset.engines.IO.get_tile>.

Source code in src/pyramids/dataset/dataset.py
def get_tile(self, *args, **kwargs):
    """Facade — delegates to :meth:`IO.get_tile <pyramids.dataset.engines.IO.get_tile>`."""
    return self.io.get_tile(*args, **kwargs)

map_blocks(*args, **kwargs) #

Facade — delegates to :meth:IO.map_blocks <pyramids.dataset.engines.IO.map_blocks>.

Source code in src/pyramids/dataset/dataset.py
def map_blocks(self, *args, **kwargs):
    """Facade — delegates to :meth:`IO.map_blocks <pyramids.dataset.engines.IO.map_blocks>`."""
    return self.io.map_blocks(*args, **kwargs)

to_xyz(*args, **kwargs) #

Facade — delegates to :meth:IO.to_xyz <pyramids.dataset.engines.IO.to_xyz>.

Source code in src/pyramids/dataset/dataset.py
def to_xyz(self, *args, **kwargs):
    """Facade — delegates to :meth:`IO.to_xyz <pyramids.dataset.engines.IO.to_xyz>`."""
    return self.io.to_xyz(*args, **kwargs)

create_overviews(*args, **kwargs) #

Facade — delegates to :meth:IO.create_overviews <pyramids.dataset.engines.IO.create_overviews>.

Source code in src/pyramids/dataset/dataset.py
def create_overviews(self, *args, **kwargs):
    """Facade — delegates to :meth:`IO.create_overviews <pyramids.dataset.engines.IO.create_overviews>`."""
    return self.io.create_overviews(*args, **kwargs)

recreate_overviews(*args, **kwargs) #

Facade — delegates to :meth:IO.recreate_overviews <pyramids.dataset.engines.IO.recreate_overviews>.

Source code in src/pyramids/dataset/dataset.py
def recreate_overviews(self, *args, **kwargs):
    """Facade — delegates to :meth:`IO.recreate_overviews <pyramids.dataset.engines.IO.recreate_overviews>`."""
    return self.io.recreate_overviews(*args, **kwargs)

get_overview(*args, **kwargs) #

Facade — delegates to :meth:IO.get_overview <pyramids.dataset.engines.IO.get_overview>.

Source code in src/pyramids/dataset/dataset.py
def get_overview(self, *args, **kwargs):
    """Facade — delegates to :meth:`IO.get_overview <pyramids.dataset.engines.IO.get_overview>`."""
    return self.io.get_overview(*args, **kwargs)

read_overview_array(*args, **kwargs) #

Facade — delegates to :meth:IO.read_overview_array <pyramids.dataset.engines.IO.read_overview_array>.

Source code in src/pyramids/dataset/dataset.py
def read_overview_array(self, *args, **kwargs):
    """Facade — delegates to :meth:`IO.read_overview_array <pyramids.dataset.engines.IO.read_overview_array>`."""
    return self.io.read_overview_array(*args, **kwargs)

get_attribute_table(*args, **kwargs) #

Facade — delegates to :meth:Bands.get_attribute_table <pyramids.dataset.engines.Bands.get_attribute_table>.

Source code in src/pyramids/dataset/dataset.py
def get_attribute_table(self, *args, **kwargs):
    """Facade — delegates to :meth:`Bands.get_attribute_table <pyramids.dataset.engines.Bands.get_attribute_table>`."""
    return self.bands.get_attribute_table(*args, **kwargs)

set_attribute_table(*args, **kwargs) #

Facade — delegates to :meth:Bands.set_attribute_table <pyramids.dataset.engines.Bands.set_attribute_table>.

Source code in src/pyramids/dataset/dataset.py
def set_attribute_table(self, *args, **kwargs):
    """Facade — delegates to :meth:`Bands.set_attribute_table <pyramids.dataset.engines.Bands.set_attribute_table>`."""
    return self.bands.set_attribute_table(*args, **kwargs)

add_band(*args, **kwargs) #

Facade — delegates to :meth:Bands.add_band <pyramids.dataset.engines.Bands.add_band>.

Source code in src/pyramids/dataset/dataset.py
def add_band(self, *args, **kwargs):
    """Facade — delegates to :meth:`Bands.add_band <pyramids.dataset.engines.Bands.add_band>`."""
    return self.bands.add_band(*args, **kwargs)

get_band_by_color(*args, **kwargs) #

Facade — delegates to :meth:Bands.get_band_by_color <pyramids.dataset.engines.Bands.get_band_by_color>.

Source code in src/pyramids/dataset/dataset.py
def get_band_by_color(self, *args, **kwargs):
    """Facade — delegates to :meth:`Bands.get_band_by_color <pyramids.dataset.engines.Bands.get_band_by_color>`."""
    return self.bands.get_band_by_color(*args, **kwargs)

change_no_data_value(*args, **kwargs) #

Facade — concrete override of the abstract :meth:RasterBase.change_no_data_value.

The collaborator returns None for the inplace=True path; the facade substitutes self for identity preservation, matching :meth:apply and :meth:fill.

Source code in src/pyramids/dataset/dataset.py
def change_no_data_value(self, *args, **kwargs):
    """Facade — concrete override of the abstract :meth:`RasterBase.change_no_data_value`.

    The collaborator returns `None` for the `inplace=True` path; the
    facade substitutes `self` for identity preservation, matching
    :meth:`apply` and :meth:`fill`.
    """
    result = self.bands.change_no_data_value(*args, **kwargs)
    return self if result is None else result

zonal_stats(fc, *, stats=('mean',), method='rasterize', band=0) #

Compute zonal statistics of this dataset over a polygon FeatureCollection.

Thin forwarder to :func:pyramids.dataset.ops._zonal.zonal_stats; see that function for the full argument contract.

Parameters:

Name Type Description Default
fc

A :class:pyramids.feature.FeatureCollection of polygons sharing this dataset's CRS.

required
stats

Sequence of stat names ("mean", "sum", "min", "max", "std", "var", "count").

('mean',)
method str

"rasterize" is the only supported value today; an area-weighted "fractional" method is planned.

'rasterize'
band int

Zero-based band index.

0

Returns:

Type Description

pandas.DataFrame: Indexed by fc.index; one column per stat.

Source code in src/pyramids/dataset/dataset.py
def zonal_stats(
    self,
    fc,
    *,
    stats=("mean",),
    method: str = "rasterize",
    band: int = 0,
):
    """Compute zonal statistics of this dataset over a polygon FeatureCollection.

    Thin forwarder to
    :func:`pyramids.dataset.ops._zonal.zonal_stats`; see that
    function for the full argument contract.

    Args:
        fc: A :class:`pyramids.feature.FeatureCollection` of
            polygons sharing this dataset's CRS.
        stats: Sequence of stat names (`"mean"`, `"sum"`,
            `"min"`, `"max"`, `"std"`, `"var"`,
            `"count"`).
        method: `"rasterize"` is the only supported value today;
            an area-weighted `"fractional"` method is planned.
        band: Zero-based band index.

    Returns:
        pandas.DataFrame: Indexed by `fc.index`; one column per stat.
    """
    return _zonal_stats(self, fc, stats=stats, method=method, band=band)

to_zarr(store, *, compute=True, mode='w', chunks=None, storage_options=None) #

Serialise this Dataset to a Zarr store (parallel writes per chunk).

Thin forwarder to :func:pyramids.dataset.ops._zarr.write_dataset_to_zarr; see that function for the full argument contract. Zarr is the only raster output format where pyramids can write in true parallel — each dask chunk becomes an independent Zarr chunk file. Requires the [lazy] optional extra.

Parameters:

Name Type Description Default
store

Target store (path / fsspec URL / zarr.Store).

required
compute bool

True writes immediately; False returns a :class:dask.delayed.Delayed.

True
mode str

Zarr open mode, usually "w" or "a".

'w'
chunks

Chunk spec forwarded to :meth:read_array. None defaults to "auto" via the zarr helper.

None
storage_options dict | None

fsspec options for cloud stores.

None
Source code in src/pyramids/dataset/dataset.py
def to_zarr(
    self,
    store,
    *,
    compute: bool = True,
    mode: str = "w",
    chunks=None,
    storage_options: dict | None = None,
):
    """Serialise this Dataset to a Zarr store (parallel writes per chunk).

    Thin forwarder to
    :func:`pyramids.dataset.ops._zarr.write_dataset_to_zarr`; see
    that function for the full argument contract. Zarr is the
    only raster output format where pyramids can write in true
    parallel — each dask chunk becomes an independent Zarr chunk
    file. Requires the `[lazy]` optional extra.

    Args:
        store: Target store (path / fsspec URL / zarr.Store).
        compute: `True` writes immediately; `False` returns a
            :class:`dask.delayed.Delayed`.
        mode: Zarr open mode, usually `"w"` or `"a"`.
        chunks: Chunk spec forwarded to :meth:`read_array`.
            `None` defaults to `"auto"` via the zarr helper.
        storage_options: fsspec options for cloud stores.
    """
    resolved_chunks = chunks if chunks is not None else "auto"
    return write_dataset_to_zarr(
        self,
        store,
        compute=compute,
        mode=mode,
        chunks=resolved_chunks,
        storage_options=storage_options,
    )

from_zarr(store, *, chunks=None, storage_options=None) classmethod #

Load a pyramids-written Zarr store into a new :class:Dataset.

Thin forwarder to :func:pyramids.dataset.ops._zarr.read_dataset_from_zarr.

Parameters:

Name Type Description Default
store

Input store (path / fsspec URL / zarr.Store).

required
chunks

If non-None, the loaded Dataset is flagged as dask-backed so downstream read_array calls return lazy arrays.

None
storage_options dict | None

fsspec options for cloud stores.

None
Source code in src/pyramids/dataset/dataset.py
@classmethod
def from_zarr(
    cls,
    store,
    *,
    chunks=None,
    storage_options: dict | None = None,
) -> Dataset:
    """Load a pyramids-written Zarr store into a new :class:`Dataset`.

    Thin forwarder to
    :func:`pyramids.dataset.ops._zarr.read_dataset_from_zarr`.

    Args:
        store: Input store (path / fsspec URL / zarr.Store).
        chunks: If non-None, the loaded Dataset is flagged as
            dask-backed so downstream `read_array` calls return
            lazy arrays.
        storage_options: fsspec options for cloud stores.
    """
    return read_dataset_from_zarr(
        store,
        chunks=chunks,
        storage_options=storage_options,
    )

__str__() #

str.

Source code in src/pyramids/dataset/dataset.py
def __str__(self) -> str:
    """__str__."""
    message = f"""
        Top Left Corner: {self.top_left_corner}
        Cell size: {self.cell_size}
        Dimension: {self.rows} * {self.columns}
        EPSG: {self.epsg}
        Number of Bands: {self.band_count}
        Band names: {self.band_names}
        Band colors: {self.band_color}
        Band units: {self.band_units}
        Scale: {self.scale}
        Offset: {self.offset}
        Mask: {self.no_data_value[0]}
        Data type: {self.dtype[0]}
        File: {self.file_name}
    """
    return message

__repr__() #

repr.

Source code in src/pyramids/dataset/dataset.py
def __repr__(self) -> str:
    """__repr__."""
    return str(gdal.Info(self.raster))

convert_units(target, band=None) #

Convert band values to target units, returning a new Dataset.

Unlike the :attr:band_units setter — which only relabels bands — this actually transforms the stored values using a small affine conversion table (see :func:pyramids.dataset.ops.units.convert_array) and records the new unit on the result. No-data cells are preserved unchanged. The output is a new in-memory float64 Dataset; the source is left untouched.

Parameters:

Name Type Description Default
target str

Target unit label (e.g. "celsius", "hPa", "knots").

required
band int | None

Zero-based band index to convert. None (default) converts every band; bands already in target units are passed through unchanged.

None

Returns:

Type Description
Dataset

A new :class:Dataset with converted values and updated

Dataset

attr:band_units.

Raises:

Type Description
ValueError

band is out of range, a converted band has no source unit set, or the (source, target) pair is unsupported.

Examples:

  • Convert a Kelvin raster to Celsius and read the new values:
    >>> import numpy as np
    >>> from pyramids.dataset import Dataset
    >>> ds = Dataset.create_from_array(
    ...     np.array([[273.15, 283.15], [293.15, 303.15]]),
    ...     top_left_corner=(0, 0), cell_size=1.0, epsg=4326,
    ... )
    >>> ds.band_units = ["K"]
    >>> converted = ds.convert_units("celsius")
    >>> converted.read_array().tolist()
    [[0.0, 10.0], [20.0, 30.0]]
    >>> converted.band_units
    ['celsius']
    
  • An unsupported target raises a clear error:
    >>> import numpy as np
    >>> from pyramids.dataset import Dataset
    >>> ds = Dataset.create_from_array(
    ...     np.array([[273.15]]), top_left_corner=(0, 0), cell_size=1.0, epsg=4326,
    ... )
    >>> ds.band_units = ["K"]
    >>> try:
    ...     ds.convert_units("furlongs")
    ... except ValueError as exc:
    ...     print("No unit conversion" in str(exc))
    True
    
Source code in src/pyramids/dataset/dataset.py
def convert_units(self, target: str, band: int | None = None) -> Dataset:
    """Convert band values to ``target`` units, returning a new Dataset.

    Unlike the :attr:`band_units` setter — which only relabels bands — this
    actually transforms the stored values using a small affine conversion table
    (see :func:`pyramids.dataset.ops.units.convert_array`) and records the new
    unit on the result. No-data cells are preserved unchanged. The output is a
    new in-memory ``float64`` Dataset; the source is left untouched.

    Args:
        target: Target unit label (e.g. ``"celsius"``, ``"hPa"``, ``"knots"``).
        band: Zero-based band index to convert. ``None`` (default) converts every
            band; bands already in ``target`` units are passed through unchanged.

    Returns:
        A new :class:`Dataset` with converted values and updated
        :attr:`band_units`.

    Raises:
        ValueError: ``band`` is out of range, a converted band has no source unit
            set, or the ``(source, target)`` pair is unsupported.

    Examples:
        - Convert a Kelvin raster to Celsius and read the new values:
            ```python
            >>> import numpy as np
            >>> from pyramids.dataset import Dataset
            >>> ds = Dataset.create_from_array(
            ...     np.array([[273.15, 283.15], [293.15, 303.15]]),
            ...     top_left_corner=(0, 0), cell_size=1.0, epsg=4326,
            ... )
            >>> ds.band_units = ["K"]
            >>> converted = ds.convert_units("celsius")
            >>> converted.read_array().tolist()
            [[0.0, 10.0], [20.0, 30.0]]
            >>> converted.band_units
            ['celsius']

            ```
        - An unsupported target raises a clear error:
            ```python
            >>> import numpy as np
            >>> from pyramids.dataset import Dataset
            >>> ds = Dataset.create_from_array(
            ...     np.array([[273.15]]), top_left_corner=(0, 0), cell_size=1.0, epsg=4326,
            ... )
            >>> ds.band_units = ["K"]
            >>> try:
            ...     ds.convert_units("furlongs")
            ... except ValueError as exc:
            ...     print("No unit conversion" in str(exc))
            True

            ```
    """
    if band is not None and not 0 <= band < self.band_count:
        raise ValueError(
            f"band {band} is out of range for a {self.band_count}-band dataset."
        )

    band_indices = range(self.band_count) if band is None else [band]
    source_units = list(self.band_units)
    new_units = list(self.band_units)

    full = self.read_array()
    single_band = self.band_count == 1
    stack = full[np.newaxis, ...] if single_band else full
    out = stack.astype("float64").copy()
    no_data = self.no_data_value

    for index in band_indices:
        layer = out[index]
        nodata_value = no_data[index]
        mask = layer == nodata_value if nodata_value is not None else None
        converted = convert_array(layer, source_units[index], target)
        if mask is not None:
            converted[mask] = nodata_value
        out[index] = converted
        new_units[index] = target

    result_array = out[0] if single_band else out
    result = self.create_from_array(
        result_array,
        geo=self.geotransform,
        epsg=self.epsg,
        no_data_value=list(no_data),
    )
    result.band_units = new_units
    return result

to_stac_item(item_id, *, asset_href, datetime=None, start_datetime=None, end_datetime=None, asset_key='data', asset_media_type=None, with_proj=True, with_raster=True, precision=6) #

Describe this raster as a STAC Item dict (proj + raster extensions).

Thin forwarder to :func:pyramids.dataset._stac.to_stac_item — the inverse of :meth:DatasetCollection.from_stac. Returns a plain STAC-JSON dict (pystac not required); the footprint is this dataset's bounding rectangle reprojected to EPSG:4326.

Parameters:

Name Type Description Default
item_id str

The STAC Item id.

required
asset_href str

Href to record for the single data asset.

required
datetime

Item datetime (datetime.datetime or RFC 3339 string). None with no range defaults to the current UTC time; None with start_datetime/end_datetime writes a null datetime plus the range (the STAC-valid null-datetime form).

None
start_datetime

Optional range start, written to properties.start_datetime.

None
end_datetime

Optional range end, written to properties.end_datetime.

None
asset_key str

Key for the data asset (default "data").

'data'
asset_media_type str | None

Optional media type for the asset.

None
with_proj bool

Populate the proj extension from the grid.

True
with_raster bool

Populate raster:bands (data_type + nodata).

True
precision int

Decimal places for the reprojected footprint.

6

Returns:

Name Type Description
dict dict

The STAC Item (a GeoJSON Feature).

Source code in src/pyramids/dataset/dataset.py
def to_stac_item(
    self,
    item_id: str,
    *,
    asset_href: str,
    datetime=None,
    start_datetime=None,
    end_datetime=None,
    asset_key: str = "data",
    asset_media_type: str | None = None,
    with_proj: bool = True,
    with_raster: bool = True,
    precision: int = 6,
) -> dict:
    """Describe this raster as a STAC Item dict (proj + raster extensions).

    Thin forwarder to :func:`pyramids.dataset._stac.to_stac_item` — the
    inverse of :meth:`DatasetCollection.from_stac`. Returns a plain
    STAC-JSON dict (pystac not required); the footprint is this dataset's
    bounding rectangle reprojected to EPSG:4326.

    Args:
        item_id: The STAC Item id.
        asset_href: Href to record for the single data asset.
        datetime: Item datetime (`datetime.datetime` or RFC 3339 string).
            `None` with no range defaults to the current UTC time; `None`
            with `start_datetime`/`end_datetime` writes a null `datetime`
            plus the range (the STAC-valid null-datetime form).
        start_datetime: Optional range start, written to
            `properties.start_datetime`.
        end_datetime: Optional range end, written to
            `properties.end_datetime`.
        asset_key: Key for the data asset (default `"data"`).
        asset_media_type: Optional media type for the asset.
        with_proj: Populate the `proj` extension from the grid.
        with_raster: Populate `raster:bands` (data_type + nodata).
        precision: Decimal places for the reprojected footprint.

    Returns:
        dict: The STAC Item (a GeoJSON Feature).
    """
    # Imported here to avoid the dataset <-> stac import cycle at load time.
    from pyramids.dataset._stac import to_stac_item

    return to_stac_item(
        self,
        item_id,
        asset_href=asset_href,
        datetime=datetime,
        start_datetime=start_datetime,
        end_datetime=end_datetime,
        asset_key=asset_key,
        asset_media_type=asset_media_type,
        with_proj=with_proj,
        with_raster=with_raster,
        precision=precision,
    )

read_file(path, read_only=True, file_i=0, *, vsi=None) classmethod #

Open a raster from a path, URL, or archive member.

Plain local paths, /vsi* paths, and URL schemes (http(s)://, s3://, gs://, az:// / abfs://, file://) are all accepted — URLs are transparently rewritten to GDAL's virtual filesystem (GDAL fetches via HTTP range requests for http(s)). Compressed archives are detected from the extension; pass vsi= to be explicit about it (e.g. an archive with an unusual extension, or to open a specific member by index).

Parameters:

Name Type Description Default
path str | Path

Path or URL of the file to open.

required
read_only bool

File mode; set to False to open in update mode.

True
file_i int

Which member to open when path is (or is forced to be) a multi-file archive. Default 0.

0
vsi str | None

Treat path as an archive of this kind and open member file_i from inside it: "zip", "tar" (also "tar.gz" / "tgz"), "gzip" (also "gz"), or "auto" (infer from the extension). Default Nonepath is opened directly / extension-sniffed as before. Works for archives reachable locally or over the network (/vsizip//vsicurl/… is built automatically) provided the file name carries a recognised archive extension — GDAL's archive handlers key off the extension, so an extension-less download URL must first be fetched and saved with a .zip name (or written to /vsimem/<name>.zip via :func:osgeo.gdal.FileFromMemBuffer).

None

Returns:

Name Type Description
Dataset Dataset

Opened dataset instance.

See Also
  • :meth:read_array: read the values stored in a dataset band.
  • :meth:from_bytes: open a raster held in memory.
  • :meth:pyramids.dataset.DatasetCollection.from_archive: open every member of an archive as a temporal stack.
Source code in src/pyramids/dataset/dataset.py
@classmethod
def read_file(
    cls,
    path: str | Path,
    read_only=True,
    file_i: int = 0,
    *,
    vsi: str | None = None,
) -> Dataset:
    """Open a raster from a path, URL, or archive member.

    Plain local paths, ``/vsi*`` paths, and URL schemes
    (``http(s)://``, ``s3://``, ``gs://``, ``az://`` / ``abfs://``,
    ``file://``) are all accepted — URLs are transparently rewritten to
    GDAL's virtual filesystem (GDAL fetches via HTTP range requests for
    ``http(s)``). Compressed archives are detected from the extension; pass
    ``vsi=`` to be explicit about it (e.g. an archive with an unusual
    extension, or to open a specific member by index).

    Args:
        path (str | Path):
            Path or URL of the file to open.
        read_only (bool):
            File mode; set to ``False`` to open in update mode.
        file_i (int):
            Which member to open when ``path`` is (or is forced to be) a
            multi-file archive. Default ``0``.
        vsi (str | None):
            Treat ``path`` as an archive of this kind and open member
            ``file_i`` from inside it: ``"zip"``, ``"tar"`` (also
            ``"tar.gz"`` / ``"tgz"``), ``"gzip"`` (also ``"gz"``), or
            ``"auto"`` (infer from the extension). Default ``None`` —
            ``path`` is opened directly / extension-sniffed as before.
            Works for archives reachable locally or over the network
            (``/vsizip//vsicurl/…`` is built automatically) **provided the
            file name carries a recognised archive extension** — GDAL's
            archive handlers key off the extension, so an extension-less
            download URL must first be fetched and saved with a ``.zip``
            name (or written to ``/vsimem/<name>.zip`` via
            :func:`osgeo.gdal.FileFromMemBuffer`).

    Returns:
        Dataset:
            Opened dataset instance.

    See Also:
        - :meth:`read_array`: read the values stored in a dataset band.
        - :meth:`from_bytes`: open a raster held in memory.
        - :meth:`pyramids.dataset.DatasetCollection.from_archive`: open
          *every* member of an archive as a temporal stack.
    """
    src = _io.read_file(path, read_only=read_only, file_i=file_i, vsi=vsi)
    return cls(src, access="read_only" if read_only else "write")

from_bytes(data, *, suffix='.tif', name=None, read_only=True) classmethod #

Open a raster held in memory as a byte string.

Writes data to a temporary GDAL /vsimem/ path and opens it — no on-disk temp file needed. Useful for HTTP response bodies (requests.get(url).content), object-store get_object payloads, database blobs, and test fixtures.

This is not a URL helper. Reading from a URL is already supported by :meth:read_file, which rewrites http(s)://, s3://, gs://, az:// / abfs:// and file:// to GDAL /vsi* paths. Use from_bytes only when you already hold the bytes.

The /vsimem/ entry is removed automatically when the returned :class:Dataset is garbage-collected (:func:weakref.finalize); :meth:close does not need to be called for cleanup. Note that an in-memory dataset is not picklable — :meth:__reduce__ raises TypeError for /vsimem/ paths; call :meth:to_file first to anchor it to disk before sending it to another process.

Parameters:

Name Type Description Default
data bytes | bytearray | memoryview

Raw bytes of a raster (GeoTIFF, ASCII grid, ...). For NetCDF bytes use :meth:pyramids.netcdf.NetCDF.from_bytes.

required
suffix str

Extension hint for GDAL's driver detection. Needed only for headerless formats (e.g. ESRI ASCII grid: suffix=".asc"); GDAL sniffs anything with a magic header regardless. Defaults to ".tif".

'.tif'
name str | None

Optional label recorded as the dataset's :attr:file_name (cosmetic only — it is still an in-memory dataset). Defaults to None.

None
read_only bool

Open the dataset read-only. Defaults to True.

True

Returns:

Name Type Description
Dataset Dataset

The opened in-memory dataset.

Raises:

Type Description
TypeError

data is not a bytes-like object.

ValueError

GDAL could not open the bytes (corrupt / truncated payload, or a headerless format without a suffix hint).

Examples:

  • Open the bytes of a downloaded GeoTIFF and inspect it (the bytes here come from a file, but they could just as well be requests.get(url).content):
    >>> from pathlib import Path
    >>> from pyramids.dataset import Dataset
    >>> data = Path("tests/data/acc4000.tif").read_bytes()
    >>> ds = Dataset.from_bytes(data, name="downloaded-scene")
    >>> ds.band_count
    1
    >>> ds.shape
    (1, 13, 14)
    >>> ds.epsg
    32618
    >>> ds.file_name
    'downloaded-scene'
    >>> ds.close()
    
  • The bytes path yields the same data as opening the file directly:
    >>> from pathlib import Path
    >>> from pyramids.dataset import Dataset
    >>> data = Path("tests/data/acc4000.tif").read_bytes()
    >>> from_bytes = Dataset.from_bytes(data)
    >>> from_file = Dataset.read_file("tests/data/acc4000.tif")
    >>> from_bytes.shape == from_file.shape
    True
    >>> from_bytes.epsg == from_file.epsg
    True
    
  • An in-memory dataset cannot be pickled — anchor it to disk first:
    >>> import pickle
    >>> from pathlib import Path
    >>> from pyramids.dataset import Dataset
    >>> data = Path("tests/data/acc4000.tif").read_bytes()
    >>> try:
    ...     pickle.dumps(Dataset.from_bytes(data))
    ... except TypeError as exc:
    ...     print("to_file" in str(exc))
    True
    
See Also
  • :meth:read_file: open a raster from a path or URL.
  • :meth:to_file: write an in-memory dataset to disk.
  • :meth:pyramids.netcdf.NetCDF.from_bytes: the NetCDF variant.
Source code in src/pyramids/dataset/dataset.py
@classmethod
def from_bytes(
    cls,
    data: bytes | bytearray | memoryview,
    *,
    suffix: str = ".tif",
    name: str | None = None,
    read_only: bool = True,
) -> Dataset:
    """Open a raster held in memory as a byte string.

    Writes ``data`` to a temporary GDAL ``/vsimem/`` path and opens
    it — no on-disk temp file needed. Useful for HTTP response
    bodies (``requests.get(url).content``), object-store
    ``get_object`` payloads, database blobs, and test fixtures.

    This is **not** a URL helper. Reading from a URL is already
    supported by :meth:`read_file`, which rewrites ``http(s)://``,
    ``s3://``, ``gs://``, ``az://`` / ``abfs://`` and ``file://``
    to GDAL ``/vsi*`` paths. Use ``from_bytes`` only when you
    already hold the bytes.

    The ``/vsimem/`` entry is removed automatically when the
    returned :class:`Dataset` is garbage-collected
    (:func:`weakref.finalize`); :meth:`close` does not need to be
    called for cleanup. Note that an in-memory dataset is **not
    picklable** — :meth:`__reduce__` raises ``TypeError`` for
    ``/vsimem/`` paths; call :meth:`to_file` first to anchor it to
    disk before sending it to another process.

    Args:
        data: Raw bytes of a raster (GeoTIFF, ASCII grid, ...). For
            NetCDF bytes use :meth:`pyramids.netcdf.NetCDF.from_bytes`.
        suffix: Extension hint for GDAL's driver detection. Needed
            only for headerless formats (e.g. ESRI ASCII grid:
            ``suffix=".asc"``); GDAL sniffs anything with a magic
            header regardless. Defaults to ``".tif"``.
        name: Optional label recorded as the dataset's
            :attr:`file_name` (cosmetic only — it is still an
            in-memory dataset). Defaults to ``None``.
        read_only: Open the dataset read-only. Defaults to ``True``.

    Returns:
        Dataset: The opened in-memory dataset.

    Raises:
        TypeError: ``data`` is not a bytes-like object.
        ValueError: GDAL could not open the bytes (corrupt /
            truncated payload, or a headerless format without a
            ``suffix`` hint).

    Examples:
        - Open the bytes of a downloaded GeoTIFF and inspect it (the
          bytes here come from a file, but they could just as well be
          ``requests.get(url).content``):
            ```python
            >>> from pathlib import Path
            >>> from pyramids.dataset import Dataset
            >>> data = Path("tests/data/acc4000.tif").read_bytes()
            >>> ds = Dataset.from_bytes(data, name="downloaded-scene")
            >>> ds.band_count
            1
            >>> ds.shape
            (1, 13, 14)
            >>> ds.epsg
            32618
            >>> ds.file_name
            'downloaded-scene'
            >>> ds.close()

            ```
        - The bytes path yields the same data as opening the file directly:
            ```python
            >>> from pathlib import Path
            >>> from pyramids.dataset import Dataset
            >>> data = Path("tests/data/acc4000.tif").read_bytes()
            >>> from_bytes = Dataset.from_bytes(data)
            >>> from_file = Dataset.read_file("tests/data/acc4000.tif")
            >>> from_bytes.shape == from_file.shape
            True
            >>> from_bytes.epsg == from_file.epsg
            True

            ```
        - An in-memory dataset cannot be pickled — anchor it to disk first:
            ```python
            >>> import pickle
            >>> from pathlib import Path
            >>> from pyramids.dataset import Dataset
            >>> data = Path("tests/data/acc4000.tif").read_bytes()
            >>> try:
            ...     pickle.dumps(Dataset.from_bytes(data))
            ... except TypeError as exc:
            ...     print("to_file" in str(exc))
            True

            ```

    See Also:
        - :meth:`read_file`: open a raster from a path or URL.
        - :meth:`to_file`: write an in-memory dataset to disk.
        - :meth:`pyramids.netcdf.NetCDF.from_bytes`: the NetCDF variant.
    """
    src, vsi_path = _io.bytes_to_gdal(data, suffix=suffix, read_only=read_only)
    try:
        obj = cls(src, access="read_only" if read_only else "write")
    except Exception as e:
        src = None
        _io.silent_unlink(vsi_path)
        raise ValueError(
            "could not open the supplied bytes as a raster dataset "
            f"(the data may be corrupt or truncated): {e}"
        ) from e
    obj._vsimem_path = vsi_path
    weakref.finalize(obj, _io.silent_unlink, vsi_path)
    if name is not None:
        obj._file_name = str(name)
    return obj

copy(path=None) #

Deep copy.

Parameters:

Name Type Description Default
path str

Destination path to save the copied dataset. If None is passed, the copied dataset is created in memory.

None

Returns:

Name Type Description
Dataset Dataset

An independent copy. Access mode of the returned

Dataset Dataset
Dataset
  • path is None (in-memory copy) → access mode of the source is preserved. A copy() of a read-only source stays read-only at the pyramids level (the underlying MEM driver is always writable; pyramids enforces the flag itself).
Dataset
  • path is not None (on-disk copy) → "write", because the caller has just created a new file they presumably want to populate.
Source code in src/pyramids/dataset/dataset.py
def copy(self, path: str | Path | None = None) -> Dataset:
    """Deep copy.

    Args:
        path (str, optional):
            Destination path to save the copied dataset. If None
            is passed, the copied dataset is created in memory.

    Returns:
        Dataset: An independent copy. Access mode of the returned
        Dataset:

        * `path is None` (in-memory copy) → access mode of the
          source is preserved. A `copy()` of a read-only source
          stays read-only at the pyramids level (the underlying
          MEM driver is always writable; pyramids enforces the
          flag itself).
        * `path is not None` (on-disk copy) → `"write"`,
          because the caller has just created a new file they
          presumably want to populate.
    """
    if path is None:
        path = ""
        driver = "MEM"
        new_access = self._access
    else:
        driver = "GTiff"
        new_access = "write"

    src = gdal.GetDriverByName(driver).CreateCopy(str(path), self._raster)
    return Dataset(src, access=new_access)

close() #

Close the dataset.

Safe to call multiple times — subsequent calls after the first are no-ops.

Source code in src/pyramids/dataset/dataset.py
def close(self) -> None:
    """Close the dataset.

    Safe to call multiple times — subsequent calls after the first are no-ops.
    """
    if self._raster is not None:
        self._raster.FlushCache()
        self._raster = None

create(cell_size, rows, columns, dtype, bands, top_left_corner, epsg, no_data_value=None, path=None) classmethod #

Create a new dataset and fill it with the no_data_value.

The new dataset will have an array filled with the no_data_value.

Parameters:

Name Type Description Default
cell_size int | float

Cell size.

required
rows int

Number of rows.

required
columns int

Number of columns.

required
dtype str

Data type.

required
bands int | None

Number of bands to create in the output raster.

required
top_left_corner Tuple

Coordinates of the top left corner point.

required
epsg int

EPSG number to identify the projection of the coordinates in the created raster.

required
no_data_value float | None

No data value.

None
path str

Path on disk; if None, the dataset is created in memory. Default is None.

None

Returns:

Name Type Description
Dataset Dataset

A new dataset

Source code in src/pyramids/dataset/dataset.py
@classmethod
def create(
    cls,
    cell_size: int | float,
    rows: int,
    columns: int,
    dtype: str,
    bands: int,
    top_left_corner: tuple,
    epsg: int,
    no_data_value: Any | None = None,
    path: str | Path | None = None,
) -> Dataset:
    """Create a new dataset and fill it with the no_data_value.

    The new dataset will have an array filled with the no_data_value.

    Args:
        cell_size (int|float):
            Cell size.
        rows (int):
            Number of rows.
        columns (int):
            Number of columns.
        dtype (str):
            Data type.
        bands (int|None):
            Number of bands to create in the output raster.
        top_left_corner (Tuple):
            Coordinates of the top left corner point.
        epsg (int):
            EPSG number to identify the projection of the coordinates in the created raster.
        no_data_value (float|None):
            No data value.
        path (str, optional):
            Path on disk; if None, the dataset is created in memory. Default is None.

    Returns:
        Dataset: A new dataset
    """
    gdal_dtype = numpy_to_gdal_dtype(dtype)
    crs_wkt = sr_from_epsg(epsg).ExportToWkt()
    geotransform = (
        top_left_corner[0],
        cell_size,
        0,
        top_left_corner[1],
        0,
        -1 * cell_size,
    )
    return cls._build_dataset(
        columns,
        rows,
        bands,
        gdal_dtype,
        geotransform,
        crs_wkt,
        no_data_value,
        path=path,
    )

from_features(features, *, cell_size=None, template=None, column_name=None) classmethod #

Rasterize a :class:FeatureCollection into a new :class:Dataset.

Burns the values from column_name (or every attribute column if None) into a single-band or multi-band raster. When a template Dataset is given, the output adopts its geotransform, cell size, row/column count, and no-data value. Otherwise cell_size controls the resolution and the extent is derived from :attr:FeatureCollection.total_bounds.

Parameters:

Name Type Description Default
features FeatureCollection

The vector to rasterize.

required
cell_size int | float | None

Cell size for the new raster. Required unless template is given.

None
template Dataset | None

Optional template raster. When supplied, the output inherits its geotransform and no-data value.

None
column_name str | list[str] | None

Attribute column(s) to burn as band values. None burns every non-geometry column as a separate band. Mixed-dtype column lists are promoted to the smallest numpy dtype that holds every selected column without lossy cast (numpy result-type rules).

None

Returns:

Name Type Description
Dataset Dataset

The burned raster.

Raises:

Type Description
ValueError

cell_size missing or non-positive, column_name empty or referencing missing columns.

TypeError

template is not a Dataset, or column_name is not str / list / None.

CRSError

features.epsg is None, or template.epsg!= features.epsg.

Source code in src/pyramids/dataset/dataset.py
@classmethod
def from_features(
    cls,
    features: FeatureCollection,
    *,
    cell_size: Any | None = None,
    template: Dataset | None = None,
    column_name: str | list[str] | None = None,
) -> Dataset:
    """Rasterize a :class:`FeatureCollection` into a new :class:`Dataset`.

    Burns the values from `column_name` (or every attribute
    column if `None`) into a single-band or multi-band raster.
    When a `template` Dataset is given, the output adopts its
    geotransform, cell size, row/column count, and no-data value.
    Otherwise `cell_size` controls the resolution and the extent
    is derived from :attr:`FeatureCollection.total_bounds`.

    Args:
        features (FeatureCollection):
            The vector to rasterize.
        cell_size (int | float | None):
            Cell size for the new raster. Required unless
            `template` is given.
        template (Dataset | None):
            Optional template raster. When supplied, the output
            inherits its geotransform and no-data value.
        column_name (str | list[str] | None):
            Attribute column(s) to burn as band values. `None`
            burns every non-geometry column as a separate band.
            Mixed-dtype column lists are promoted to the smallest
            numpy dtype that holds every selected column without
            lossy cast (numpy result-type rules).

    Returns:
        Dataset: The burned raster.

    Raises:
        ValueError: `cell_size` missing or non-positive,
            `column_name` empty or referencing missing columns.
        TypeError: `template` is not a Dataset, or
            `column_name` is not `str` / `list` / `None`.
        CRSError: `features.epsg` is `None`, or
            `template.epsg!= features.epsg`.
    """
    return rasterize_features(
        features,
        cls,
        cell_size=cell_size,
        template=template,
        column_name=column_name,
    )

from_points(points, value_column, *, algorithm='invdist:power=2.0:smoothing=0.0', cell_size=None, width=None, height=None, bbox=None, epsg=None) classmethod #

Interpolate scattered point samples onto a regular grid (gdal.Grid).

The GDAL-native equivalent of gdal_grid — turns an irregular point layer (gauge readings, soundings, station observations) into a continuous single-band raster. The output extent defaults to the points' bounding box and the resolution is set by cell_size (or an explicit width/height).

Parameters:

Name Type Description Default
points FeatureCollection

A point :class:FeatureCollection carrying value_column.

required
value_column str

Numeric attribute column to interpolate (the Z field).

required
algorithm str

A gdal.Grid algorithm string. Defaults to inverse-distance weighting ("invdist:power=2.0:smoothing=0.0"). Other options include "invdistnn", "nearest", "linear", and "average".

'invdist:power=2.0:smoothing=0.0'
cell_size float | None

Output pixel size in the points' CRS units. Required unless both width and height are given.

None
width int | None

Output width in pixels. Overrides cell_size on the x axis.

None
height int | None

Output height in pixels. Overrides cell_size on the y axis.

None
bbox tuple[float, float, float, float] | None

(minx, miny, maxx, maxy) output extent. Defaults to the points' total bounds.

None
epsg int | None

Output EPSG code. Defaults to the points' CRS.

None

Returns:

Name Type Description
Dataset Dataset

A single-band raster of the interpolated surface.

Raises:

Type Description
ValueError

value_column missing, output bounds degenerate, or neither cell_size nor width+height provided.

FailedToSaveError

gdal.Grid produced no dataset.

Examples:

  • Inverse-distance interpolate four corner readings onto a 1-degree grid and read back the surface shape:
    >>> from shapely.geometry import Point
    >>> from geopandas import GeoDataFrame
    >>> from pyramids.feature import FeatureCollection
    >>> from pyramids.dataset import Dataset
    >>> gdf = GeoDataFrame(
    ...     {"rain": [10.0, 20.0, 30.0, 40.0]},
    ...     geometry=[Point(0, 0), Point(10, 0), Point(0, 10), Point(10, 10)],
    ...     crs="EPSG:4326",
    ... )
    >>> ds = Dataset.from_points(FeatureCollection(gdf), "rain", cell_size=1.0)
    >>> (ds.rows, ds.columns, ds.band_count)
    (10, 10, 1)
    
  • Use nearest-neighbour with an explicit output size:
    >>> from shapely.geometry import Point
    >>> from geopandas import GeoDataFrame
    >>> from pyramids.feature import FeatureCollection
    >>> from pyramids.dataset import Dataset
    >>> gdf = GeoDataFrame(
    ...     {"z": [1.0, 2.0, 3.0, 4.0]},
    ...     geometry=[Point(0, 0), Point(5, 0), Point(0, 5), Point(5, 5)],
    ...     crs="EPSG:4326",
    ... )
    >>> ds = Dataset.from_points(
    ...     FeatureCollection(gdf), "z", algorithm="nearest", width=5, height=5
    ... )
    >>> ds.columns
    5
    
Source code in src/pyramids/dataset/dataset.py
@classmethod
def from_points(
    cls,
    points: FeatureCollection,
    value_column: str,
    *,
    algorithm: str = "invdist:power=2.0:smoothing=0.0",
    cell_size: float | None = None,
    width: int | None = None,
    height: int | None = None,
    bbox: tuple[float, float, float, float] | None = None,
    epsg: Any | None = None,
) -> Dataset:
    """Interpolate scattered point samples onto a regular grid (``gdal.Grid``).

    The GDAL-native equivalent of ``gdal_grid`` — turns an irregular point
    layer (gauge readings, soundings, station observations) into a
    continuous single-band raster. The output extent defaults to the points'
    bounding box and the resolution is set by ``cell_size`` (or an explicit
    ``width``/``height``).

    Args:
        points (FeatureCollection):
            A point :class:`FeatureCollection` carrying ``value_column``.
        value_column (str):
            Numeric attribute column to interpolate (the Z field).
        algorithm (str):
            A ``gdal.Grid`` algorithm string. Defaults to inverse-distance
            weighting (``"invdist:power=2.0:smoothing=0.0"``). Other options
            include ``"invdistnn"``, ``"nearest"``, ``"linear"``, and
            ``"average"``.
        cell_size (float | None):
            Output pixel size in the points' CRS units. Required unless both
            ``width`` and ``height`` are given.
        width (int | None):
            Output width in pixels. Overrides ``cell_size`` on the x axis.
        height (int | None):
            Output height in pixels. Overrides ``cell_size`` on the y axis.
        bbox (tuple[float, float, float, float] | None):
            ``(minx, miny, maxx, maxy)`` output extent. Defaults to the
            points' total bounds.
        epsg (int | None):
            Output EPSG code. Defaults to the points' CRS.

    Returns:
        Dataset: A single-band raster of the interpolated surface.

    Raises:
        ValueError: ``value_column`` missing, output bounds degenerate, or
            neither ``cell_size`` nor ``width``+``height`` provided.
        FailedToSaveError: ``gdal.Grid`` produced no dataset.

    Examples:
        - Inverse-distance interpolate four corner readings onto a 1-degree
          grid and read back the surface shape:
            ```python
            >>> from shapely.geometry import Point
            >>> from geopandas import GeoDataFrame
            >>> from pyramids.feature import FeatureCollection
            >>> from pyramids.dataset import Dataset
            >>> gdf = GeoDataFrame(
            ...     {"rain": [10.0, 20.0, 30.0, 40.0]},
            ...     geometry=[Point(0, 0), Point(10, 0), Point(0, 10), Point(10, 10)],
            ...     crs="EPSG:4326",
            ... )
            >>> ds = Dataset.from_points(FeatureCollection(gdf), "rain", cell_size=1.0)
            >>> (ds.rows, ds.columns, ds.band_count)
            (10, 10, 1)

            ```
        - Use nearest-neighbour with an explicit output size:
            ```python
            >>> from shapely.geometry import Point
            >>> from geopandas import GeoDataFrame
            >>> from pyramids.feature import FeatureCollection
            >>> from pyramids.dataset import Dataset
            >>> gdf = GeoDataFrame(
            ...     {"z": [1.0, 2.0, 3.0, 4.0]},
            ...     geometry=[Point(0, 0), Point(5, 0), Point(0, 5), Point(5, 5)],
            ...     crs="EPSG:4326",
            ... )
            >>> ds = Dataset.from_points(
            ...     FeatureCollection(gdf), "z", algorithm="nearest", width=5, height=5
            ... )
            >>> ds.columns
            5

            ```
    """
    return grid_points(
        points,
        value_column,
        cls,
        algorithm=algorithm,
        cell_size=cell_size,
        width=width,
        height=height,
        bbox=bbox,
        epsg=epsg,
    )

create_from_array(arr, top_left_corner=None, cell_size=None, geo=None, epsg=4326, no_data_value=DEFAULT_NO_DATA_VALUE, driver_type='MEM', path=None) classmethod #

Create a new dataset from an array.

Parameters:

Name Type Description Default
arr ndarray

Numpy array.

required
top_left_corner Tuple[float, float]

The coordinates of the top left corner of the dataset.

None
cell_size int | float

Cell size in the same units of the coordinate reference system defined by the epsg parameter.

None
geo Tuple[float, float, float, float, float, float]

Geotransform tuple (minimum lon/x, pixel-size, rotation, maximum lat/y, rotation, pixel-size).

None
epsg int

Integer reference number to the projection (https://epsg.io/).

4326
no_data_value Any

No data value to mask the cells out of the domain. The default is -9999.

DEFAULT_NO_DATA_VALUE
driver_type str

Driver type ["GTiff", "MEM", "netcdf"]. Default is "MEM".

'MEM'
path str

Path to save the driver.

None

Returns:

Name Type Description
Dataset Dataset

Dataset object will be returned.

Source code in src/pyramids/dataset/dataset.py
@classmethod
def create_from_array(  # type: ignore[override]
    cls,
    arr: np.ndarray,
    top_left_corner: tuple[float, float] | None = None,
    cell_size: int | float | None = None,
    geo: tuple[float, float, float, float, float, float] | None = None,
    epsg: str | int = 4326,
    no_data_value: Any | list = DEFAULT_NO_DATA_VALUE,
    driver_type: str = "MEM",
    path: str | Path | None = None,
) -> Dataset:
    """Create a new dataset from an array.

    Args:
        arr (np.ndarray):
            Numpy array.
        top_left_corner (Tuple[float, float], optional):
            The coordinates of the top left corner of the dataset.
        cell_size (int|float, optional):
            Cell size in the same units of the coordinate reference system defined by the `epsg`
            parameter.
        geo (Tuple[float, float, float, float, float, float], optional):
            Geotransform tuple (minimum lon/x, pixel-size, rotation, maximum lat/y, rotation,
            pixel-size).
        epsg (int):
            Integer reference number to the projection (https://epsg.io/).
        no_data_value (Any, optional):
            No data value to mask the cells out of the domain. The default is -9999.
        driver_type (str, optional):
            Driver type ["GTiff", "MEM", "netcdf"]. Default is "MEM".
        path (str, optional):
            Path to save the driver.

    Returns:
        Dataset:
            Dataset object will be returned.
    """
    if geo is None:
        if top_left_corner is None or cell_size is None:
            raise ValueError(
                "Either top_left_corner and cell_size or geo should be provided."
            )
        geo = (
            top_left_corner[0],
            cell_size,
            0,
            top_left_corner[1],
            0,
            -1 * cell_size,
        )

    if arr.ndim == 2:
        bands = 1
        rows = int(arr.shape[0])
        cols = int(arr.shape[1])
    else:
        bands = arr.shape[0]
        rows = int(arr.shape[1])
        cols = int(arr.shape[2])

    return cls._build_dataset(
        cols,
        rows,
        bands,
        numpy_to_gdal_dtype(arr),
        geo,
        sr_from_epsg(int(epsg)).ExportToWkt(),
        no_data_value,
        driver=driver_type,
        path=path,
        array=arr,
    )

dataset_like(src, array, path=None) classmethod #

Create a new dataset like another dataset.

dataset_like method creates a Dataset from an array like another source dataset. The new dataset will have the same projection, coordinates or the top left corner of the original dataset, cell size, no_data_velue, and number of rows and columns. the array and the source dataset should have the same number of columns and rows

Parameters:

Name Type Description Default
src Dataset

source raster to get the spatial information

required
array ndarray

data to store in the new dataset.

required
path str

path to save the new dataset, if not given, the method will return in-memory dataset.

None

Returns:

Name Type Description
Dataset Dataset

if the path is given, the method will save the new raster to the given path, else the method will return an in-memory dataset.

Source code in src/pyramids/dataset/dataset.py
@classmethod
def dataset_like(
    cls,
    src: Dataset,
    array: np.ndarray,
    path: str | Path | None = None,
) -> Dataset:
    """Create a new dataset like another dataset.

    dataset_like method creates a Dataset from an array like another source dataset. The new dataset
    will have the same `projection`, `coordinates` or the `top left corner` of the original dataset,
    `cell size`, `no_data_velue`, and number of `rows` and `columns`.
    the array and the source dataset should have the same number of columns and rows

    Args:
        src (Dataset):
            source raster to get the spatial information
        array (ndarray):
            data to store in the new dataset.
        path (str, optional):
            path to save the new dataset, if not given, the method will return in-memory dataset.

    Returns:
        Dataset:
            if the `path` is given, the method will save the new raster to the given path, else the
            method will return an in-memory dataset.
    """
    if not isinstance(array, np.ndarray):
        raise TypeError("array should be of type numpy array")

    bands = 1 if array.ndim == 2 else array.shape[0]
    return cls._build_dataset(
        src.columns,
        src.rows,
        bands,
        numpy_to_gdal_dtype(array),
        src.geotransform,
        src.crs,
        src.no_data_value[0],
        path=path,
        array=array,
    )

from_band_files(files, *, band_names=None, align=False, no_data_value=_INHERIT_NO_DATA, path=None) classmethod #

Stack N single-band rasters into one multi-band :class:Dataset.

Each input file becomes one band, in order, with its name preserved. This is the natural target for an Earth Engine default download (<assetSlug>.<bandName>.tif — one file per band), a Landsat Collection-2 scene (per-band .TIF), or a Sentinel-2 SAFE (per-band JP2s).

By default all inputs must already share the same grid and CRS; pass align=True to resample mismatched rasters onto the first file's grid (nearest-neighbour, via :meth:align). When the inputs have different numpy dtypes the output dtype is the smallest type that holds every input without a lossy cast.

Parameters:

Name Type Description Default
files list[str | Path]

Paths (or URLs / /vsi* strings) of the single-band rasters to stack. Order is preserved as band order.

required
band_names list[str] | None

Explicit band names, one per file. When None (default) names are derived from the file names (<slug>.<band>.tif<band>; dotless stems are kept whole; duplicates get a _<n> suffix).

None
align bool

When False (default), a grid/CRS mismatch among the inputs raises :class:AlignmentError. When True, every input is resampled onto files[0]'s grid first.

False
no_data_value Any

No-data value stamped on the output bands. When omitted, it is inherited from the source rasters (a warning is issued if they disagree, and the first file's value wins; if no source declares one, the output has none). Pass an explicit value (including None for "no no-data sentinel") to override.

_INHERIT_NO_DATA
path str | Path | None

Output .tif path. When None (default) the result is an in-memory dataset.

None

Returns:

Name Type Description
Dataset Dataset

A multi-band dataset with band_count == len(files)

Dataset

and band_names set.

Raises:

Type Description
ValueError

files is empty, band_names length does not match files, an input has more than one band, or path does not end in .tif.

AlignmentError

align=False and the inputs do not share a grid/CRS.

CRSError

An input raster has no CRS.

Examples:

  • Stack three per-band GeoTIFFs into one 3-band dataset; band names come from the file names:
    >>> import numpy as np
    >>> import tempfile, os
    >>> from pyramids.dataset import Dataset
    >>> d = tempfile.mkdtemp()
    >>> paths = []
    >>> for name, val in [("scene.B2.tif", 2), ("scene.B3.tif", 3), ("scene.B4.tif", 4)]:
    ...     p = os.path.join(d, name)
    ...     _ = Dataset.create_from_array(
    ...         np.full((4, 5), val, dtype="int16"),
    ...         top_left_corner=(0, 0), cell_size=1.0, epsg=4326, path=p,
    ...     ).close()
    ...     paths.append(p)
    >>> ds = Dataset.from_band_files(paths)
    >>> ds.band_count
    3
    >>> ds.band_names
    ['B2', 'B3', 'B4']
    >>> [int(ds.read_array(band=i).flat[0]) for i in range(3)]
    [2, 3, 4]
    
  • Override the band names explicitly:
    >>> ds = Dataset.from_band_files(paths, band_names=["blue", "green", "red"])
    >>> ds.band_names
    ['blue', 'green', 'red']
    
  • Mismatched grids are rejected unless align=True:
    >>> odd = os.path.join(d, "odd.tif")
    >>> _ = Dataset.create_from_array(
    ...     np.zeros((8, 9), dtype="int16"),
    ...     top_left_corner=(0, 0), cell_size=0.5, epsg=4326, path=odd,
    ... ).close()
    >>> try:
    ...     Dataset.from_band_files([paths[0], odd])
    ... except AlignmentError as exc:
    ...     print("align=True" in str(exc))
    True
    >>> aligned = Dataset.from_band_files([paths[0], odd], align=True)
    >>> aligned.band_count
    2
    >>> (aligned.rows, aligned.columns) == (
    ...     Dataset.read_file(paths[0]).rows,
    ...     Dataset.read_file(paths[0]).columns,
    ... )
    True
    
See Also
  • :meth:align: resample one dataset onto another's grid.
  • :meth:create_from_array: build a dataset from a numpy array.
  • :meth:pyramids.dataset.DatasetCollection.from_files: stack rasters along time instead of along bands.
Source code in src/pyramids/dataset/dataset.py
@classmethod
def from_band_files(
    cls,
    files: list[str | Path],
    *,
    band_names: list[str] | None = None,
    align: bool = False,
    no_data_value: Any = _INHERIT_NO_DATA,
    path: str | Path | None = None,
) -> Dataset:
    """Stack N single-band rasters into one multi-band :class:`Dataset`.

    Each input file becomes one band, in order, with its name preserved.
    This is the natural target for an Earth Engine default download
    (``<assetSlug>.<bandName>.tif`` — one file per band), a Landsat
    Collection-2 scene (per-band ``.TIF``), or a Sentinel-2 SAFE
    (per-band JP2s).

    By default all inputs must already share the same grid and CRS;
    pass ``align=True`` to resample mismatched rasters onto the first
    file's grid (nearest-neighbour, via :meth:`align`). When the inputs
    have different numpy dtypes the output dtype is the smallest type
    that holds every input without a lossy cast.

    Args:
        files: Paths (or URLs / ``/vsi*`` strings) of the single-band
            rasters to stack. Order is preserved as band order.
        band_names: Explicit band names, one per file. When ``None``
            (default) names are derived from the file names
            (``<slug>.<band>.tif`` → ``<band>``; dotless stems are kept
            whole; duplicates get a ``_<n>`` suffix).
        align: When ``False`` (default), a grid/CRS mismatch among the
            inputs raises :class:`AlignmentError`. When ``True``, every
            input is resampled onto ``files[0]``'s grid first.
        no_data_value: No-data value stamped on the output bands. When
            omitted, it is inherited from the source rasters (a warning
            is issued if they disagree, and the first file's value
            wins; if no source declares one, the output has none). Pass
            an explicit value (including ``None`` for "no no-data
            sentinel") to override.
        path: Output ``.tif`` path. When ``None`` (default) the result
            is an in-memory dataset.

    Returns:
        Dataset: A multi-band dataset with ``band_count == len(files)``
        and ``band_names`` set.

    Raises:
        ValueError: ``files`` is empty, ``band_names`` length does not
            match ``files``, an input has more than one band, or ``path``
            does not end in ``.tif``.
        AlignmentError: ``align=False`` and the inputs do not share a
            grid/CRS.
        CRSError: An input raster has no CRS.

    Examples:
        - Stack three per-band GeoTIFFs into one 3-band dataset; band
          names come from the file names:
            ```python
            >>> import numpy as np
            >>> import tempfile, os
            >>> from pyramids.dataset import Dataset
            >>> d = tempfile.mkdtemp()
            >>> paths = []
            >>> for name, val in [("scene.B2.tif", 2), ("scene.B3.tif", 3), ("scene.B4.tif", 4)]:
            ...     p = os.path.join(d, name)
            ...     _ = Dataset.create_from_array(
            ...         np.full((4, 5), val, dtype="int16"),
            ...         top_left_corner=(0, 0), cell_size=1.0, epsg=4326, path=p,
            ...     ).close()
            ...     paths.append(p)
            >>> ds = Dataset.from_band_files(paths)
            >>> ds.band_count
            3
            >>> ds.band_names
            ['B2', 'B3', 'B4']
            >>> [int(ds.read_array(band=i).flat[0]) for i in range(3)]
            [2, 3, 4]

            ```
        - Override the band names explicitly:
            ```python
            >>> ds = Dataset.from_band_files(paths, band_names=["blue", "green", "red"])
            >>> ds.band_names
            ['blue', 'green', 'red']

            ```
        - Mismatched grids are rejected unless ``align=True``:
            ```python
            >>> odd = os.path.join(d, "odd.tif")
            >>> _ = Dataset.create_from_array(
            ...     np.zeros((8, 9), dtype="int16"),
            ...     top_left_corner=(0, 0), cell_size=0.5, epsg=4326, path=odd,
            ... ).close()
            >>> try:
            ...     Dataset.from_band_files([paths[0], odd])
            ... except AlignmentError as exc:
            ...     print("align=True" in str(exc))
            True
            >>> aligned = Dataset.from_band_files([paths[0], odd], align=True)
            >>> aligned.band_count
            2
            >>> (aligned.rows, aligned.columns) == (
            ...     Dataset.read_file(paths[0]).rows,
            ...     Dataset.read_file(paths[0]).columns,
            ... )
            True

            ```

    See Also:
        - :meth:`align`: resample one dataset onto another's grid.
        - :meth:`create_from_array`: build a dataset from a numpy array.
        - :meth:`pyramids.dataset.DatasetCollection.from_files`: stack
          rasters along *time* instead of along *bands*.
    """
    resolved_paths = [str(_io._parse_path(str(p))) for p in files]
    if not resolved_paths:
        raise ValueError("from_band_files requires at least one file")

    datasets = [cls.read_file(p) for p in resolved_paths]
    for p, ds in zip(resolved_paths, datasets):
        if ds.band_count != 1:
            raise ValueError(
                f"{p!r} has {ds.band_count} bands; from_band_files expects exactly "
                "one band per file"
            )
        if not ds.crs:
            raise CRSError(f"{p!r} has no CRS; cannot stack rasters without a CRS")

    template = datasets[0]

    if band_names is not None:
        out_names = list(band_names)
        if len(out_names) != len(resolved_paths):
            raise ValueError(
                f"band_names has {len(out_names)} entries but {len(resolved_paths)} "
                "files were given"
            )
    else:
        out_names = _derive_band_names(resolved_paths)

    if no_data_value is _INHERIT_NO_DATA:
        source_nd = [ds.no_data_value[0] for ds in datasets]
        present = [v for v in source_nd if v is not None]
        if not present:
            resolved_nd: Any | None = None
        else:
            resolved_nd = source_nd[0] if source_nd[0] is not None else present[0]
            # NaN != NaN, so plain set() over-reports disagreement for
            # float-NaN sentinels (the GeoTIFF default for float rasters).
            # Normalise NaN to a single key so we only warn when distinct
            # *real* values are present.
            distinct = {
                "__nan__" if isinstance(v, float) and np.isnan(v) else v
                for v in present
            }
            if len(distinct) > 1:
                warnings.warn(
                    f"source rasters disagree on no-data value ({sorted(set(present))}); "
                    f"using {resolved_nd!r}",
                    stacklevel=2,
                )
    else:
        resolved_nd = no_data_value

    if path is not None and not str(path).lower().endswith(".tif"):
        # TypeError to match ``_create_dataset`` (used by every other
        # factory: ``create_from_array``, ``dataset_like`` etc.) — keeping
        # one convention across the public surface.
        raise TypeError("the path to save the stacked raster should end with .tif")

    if not align:
        for p, ds in zip(resolved_paths[1:], datasets[1:]):
            if not _same_grid(template, ds):
                raise AlignmentError(
                    f"{p!r} does not share the grid/CRS of {resolved_paths[0]!r}; "
                    "pass align=True to resample mismatched rasters onto the first "
                    "file's grid"
                )

    # gdal.BuildVRT(separate=True) does not promote dtypes (it truncates the
    # wider bands) — take that low-memory band-by-band path only when the
    # grids already match and every input shares one dtype. Otherwise read
    # the (possibly resampled) band arrays and let numpy pick the common dtype.
    uniform_dtype = len({ds.gdal_dtype[0] for ds in datasets}) == 1

    if align or not uniform_dtype:
        if align:
            # Resample every input onto the first file's grid in the
            # promoted dtype. Dataset.align adopts the alignment source's
            # dtype, so cast the template first to avoid truncating wider
            # inputs (e.g. a float band onto an int template).
            target_np_dtype = np.result_type(
                *(ds.numpy_dtype[0] for ds in datasets)
            )
            grid_template = cls.create_from_array(
                template.read_array(band=0).astype(target_np_dtype, copy=False),
                geo=template.geotransform,
                epsg=template.epsg,
                no_data_value=resolved_nd,
            )
            # Dataset.align uses the source's no_data_value to fill the warp
            # destination, so the aligned fringe carries the SOURCE's sentinel.
            # When sources disagree on nodata (resolved_nd is the first one
            # by "first-wins" policy + a UserWarning), bands whose source's
            # sentinel != resolved_nd would still have that sentinel in the
            # fringe, which would no longer match the output band's declared
            # nodata. Remap so what's in the array matches what's declared.
            # Sources that already match the template grid skip the full
            # gdal.Warp round-trip and just astype, which is lossless.
            band_arrays = []
            for ds_i in datasets:
                if _same_grid(template, ds_i):
                    arr = ds_i.read_array(band=0).astype(
                        target_np_dtype, copy=False
                    )
                else:
                    arr = ds_i.align(grid_template).read_array(band=0)
                band_arrays.append(
                    _remap_nodata_to(arr, ds_i.no_data_value[0], resolved_nd)
                )
        else:
            band_arrays = [ds.read_array(band=0) for ds in datasets]
        stacked = np.stack(band_arrays, axis=0)
        obj = cls._build_dataset(
            template.columns,
            template.rows,
            len(resolved_paths),
            numpy_to_gdal_dtype(stacked),
            template.geotransform,
            template.crs,
            resolved_nd,
            path=path,
            array=stacked,
        )
    else:
        vrt = gdal.BuildVRT("", resolved_paths, separate=True)
        if (
            vrt is None
        ):  # pragma: no cover - BuildVRT returns None only on bad input
            raise AlignmentError(
                f"gdal.BuildVRT could not stack {resolved_paths!r}"
            )
        if path is not None:
            dst = gdal.GetDriverByName("GTiff").CreateCopy(
                str(path), vrt, strict=1, options=["COMPRESS=LZW"]
            )
        else:
            dst = gdal.GetDriverByName("MEM").CreateCopy("", vrt, strict=1)
        vrt = None
        # BuildVRT(separate=True) carries each source band's no-data through;
        # honour an explicit override (including ``None`` = drop it).
        for i in range(dst.RasterCount):
            band = dst.GetRasterBand(i + 1)
            if resolved_nd is None:
                band.DeleteNoDataValue()
            else:
                band.SetNoDataValue(float(resolved_nd))
        obj = cls(dst, access="write")

    obj.band_names = out_names
    obj._raster.FlushCache()
    return obj

from_archive(url_or_path, *, kind='auto', member_glob='*', band_names=None, align=False, no_data_value=_INHERIT_NO_DATA, path=None) classmethod #

Open every raster in an archive and merge them into one multi-band Dataset.

Lists the archive's members (locally or over the network — a remote ZIP is read via the chained /vsizip//vsicurl/… path) and hands them to :meth:from_band_files. For "one Dataset per member" (a temporal stack) use :meth:pyramids.dataset.DatasetCollection.from_archive instead.

The archive's file name must carry a recognised extension (.zip / .tar / .tar.gz / .gz) — GDAL's archive handlers key off the extension. An extension-less download URL (e.g. an Earth Engine getDownloadURL ending in :getPixels) must first be fetched and saved with a .zip name (or written to /vsimem/<name>.zip via :func:osgeo.gdal.FileFromMemBuffer) before calling this.

Parameters:

Name Type Description Default
url_or_path str | Path

Path or URL of the archive (.zip / .tar / .tar.gz / .gz).

required
kind str

Archive kind — "zip", "tar" (also "tar.gz" / "tgz"), "gzip" (also "gz"), or "auto" (default, infer from the extension).

'auto'
member_glob str

:mod:fnmatch pattern selecting which members to stack. Default "*" (all top-level members, sorted by name). Pass e.g. "*.tif" for an archive that also ships sidecar files.

'*'
band_names list[str] | None

Explicit per-band names; None derives them from the member names (see :meth:from_band_files).

None
align bool

When True, resample mismatched members onto the first member's grid instead of raising :class:AlignmentError.

False
no_data_value Any

No-data value for the output bands; omitted means "inherit from the members".

_INHERIT_NO_DATA
path str | Path | None

Output .tif path; None keeps the result in memory.

None

Returns:

Name Type Description
Dataset Dataset

A multi-band dataset, one band per matching archive member.

Raises:

Type Description
FileFormatNotSupportedError

kind="auto" and the extension is not recognised, or the archive could not be listed.

FileNotFoundError

No member matched member_glob.

ValueError / AlignmentError / CRSError

As for :meth:from_band_files.

Examples:

  • Stack the raster members of a local ZIP into one multi-band dataset (band names come from the member names):
    >>> import os, tempfile, zipfile
    >>> import numpy as np
    >>> from pyramids.dataset import Dataset
    >>> d = tempfile.mkdtemp()
    >>> members = []
    >>> for name, val in [("scene.B2.tif", 2), ("scene.B3.tif", 3)]:
    ...     p = os.path.join(d, name)
    ...     _ = Dataset.create_from_array(
    ...         np.full((4, 5), val, dtype="int16"),
    ...         top_left_corner=(0, 0), cell_size=1.0, epsg=4326, path=p,
    ...     ).close()
    ...     members.append(p)
    >>> zip_path = os.path.join(d, "download.zip")
    >>> with zipfile.ZipFile(zip_path, "w") as zf:
    ...     for m in members:
    ...         zf.write(m, arcname=os.path.basename(m))
    >>> ds = Dataset.from_archive(zip_path, member_glob="*.tif")
    >>> ds.band_count
    2
    >>> ds.band_names
    ['B2', 'B3']
    >>> [int(ds.read_array(band=i).flat[0]) for i in range(2)]
    [2, 3]
    
See Also
  • :meth:from_band_files: stack a known list of single-band rasters.
  • :meth:pyramids.dataset.DatasetCollection.from_archive: open each member as a separate timestep instead of merging them into bands.
Source code in src/pyramids/dataset/dataset.py
@classmethod
def from_archive(
    cls,
    url_or_path: str | Path,
    *,
    kind: str = "auto",
    member_glob: str = "*",
    band_names: list[str] | None = None,
    align: bool = False,
    no_data_value: Any = _INHERIT_NO_DATA,
    path: str | Path | None = None,
) -> Dataset:
    """Open every raster in an archive and merge them into one multi-band Dataset.

    Lists the archive's members (locally or over the network — a remote ZIP
    is read via the chained ``/vsizip//vsicurl/…`` path) and hands them to
    :meth:`from_band_files`. For "one Dataset per member" (a temporal stack)
    use :meth:`pyramids.dataset.DatasetCollection.from_archive` instead.

    The archive's file name must carry a recognised extension (``.zip`` /
    ``.tar`` / ``.tar.gz`` / ``.gz``) — GDAL's archive handlers key off the
    extension. An extension-less download URL (e.g. an Earth Engine
    ``getDownloadURL`` ending in ``:getPixels``) must first be fetched and
    saved with a ``.zip`` name (or written to ``/vsimem/<name>.zip`` via
    :func:`osgeo.gdal.FileFromMemBuffer`) before calling this.

    Args:
        url_or_path: Path or URL of the archive (``.zip`` / ``.tar`` /
            ``.tar.gz`` / ``.gz``).
        kind: Archive kind — ``"zip"``, ``"tar"`` (also ``"tar.gz"`` /
            ``"tgz"``), ``"gzip"`` (also ``"gz"``), or ``"auto"`` (default,
            infer from the extension).
        member_glob: :mod:`fnmatch` pattern selecting which members to stack.
            Default ``"*"`` (all top-level members, sorted by name). Pass e.g.
            ``"*.tif"`` for an archive that also ships sidecar files.
        band_names: Explicit per-band names; ``None`` derives them from the
            member names (see :meth:`from_band_files`).
        align: When ``True``, resample mismatched members onto the first
            member's grid instead of raising :class:`AlignmentError`.
        no_data_value: No-data value for the output bands; omitted means
            "inherit from the members".
        path: Output ``.tif`` path; ``None`` keeps the result in memory.

    Returns:
        Dataset: A multi-band dataset, one band per matching archive member.

    Raises:
        FileFormatNotSupportedError: ``kind="auto"`` and the extension is
            not recognised, or the archive could not be listed.
        FileNotFoundError: No member matched ``member_glob``.
        ValueError / AlignmentError / CRSError: As for :meth:`from_band_files`.

    Examples:
        - Stack the raster members of a local ZIP into one multi-band dataset
          (band names come from the member names):
            ```python
            >>> import os, tempfile, zipfile
            >>> import numpy as np
            >>> from pyramids.dataset import Dataset
            >>> d = tempfile.mkdtemp()
            >>> members = []
            >>> for name, val in [("scene.B2.tif", 2), ("scene.B3.tif", 3)]:
            ...     p = os.path.join(d, name)
            ...     _ = Dataset.create_from_array(
            ...         np.full((4, 5), val, dtype="int16"),
            ...         top_left_corner=(0, 0), cell_size=1.0, epsg=4326, path=p,
            ...     ).close()
            ...     members.append(p)
            >>> zip_path = os.path.join(d, "download.zip")
            >>> with zipfile.ZipFile(zip_path, "w") as zf:
            ...     for m in members:
            ...         zf.write(m, arcname=os.path.basename(m))
            >>> ds = Dataset.from_archive(zip_path, member_glob="*.tif")
            >>> ds.band_count
            2
            >>> ds.band_names
            ['B2', 'B3']
            >>> [int(ds.read_array(band=i).flat[0]) for i in range(2)]
            [2, 3]

            ```

    See Also:
        - :meth:`from_band_files`: stack a known list of single-band rasters.
        - :meth:`pyramids.dataset.DatasetCollection.from_archive`: open each
          member as a separate timestep instead of merging them into bands.
    """
    dir_vsi = _io._archive_dir_vsi(url_or_path, kind)
    members = _io._archive_members(dir_vsi, member_glob)
    member_paths = [f"{dir_vsi}/{m}" for m in members]
    return cls.from_band_files(
        member_paths,
        band_names=band_names,
        align=align,
        no_data_value=no_data_value,
        path=path,
    )