Skip to content

Utilities

align_rasters(input_images, output_images, *, resampling_method='bilinear', tap=False, resolution='highest', window_size=None, debug_logs=False, image_parallel_workers=None, window_parallel_workers=None)

Aligns multiple rasters to a common resolution and grid using specified resampling.

Parameters:

Name Type Description Default
input_images (str | List[str], required)

Defines input files from a glob path, folder, or list of paths. Specify like: "/input/files/.tif", "/input/folder" (assumes .tif), ["/input/one.tif", "/input/two.tif"].

required
output_images (str | List[str], required)

Defines output files from a template path, folder, or list of paths (with the same length as the input). Specify like: "/input/files/$.tif", "/input/folder" (assumes $_Align.tif), ["/input/one.tif", "/input/two.tif"].

required
resampling_method Literal['nearest', 'bilinear', 'cubic']

Resampling method to use; default is "bilinear".

'bilinear'
tap bool

If True, aligns outputs to target-aligned pixels (GDAL's -tap); default is False.

False
resolution Literal['highest', 'average', 'lowest']

Strategy for choosing target resolution; default is "highest".

'highest'
window_size WindowSize

Tiling strategy for windowed alignment.

None
debug_logs DebugLogs

If True, prints debug output.

False
image_parallel_workers ImageParallelWorkers

Parallelization strategy for image-level alignment.

None
window_parallel_workers WindowParallelWorkers

Parallelization strategy for within-image window alignment.

None

Returns:

Type Description
None

None

Source code in spectralmatch/utils.py
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
def align_rasters(
    input_images: Universal.SearchFolderOrListFiles,
    output_images: Universal.CreateInFolderOrListFiles,
    *,
    resampling_method: Literal["nearest", "bilinear", "cubic"] = "bilinear",
    tap: bool = False,
    resolution: Literal["highest", "average", "lowest"] = "highest",
    window_size: Universal.WindowSize = None,
    debug_logs: Universal.DebugLogs = False,
    image_parallel_workers: Universal.ImageParallelWorkers = None,
    window_parallel_workers: Universal.WindowParallelWorkers = None,
) -> None:
    """
    Aligns multiple rasters to a common resolution and grid using specified resampling.

    Args:
        input_images (str | List[str], required): Defines input files from a glob path, folder, or list of paths. Specify like: "/input/files/*.tif", "/input/folder" (assumes *.tif), ["/input/one.tif", "/input/two.tif"].
        output_images (str | List[str], required): Defines output files from a template path, folder, or list of paths (with the same length as the input). Specify like: "/input/files/$.tif", "/input/folder" (assumes $_Align.tif), ["/input/one.tif", "/input/two.tif"].
        resampling_method (Literal["nearest", "bilinear", "cubic"], optional): Resampling method to use; default is "bilinear".
        tap (bool, optional): If True, aligns outputs to target-aligned pixels (GDAL's -tap); default is False.
        resolution (Literal["highest", "average", "lowest"], optional): Strategy for choosing target resolution; default is "highest".
        window_size (Universal.WindowSize, optional): Tiling strategy for windowed alignment.
        debug_logs (Universal.DebugLogs, optional): If True, prints debug output.
        image_parallel_workers (Universal.ImageParallelWorkers, optional): Parallelization strategy for image-level alignment.
        window_parallel_workers (Universal.WindowParallelWorkers, optional): Parallelization strategy for within-image window alignment.

    Returns:
        None
    """

    print("Start align rasters")

    Universal.validate(
        input_images=input_images,
        output_images=output_images,
        debug_logs=debug_logs,
        window_size=window_size,
        image_parallel_workers=image_parallel_workers,
        window_parallel_workers=window_parallel_workers,
    )

    input_image_paths = _resolve_paths(
        "search", input_images, kwargs={"default_file_pattern": "*.tif"}
    )
    output_image_paths = _resolve_paths(
        "create",
        output_images,
        kwargs={
            "paths_or_bases": input_image_paths,
            "default_file_pattern": "$_Align.tif",
        },
    )
    image_names = _resolve_paths("name", input_image_paths)

    if debug_logs:
        print(f"{len(input_image_paths)} rasters to align")

    # Determine target resolution
    resolutions = []
    crs_list = []
    for path in input_image_paths:
        with rasterio.open(path) as src:
            resolutions.append(src.res)
            crs_list.append(src.crs)
    if len(set(crs_list)) > 1:
        raise ValueError("Input rasters must have the same CRS.")

    res_arr = np.array(resolutions)
    target_res = {
        "highest": res_arr.min(axis=0),
        "lowest": res_arr.max(axis=0),
        "average": res_arr.mean(axis=0),
    }[resolution]

    if debug_logs:
        print(f"Target resolution: {target_res}")

    parallel_args = [
        (
            image_name,
            window_parallel_workers,
            in_path,
            out_path,
            target_res,
            resampling_method,
            tap,
            window_size,
            debug_logs,
        )
        for in_path, out_path, image_name in zip(
            input_image_paths, output_image_paths, image_names
        )
    ]

    if image_parallel_workers:
        with _get_executor(*image_parallel_workers) as executor:
            futures = [
                executor.submit(_align_process_image, *args) for args in parallel_args
            ]
            for future in as_completed(futures):
                future.result()
    else:
        for args in parallel_args:
            _align_process_image(*args)

mask_rasters(input_images, output_images, vector_mask=None, window_size=None, debug_logs=False, image_parallel_workers=None, window_parallel_workers=None, include_touched_pixels=False, custom_nodata_value=None)

Applies a vector-based mask to one or more rasters, with support for image- and window-level parallelism.

Parameters:

Name Type Description Default
input_images (str | List[str], required)

Defines input files from a glob path, folder, or list of paths. Specify like: "/input/files/.tif", "/input/folder" (assumes .tif), ["/input/one.tif", "/input/two.tif"].

required
output_images (str | List[str], required)

Defines output files from a template path, folder, or list of paths (with the same length as the input). Specify like: "/input/files/$.tif", "/input/folder" (assumes $_Clip.tif), ["/input/one.tif", "/input/two.tif"].

required
vector_mask VectorMask

Tuple ("include"/"exclude", vector path, optional field name) or None.

None
window_size WindowSize

Strategy for tiling rasters during processing.

None
debug_logs DebugLogs

If True, prints debug information.

False
image_parallel_workers ImageParallelWorkers

Strategy for parallelizing image-level masking.

None
window_parallel_workers WindowParallelWorkers

Strategy for parallelizing masking within windows.

None
include_touched_pixels bool

If True, includes pixels touched by mask geometry edges; default is False.

False

Returns:

Type Description
None

None

Source code in spectralmatch/utils.py
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
def mask_rasters(
    input_images: Universal.SearchFolderOrListFiles,
    output_images: Universal.CreateInFolderOrListFiles,
    vector_mask: Universal.VectorMask = None,
    window_size: Universal.WindowSize = None,
    debug_logs: Universal.DebugLogs = False,
    image_parallel_workers: Universal.ImageParallelWorkers = None,
    window_parallel_workers: Universal.WindowParallelWorkers = None,
    include_touched_pixels: bool = False,
    custom_nodata_value: Universal.CustomNodataValue = None,
) -> None:
    """
    Applies a vector-based mask to one or more rasters, with support for image- and window-level parallelism.

    Args:
        input_images (str | List[str], required): Defines input files from a glob path, folder, or list of paths. Specify like: "/input/files/*.tif", "/input/folder" (assumes *.tif), ["/input/one.tif", "/input/two.tif"].
        output_images (str | List[str], required): Defines output files from a template path, folder, or list of paths (with the same length as the input). Specify like: "/input/files/$.tif", "/input/folder" (assumes $_Clip.tif), ["/input/one.tif", "/input/two.tif"].
        vector_mask (Universal.VectorMask, optional): Tuple ("include"/"exclude", vector path, optional field name) or None.
        window_size (Universal.WindowSize, optional): Strategy for tiling rasters during processing.
        debug_logs (Universal.DebugLogs, optional): If True, prints debug information.
        image_parallel_workers (Universal.ImageParallelWorkers, optional): Strategy for parallelizing image-level masking.
        window_parallel_workers (Universal.WindowParallelWorkers, optional): Strategy for parallelizing masking within windows.
        include_touched_pixels (bool, optional): If True, includes pixels touched by mask geometry edges; default is False.

    Returns:
        None
    """
    # Validate parameters
    Universal.validate(
        input_images=input_images,
        output_images=output_images,
        debug_logs=debug_logs,
        vector_mask=vector_mask,
        window_size=window_size,
        image_parallel_workers=image_parallel_workers,
        window_parallel_workers=window_parallel_workers,
        custom_nodata_value=custom_nodata_value,
    )

    input_image_paths = _resolve_paths(
        "search", input_images, kwargs={"default_file_pattern": "*.tif"}
    )
    output_image_paths = _resolve_paths(
        "create",
        output_images,
        kwargs={
            "paths_or_bases": input_image_paths,
            "default_file_pattern": "$_Clip.tif",
        },
    )

    if debug_logs:
        print(f"Input images: {input_image_paths}")
    if debug_logs:
        print(f"Output images: {output_image_paths}")

    input_image_names = [
        os.path.splitext(os.path.basename(p))[0] for p in input_image_paths
    ]
    input_image_path_pairs = dict(zip(input_image_names, input_image_paths))
    output_image_path_pairs = dict(zip(input_image_names, output_image_paths))

    image_parallel, image_backend, image_max_workers = _resolve_parallel_config(
        image_parallel_workers
    )
    window_parallel, window_backend, window_max_workers = _resolve_parallel_config(
        window_parallel_workers
    )

    parallel_args = [
        (
            window_parallel,
            window_max_workers,
            window_backend,
            input_image_path_pairs[name],
            output_image_path_pairs[name],
            name,
            vector_mask,
            window_size,
            debug_logs,
            include_touched_pixels,
            custom_nodata_value,
        )
        for name in input_image_names
    ]

    if image_parallel:
        with _get_executor(image_backend, image_max_workers) as executor:
            futures = [
                executor.submit(_mask_raster_process_image, *args)
                for args in parallel_args
            ]
            for future in as_completed(futures):
                future.result()
    else:
        for args in parallel_args:
            _mask_raster_process_image(*args)

merge_rasters(input_images, output_image_path, *, image_parallel_workers=None, window_parallel_workers=None, window_size=None, debug_logs=False, output_dtype=None, custom_nodata_value=None)

Merges multiple rasters into a single mosaic aligned to the union extent and minimum resolution.

Parameters:

Name Type Description Default
input_images (str | List[str], required)

Defines input files from a glob path, folder, or list of paths. Specify like: "/input/files/.tif", "/input/folder" (assumes .tif), ["/input/one.tif", "/input/two.tif"].

required
output_image_path str

Path to save the merged output raster.

required
image_parallel_workers ImageParallelWorkers

Strategy for parallelizing image-level merging.

None
window_parallel_workers WindowParallelWorkers

Strategy for within-image window merging.

None
window_size WindowSize

Tiling strategy for processing windows.

None
debug_logs DebugLogs

If True, prints debug output.

False
output_dtype CustomOutputDtype

Output data type; defaults to input type if None.

None
custom_nodata_value CustomNodataValue

NoData value to use; defaults to first input's value.

None

Returns:

Type Description
None

None

Source code in spectralmatch/utils.py
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
def merge_rasters(
    input_images: Universal.SearchFolderOrListFiles,
    output_image_path: str,
    *,
    image_parallel_workers: Universal.ImageParallelWorkers = None,
    window_parallel_workers: Universal.WindowParallelWorkers = None,
    window_size: Universal.WindowSize = None,
    debug_logs: Universal.DebugLogs = False,
    output_dtype: Universal.CustomOutputDtype = None,
    custom_nodata_value: Universal.CustomNodataValue = None,
) -> None:
    """
    Merges multiple rasters into a single mosaic aligned to the union extent and minimum resolution.

    Args:
        input_images (str | List[str], required): Defines input files from a glob path, folder, or list of paths. Specify like: "/input/files/*.tif", "/input/folder" (assumes *.tif), ["/input/one.tif", "/input/two.tif"].
        output_image_path (str): Path to save the merged output raster.
        image_parallel_workers (Universal.ImageParallelWorkers, optional): Strategy for parallelizing image-level merging.
        window_parallel_workers (Universal.WindowParallelWorkers, optional): Strategy for within-image window merging.
        window_size (Universal.WindowSize, optional): Tiling strategy for processing windows.
        debug_logs (Universal.DebugLogs, optional): If True, prints debug output.
        output_dtype (Universal.CustomOutputDtype, optional): Output data type; defaults to input type if None.
        custom_nodata_value (Universal.CustomNodataValue, optional): NoData value to use; defaults to first input's value.

    Returns:
        None
    """

    print("Start raster merging")

    # Validate parameters
    Universal.validate(
        input_images=input_images,
        debug_logs=debug_logs,
        custom_nodata_value=custom_nodata_value,
        output_dtype=output_dtype,
        window_size=window_size,
        image_parallel_workers=image_parallel_workers,
        window_parallel_workers=window_parallel_workers,
    )

    input_image_paths = _resolve_paths(
        "search", input_images, kwargs={"default_file_pattern": "*.tif"}
    )

    _check_raster_requirements(
        input_image_paths,
        debug_logs,
        check_geotransform=True,
        check_crs=True,
        check_bands=True,
        check_nodata=True,
    )

    image_names = [os.path.splitext(os.path.basename(p))[0] for p in input_image_paths]
    input_image_path_pairs = dict(zip(image_names, input_image_paths))

    if custom_nodata_value:
        nodata_value = custom_nodata_value
    else:
        with rasterio.open(input_image_paths[0]) as src:
            nodata_value = src.nodata

    if debug_logs:
        print(f"Merging {len(input_image_paths)} rasters into: {output_image_path}")

    # Compute union bounds and min resolution
    bounds_list = []
    res_x_list, res_y_list = [], []
    for path in input_image_paths:
        with rasterio.open(path) as src:
            bounds_list.append(src.bounds)
            res_x, res_y = src.res
            res_x_list.append(res_x)
            res_y_list.append(res_y)

    minx = min(b.left for b in bounds_list)
    miny = min(b.bottom for b in bounds_list)
    maxx = max(b.right for b in bounds_list)
    maxy = max(b.top for b in bounds_list)

    res_x = min(res_x_list)
    res_y = min(res_y_list)

    width = int(np.ceil((maxx - minx) / res_x))
    height = int(np.ceil((maxy - miny) / res_y))

    transform = Affine.translation(minx, maxy) * Affine.scale(res_x, -res_y)

    with rasterio.open(input_image_paths[0]) as src:
        meta = src.meta.copy()
        meta.update(
            {
                "height": height,
                "width": width,
                "transform": transform,
                "count": src.count,
                "dtype": output_dtype or src.dtypes[0],
                "nodata": nodata_value,
            }
        )

    # Determine multiprocessing and worker count
    image_parallel, image_backend, image_max_workers = _resolve_parallel_config(
        image_parallel_workers
    )

    parallel_args = []
    for name, path in input_image_path_pairs.items():
        with rasterio.open(path) as src:
            for band in range(src.count):
                windows = _resolve_windows(src, window_size)
                for window in windows:
                    parallel_args.append(
                        (
                            window,
                            band,
                            meta["dtype"],
                            debug_logs,
                            name,
                            src.transform,
                            transform,
                            nodata_value,
                        )
                    )

    # Pre-initialize WorkerContext
    init_worker = WorkerContext.init
    init_args_map = {
        name: ("raster", path) for name, path in input_image_path_pairs.items()
    }

    with rasterio.open(output_image_path, "w", **meta):
        pass
    with rasterio.open(output_image_path, "r+", **meta) as dst:
        if image_parallel:
            with _get_executor(
                image_backend,
                image_max_workers,
                initializer=init_worker,
                initargs=(init_args_map,),
            ) as executor:
                futures = [
                    executor.submit(_merge_raster_process_window, *args)
                    for args in parallel_args
                ]
                for future in as_completed(futures):
                    band, dst_window, buf = future.result()
                    if buf is not None:
                        existing = dst.read(band + 1, window=dst_window)
                        valid_mask = buf != nodata_value
                        merged = np.where(valid_mask, buf, existing)
                        dst.write(merged, band + 1, window=dst_window)
        else:
            WorkerContext.init(init_args_map)
            for args in parallel_args:
                band, dst_window, buf = _merge_raster_process_window(*args)
                if buf is not None:
                    existing = dst.read(band + 1, window=dst_window)
                    valid_mask = buf != nodata_value
                    merged = np.where(valid_mask, buf, existing)
                    dst.write(merged, band + 1, window=dst_window)
            WorkerContext.close()
    if debug_logs:
        print("Raster merging complete")

merge_vectors(input_vectors, merged_vector_path, method, debug_logs=False, create_name_attribute=None)

Merge multiple vector files using the specified geometric method.

Parameters:

Name Type Description Default
input_vectors str | List[str]

Defines input files from a glob path, folder, or list of paths. Specify like: "/input/files/.gpkg", "/input/folder" (assumes .gpkg), ["/input/one.tif", "/input/two.tif"].

required
merged_vector_path str

Path to save merged output.

required
method Literal['intersection', 'union', 'keep']

Merge strategy.

required
debug_logs bool

If True, print debug information.

False
create_name_attribute Optional[Tuple[str, str]]

Tuple of (field_name, separator) to add a combined name field.

None

Returns:

Type Description
None

None

Source code in spectralmatch/utils.py
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
def merge_vectors(
    input_vectors: Universal.SearchFolderOrListFiles,
    merged_vector_path: str,
    method: Literal["intersection", "union", "keep"],
    debug_logs: bool = False,
    create_name_attribute: Optional[Tuple[str, str]] = None,
) -> None:
    """
    Merge multiple vector files using the specified geometric method.

    Args:
        input_vectors (str | List[str]): Defines input files from a glob path, folder, or list of paths. Specify like: "/input/files/*.gpkg", "/input/folder" (assumes *.gpkg), ["/input/one.tif", "/input/two.tif"].
        merged_vector_path (str): Path to save merged output.
        method (Literal["intersection", "union", "keep"]): Merge strategy.
        debug_logs (bool): If True, print debug information.
        create_name_attribute (Optional[Tuple[str, str]]): Tuple of (field_name, separator) to add a combined name field.

    Returns:
        None
    """
    print("Start vector merge")

    os.makedirs(os.path.dirname(merged_vector_path), exist_ok=True)
    input_vector_paths = _resolve_paths(
        "search", input_vectors, kwargs={"default_file_pattern": "*.gpkg"}
    )

    geoms = []
    input_names = []

    for path in input_vector_paths:
        gdf = gpd.read_file(path)
        if create_name_attribute:
            name = os.path.splitext(os.path.basename(path))[0]
            input_names.append(name)
        geoms.append(gdf)

    combined_name_value = None
    if create_name_attribute:
        field_name, sep = create_name_attribute
        combined_name_value = sep.join(input_names)

    if method == "keep":
        merged_dfs = []
        field_name = create_name_attribute[0] if create_name_attribute else None
        for path in input_vector_paths:
            gdf = gpd.read_file(path)
            if field_name:
                name = os.path.splitext(os.path.basename(path))[0]
                gdf[field_name] = name
            merged_dfs.append(gdf)
        merged = gpd.GeoDataFrame(
            pd.concat(merged_dfs, ignore_index=True), crs=merged_dfs[0].crs
        )

    elif method == "union":
        merged = gpd.GeoDataFrame(pd.concat(geoms, ignore_index=True), crs=geoms[0].crs)
        if create_name_attribute:
            merged[field_name] = combined_name_value

    elif method == "intersection":
        merged = geoms[0]
        for gdf in geoms[1:]:
            shared_cols = set(merged.columns).intersection(gdf.columns) - {"geometry"}
            gdf = gdf.drop(columns=shared_cols)
            merged = gpd.overlay(merged, gdf, how="intersection", keep_geom_type=True)
        if create_name_attribute:
            merged[field_name] = combined_name_value

    else:
        raise ValueError(f"Unsupported merge method: {method}")

    merged.to_file(merged_vector_path)