Skip to content

Create Masks

band_math(input_images, output_images, threshold_math, *, debug_logs=False, custom_nodata_value=None, cache=None, image_threads=None, io_threads=None, tile_threads=None, window_size=None, custom_output_dtype=None, calculation_dtype='float32')

Applies a thresholding operation to input raster images using a mathematical expression string.

Parameters:

Name Type Description Default
input_images (str | List[str], required)

Defines input files from a glob path, folder, or list of paths. Specify like: "/input/files/.tif", "/input/folder" (assumes .tif), ["/input/one.tif", "/input/two.tif"].

required
output_images (str | List[str], required)

Defines output files from a template path, folder, or list of paths (with the same length as the input). Specify like: "/input/files/$.tif", "/input/folder" (assumes $_Threshold.tif), ["/input/one.tif", "/input/two.tif"].

required
threshold_math str

A muparser‑compatible expression applied to the raster bands, see https://github.com/beltoforion/muparser. Bands are referenced as B1, B2, … and you can use C‑style comparison and logical operators (such as >, <, >=, <=, ==, !=, &&, ||, !) along with parentheses and ternary ? : constructs—for example, ((B1 > 5) && (B2 < 10)) ? 1 : 0. Percentile‑based thresholds are supported: write 5%B1 to substitute the 5th‑percentile value of band 1 into the expression before evaluation.

required
debug_logs bool

If True, prints debug messages.

False
custom_nodata_value float | int | None

Override the dataset's nodata value.

None
cache float | None

Controls GDAL cache size in GB. Defaults to preset cache size. Applied via GDAL_CACHEMAX.

None
image_threads Literal[cpu] | int | None

Parallelism for per-image operations. "cpu" to get number of cores, int to assign number, and None to disable image level parallelism.

None
io_threads Literal[cpu] | int | None

Parallelism for IO operations. "cpu" to get number of cores, int to assign number, and None to disable io level parallelism.

None
tile_threads Literal[cpu] | int | None

"cpu" to get number of cores, int to assign number, and None to disable tile level parallelism.

None
window_size WindowSize

Window tiling strategy for memory-efficient processing.

None
custom_output_dtype CustomOutputDtype

Output data type override.

None
calculation_dtype CalculationDtype

Internal computation dtype.

'float32'

Returns:

Type Description
List[str]

List[str]: Paths to the thresholded output images.

Source code in spectralmatch/mask/mask.py
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
def band_math(
    input_images: Universal.SearchFolderOrListFiles,
    output_images: Universal.CreateInFolderOrListFiles,
    threshold_math: str,
    *,
    debug_logs: Universal.DebugLogs = False,
    custom_nodata_value: Universal.CustomNodataValue = None,
    cache: Universal.Cache = None,
    image_threads: Universal.Threads = None,
    io_threads: Universal.Threads = None,
    tile_threads: Universal.Threads = None,
    window_size: Universal.WindowSize = None,
    custom_output_dtype: Universal.CustomOutputDtype = None,
    calculation_dtype: Universal.CalculationDtype = "float32",
) -> List[str]:
    """
    Applies a thresholding operation to input raster images using a mathematical expression string.

    Args:
        input_images (str | List[str], required): Defines input files from a glob path, folder, or list of paths. Specify like: "/input/files/*.tif", "/input/folder" (assumes *.tif), ["/input/one.tif", "/input/two.tif"].
        output_images (str | List[str], required): Defines output files from a template path, folder, or list of paths (with the same length as the input). Specify like: "/input/files/$.tif", "/input/folder" (assumes $_Threshold.tif), ["/input/one.tif", "/input/two.tif"].
        threshold_math (str): A muparser‑compatible expression applied to the raster bands, see https://github.com/beltoforion/muparser. Bands are referenced as B1, B2, … and you can use C‑style comparison and logical operators (such as >, <, >=, <=, ==, !=, &&, ||, !) along with parentheses and ternary ? : constructs—for example, ((B1 > 5) && (B2 < 10)) ? 1 : 0. Percentile‑based thresholds are supported: write 5%B1 to substitute the 5th‑percentile value of band 1 into the expression before evaluation.
        debug_logs (bool, optional): If True, prints debug messages.
        custom_nodata_value (float | int | None, optional): Override the dataset's nodata value.
        cache (float | None): Controls GDAL cache size in GB. Defaults to preset cache size. Applied via GDAL_CACHEMAX.
        image_threads (Literal["cpu"] | int | None): Parallelism for per-image operations. "cpu" to get number of cores, int to assign number, and None to disable image level parallelism.
        io_threads (Literal["cpu"] | int | None): Parallelism for IO operations. "cpu" to get number of cores, int to assign number, and None to disable io level parallelism.
        tile_threads (Literal["cpu"] | int | None): "cpu" to get number of cores, int to assign number, and None to disable tile level parallelism.
        window_size (WindowSize, optional): Window tiling strategy for memory-efficient processing.
        custom_output_dtype (CustomOutputDtype, optional): Output data type override.
        calculation_dtype (CalculationDtype, optional): Internal computation dtype.

    Returns:
        List[str]: Paths to the thresholded output images.
    """

    Universal.validate(
        input_images=input_images,
        output_images=output_images,
        debug_logs=debug_logs,
        custom_nodata_value=custom_nodata_value,
        image_threads=image_threads,
        io_threads=io_threads,
        tile_threads=tile_threads,
        window_size=window_size,
        custom_output_dtype=custom_output_dtype,
        calculation_dtype=calculation_dtype,
    )

    # Set gdal params
    _set_gdal_cache(cache, debug_logs)
    _set_gdal_workers(io_threads, debug_logs)

    # Resolve input and output paths based on the provided specification (folder, glob or list).
    input_image_paths = _resolve_paths(
        "search", input_images, kwargs={"default_file_pattern": "*.tif"}
    )
    output_image_paths = _resolve_paths(
        "create",
        output_images,
        kwargs={
            "paths_or_bases": input_image_paths,
            "default_file_pattern": "$_Threshold.tif",
        },
    )
    image_names = _resolve_paths("name", input_image_paths)

    if debug_logs:
        print(f"Input images: {input_image_paths}")
        print(f"Output images: {output_image_paths}")

    # Dtype and nodata
    nodata_value = _resolve_nodata_value(input_image_paths[0], custom_nodata_value)
    output_dtype = _resolve_gdal_dtype(custom_output_dtype, input_image_paths[0], debug_logs)

    # Check raster requirements
    _check_raster_requirements(
        input_image_paths,
        debug_logs,
        check_geotransform=True,
        check_crs=True,
        check_bands=True,
        check_nodata=True,
    )

    # Determine multiprocessing and worker count
    image_backend = "thread" # "thread" or "process"
    image_threads_on, image_thread_workers = _resolve_parallel_config(image_threads)
    tile_thread_on, tile_thread_workers = _resolve_parallel_config(tile_threads)

    # Process each image
    if debug_logs:
        print(f"Thresholding and saving results for:")
    image_args = [
        (
            in_path,
            out_path,
            name,
            threshold_math,
            debug_logs,
            nodata_value,
            tile_thread_on,
            tile_thread_workers,
            window_size,
            output_dtype,
            calculation_dtype,
        )
        for in_path, out_path, name in zip(input_image_paths, output_image_paths, image_names)
    ]

    if image_threads_on:
        with _get_executor(image_backend, image_thread_workers) as executor:
            futures = [executor.submit(_band_math_process_image, *args) for args in image_args]
            for future in as_completed(futures):
                future.result()
    else:
        for args in image_args:
            _band_math_process_image(*args)

    return output_image_paths

create_cloud_mask_with_omnicloudmask(input_images, output_images, red_band_index, green_band_index, nir_band_index, *, down_sample_m=None, debug_logs=False, image_threads=None, omnicloud_kwargs=None)

Generates cloud masks from input images using OmniCloudMask, with optional downsampling and multiprocessing.

Parameters:

Name Type Description Default
input_images (str | List[str], required)

Defines input files from a glob path, folder, or list of paths. Specify like: "/input/files/.tif", "/input/folder" (assumes .tif), ["/input/one.tif", "/input/two.tif"].

required
output_images (str | List[str], required)

Defines output files from a template path, folder, or list of paths (with the same length as the input). Specify like: "/input/files/$.tif", "/input/folder" (assumes $_CloudMask.tif), ["/input/one.tif", "/input/two.tif"].

required
red_band_index int

Index of red band in the image.

required
green_band_index int

Index of green band in the image.

required
nir_band_index int

Index of NIR band in the image.

required
down_sample_m float

If set, resamples input to this resolution in meters. Recommended to use a target resolution of 10 m or lower.

None
debug_logs bool

If True, prints progress and debug info.

False
image_threads Literal[cpu] | int | None

Enables parallel execution. Note: "process" does not work on macOS due to PyTorch MPS limitations.

None
omnicloud_kwargs dict | None

Additional arguments forwarded to predict_from_array.

None

Raises:

Type Description
Exception

Propagates any error from processing individual images.

Source code in spectralmatch/mask/mask.py
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
def create_cloud_mask_with_omnicloudmask(
    input_images: Universal.SearchFolderOrListFiles,
    output_images: Universal.CreateInFolderOrListFiles,
    red_band_index: int,
    green_band_index: int,
    nir_band_index: int,
    *,
    down_sample_m: float = None,
    debug_logs: Universal.DebugLogs = False,
    image_threads: Universal.Threads = None,
    omnicloud_kwargs: dict | None = None,
):
    """
    Generates cloud masks from input images using OmniCloudMask, with optional downsampling and multiprocessing.

    Args:
        input_images (str | List[str], required): Defines input files from a glob path, folder, or list of paths. Specify like: "/input/files/*.tif", "/input/folder" (assumes *.tif), ["/input/one.tif", "/input/two.tif"].
        output_images (str | List[str], required): Defines output files from a template path, folder, or list of paths (with the same length as the input). Specify like: "/input/files/$.tif", "/input/folder" (assumes $_CloudMask.tif), ["/input/one.tif", "/input/two.tif"].
        red_band_index (int): Index of red band in the image.
        green_band_index (int): Index of green band in the image.
        nir_band_index (int): Index of NIR band in the image.
        down_sample_m (float, optional): If set, resamples input to this resolution in meters. Recommended to use a target resolution of 10 m or lower.
        debug_logs (bool, optional): If True, prints progress and debug info.
        image_threads (Literal["cpu"] | int | None): Enables parallel execution. Note: "process" does not work on macOS due to PyTorch MPS limitations.
        omnicloud_kwargs (dict | None): Additional arguments forwarded to predict_from_array.

    Raises:
        Exception: Propagates any error from processing individual images.
    """

    print("Start omnicloudmask")
    Universal.validate(
        input_images=input_images,
        output_images=output_images,
        debug_logs=debug_logs
    )

    input_image_paths = _resolve_paths(
        "search", input_images, kwargs={"default_file_pattern": "*.tif"}
    )
    output_image_paths = _resolve_paths(
        "create",
        output_images,
        kwargs={
            "paths_or_bases": input_image_paths,
            "default_file_pattern": "$_CloudClip.tif",
        },
    )

    # Determine multiprocessing and worker count
    image_backend = "thread" # "thread" or "process"
    image_threads_on, image_thread_workers = _resolve_parallel_config(image_threads)


    if debug_logs:
        print(f"Input images: {input_image_paths}")
        print(f"Output images: {output_image_paths}")

    image_args = [
        (
            input_path,
            output_path,
            red_band_index,
            green_band_index,
            nir_band_index,
            down_sample_m,
            debug_logs,
            omnicloud_kwargs,
        )
        for input_path, output_path in zip(input_image_paths, output_image_paths)
    ]

    if image_threads_on:
        with _get_executor(image_backend, image_thread_workers) as executor:
            futures = [
                executor.submit(_process_cloud_mask_image, *args) for args in image_args
            ]
            for future in as_completed(futures):
                future.result()
    else:
        for args in image_args:
            _process_cloud_mask_image(*args)

process_raster_values_to_vector_polygons(input_images, output_vectors, *, extraction_expression, custom_nodata_value=None, custom_output_dtype=None, cache=None, image_threads=None, io_threads=None, tile_threads=None, debug_logs=False, filter_by_polygon_size=None, polygon_buffer=0.0, value_mapping=None, estimate_statistics=True)

Converts raster values into vector polygons based on an expression and optional filtering logic.

Parameters:

Name Type Description Default
input_images (str | List[str], required)

Defines input files from a glob path, folder, or list of paths. Specify like: "/input/files/.tif", "/input/folder" (assumes .tif), ["/input/one.tif", "/input/two.tif"].

required
output_vectors (str | List[str], required)

Defines output files from a template path, folder, or list of paths (with the same length as the input). Specify like: "/input/files/$.gpkg", "/input/folder" (assumes $_Vectorized.gpkg), ["/input/one.gpkg", "/input/two.gpkg"].

required
extraction_expression str

A muparser‑compatible expression applied to the raster bands, see https://github.com/beltoforion/muparser. Bands are referenced as B1, B2, … and you can use C‑style comparison and logical operators (such as >, <, >=, <=, ==, !=, &&, ||, !) along with parentheses and ternary ? : constructs—for example, ((B1 > 5) && (B2 < 10)) ? 1 : 0. Percentile‑based thresholds are supported: write 5%B1 to substitute the 5th‑percentile value of band 1 into the expression before evaluation.

required
custom_nodata_value CustomNodataValue

Custom NoData value to override the default from the raster metadata.

None
custom_output_dtype CustomOutputDtype

Desired output data type. If not set, defaults to raster’s dtype.

None
cache float | None

Controls GDAL cache size in GB. Defaults to preset cache size. Applied via GDAL_CACHEMAX.

None
image_threads Literal[cpu] | int | None

Parallelism for per-image operations. "cpu" to get number of cores, int to assign number, and None to disable image level parallelism.

None
io_threads Literal[cpu] | int | None

Parallelism for IO operations. "cpu" to get number of cores, int to assign number, and None to disable io level parallelism.

None
tile_threads Literal[cpu] | int | None

"cpu" to get number of cores, int to assign number, and None to disable tile level parallelism.

None
debug_logs DebugLogs

Whether to print debug logs to the console.

False
filter_by_polygon_size str

Area filter for resulting polygons. Can be a number (e.g., ">100") or percentile (e.g., ">95%").

None
polygon_buffer float

Distance in coordinate units to buffer the resulting polygons. Default is 0.

0.0
value_mapping dict

Mapping from original raster values to new values. Use None to convert to NoData.

None
estimate_statistics bool

Whether to estimate statistics for percentile thresholds. Defaults to True.

True
Source code in spectralmatch/mask/utils_mask.py
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
def process_raster_values_to_vector_polygons(
    input_images: Universal.SearchFolderOrListFiles,
    output_vectors: Universal.CreateInFolderOrListFiles,
    *,
    extraction_expression: str,
    custom_nodata_value: Universal.CustomNodataValue = None,
    custom_output_dtype: Universal.CustomOutputDtype = None,
    cache: Universal.Cache = None,
    image_threads: Universal.Threads = None,
    io_threads: Universal.Threads = None,
    tile_threads: Universal.Threads = None,
    debug_logs: Universal.DebugLogs = False,
    filter_by_polygon_size: str | None = None,
    polygon_buffer: float = 0.0,
    value_mapping: dict | None = None,
    estimate_statistics: bool = True,
):
    """
    Converts raster values into vector polygons based on an expression and optional filtering logic.

    Args:
        input_images (str | List[str], required): Defines input files from a glob path, folder, or list of paths. Specify like: "/input/files/*.tif", "/input/folder" (assumes *.tif), ["/input/one.tif", "/input/two.tif"].
        output_vectors (str | List[str], required): Defines output files from a template path, folder, or list of paths (with the same length as the input). Specify like: "/input/files/$.gpkg", "/input/folder" (assumes $_Vectorized.gpkg), ["/input/one.gpkg", "/input/two.gpkg"].
        extraction_expression (str): A muparser‑compatible expression applied to the raster bands, see https://github.com/beltoforion/muparser. Bands are referenced as B1, B2, … and you can use C‑style comparison and logical operators (such as >, <, >=, <=, ==, !=, &&, ||, !) along with parentheses and ternary ? : constructs—for example, ((B1 > 5) && (B2 < 10)) ? 1 : 0. Percentile‑based thresholds are supported: write 5%B1 to substitute the 5th‑percentile value of band 1 into the expression before evaluation.
        custom_nodata_value (Universal.CustomNodataValue, optional): Custom NoData value to override the default from the raster metadata.
        custom_output_dtype (Universal.CustomOutputDtype, optional): Desired output data type. If not set, defaults to raster’s dtype.
        cache (float | None): Controls GDAL cache size in GB. Defaults to preset cache size. Applied via GDAL_CACHEMAX.
        image_threads (Literal["cpu"] | int | None): Parallelism for per-image operations. "cpu" to get number of cores, int to assign number, and None to disable image level parallelism.
        io_threads (Literal["cpu"] | int | None): Parallelism for IO operations. "cpu" to get number of cores, int to assign number, and None to disable io level parallelism.
        tile_threads (Literal["cpu"] | int | None): "cpu" to get number of cores, int to assign number, and None to disable tile level parallelism.
        debug_logs (Universal.DebugLogs, optional): Whether to print debug logs to the console.
        filter_by_polygon_size (str, optional): Area filter for resulting polygons. Can be a number (e.g., ">100") or percentile (e.g., ">95%").
        polygon_buffer (float, optional): Distance in coordinate units to buffer the resulting polygons. Default is 0.
        value_mapping (dict, optional): Mapping from original raster values to new values. Use `None` to convert to NoData.
        estimate_statistics (bool, optional): Whether to estimate statistics for percentile thresholds. Defaults to True.
    """

    print("Start raster value extraction to polygons")

    Universal.validate(
        input_images=input_images,
        output_images=output_vectors,
        custom_nodata_value=custom_nodata_value,
        custom_output_dtype=custom_output_dtype,
        cache=cache,
        image_threads=image_threads,
        io_threads=io_threads,
        tile_threads=tile_threads,
        debug_logs=debug_logs,
    )

    # Set gdal params
    _set_gdal_cache(cache, debug_logs)
    _set_gdal_workers(io_threads, debug_logs)

    input_image_paths = _resolve_paths(
        "search", input_images, kwargs={"default_file_pattern": "*.tif"}
    )
    output_image_paths = _resolve_paths(
        "create",
        output_vectors,
        kwargs={
            "paths_or_bases": input_image_paths,
            "default_file_pattern": "$_Vectorized.gpkg",
        },
    )

    if debug_logs:
        print(f"Input: {input_image_paths}")
        print(f"Output: {output_image_paths}")

    # Determine multiprocessing and worker count
    image_backend = "thread" # "thread" or "process"
    image_threads_on, image_thread_workers = _resolve_parallel_config(image_threads)
    tile_thread_on, tile_thread_workers = _resolve_parallel_config(tile_threads)

    image_args = [
        (
            in_path,
            out_path,
            extraction_expression,
            filter_by_polygon_size,
            polygon_buffer,
            value_mapping,
            custom_nodata_value,
            debug_logs,
            estimate_statistics,
            tile_thread_on,
            tile_thread_workers,
        )
        for in_path, out_path in zip(input_image_paths, output_image_paths)
    ]

    if image_threads_on:
        with _get_executor(image_backend, image_thread_workers) as executor:
            futures = [
                executor.submit(_process_image_to_polygons, *args)
                for args in image_args
            ]
            for future in as_completed(futures):
                future.result()
    else:
        for args in image_args:
            _process_image_to_polygons(*args)