Skip to content

LLM Prompt

Use this text to prompt LLM models with context about this codebase which includes function headers and docs.

# LLM Prompt

The following content includes function signatures and docstrings from Python source files, as well as relevant Markdown documentation. Each section is labeled by its relative file path. Use this as context to understand the project structure, purpose, and functionality.


## Python Section
### File: utils.py

def merge_vectors(input_vectors, merged_vector_path, method, debug_logs, create_name_attribute):
    """Merge multiple vector files using the specified geometric method.

Args:
    input_vectors (str | List[str]): Defines input files from a glob path, folder, or list of paths. Specify like: "/input/files/*.gpkg", "/input/folder" (assumes *.gpkg), ["/input/one.tif", "/input/two.tif"].
    merged_vector_path (str): Path to save merged output.
    method (Literal["intersection", "union", "keep"]): Merge strategy.
    debug_logs (bool): If True, print debug information.
    create_name_attribute (Optional[Tuple[str, str]]): Tuple of (field_name, separator) to add a combined name field.

Returns:
    None"""

def align_rasters(input_images, output_images):
    """Aligns multiple rasters to a common resolution and grid using specified resampling.

Args:
    input_images (str | List[str], required): Defines input files from a glob path, folder, or list of paths. Specify like: "/input/files/*.tif", "/input/folder" (assumes *.tif), ["/input/one.tif", "/input/two.tif"].
    output_images (str | List[str], required): Defines output files from a template path, folder, or list of paths (with the same length as the input). Specify like: "/input/files/$.tif", "/input/folder" (assumes $_Local.tif), ["/input/one.tif", "/input/two.tif"].
    resampling_method: "nearest" | "bilinear" | "cubic".
    tap: If True, snap output extent to target-aligned pixels (GDAL -tap behavior).
    resolution: "highest" (min px size), "average", or "lowest" (max px size).
    window_size: Tile size for output blocks; used for GTiff creation options.
    debug_logs: Verbose logging.
    cache: Cache for processing.
    image_threads: Python-level parallelism over images (e.g., ("process", 4)).
    io_threads: Sets GDAL_NUM_THREADS for internal GDAL multithreading (int or str).
    tile_threads: Sets GTiff/COG writer NUM_THREADS and Warp’s NUM_THREADS (int or str).

Returns:
    List[str]: Paths to the locally adjusted output raster images."""

def _align_process_image(image_name, in_path, out_path, target_res, resampling_method, tap, window_size, tile_threads, debug_logs):
    """Align a single raster to a target resolution and grid using GDAL Warp.

Args:
    image_name (str): Identifier for the raster, used for logging.
    in_path (str): Path to the input raster file.
    out_path (str): Path where the aligned raster will be written.
    target_res (Tuple[float, float]): Target pixel resolution as ``(xres, yres)``.
    resampling_method (Literal["nearest", "bilinear", "cubic"]): Resampling algorithm.
    tap (bool): If True, snaps bounds to target-aligned pixels (GDAL -tap behavior).
    window_size (int): Tile size in pixels for output blocks (BLOCKXSIZE/BLOCKYSIZE).
    tile_threads (Optional[int | str]): Number of threads for GTiff/COG writer and Warp tile processing. If None, defaults to GDAL's internal behavior.
    debug_logs (bool): If True, print debug information during processing.

Returns:
    None"""

def compute_resolution(paths, strategy):

def merge_rasters(input_images, output_image_path):
    """Merges multiple rasters into a single output.

Args:
    input_images (str | List[str], required): Defines input files from a glob path, folder, or list of paths. Specify like: "/input/files/*.tif", "/input/folder" (assumes *.tif), ["/input/one.tif", "/input/two.tif"].
    output_image_path (str): Path to output mosaic.
    cache (int | Tuple[int, str] | None, optional): Controls GDAL cache size. Examples: 2048 (MB), (2, "GB"). Set None to use GDAL’s default. Applied via GDAL_CACHEMAX.        window_parallel_workers (Tuple[Literal["process"], Literal["cpu"] | int] | None = None): Parallelization strategy at the window level within each image. Same format as image_parallel_workers. Threads are not supported. Set to None to disable.
    io_threads (Literal["cpu"] | int | None): Parallelism for IO operations. "cpu" to get number of cores, int to assign number, and None to disable io level parallelism.
    tile_threads (Literal["cpu"] | int | None): "cpu" to get number of cores, int to assign number, and None to disable tile level parallelism.
    debug_logs (bool, optional): If True, prints progress. Defaults to False.
    output_dtype (str | None, optional): Data type for output rasters. Defaults to input image dtype.
    custom_nodata_value (float | int | None, optional): Overrides detected NoData value. Defaults to None.
    resolution ("highest" | "average" | "lowest", optional): Strategy for computing merge resolution.
    window_size (int | None): Tile size for processing tiles. Defaults to None.
    build_overviews (bool, optional): If True, computes overviews. Defaults to False.

Returns:
    str: Path of the merged raster."""

def mask_rasters(input_images, output_images, vector_mask, window_size, debug_logs, cache, image_threads, io_threads, tile_threads, include_touched_pixels, custom_nodata_value):
    """Applies a vector-based mask to one or more rasters using GDAL Warp.

Args:
    input_images (str | List[str], required): Defines input files from a glob path, folder, or list of paths. Specify like: "/input/files/*.tif", "/input/folder" (assumes *.tif), ["/input/one.tif", "/input/two.tif"].
    output_images (str | List[str], required): Defines output files from a template path, folder, or list of paths (with the same length as the input). Specify like: "/input/files/$.tif", "/input/folder" (assumes $_Local.tif), ["/input/one.tif", "/input/two.tif"].
    vector_mask (Universal.VectorMask, optional): Tuple ('include'|'exclude', vector_path, optional field name).
    window_size (int | None): Tile size for processing tiles. Defaults to None.
    debug_logs (bool, optional): If True, prints progress. Defaults to False.
    cache (int | Tuple[int, str] | None, optional): Controls GDAL cache size. Examples: 2048 (MB), (2, "GB"). Set None to use GDAL’s default. Applied via GDAL_CACHEMAX.        window_parallel_workers (Tuple[Literal["process"], Literal["cpu"] | int] | None = None): Parallelization strategy at the window level within each image. Same format as image_parallel_workers. Threads are not supported. Set to None to disable.
    image_threads (Literal["cpu"] | int | None): Parallelism for per-image operations. "cpu" to get number of cores, int to assign number, and None to disable image level parallelism.
    io_threads (Literal["cpu"] | int | None): Parallelism for IO operations. "cpu" to get number of cores, int to assign number, and None to disable io level parallelism.
    tile_threads (Literal["cpu"] | int | None): "cpu" to get number of cores, int to assign number, and None to disable tile level parallelism.
    include_touched_pixels (bool, optional): If True, uses all touched pixels for cutline mask.
    custom_nodata_value (float | int | None, optional): Overrides detected NoData value. Defaults to None.

Returns:
    list: Output image paths after masking."""

def _mask_raster_process_image(input_image_path, output_image_path, image_name, mode, cutline_path, field_given, debug_logs, include_touched_pixels, custom_nodata_value, tile_threads, tile_thread_on):
    """Applies a GDAL Warp mask to a single image using cutline and nodata configuration.

Args:
    input_image_path (str): Path to the input raster.
    output_image_path (str): Path to the output masked raster.
    image_name (str): Short name for logging/debugging.
    mode (str | None): Weather to "include" or "exclude".
    cutline_path (str | None): Path to the cutline.
    field_given (bool): If a filter field was provided.
    debug_logs (bool): If True, prints processing information.
    include_touched_pixels (bool): If True, enables CUTLINE_ALL_TOUCHED for Warp.
    custom_nodata_value (Universal.CustomNodataValue): Nodata value for masked-out pixels.
    tile_threads (int | str | None): Number of threads for Warp block parallelism.
    tile_thread_on (bool): Whether tile-level multithreading is enabled.

Returns:
    None"""

def _prepare_cutline_sources(vector_mask, image_names, debug_logs):
    """Returns: (mode, per_image_path_or_None, original_vector_path, field_given)
  - If field is given: dict[image_name] -> '/vsimem/.geojson' or None if no match
  - If no field: returns None dict, and you should pass the original vector as-is."""

def create_masked_vrts(input_image_path_pairs):
    """For each (name -> image_path), write:
  - mask_{name}.geojson  (cutline: include polys OR exclude complement)
  - vrt_{name}.vrt       (alpha = band1 nodata U cutline-outside)
Returns: dict[name, vrt_path]"""

def _set_gdal_cache(cache, debug_logs):

def _set_gdal_workers(io_threads, debug_logs):

def _resolve_gdal_dtype(override_dtype, input_image_path, debug_logs):
    """Resolve a valid GDAL data type string or image path for output.

Args:
    override_dtype (str | None): Desired GDAL dtype name (e.g., "UInt16"). If None, falls back to the dtype of the input image.
    input_image_path (str): Path to the input raster for fallback.
    debug_logs (bool): If True, prints debug information.

Returns:
    str: GDAL data type name (e.g., "Byte", "UInt16", "Float32")."""

def _resolve_window_size(window_size, input_image_path, debug_logs):
    """Resolve the output tile size (window size) for processing.

Args:
    window_size (int | None): Desired tile size. If None, fall back to the block size of the input raster (or full image size if untiled).
    input_image_path (str): Path to the input raster for fallback.
    debug_logs: bool: If True, prints debug information.

Returns:
    int: Tile size in pixels (square, width == height)."""

def _gdal_dtype_str_to_enum(s, default):

def _gdal_dtype_enum_to_name(dt):

def _get_gdal_bounds(path):
    """Compute spatial bounds of a raster.

Args:
    path (str): Path to the raster file.

Returns:
    tuple[float, float, float, float]: (min_x, min_y, max_x, max_y) bounds in dataset CRS."""

def _get_valid_count(band, approx_ok, force):
    """Get the valid pixel count of a raster band.

Args:
    band (gdal.Band): Raster band to compute stats for.
    approx_ok (bool): Allow approximate statistics (fast, may be inaccurate).
    force (bool): If True, force computation if stats are not cached.

Returns:
    valid_count"""

def compute_overviews(input_images_paths):
    """Compute and attach GDAL overviews for one or more raster images.

Args:
    input_images_paths (str | List[str], required): Defines input files from a glob path, folder, or list of paths. Specify like: "/input/files/*.tif", "/input/folder" (assumes *.tif), ["/input/one.tif", "/input/two.tif"].
    output_image_paths (str | List[str] | None): Defines output files as None to update input images or from a template path, folder, or list of paths (with the same length as the input). Specify like: "/input/files/$.tif", "/input/folder" (assumes $_Global.tif), ["/input/one.tif", "/input/two.tif"].
    window_scales: Overview decimation factors (default: (2, 4, 8, 16, 32)).
    cache: GDAL cache size configuration.
    image_threads: Number of parallel workers for image-level processing.
    io_threads: GDAL IO worker configuration.
    tile_threads: GDAL internal threads for overview computation.
    debug_logs: Enable verbose logging.

Returns:
    List[str]: Paths of images that received overviews."""

def _process_image_overview(path):

def _copy_files_if_needed(src_paths, dst_paths):
    """Copy src to dst."""

### File: statistics.py

def compare_image_spectral_profiles_pairs(image_groups_dict, output_figure_path, title, xlabel, ylabel, line_width, estimate_stats):
    """Plots paired spectral profiles for before-and-after image comparisons.

Args:
    image_groups_dict (dict): Mapping of labels to image path pairs (before, after):
        {'Image A': [
            '/image/before/a.tif',
            'image/after/a.tif'
        ],
        'Image B': [
            '/image/before/b.tif',
            '/image/after/b.tif'
        ]}
    output_figure_path (str): Path to save the resulting comparison figure.
    title (str): Title of the plot.
    xlabel (str): X-axis label.
    ylabel (str): Y-axis label.
    line_width (float, optional): Width of the spectral profiles lines. Default is 1.
    estimate_stats (bool, optional): Whether to estimate band statistics. Default is True.

Outputs:
    Saves a spectral comparison plot showing pre- and post-processing profiles."""

def compare_spatial_spectral_difference_band_average(input_images, output_figure_path, title, diff_label, subtitle, scale):
    """Computes and visualizes the mean per-pixel spectral difference between two coregistered, equal-size images.

Args:
    input_images (list): List of two image file paths [before, after].
    output_figure_path (str): Path to save the resulting difference image (PNG).
    title (str): Title for the plot.
    diff_label (str): Label for the colorbar.
    subtitle (str): Subtitle text shown below the image.
    scale (tuple, optional): Tuple (vmin, vmax) to fix the color scale. Centered at 0.

Raises:
    ValueError: If the input list doesn't contain exactly two image paths, or shapes mismatch."""

def compare_before_after_all_images(input_images_1, input_images_2, output_figure_path, title, ylabel_1, ylabel_2, image_names):
    """Creates a two-row image grid to compare before-and-after raster pairs with consistent per-row contrast stretching. Each column shows a pair of aligned images with transparent nodata. Supports 1- and 3-band rasters.

Args:
    input_images_1 (list): List of file paths to the "before" images (top row).
    input_images_2 (list): List of file paths to the "after" images (bottom row).
    output_figure_path (str): Destination path to save the output PNG figure.
    title (str): Title of the entire figure.
    ylabel_1 (str): Y-axis label for the top row.
    ylabel_2 (str): Y-axis label for the bottom row.
    image_names (list, optional): List of image names to use as column titles. Must match the number of image pairs.

Raises:
    AssertionError: If input lists have mismatched lengths or if `image_names` does not match image count.

Output:
    Saves a PNG file with the comparison figure."""

def read_as_3band(ds):

def compute_row_stretch(paths):

### File: cli.py

def _cli_version():

def _build_cli():

def main():

### File: types_and_validation.py

def _validate_threads(x, name):

def validate():

def validate_match():

def validate_global_regression():

def validate_local_block_adjustment():

### File: handlers.py

def _resolve_paths(mode, input):
    """Resolves a list of input based on the mode and input format.

Args:
    mode (Literal["search", "create", "match", "name"]): Type of operation to perform.
    input (str | List[str]): Either a list of file input or a folder/template string.
    kwargs (dict, optional): Additional keyword arguments passed to the resolved function.

Returns:
    List[str]: List of resolved input."""

def search_paths(search_pattern):
    """Search for files using a glob pattern, or a folder with a default file pattern.

Args:
    search_pattern (str, required): Defines input files from a glob path or folder. Specify like: "/input/files/*.tif" or "/input/folder" (while passing default_file_pattern like: '*.tif')
    default_file_pattern (str, optional): Used when `pattern` is a directory. If not set and `pattern` is a folder, raises an error.
    recursive (bool, optional): Whether to search recursively.
    match_to_paths (Tuple[List[str], str], optional): Matches input files to a reference list using a regex.
    debug_logs (bool, optional): Whether to print matched paths.

Returns:
    List[str]: Sorted list of matched file paths.

Raises:
    ValueError: If `search_pattern` is a directory and `default_file_pattern` is not provided."""

def create_paths(template_pattern, paths_or_bases):
    """Create output paths using a filename template_pattern and a list of reference paths or names.

Args:
    template_pattern (str, required): Defines output files from a glob path or folder to match input paths or names. Specify like: "/input/files/$.tif" or "/input/folder" (while passing default_file_pattern like: '$.tif')
    paths_or_bases (List[str]): List of full paths or base names to derive the replace_symbol from.
    default_file_pattern (str, optional): Used if `template_pattern` is a directory.
    debug_logs (bool): Whether to print the created paths.
    replace_symbol (str): Placeholder symbol in the template to replace with base names.
    create_folders (bool): Whether to create output folders if they don't exist.

Returns:
    List[str]: List of constructed file paths.

Raises:
    ValueError: If `template_pattern` is a directory and `default_file_pattern` is not provided."""

def match_paths(input_match_paths, reference_paths, match_regex, debug_logs):
    """Match `reference_paths` to `input_match_paths` using a regex applied to the basenames of `input_match_paths`. The extracted key must be a substring of the reference filename.

Args:
    input_match_paths (List[str]): List of candidate paths to extract keys from.
    reference_paths (List[str]): List of reference paths to align to.
    match_regex (str): Regex applied to basenames of input_match_paths to extract a key to match via *inclusion* in reference_paths (e.g. "(.*)_LocalMatch\.gpkg$" (without one of the backslashes)).
    debug_logs (bool): If True, print matched and unmatched file basenames.

Returns:
    List[Optional[str]]: A list the same length as `reference_paths` where each
    element is the matched path from `input_match_paths` or None.

Raises:
    ValueError: If output list length does not match reference_paths length."""

def _check_raster_requirements(input_image_paths, debug_logs, check_geotransform, check_crs, check_bands, check_nodata, check_resolution):
    """Validates a list of raster image paths to ensure they are compatible for processing.

Args:
    input_image_paths (list[str]): Paths to input raster images.
    debug_logs (bool): If True, prints debug messages.
    check_geotransform (bool): Check that all images have a valid geotransform.
    check_crs (bool): Check that all images have the same CRS.
    check_bands (bool): Check that all images have the same number of bands.
    check_nodata (bool): Check that all images have the same nodata values per band.
    check_resolution (bool): Check that all images have the same resolution.

Returns:
    bool: True if all checks pass.

Raises:
    ValueError: If any check fails."""

def _resolve_nodata_value(image_path, custom_nodata_value):
    """Get the NoData value for a raster.

Args:
    image_path (str): Path to the raster image.
    custom_nodata_value (float | int | None, optional): If provided, overrides the image's NoData.

Returns:
    float | None: The chosen NoData value."""

### File: utils_multiprocessing.py

def _choose_context(prefer_fork):
    """Chooses the most appropriate multiprocessing context based on platform and preference.

Args:
    prefer_fork (bool): If True, prefers "fork" context where available; default is True.

Returns:
    mp.context.BaseContext: Selected multiprocessing context ("fork", "forkserver", or "spawn")."""

def _resolve_parallel_config(workers):
    """Parses a parallel worker config into execution flags and worker count.

Args:
    workers ("cpu" | int | None): Number of workers.
        - "cpu" → use os.cpu_count()
        - int   → use that many workers
        - None  → disables parallelism

Returns:
    Tuple[bool, Optional[int]]:
        - Whether to run in parallel,
        - Number of workers."""

def _get_executor(backend, max_workers, initializer, initargs):
    """Creates a parallel executor (process or thread) with optional initialization logic.

Args:
    backend (str): Execution backend, either "process" or "thread".
    max_workers (int): Maximum number of worker processes or threads.
    initializer (Callable, optional): Function to initialize worker context.
    initargs (tuple, optional): Arguments to pass to the initializer.

Returns:
    Executor: An instance of ThreadPoolExecutor or ProcessPoolExecutor.

Raises:
    ValueError: If the backend is not "process" or "thread"."""

### File: seamline/voronoi_center_seamline.py

def voronoi_center_seamline(input_images, output_mask):
    """Generates a Voronoi-based seamline mask from edge-matching polygons (EMPs) and writes the result to a vector file.

Args:
    input_images (str | List[str], required): Defines input files from a glob path, folder, or list of paths. Specify like: "/input/files/*.tif", "/input/folder" (assumes *.tif), ["/input/one.tif", "/input/two.tif"].
    output_mask (str): Output path for the final seamline polygon vector file.
    aoi_path (str, optional): Path to an AOI vector file to clip overlapping image polygons; default is None.
    min_point_spacing (float, optional): Minimum spacing between Voronoi seed points; default is 10.
    min_cut_length (float, optional): Minimum cutline segment length to retain; default is 0.
    debug_logs (Universal.DebugLogs, optional): Enables debug print statements if True; default is False.
    image_field_name (str, optional): Name of the attribute field for image ID in output; default is 'image'.
    debug_vectors_path (str | None, optional): Optional path to save debug layers (cutlines, intersections).

Outputs:
    Saves a polygon seamline layer to `output_mask`, and optionally saves intermediate cutlines to `debug_vectors_path`."""

def _densify_polygon(poly, target_spacing):
    """Return evenly spaced points along a polygon's exterior by arc-length.

Args:
    poly: shapely Polygon
    target_spacing: target arc-length spacing between seeds (density goal)

Returns:
    list[(x, y)]  -- open list (first point not repeated at end)"""

def polygonal_intersection(a, b, buffer_eps):
    """Returns only the polygonal portion of a ∩ b. If the intersection is line-like or point-like, it buffers slightly to form a polygon.

Args:
    a (Polygon): Input geometries.
    b (Polygon): Input geometries.
    buffer_eps (float): Small buffer distance to 'inflate' line/point intersections.

Returns:
    Polygon or MultiPolygon"""

def _compute_centerline(a, b, min_point_spacing, min_cut_length, debug_logs, crs, debug_vectors_path):
    """Computes a Voronoi-based centerline between two overlapping polygons.

Args:
    a (Polygon): First polygon.
    b (Polygon): Second polygon.
    min_point_spacing (float): Minimum spacing between seed points for Voronoi generation.
    min_cut_length (float): Minimum segment length to include in the centerline graph.
    debug_logs (bool, optional): If True, prints debug information; default is False.
    crs (optional): Coordinate reference system used for optional debug output.
    debug_vectors_path (optional): Path to save debug Voronoi cells; if None, skips saving.

Returns:
    LineString: Shortest centerline path computed through the Voronoi diagram of the overlap."""

def _segment_emp(emp, cuts, debug_logs):
    """Segments an EMP polygon by sequentially applying centerline cuts, retaining the piece containing the centroid.

Args:
    emp (Polygon): The original EMP polygon to segment.
    cuts (List[LineString]): List of cutlines to apply.
    debug_logs (bool, optional): If True, prints debug info; default is False.

Returns:
    Polygon: The segmented portion of the EMP containing the original centroid."""

def _save_intersection_points(a, b, path, crs, pair_id):
    """Saves intersection points between the boundaries of two polygons to a GeoPackage layer.

Args:
    a (Polygon): First polygon.
    b (Polygon): Second polygon.
    path (str): Path to the output GeoPackage file.
    crs: Coordinate reference system for the output.
    pair_id (str): Identifier for the polygon pair, saved as an attribute.

Returns:
    None"""

def _save_voronoi_cells(voronoi_cells, path, crs, layer_name):
    """Saves Voronoi polygon geometries to a specified GeoPackage layer.

Args:
    voronoi_cells (GeometryCollection): Collection of Voronoi polygon geometries.
    path (str): Path to the output GeoPackage file.
    crs: Coordinate reference system for the output layer.
    layer_name (str, optional): Name of the layer to write; default is "voronoi_cells".

Returns:
    None"""

def _save_emp_outlines(emps, image_paths, path, crs, image_field_name, layer_name):
    """Save initial EMP polygons (one per image) to a GPKG layer."""

def _save_seed_points(pts, path, crs, layer_name):
    """Saves Voronoi seed points to a GeoPackage layer.

Args:
    pts (list[tuple]): List of (x, y) seed coordinates.
    path (str): Path to the output GeoPackage.
    crs: Coordinate reference system.
    layer_name (str, optional): Layer name. Defaults to 'voronoi_seeds'."""

def _emp_polygon_from_image(path, eight_connected):
    """Extract the largest valid EMP polygon from a raster mask using GDAL.

Args:
    path (str): Path to the input raster file.
    eight_connected (bool, optional): Use 8-connectedness for polygonization. Default is True.

Returns:
    shapely.geometry.Polygon | ogr.Geometry: The largest EMP polygon."""

def _mask_by_aoi(polygons, aoi_path):
    """Clip polygons by an AOI layer from file.

Args:
    polygons (list[Polygon]): Input seamline polygons.
    aoi_path (str): Path to vector file containing AOI polygon(s).

Returns:
    list[Polygon]: List of clipped polygons (empties dropped)."""

### File: mask/mask.py

def create_cloud_mask_with_omnicloudmask(input_images, output_images, red_band_index, green_band_index, nir_band_index):
    """Generates cloud masks from input images using OmniCloudMask, with optional downsampling and multiprocessing.

Args:
    input_images (str | List[str], required): Defines input files from a glob path, folder, or list of paths. Specify like: "/input/files/*.tif", "/input/folder" (assumes *.tif), ["/input/one.tif", "/input/two.tif"].
    output_images (str | List[str], required): Defines output files from a template path, folder, or list of paths (with the same length as the input). Specify like: "/input/files/$.tif", "/input/folder" (assumes $_CloudMask.tif), ["/input/one.tif", "/input/two.tif"].
    red_band_index (int): Index of red band in the image.
    green_band_index (int): Index of green band in the image.
    nir_band_index (int): Index of NIR band in the image.
    down_sample_m (float, optional): If set, resamples input to this resolution in meters. Recommended to use a target resolution of 10 m or lower.
    debug_logs (bool, optional): If True, prints progress and debug info.
    image_threads (Literal["cpu"] | int | None): Enables parallel execution. Note: "process" does not work on macOS due to PyTorch MPS limitations.
    omnicloud_kwargs (dict | None): Additional arguments forwarded to predict_from_array.

Raises:
    Exception: Propagates any error from processing individual images."""

def _process_cloud_mask_image(input_image_path, output_mask_path, red_band_index, green_band_index, nir_band_index, down_sample_m, debug_logs, omnicloud_kwargs):
    """Processes a single image to generate a cloud mask using OmniCloudMask.

Args:
    input_image_path (str): Path to input image.
    output_mask_path (str): Path to save output mask.
    red_band_index (int): Index of red band.
    green_band_index (int): Index of green band.
    nir_band_index (int): Index of NIR band.
    down_sample_m (float): Target resolution (if resampling).
    debug_logs (bool): If True, print progress info.
    omnicloud_kwargs (dict | None): Passed to predict_from_array.

Raises:
    Exception: If any step in reading, prediction, or writing fails."""

def band_math(input_images, output_images, threshold_math):
    """Applies a thresholding operation to input raster images using a mathematical expression string.

Args:
    input_images (str | List[str], required): Defines input files from a glob path, folder, or list of paths. Specify like: "/input/files/*.tif", "/input/folder" (assumes *.tif), ["/input/one.tif", "/input/two.tif"].
    output_images (str | List[str], required): Defines output files from a template path, folder, or list of paths (with the same length as the input). Specify like: "/input/files/$.tif", "/input/folder" (assumes $_Threshold.tif), ["/input/one.tif", "/input/two.tif"].
    threshold_math (str): A muparser‑compatible expression applied to the raster bands, see https://github.com/beltoforion/muparser. Bands are referenced as B1, B2, 
 and you can use C‑style comparison and logical operators (such as >, <, >=, <=, ==, !=, &&, ||, !) along with parentheses and ternary ? : constructs—for example, ((B1 > 5) && (B2 < 10)) ? 1 : 0. Percentile‑based thresholds are supported: write 5%B1 to substitute the 5th‑percentile value of band 1 into the expression before evaluation.
    debug_logs (bool, optional): If True, prints debug messages.
    custom_nodata_value (float | int | None, optional): Override the dataset's nodata value.
    cache (float | None): Controls GDAL cache size in GB. Defaults to preset cache size. Applied via GDAL_CACHEMAX.
    image_threads (Literal["cpu"] | int | None): Parallelism for per-image operations. "cpu" to get number of cores, int to assign number, and None to disable image level parallelism.
    io_threads (Literal["cpu"] | int | None): Parallelism for IO operations. "cpu" to get number of cores, int to assign number, and None to disable io level parallelism.
    tile_threads (Literal["cpu"] | int | None): "cpu" to get number of cores, int to assign number, and None to disable tile level parallelism.
    window_size (WindowSize, optional): Window tiling strategy for memory-efficient processing.
    custom_output_dtype (CustomOutputDtype, optional): Output data type override.
    calculation_dtype (CalculationDtype, optional): Internal computation dtype.

Returns:
    List[str]: Paths to the thresholded output images."""

def _band_math_process_image(input_image_path, output_image_path, name, threshold_math, debug_logs, nodata_value, tile_threads_on, tile_thread_workers, window_size, output_dtype, calculation_dtype):
    """Processes a single input raster image using a threshold expression and writes the result to disk.

Args:
    input_image_path (str): Path to input raster image.
    output_image_path (str): Path to save the output thresholded image.
    name (str): Image name for worker context.
    threshold_math (str): Expression string to evaluate pixel-wise conditions.
    debug_logs (bool): Enable debug logging.
    nodata_value (float | int | None): Value considered as nodata.
    tile_threads_on (bool): Enable GDAL multithreaded tiling if ``True``.
    tile_thread_workers (int): Number of worker threads for GDAL tiling.
    window_size: Window tiling size for memory efficiency.
    output_dtype: Output raster data type.
    calculation_dtype: Data type used for internal calculations.

Returns:
    None"""

def _calculate_threshold_from_percent(input_image_path, threshold, band_index, debug_logs, nodata_value, bins, estimate_statistics):
    """Compute a percentile value for a raster band using GDAL.

Args:
    input_image_path: Path to the input raster file.
    threshold: Desired percentile (e.g., 95 for the 95th percentile).
    band_index: 1-based index of the band to process.
    debug_logs: If True, print debug information.
    nodata_value: Value to be treated as nodata.
    bins: Number of histogram bins (default is 1000).
    estimate_statistics: If True, allow GDAL to approximate min/max and histogram for speed.

Returns:
    float: The pixel value corresponding to the requested percentile."""

def replace_percent_with_threshold(match):

### File: mask/utils_mask.py

def process_raster_values_to_vector_polygons(input_images, output_vectors):
    """Converts raster values into vector polygons based on an expression and optional filtering logic.

Args:
    input_images (str | List[str], required): Defines input files from a glob path, folder, or list of paths. Specify like: "/input/files/*.tif", "/input/folder" (assumes *.tif), ["/input/one.tif", "/input/two.tif"].
    output_vectors (str | List[str], required): Defines output files from a template path, folder, or list of paths (with the same length as the input). Specify like: "/input/files/$.gpkg", "/input/folder" (assumes $_Vectorized.gpkg), ["/input/one.gpkg", "/input/two.gpkg"].
    extraction_expression (str): A muparser‑compatible expression applied to the raster bands, see https://github.com/beltoforion/muparser. Bands are referenced as B1, B2, 
 and you can use C‑style comparison and logical operators (such as >, <, >=, <=, ==, !=, &&, ||, !) along with parentheses and ternary ? : constructs—for example, ((B1 > 5) && (B2 < 10)) ? 1 : 0. Percentile‑based thresholds are supported: write 5%B1 to substitute the 5th‑percentile value of band 1 into the expression before evaluation.
    custom_nodata_value (Universal.CustomNodataValue, optional): Custom NoData value to override the default from the raster metadata.
    custom_output_dtype (Universal.CustomOutputDtype, optional): Desired output data type. If not set, defaults to raster’s dtype.
    cache (float | None): Controls GDAL cache size in GB. Defaults to preset cache size. Applied via GDAL_CACHEMAX.
    image_threads (Literal["cpu"] | int | None): Parallelism for per-image operations. "cpu" to get number of cores, int to assign number, and None to disable image level parallelism.
    io_threads (Literal["cpu"] | int | None): Parallelism for IO operations. "cpu" to get number of cores, int to assign number, and None to disable io level parallelism.
    tile_threads (Literal["cpu"] | int | None): "cpu" to get number of cores, int to assign number, and None to disable tile level parallelism.
    debug_logs (Universal.DebugLogs, optional): Whether to print debug logs to the console.
    filter_by_polygon_size (str, optional): Area filter for resulting polygons. Can be a number (e.g., ">100") or percentile (e.g., ">95%").
    polygon_buffer (float, optional): Distance in coordinate units to buffer the resulting polygons. Default is 0.
    value_mapping (dict, optional): Mapping from original raster values to new values. Use `None` to convert to NoData.
    estimate_statistics (bool, optional): Whether to estimate statistics for percentile thresholds. Defaults to True."""

def _process_image_to_polygons(input_image_path, output_vector_path, extraction_expression, filter_by_polygon_size, polygon_buffer, value_mapping, custom_nodata_value, debug_logs, estimate_statistics, tile_thread_on, tile_thread_workers):
    """Processes a single raster file and extracts polygons based on logical expressions and optional filters.

Args:
    input_image_path (str): Path to the input raster image.
    output_vector_path (str): Output file path for the resulting vector file (GeoPackage format).
    extraction_expression (str): Logical expression using band indices (e.g., "b1 > 5 & b2 < 10").
    filter_by_polygon_size (str): Area filter for polygons. Supports direct comparisons (">100") or percentiles ("90%").
    polygon_buffer (float): Amount of buffer to apply to polygons in projection units.
    value_mapping (dict): Dictionary mapping original raster values to new ones. Set value to `None` to mark as NoData.
    custom_nodata_value: Custom NoData value to use during processing.
    debug_logs (bool): Whether to print debug logging information.
    estimate_statistics (bool): Whether to estimate statistics for percentile thresholds.
    tile_thread_on (bool): Whether to use tiled processing.
    tile_thread_workers (int): Number of worker threads to use for tiled processing."""

def _sub(m):

def _apply_mapping_feat(feat):

def _area_ok(geom_area, areas):

### File: match/global_regression.py

def global_regression(input_images, output_images):
    """Performs global radiometric normalization across overlapping images using least squares regression.

Args:
    input_images (str | List[str], required): Defines input files from a glob path, folder, or list of paths. Specify like: "/input/files/*.tif", "/input/folder" (assumes *.tif), ["/input/one.tif", "/input/two.tif"].
    output_images (str | List[str], required): Defines output files from a template path, folder, or list of paths (with the same length as the input). Specify like: "/input/files/$.tif", "/input/folder" (assumes $_Global.tif), ["/input/one.tif", "/input/two.tif"].
    calculation_dtype (str, optional): Precision for internal calculations. Defaults to "float32".
    output_dtype (str | None, optional): Data type for output rasters. Defaults to input image dtype.
    vector_mask (Tuple[Literal["include", "exclude"], str, Optional[str]] | None): Mask to limit stats calculation to specific areas in the format of a tuple with two or three items: literal "include" or "exclude" the mask area, str path to the vector file, optional str of field name in vector file that *includes* (can be substring) input image name to filter geometry by. Loaded stats won't have this applied to them. The matching solution is still applied to these areas in the output. Defaults to None for no mask.
    debug_logs (bool, optional): If True, prints debug info and progress. Defaults to False.
    custom_nodata_value (float | int | None, optional): Overrides detected NoData value. Defaults to None.
    cache (float | None): Controls GDAL cache size in GB. Defaults to preset cache size. Applied via GDAL_CACHEMAX.
    image_threads (Literal["cpu"] | int | None): Parallelism for per-image operations. "cpu" to get number of cores, int to assign number, and None to disable image level parallelism.
    io_threads (Literal["cpu"] | int | None): Parallelism for IO operations. "cpu" to get number of cores, int to assign number, and None to disable io level parallelism.
    tile_threads (Literal["cpu"] | int | None): "cpu" to get number of cores, int to assign number, and None to disable tile level parallelism.
    estimate_stats (bool): If True, use an estimate algorithm to calculate the mean and sd to increase processing speeds. If False, use the exact algorithm. Defaults to True.
    window_size (int | None): Output image tile size. Defaults to input image tile size.
    save_as_cog (bool): If True, saves output as a Cloud-Optimized GeoTIFF using proper band and block order.
    specify_model_images (Tuple[Literal["exclude", "include"], List[str]] | None ): First item in tuples sets weather to 'include' or 'exclude' the listed images from model building statistics. Second item is the list of image names (without their extension) to apply criteria to. For example, if this param is only set to 'include' one image, all other images will be matched to that one image. Defaults to no exclusion.
    custom_mean_factor (float, optional): Weight for mean constraints in regression. Defaults to 1.0.
    custom_std_factor (float, optional): Weight for standard deviation constraints in regression. Defaults to 1.0.
    save_adjustments (str | None, optional): The output path of a .json file to save adjustments parameters. Defaults to not saving.
    load_adjustments (str | None, optional): If set, loads saved whole and overlapping statistics only for images that exist in the .json file. Other images will still have their statistics calculated. Defaults to None.
    build_overviews (bool, optional): If True, computes overviews. Defaults to False.

Returns:
    List[str]: Paths to the globally adjusted output raster images."""

def _solve_global_model(num_bands, num_total, all_image_names, included_names, input_image_names, all_overlap_stats, all_whole_stats, custom_mean_factor, custom_std_factor, overlapping_pairs, debug_logs, apply_size_weighting):
    """Computes global radiometric normalization parameters (scale and offset) for each image and band using least squares regression.

Args:
    num_bands: Number of image bands.
    num_total: Total number of images (including loaded).
    all_image_names: Ordered list of all image names.
    included_names: Subset of images used to constrain the model.
    input_image_names: Names of input images to apply normalization to.
    all_overlap_stats: Pairwise overlap statistics per band.
    all_whole_stats: Whole-image stats (mean, std) per band.
    custom_mean_factor: Weight for mean constraints.
    custom_std_factor: Weight for std constraints.
    overlapping_pairs: Pairs of overlapping images.
    debug_logs: If True, prints debug information.
    apply_size_weighting (bool): Whether to use the overlap size to weight its influence.

Returns:
    np.ndarray: Adjustment parameters of shape (bands, 2 * num_images, 1)."""

def _apply_adjustments_process_image(tile_thread_on, tile_thread_workers, image_name, input_image_path, output_image_path, scale, offset, num_bands, nodata_val, window_size, output_dtype, calculation_dtype, save_as_cog, debug_logs):
    """Applies per-band linear radiometric adjustments to an image using GDAL VRT metadata and materializes the result as a GeoTIFF or COG. Each band is transformed according to: y = a * x + b, where `a` = scale and `b` = offset.

Args:
    tile_thread_on (bool): Enable multithreaded GDAL translation for tile-level work.
    tile_thread_workers (int): Number of worker threads if `tile_thread_on=True`.
    image_name (str): Basename (no extension) of the input image; used for temporary files.
    input_image_path (str): Path to the input image to adjust.
    output_image_path (str): Path where the adjusted raster will be written.
    scale (np.ndarray): 1D array of per-band scale coefficients (length = num_bands).
    offset (np.ndarray): 1D array of per-band offset coefficients (length = num_bands).
    num_bands (int): Number of data bands to adjust (alpha not included).
    nodata_val (int | float | None): NoData value to assign to output bands, if provided.
    window_size: Window size used for tiling (currently not applied at pixel level).
    output_dtype (str | None): Desired GDAL output data type (e.g., "UInt16"). If None, preserves source dtype.
    calculation_dtype (str | None): Desired GDAL calculation dtype.
    save_as_cog (bool): If True, writes output as Cloud-Optimized GeoTIFF (COG); otherwise, writes a standard tiled GeoTIFF.
    debug_logs (bool, optional): If True, print detailed logging about the process. Defaults to False.

Returns:
    None"""

def _save_adjustments(save_path, input_image_names, all_params, all_whole_stats, all_overlap_stats, num_bands, calculation_dtype):
    """Saves adjustment parameters, whole-image stats, and overlap stats in a nested JSON format.

Args:
    save_path (str): Output JSON path.
    input_image_names (List[str]): List of input image names.
    all_params (np.ndarray): Adjustment parameters, shape (bands, 2 * num_images, 1).
    all_whole_stats (dict): Per-image stats (keyed by image name).
    all_overlap_stats (dict): Per-pair overlap stats (keyed by image name).
    num_bands (int): Number of bands.
    calculation_dtype (str): Precision for saving values (e.g., "float32")."""

def _validate_adjustment_model_structure(model):
    """Validates the structure of a loaded adjustment model dictionary.

Ensures that:
- Each top-level key is an image name mapping to a dictionary.
- Each image has 'adjustments' and 'whole_stats' with per-band keys like 'band_0'.
- Each band entry in 'adjustments' contains 'scale' and 'offset'.
- Each band entry in 'whole_stats' contains 'mean', 'std', and 'size'.
- If present, 'overlap_stats' maps to other image names with valid per-band statistics.

The expected model structure is a dictionary with this format:

{
    "image_name_1": {
        "adjustments": {
            "band_0": {"scale": float, "offset": float},
            "band_1": {"scale": float, "offset": float},
            ...
        },
        "whole_stats": {
            "band_0": {"mean": float, "std": float, "size": int},
            "band_1": {"mean": float, "std": float, "size": int},
            ...
        },
        "overlap_stats": {
            "image_name_2": {
                "band_0": {"mean": float, "std": float, "size": int},
                "band_1": {"mean": float, "std": float, "size": int},
                ...
            },
            ...
        }
    },
    ...
}

- Keys are image basenames (without extension).
- Band keys are of the form "band_0", "band_1", etc.
- All numerical values are stored as floats (except 'size', which is an int).

Args:
    model (dict): Parsed JSON adjustment model.

Raises:
    ValueError: If any structural issues or missing keys are detected."""

def _print_constraint_system(constraint_matrix, adjustment_params, observed_values_vector, overlap_pairs, image_names_with_id):
    """Prints the constraint matrix system with labeled rows and columns for debugging regression inputs.

Args:
    constraint_matrix (ndarray): Coefficient matrix used in the regression system.
    adjustment_params (ndarray): Solved adjustment parameters (regression output).
    observed_values_vector (ndarray): Target values in the regression system.
    overlap_pairs (tuple): Pairs of overlapping image indices used in constraints.
    image_names_with_id (list of tuple): List of (ID, name) pairs corresponding to each image's position in the system.

Returns:
    None"""

def _find_overlaps(image_bounds_dict):
    """Finds all pairs of image names with overlapping spatial bounds.

Args:
    image_bounds_dict: Map of image name -> (minx, miny, maxx, maxy).

Returns:
    Tuple of (name_i, name_j) pairs with overlapping extents."""

def _overlap_stats_process_image(tile_thread_on, tile_thread_workers, num_bands, input_image_path_i, input_image_path_j, name_i, name_j, bound_i, bound_j, estimate_stats, debug_logs):
    """Computes per-band overlap statistics between two images using alpha masks, without VRT recursion.

Args:
    tile_thread_on (bool): Enable multithreaded GDAL warp/translate operations for per-tile work.
    tile_thread_workers (int): Number of worker threads when `tile_thread_on=True`.
    num_bands (int): Number of data bands to analyze.
    input_image_path_i (str): Path to image I.
    input_image_path_j (str): Path to image J.
    name_i (str): Basename (no extension) for image I; used as a key in outputs.
    name_j (str): Basename (no extension) for image J; used as a key in outputs.
    bound_i: (minx, miny, maxx, maxy) bounds for image I (dataset CRS).
    bound_j: (minx, miny, maxx, maxy) bounds for image J (dataset CRS).
    estimate_stats (bool): If True, use GDAL’s approximate statistics; if False, use exact.
    debug_logs (bool, optional): If True, print progress and intermediate details. Defaults to False.

Returns:
    Dict[str, Dict[str, Dict[int, Dict[str, float]]]]: Nested mapping of overlap stats:
        {
          name_i: {
            name_j: {
              band_index: {"mean": float, "std": float, "size": int},  # band_index is 0-based
              ...
            }
          },
          name_j: {
            name_i: { ... }  # symmetric entries
          }
        }"""

def _whole_stats_process_image(tile_thread_on, tile_thread_worker, input_image_path, num_bands, image_name, estimate_stats, debug_logs):
    """Computes whole-image statistics (mean, standard deviation, valid pixel count) for each band of a masked raster.

Args:
    tile_thread_on (bool): Enable multithreaded GDAL operations for tile-level work.
    tile_thread_worker (int): Number of worker threads if `tile_thread_on=True`.
    input_image_path (str): Path to an input raster (VRT with alpha/nodata applied).
    num_bands (int): Number of data bands to compute stats for (alpha excluded).
    image_name (str): Basename (no extension) of the image; used as key in output.
    estimate_stats (bool): If True, use GDAL’s approximate statistics; if False, compute exact statistics.
    debug_logs (bool): If True, print detailed per-band statistics.

Returns:
    Dict[str, Dict[int, Dict[str, float]]]: Mapping of image name to per-band stats:
        {
          image_name: {
            band_index: {
              "mean": float,  # band mean
              "std": float,   # band standard deviation
              "size": int     # count of valid pixels (shared across bands)
            },
            ...
          }
        }"""

def _make_stats_vrt(base_vrt, out_vrt):

### File: match/local_block_adjustment.py

def local_block_adjustment(input_images, output_images):
    """Performs local radiometric adjustment on a set of raster images using block-based statistics.

Args:
    input_images (str | List[str], required): Defines input files from a glob path, folder, or list of paths. Specify like: "/input/files/*.tif", "/input/folder" (assumes *.tif), ["/input/one.tif", "/input/two.tif"].
    output_images (str | List[str], required): Defines output files from a template path, folder, or list of paths (with the same length as the input). Specify like: "/input/files/$.tif", "/input/folder" (assumes $_Global.tif), ["/input/one.tif", "/input/two.tif"].
    calculation_dtype (str, optional): Precision for internal calculations. Defaults to "float32".
    output_dtype (str | None, optional): Data type for output rasters. Defaults to input image dtype.
    vector_mask (Tuple[Literal["include", "exclude"], str, Optional[str]] | None): A mask limiting pixels to include when calculating stats for each block in the format of a tuple with two or three items: literal "include" or "exclude" the mask area, str path to the vector file, optional str of field name in vector file that *includes* (can be substring) input image name to filter geometry by. It is only applied when calculating local blocks, as the reference map is calculated as the mean of all local blocks. Loaded block maps won't have this applied unless it was used when calculating them. The matching solution is still applied to these areas in the output. Defaults to None for no mask.
    debug_logs (bool, optional): If True, prints debug info and progress. Defaults to False.
    custom_nodata_value (float | int | None, optional): Overrides detected NoData value. Defaults to None.
    cache (float | None): Controls GDAL cache size in GB. Defaults to preset cache size. Applied via GDAL_CACHEMAX.
    image_threads (Literal["cpu"] | int | None): Parallelism for per-image operations. "cpu" to get number of cores, int to assign number, and None to disable image level parallelism.
    io_threads (Literal["cpu"] | int | None): Parallelism for IO operations. "cpu" to get number of cores, int to assign number, and None to disable io level parallelism.
    tile_threads (Literal["cpu"] | int | None): "cpu" to get number of cores, int to assign number, and None to disable tile level parallelism.
    window_size (int | None): Output image tile size. Defaults to input image tile size.
    save_as_cog (bool): If True, saves output as a Cloud-Optimized GeoTIFF using proper band and block order.
    number_of_blocks (int | tuple | Literal["coefficient_of_variation"]): int as a target of blocks per image, tuple to set manually set total blocks width and height, coefficient_of_variation to find the number of blocks based on this metric.
    alpha (float, optional): Blending factor between reference and local means. Defaults to 1.0.
    correction_method (Literal["gamma", "linear", "offset"], optional): Local correction method. Defaults to "gamma". Offset is commended for images with negative values.
    save_block_maps (tuple(str, str) | None): If enabled, saves block maps for review, to resume processing later, or to add additional images to the reference map.
        - First str is the path to save the global block map.
        - Second str is the path to save the local block maps, which must include "$" which will be replaced my the image name (because there are multiple local maps).
    load_block_maps (Tuple[str, List[str]] | Tuple[str, None] | Tuple[None, List[str]] | None, optional):
        Controls loading of precomputed block maps. Can be one of:
            - Tuple[str, List[str]]: Load both reference and local block maps.
            - Tuple[str, None]: Load only the reference block map.
            - Tuple[None, List[str]]: Load only the local block maps.
            - None: Do not load any block maps.
        This supports partial or full reuse of precomputed block maps:
            - Local block maps will still be computed for each input image that is not linked to a local block map by the images name being *included* in the local block maps name (file name).
            - The reference block map will only be calculated (mean of all local blocks) if not set.
            - The reference map defines the reference block statistics and the local maps define per-image local block statistics.
            - Both reference and local maps must have the same canvas extent and dimensions which will be used to set those values.
    override_bounds_canvas_coords (Tuple[float, float, float, float] | None): Manually set (min_x, min_y, max_x, max_y) bounds to override the computed/loaded canvas extent. If you wish to have a larger extent than the current images, you can manually set this, along with setting a fixed number of blocks, to anticipate images will expand beyond the current extent.
    build_overviews (bool, optional): If True, computes overviews. Defaults to False.

Returns:
    List[str]: Paths to the locally adjusted output raster images."""

def _get_pre_computed_block_maps(load_block_maps, calculation_dtype, debug_logs):
    """Load pre-computed block mean maps from files.

Args:
    load_block_maps (Tuple[str, List[str]] | Tuple[str, None] | Tuple[None, List[str]]):
        - Tuple[str, List[str]]: Load both reference and local block maps.
        - Tuple[str, None]: Load only the reference block map.
        - Tuple[None, List[str]]: Load only the local block maps.
    calculation_dtype (str): Numpy dtype to use for reading.
    debug_logs (bool): To print debug statements or not.

Returns:
    Tuple[
        dict[str, np.ndarray],             # block_local_means
        Optional[np.ndarray],              # block_reference_mean
        Optional[int],                     # num_row
        Optional[int],                     # num_col
        Optional[Tuple[float, float, float, float]]  # bounds_canvas_coords
    ]"""

def _get_bounding_rect_images_block_space(block_local_means):
    """Compute block-space bounding rectangles for each image based on valid block values.

Args:
    block_local_means (dict[str, np.ndarray]): Per-image block means
        with shape (num_row, num_col, num_bands).

Returns:
    dict[str, tuple[int, int, int, int]]: Each entry maps image name to
        (min_row, min_col, max_row, max_col)."""

def _compute_reference_blocks(block_local_means, calculation_dtype):
    """Computes reference block means across images by averaging non-NaN local block means.

Args:
    block_local_means (dict[str, np.ndarray]): Per-image block mean arrays.
    calculation_dtype (str): Numpy dtype for output array.

Returns:
    np.ndarray: Reference block map of shape (num_row, num_col, num_bands)"""

def _apply_adjustment_process_image(name, img_path, out_path, num_bands, block_reference_mean, block_local_mean, bounds_canvas_coords, window_size, num_row, num_col, nodata_val, alpha, correction_method, calculation_dtype, output_dtype, debug_logs, tile_thread_on, tile_thread_workers, save_as_cog, estimate_stats):
    """Apply local radiometric adjustment (linear or gamma) to a raster image using block-based reference and local mean surfaces. Builds parameter surfaces as rasters, warps them to the image grid, and creates a VRT with per-pixel expressions, then materializes the output as GTiff or COG.

Args:
    name (str): Identifier for the image (basename, no extension).
    img_path (str): Path to the input raster image.
    out_path (str): Path where the adjusted raster will be written.
    num_bands (int): Number of spectral bands to process.
    block_reference_mean (np.ndarray): Block-level reference mean values per band.
    block_local_mean (np.ndarray): Block-level local mean values per band.
    bounds_canvas_coords (tuple): Geographic bounds of the image canvas (xmin, ymin, xmax, ymax).
    window_size (int | None): Output block size used for tiling.
    num_row (int): Number of block rows.
    num_col (int): Number of block columns.
    nodata_val (float): NoData value to assign to output bands.
    alpha (float): Scaling factor applied in gamma correction formula.
    correction_method (Literal["gamma", "linear"]): Radiometric correction method.
    calculation_dtype (str): Intermediate calculation data type (GDAL type string).
    output_dtype: GDAL output data type (enum or string).
    debug_logs (bool): If True, print debug information.
    tile_thread_on (bool): If True, enable multithreaded warp/translate operations.
    tile_thread_workers (int): Number of worker threads if `tile_thread_on=True`.
    save_as_cog (bool): If True, write output as Cloud-Optimized GeoTIFF (COG).

Returns:
    None"""

def _get_bounding_rectangle(images):
    """Return the combined extent (minx, miny, maxx, maxy) of rasters. Accepts a list of file paths, single path, or single GDAL dataset.

Args:
    images (List[str] | gdal.Dataset): List of raster file paths, single path, or single GDAL dataset.

Returns:
    Tuple[float, float, float, float]: (min_x, min_y, max_x, max_y) of the combined extent."""

def _compute_mosaic_coefficient_of_variation(image_paths, nodata_value, reference_std, reference_mean, base_block_size, band_index, estimate_statistics, debug_logs):
    """Estimates block size for local adjustment using the coefficient of variation across input images.

Args:
    image_paths (List[str]): List of input raster file paths.
    nodata_value (float): Value representing NoData in the input rasters.
    reference_std (float, optional): Reference standard deviation for comparison. Defaults to 45.0.
    reference_mean (float, optional): Reference mean for comparison. Defaults to 125.0.
    base_block_size (Tuple[int, int], optional): Base block size (rows, cols). Defaults to (10, 10).
    band_index (int, optional): Band index to use for statistics (1-based). Defaults to 1.
    estimate_statistics (bool, optional): If True, estimates statistics for each block. Defaults to True.
    debug_logs (bool, optional): If True, print logs.

Returns:
    Tuple[int, int]: Estimated block size (rows, cols) adjusted based on coefficient of variation."""

def _calculate_block_process_image(name, image_path, bounds_canvas_coords, num_row, num_col, num_bands, debug_logs, nodata_value, calculation_dtype, tile_thread_on, tile_thread_workers):
    """Compute area-weighted block means over a target grid using GDAL Warp.

Args:
  name: Identifier carried through to the return tuple.
  image_path: Path to the source raster (VRT/GeoTIFF/etc.).
  bounds_canvas_coords: (x_min, y_min, x_max, y_max) in the source CRS (projection taken from `image_path`).
  num_row: Output grid height (rows).
  num_col: Output grid width (columns).
  num_bands: Number of bands to read from the warped raster.
  debug_logs: If True, emit progress and NaN counts to stdout.
  nodata_value: Source NoData value to treat as invalid (if present).
  calculation_dtype: Target NumPy dtype for the output array (e.g., "float32").
  tile_thread_on: If True, enable multithreaded warping.
  tile_thread_workers: Number of worker threads when `tile_thread_on` is True.

Returns:
  Tuple[str, np.ndarray]: `(name, block_mean)` where `block_mean` has shape `(num_row, num_col, num_bands)` and dtype `calculation_dtype`. Cells with no valid input samples are NaN."""

def _download_block_map(block_map, bounding_rect, output_image_path, srs, dtype, nodata_value, width, height, write_bands, delete_output):
    """Writes a 3D block map to a raster file, creating or updating specified bands within a target window.

Args:
    block_map (np.ndarray): Block data of shape (rows, cols, bands).
    bounding_rect (tuple): Spatial extent (minx, miny, maxx, maxy).
    output_image_path (str): Path to the output raster file.
    srs (str): SRS to save image with.
    dtype (str): Data type for output.
    nodata_value (float): NoData value to write.
    width (int): Full raster width.
    height (int): Full raster height.
    write_bands (tuple[int] | None): 0-based band indices to write; all if None.

Output:
    Writes the `block_map` array to `output_image_path`, either creating a new raster or updating an existing one."""

def _compute_block_size(input_image_array_path, target_blocks_per_image, bounds_canvas_coords):
    """Calculates the number of rows and columns for dividing a bounding rectangle into target-sized blocks.

Args:
    input_image_array_path (list): List of image paths to determine total image count.
    target_blocks_per_image (int | float): Desired number of blocks per image.
    bounds_canvas_coords (tuple): Bounding box covering all images (minx, miny, maxx, maxy).

Returns:
    Tuple[int, int]: Number of rows (num_row) and columns (num_col) for the block grid."""

def _smooth_array(input_array, nodata_value, scale_factor):
    """Applies Gaussian smoothing to an array while preserving NoData regions.

Args:
    input_array (np.ndarray): 2D array to be smoothed.
    nodata_value (Optional[float], optional): Value representing NoData. Treated as NaN during smoothing. Defaults to None.
    scale_factor (float, optional): Sigma value for the Gaussian filter. Controls smoothing extent. Defaults to 1.0.

Returns:
    np.ndarray: Smoothed array with NoData regions preserved or restored."""

def _write_block_raster(arr2d, path, dtype):

def _warp_to_image_grid(src_path, dst_path):

## Markdown Section
### File: rrn_methods.md
# Relative Radiometric Normalization (RRN) Methods

RRN methods differ not only in the algorithms used to adjust image values but also in the requirements images must have and other techniques that can be used in conjunction. The following taxonomy summarizes the core dimensions along which RRN techniques vary:

 - **Matching algorithm:** The core transformation applied to align radiometry between images.
 - **Geometric alignment required:** The level of spatial alignment necessary for the method.
 - **Pixel selection (PIFs/RCS):** How pseudo-invariant features/control sets are identified.
 - **Adjustment scope:** How corrections are applied to the images.
 - **Overlap:** Whether the method requires overlapping pixels.
 - **Pixel units:** The radiometric units the method is able to operate on.
 - **Bands:** Whether bands relationships are preserved.
 - **Target reference:** What the target image is normalized to.

Multiple matching algorithms can be used in conjunction with multiple pixel selection methods. Note that the most restrictive method will dictate the image requirements (e.g. if using `Global regression` with `Overlapping area` the `Geometric alignment` will need to be `Moderate`). The specific matching algorithm used in each method is flexible and not fixed; it may involve least squares, RANSAC, Theil–Sen, Huber, or other forms of robust regression.

## Matching Algorithms

| Matching algorithm                      | Description                                                                                            | Geometric alignment | Adjustment granularity          | Applies                        | Overlap required | Pixel units | Bands                  | Target reference                           | Year introduced | Key papers                                                                               | Software                                                                                                                                                                                                |
| --------------------------------------- | ------------------------------------------------------------------------------------------------------ | ------------------- | ------------------------------- | ------------------------------ | ---------------- | ----------- | ---------------------- | ------------------------------------------ | --------------- | ---------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| Histogram Matching (HM)                 | Matches histogram distributions between images                                                         | None                | Global                          | Lookup table                   | no               | Any         | Independent            | Reference histogram                        | 1980s           |                                                                                          | ENVI; [HistMatch QGIS Plugin](https://github.com/Gustavoohs/HistMatch); ArcGIS Pro; IMAGINE Mosaic Pro; [landsat R library via histmatch()](https://cran.r-project.org/web/packages/landsat/index.html) |
| Minimum–Maximum Scale Normalization     | Linearly scales pixel values to match reference min/max                                                | None                | Global                          | Min/max                        | No               | Any         | Independent            | Reference min/max                          | 1980s           |                                                                                          |                                                                                                                                                                                                         |
| Mean–Standard Deviation Regression      | Fits linear regression using mean and std dev                                                          | None                | Global                          | Gain/offset                    | No               | Any         | Independent/Correlated | Reference mean/std                         | 1980s           |                                                                                          | ArcGIS Pro; [spectralmatch Python library and QGIS plugin](https://github.com/spectralmatch/spectralmatch)                                                                                              |
| Overlaping pixel-wise Linear Regression | Fits linear regression using overlapping pairs of pixels                                               | Co-registered       | Model                           | Gain/offset                    | Yes              | Any         | Independent/Correlated | Reference image pixels                     | 1980s           |                                                                                          | ArcGIS Pro; [landsat R library via relnorm()](https://cran.r-project.org/web/packages/landsat/index.html)                                                                                               |
| Block adjusted gamma correction         | Adjusts local brightness via block-based gamma scaling                                                 | Moderate            | Blocks/interpolation resolution | Power function                 | Yes              | Any         | Independent            | Reference block map (mean of local blocks) |                 |                                                                                          | [spectralmatch Python library and QGIS plugin](https://github.com/spectralmatch/spectralmatch)                                                                                                          |
| CCA/KCCA-Based                          | Finding the most correlated combinations between images                                                | Co-registered       | CCA space resolution            | Matrix                         | Yes              | Any         | Correlated             | Reference canonical components             |                 |                                                                                          |                                                                                                                                                                                                         |
| Dodging                                 | Smooths brightness using low-pass filtering to reduce lighting artifacts                               | Co-registered       | Blur resolution                 | Low-pass brightness correction | Yes              | Any         | Independent            | Blur created brightness values             |                 |                                                                                          | ArcGIS Pro; IMAGINE Mosaic Pro                                                                                                                                                                          |
| Illumination Equalization               | Models and removes large-scale illumination differences across images                                  | Co-registered       | Surface model resolution        | Modeled lighting correction    | Yes              | Any         | Independent            | Computed illumination values               |                 |                                                                                          | IMAGINE Mosaic Pro                                                                                                                                                                                      |
| Wavelet reconstruction                  | Uses ancillary data to model and reconstruct image values at multiple detail levels                    | Co-registered       | Ancillary data resolution       | Decomposition/reconstruction   | Yes              | Any         | Correlated             | Ancillary data                             |                 | [(Gan et al., 2021)](https://doi.org/10.1109/JSTARS.2021.3069855)                        |                                                                                                                                                                                                         |
| Dual-reference affine interpolation     | Models corrections from the two nearest reference images and applies temporally weighted interpolation | Co-registered       | Model                           | Gain/offset                    | Yes              | Any         | Independent            | Two closest high-quality reference images  | 2020            | [(Hessel et al., 2020)](https://isprs-annals.copernicus.org/articles/V-2-2020/845/2020/) | [rrn-multisensor-multidate Python scripts](https://github.com/chlsl/rrn-multisensor-multidate)                                                                                                          |

## Pixel Selection

| Pixel selection (PIFs/RCS)              | Description                                                                                                                                                                       | Type        | Geometric alignment | Overlap required | Pixel units                        | Year introduced | Key papers                                                                                             | Software                                                                                       |
| --------------------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ----------- | ------------------- | ---------------- | ---------------------------------- | --------------- | ------------------------------------------------------------------------------------------------------ | ---------------------------------------------------------------------------------------------- |
| Whole image                             | Uses all pixels without selection or masking                                                                                                                                      | None        | None                | No               | Any                                |                 |                                                                                                        |                                                                                                |
| Overlapping area                        | Uses only pixels in the spatial overlap between images                                                                                                                            | None        | Moderate            | Yes              | Any                                |                 |                                                                                                        |                                                                                                |
| Manual polygons or pixels               | User-defined areas or points chosen as invariant                                                                                                                                  | Manual      | None                | No               | Any                                |                 |                                                                                                        |                                                                                                |
| Manual threshold                        | Selects pixels based on value threshold                                                                                                                                           | Threshold   | None                | No               | Any                                |                 |                                                                                                        |                                                                                                |
| Dark/Bright Set (DB)                    | Selects darkest and brightest pixels assumed to be invariant                                                                                                                      | Threshold   | None                | No               | Any/reflectance may perform better |                 |                                                                                                        |                                                                                                |
| NDVI ratio                              | Uses vegetation indices to isolate vegetated areas for normalization                                                                                                              | Band ratio  | None                | No               | Reflectance                        |                 |                                                                                                        | [spectralmatch Python library and QGIS plugin](https://github.com/spectralmatch/spectralmatch) |
| K-T ratio                               | Uses the Kauth–Thomas transformation to identify invariant pixels in greenness–brightness space                                                                                   | Band ratio  | None                | No               | Reflectance                        |                 | [(Hall et al., 1991)](https://www.sciencedirect.com/science/article/pii/003442579190062B?via%3Dihub)   | [landsat R library via RCS()](https://cran.r-project.org/web/packages/landsat/index.html)      |
| Urban materials ratio                   | Assumes that certain **man-made surfaces** (e.g., roads, rooftops) have **stable reflectance over time** and uses their statistical properties to correct radiometric differences | Band ratio  | None                | No               | Reflectance                        | 1988            | [(Schott et al., 1988)](https://www.sciencedirect.com/science/article/pii/0034425788901162?via%3Dihub) | [landsat R library via PIF()](https://cran.r-project.org/web/packages/landsat/index.html)      |
| No-change  Scattergrams (NC)            | Selects pixels near the scatterplot diagonal where reference and target values match closely                                                                                      | Statistical | Co-registered       | Yes              | Any                                |                 | [(De Carvalho et al., 2013)](https://www.mdpi.com/2072-4292/5/6/2763)                                  |                                                                                                |
| Multivariate Alteration Detection (MAD) | Identifies invariant pixels by transforming image differences into uncorrelated components; selects pixels with minimal change across all bands                                   | Statistical | Co-registered       | Yes              | Any                                |                 |                                                                                                        |                                                                                                |
| Iteratively Reweighted MAD (IR-MAD)     | Refines MAD by reweighting pixels to improve change detection                                                                                                                     | Statistical | Co-registered       | Yes              | Any                                |                 | [(Canty & Nielsen, 2008)](https://doi.org/10.1016/j.rse.2007.07.013)                                   | [ArrNorm Python scripts](https://github.com/SMByC/ArrNorm)                                     |
| Multi-Rule-Based Normalization          | Combines several selection rules to identify invariant pixels                                                                                                                     | Statistical | None                | No               | Any                                |                 |                                                                                                        |                                                                                                |
| PCA                                     | Uses principal component analysis to identify pseudo-invariant pixels along the major axis of multitemporal scatterplots                                                          | Statistical | Co-registered       | Yes              | Any                                | 2002            | [(Du et al., 2002)](https://www.sciencedirect.com/science/article/pii/S0034425702000299?via%3Dihub)    |                                                                                                |
| Gradient angle similarity               | Selecting the 10% of pixels with the smallest gradient angle differences between an image and its reference                                                                       | Statistical | Co-registered       | Yes              | Any                                | 2020            | [(Hessel et al., 2020)](https://isprs-annals.copernicus.org/articles/V-2-2020/845/2020/)               | [rrn-multisensor-multidate Python scripts](https://github.com/chlsl/rrn-multisensor-multidate) |
| Feature-Based (Keypoint) RRN            | Matches distinctive features between images and uses their correspondence to guide normalization                                                                                  | Geometric   | Moderate            | Yes              | Any                                |                 |                                                                                                        |                                                                                                |
| Location-Independent RRN (LIRRN)        | Groups pixels by brightness or spectral similarity, then matches these groups between images to perform group-wise normalization                                                  | Geometric   | Moderate            | Yes              | Any                                | 2024            | [(Maghimi et al., 2024)](https://www.mdpi.com/1424-8220/24/7/2272)                                     | [LIRRN MATLAB scripts](https://github.com/ArminMoghimi/LIRRN/tree/main)                        |


### File: cli.md
# Command Line Interface

## Installation
The command line interface will be installed automatically when the Python library is installed. See instructions on the installation [page](https://spectralmatch.github.io/spectralmatch/installation/). Use the api reference or command --help to see options to pass into python functions.

## Usage

Print general help:

1
spectralmatch --help
Print help for a specific command:
1
spectralmatch COMMAND --help
Print installed version:
1
spectralmatch --version
Run a specific command:
1
spectralmatch COMMAND [OPTIONS]
## Commands {commands_content} ### File: installation.md # Installation Methods --- ## Installation as a QGIS Plugin ### 1. Get QGIS [Download](https://qgis.org/download/) and install QGIS. > This plugin requires Python ≄ 3.10 and ≀ 3.12. QGIS ships with different versions of Python, to check, in the QGIS menu, go to QGIS>About gis. If your version of Python is not supported, you can update your QGIS (if available) or install it containerized with conda: `conda create --name qgis_env python=3.12`, `conda activate qgis_env`, `conda install -c conda-forge qgis`, then `qgis` to start the program. ### 2. Install spactalmatch QGIS plugin - Go to Plugins → Manage and Install Plugins
 - Find spectralmatch in the list, install, and enable it - Find the plugin in the Processing Toolbox ### 3. Install spectralmatch Python library The plugin will attempt to automatically install all Python dependencies that it requires in the QGIS Python interpreter. It uses [QPIP](https://github.com/opengisch/qpip), in addition to custom installation scripts, to do this. If it is unable to, the user must manually locate the QGIS python interpreter and install the spectralmatch python library and all of its dependencies. --- ## Installation via pip as a Python Library and CLI ### 1. System requirements Before installing, ensure you have the following system-level prerequisites: - Python ≄ 3.10 and ≀ 3.12 - PROJ ≄ 9.3 - GDAL ≄ 3.11 - pip An easy way to install these dependancies is to use [Miniconda](https://www.anaconda.com/docs/getting-started/miniconda/install#quickstart-install-instructions):
1
2
conda create -n spectralmatch python=3.12 "gdal>=3.11" "proj>=9.3" -c conda-forge
conda activate spectralmatch
### 2. Install spectralmatch You can automatically install the library via [PyPI](https://pypi.org/). (this method installs only the core code as a library):
1
pip install spectralmatch
--- ## Installation via pixi as a Python Library and CLI Installing via pixi can be easier as it handles the system level dependencies:
1
2
3
4
pixi init myproject
cd myproject
pixi add "python>=3.9,<3.13" "gdal>=3.11"
pixi add spectralmatch --pypi
## Installation from Source ### 1. Clone the Repository
1
2
git clone https://github.com/spectralmatch/spectralmatch.git
cd spectralmatch
> Assuming you have Make installed, you can then run `make install-setup` to automatically complete the remaining setup steps. ### 2. System requirements Before installing, ensure you have the following system-level prerequisites: - Python ≄ 3.10 and ≀ 3.12 - PROJ ≄ 9.3 - GDAL = 3.11 An easy way to install these dependancies is to use [Miniconda](https://www.anaconda.com/docs/getting-started/miniconda/install#quickstart-install-instructions):
1
2
conda create -n spectralmatch python=3.12 "gdal>=3.11" "proj>=9.3" -c conda-forge
conda activate spectralmatch
### 3. Install Dependancies The `pyproject.toml` defines **core** dependancies to run the library and optional **dev**, and **docs** dependancies.
1
2
3
4
pip install . # Normal dependencies
pip install -e ".[dev]"   # Developer dependencies
pip install -e ".[docs]"  # Documentation dependencies
pip install -e ".[qgis-build]" # Build qgis plugin
### File: contributing.md # Contributing Guide Thank you for your interest in contributing. The sections below outline how the library is structured, how to submit changes, and the conventions to follow when developing new features or improving existing functionality. For convenience, you can copy [this](/spectralmatch/llm_prompt/) auto updated LLM priming prompt with function headers and docs. --- ## Collaboration Instructions We welcome all contributions the project! Please be respectful and work towards improving the library. To get started: 1. [Create an issue](https://github.com/spectralmatch/spectralmatch/issues/new) describing the feature or bug or just to ask a question. Provide relevant context, desired timeline, any assistance needed, who will be responsible for the work, anticipated results, and any other details. 2. [Fork the repository](https://github.com/spectralmatch/spectralmatch/fork) and create a new feature branch. 3. Make your changes and add any necessary tests. 4. Open a Pull Request against the main repository. --- ## Design Philosophy - Keep code concise and simple - Adapt code for large datasets with windows, multiprocessing, progressive computations, etc - Keep code modular and have descriptive names - Use PEP 8 code formatting - Use functions that are already created when possible - Combine similar params into one multi-value parameter - Use similar naming convention and input parameter format as other functions. - Create docstrings (Google style), tests, and update the docs for new functionality --- ## Extensible Function Types In Relative Radiometric Normalization (RRN) methods often differ in how images are matched, pixels are selected, and seamlines are created. This library organizes those into distinct Python packages, while other operations like aligning rasters, applying masks, merging images, and calculating statistics are more consistent across techniques and are treated as standard utilities. ### Matching functions Used to adjust the pixel values of images to ensure radiometric consistency across scenes. These functions compute differences between images and apply transformations so that brightness, contrast, or spectral characteristics align across datasets. ### Masking functions (PIF/RCS) Used to define which parts of an image should be kept or discarded based on spatial criteria. These functions apply vector-based filters or logical rules to isolate regions of interest, remove clouds, or exclude invalid data from further processing. ### Seamline functions Used to determine optimal boundaries between overlapping image regions. These functions generate cutlines that split image footprints in a way that minimizes visible seams and balances spatial coverage, often relying on geometric relationships between overlapping areas. --- ## Standard UI Reusable types are organized into the types and validation module. Use these types directly as the types of params inside functions where applicable. Use the appropriate _resolve... function to resolve these inputs into usable variables. ### Input/Output The input_name parameter defines how the input files are determined and accepts either a str or a list. If given as a str, it should contain either a folder glob pattern path and default_file_pattern must be set or a whole glob pattern file path. Functions should default to searching for all appropriately formated files in the input folder (for example "*.tif"). Alternatively, it can be a list of full file paths to individual input images. For example: - input_images="/input/files/*.tif" (does not require default_file_pattern) - input_images="/input/folder" (requires default_file_pattern to be set), - input_images=["/input/one.tif", "/input/two.tif", ...] (does not require default_file_pattern) The output_name parameter defines how output filenames are determined and accepts either a str or a list. If given as a str, it should contain either a folder template pattern path and default_file_pattern must be set or a whole template pattern file path. Functions should default to templating with basename, underscore, processing step (for example "$_Global"). Alternatively, it may be a list of full output paths, which must match the number of input images. For example: - output_images="/output/files/$.tif" (does not require default_file_pattern) - output_images="/output/folder" (requires default_file_pattern to be set), - output_images=["/output/one.tif", "/output/two.tif", ...] (does not require default_file_pattern) The _resolve_paths function handles creating folders for output files. Folders and files are distinguished by the presence of a "." in the basename.
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
# Params
input_name # For example: input_images
input_name # For example: output_images

# Types
SearchFolderOrListFiles = str | List[str] # Required
CreateInFolderOrListFiles = str | List[str] # Required

# Resolve
input_image_paths = _resolve_paths("search", input_images, kwargs={"default_file_pattern":"*.tif"})
output_image_paths = _resolve_paths("create", output_images, kwargs={"paths_or_bases":input_image_paths, "default_file_pattern":"$_Global.tif"})
image_names = _resolve_paths("name", input_image_paths)
# This pattern can also be used with other input types like vectors
### Output dtype The custom_output_dtype parameter specifies the data type for output rasters and defaults to the input image’s data type if not provided.
1
2
3
4
5
6
7
8
# Param
custom_output_dtype

# Type
CustomOutputDtype = str | None # Default: None

# Resolve
output_dtype = _resolve_output_dtype(input_image_paths[0], custom_output_dtype)
### Nodata Value The custom_nodata_value parameter overrides the input nodata value from the first raster in the input rasters if set.
1
2
3
4
5
6
7
8
# Param
custom_nodata_value

# Type
CustomNodataValue = float | int | None # Default: None

# Resolve
nodata_value = _resolve_nodata_value(input_image_paths[0], custom_nodata_value)
### Debug Logs The debug_logs parameter enables printing of debug information; it defaults to False. Functions should begin by printing "Start {process name}", while all other print statements should be conditional on debug_logs being True. When printing the image being processed, use the image name and not the image path.
1
2
3
4
5
6
7
# Param
debug_logs

# Type
DebugLogs = bool # Default: False

# No resolve function necessary
### Vector Mask The vector_mask parameter limits statistics calculations to specific areas and is given as a tuple with two or three items: a literal "include" or "exclude" to define how the mask is applied, a string path to the vector file, and an optional field name used to match geometries based on the input image name (substring match allowed). Defaults to None for no mask.
1
2
3
4
5
6
7
# Param
vector_mask

# Type
VectorMask = Tuple[Literal["include", "exclude"], str, Optional[str]] | None

# No resolve function necessary
### Parallel Workers The image_threads parameter defines the parallelization strategy at the image level. It accepts "cpu" or an int to enable multiprocessing across all available CPU cores or the number specified, respectively. Set it to None to disable image-level parallelism. io_threads determines reading and writing threads via the GDAL GDAL_NUM_THREADS command. tile_threads is passed into the GDAL commands as the NUM_THREADS param to determine tile level threading. The cache param is used to set the GDAL cache (input as GB) via SetCacheMax.
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
# Params
cache
image_threads 
io_threads
tile_threads

# Types
Threads = Literal["cpu"] | int | None
Cache = float | None

# Resolve
_set_gdal_cache(cache, debug_logs)
_set_gdal_workers(io_threads, debug_logs)

image_backend = "thread" # "thread" or "process"
image_threads_on, image_thread_workers = _resolve_parallel_config(image_threads)
tile_thread_on, tile_thread_workers = _resolve_parallel_config(tile_threads)

# Main process example
image_args = [(tile_thread_on, tile_thread_workers, other_args, ...) for arg in inputs]
if image_threads_on:
    with _get_executor(image_backend, image_thread_workers) as executor:
        futures = [executor.submit(_name_process_image, *args) for args in image_args]
        for future in as_completed(futures):
            future.result()
else:
    for args in image_args:
        _name_process_image(*args)

def _name_process_image(tile_thread_on, tile_thread_workers, other_args, ...):
    # Process image
### Windows The window_size parameter sets the output tile size and if not set will default to the input tile size. It should be an int or None.
1
2
3
4
5
6
7
8
# Param
window_size

# Types
WindowSize = int | None

# Resolve within image process if possible
window_size = _resolve_window_size(window_size, input_image_path, debug_logs)
### COGs The save_as_cog parameter, when set to True, saves the output as a Cloud-Optimized GeoTIFF with correct band and block ordering.
1
2
3
4
5
6
7
# Param
SaveAsCog = bool # Default: True

# Type
SaveAsCog = bool # Default: True

# No resolve function necessary
--- ## Validate Inputs The validate methods are used to check that input parameters follow expected formats before processing begins. There are different validation methods for different scopes—some are general-purpose (e.g., Universal.validate) and others apply to specific contexts like matching (Match.validate_match). These functions raise clear errors when inputs are misconfigured, helping catch issues early and enforce consistent usage patterns across the library.
1
2
3
4
5
6
7
8
9
# Validate params example
Universal.validate(
    input_images=input_images,
    output_images=output_images,
    vector_mask=vector_mask,
)
Match.validate_match(
    specify_model_images=specify_model_images,
    )
--- ## File Cleanup Temporary generated files can be deleted once they are no longer needed via this command:
1
make clean
--- ## Docs Docs are deployed on push or merge at the main branch, or use the following commands: ### Serve docs locally Runs a local dev server at http://localhost:8000.
1
make docs-serve
### Build static site Generates the static site into the site/ folder.
1
make docs-build
### Deploy to GitHub Pages Deploys built site using mkdocs gh-deploy.
1
make docs-deploy
--- ## Versioning Automatically create a GitHub release, Pypi library, and QGIS plugin with each version. All three distributions are on the same versioning and deployed with GitHub actions. New versions will be released when sufficient new functionality or bug fixes have been added.
1
make version version=1.2.3
--- ## Code Formatting This project uses [black](https://black.readthedocs.io/en/stable/the_black_code_style/current_style.html) for code formatting and ruff for linting. ### Set Up Pre-commit Hooks (Recommended) To maintain code consistency use this hook to check and correct code formatting automatically:
1
2
pre-commit install
pre-commit run --all-files
### Manual Formatting **Format code:** Automatically formats all Python files with black.
1
make format
**Check formatting:** Checks that all code is formatted (non-zero exit code if not).
1
make check-format
**Lint code:** Runs ruff to catch style and quality issues.
1
make lint
--- ## Testing [pytest](https://docs.pytest.org/) is used for testing. Tests will automatically be run when merging into main but they can also be run locally via:
1
make test
To test a individual folder or file:
1
make test-file path=path/to/folder_or_file
## Building Python Library and QGIS Plugin Locally Use these commands to build packages locally:
1
2
make qgis-build # Build QGIS plugin
make python-build # Build python library
### File: formats_and_requirements.md # File Formats and Input Requirements ## Input Raster Requirements Input rasters must meet specific criteria to ensure compatibility during processing. These are checked by _check_raster_requirements(): - Have a valid geotransform - Share the same coordinate reference system (CRS) - Have an identical number of bands - Use consistent nodata values Additionally, all rasters should: - Be a `.tif` file - Have overlap which represents the same data in each raster - Have a consistent spectral profile ## Regression Parameters File Regression parameters can be stored in a `json` file which includes: - Adjustments: Per-band scale and offset values applied to each image. - Whole Stats: Per-band mean, std, and size representing overall image statistics. - Overlap Stats: Per-image pair mean, std, and size for overlapping geometry regions. The structure is a dictionary keyed by images basenames (no extension) with the following format:
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
{
  "image_name": {
    "adjustments": {
      "band_0": {"scale": float, "offset": float},
      ...
    },
    "whole_stats": {
      "band_0": {"mean": float, "std": float, "size": int},
      ...
    },
    "overlap_stats": {
      "other_image": {
        "band_0": {"mean": float, "std": float, "size": int},
        ...
      },
      ...
    }
  },
  ...
}
This format represents the following: For each image_name there are adjustment, whole_stats and overlap_stats. For each adjustments, for each band, there is scale and offset. For each whole_stats and overlap_stats, for each band, there is mean, std, and size (number of pixels). Each band key follows the format band_0, band_1, etc. Mean and std are floats and size is an integer. This structure is validated by `_validate_adjustment_model_structure()` before use to ensure consistency and completeness across images and bands. Global regression does not actually use 'adjustments' field because they are recalculated every run. ## Block Maps File Block maps are spatial summaries of raster data, where each block represents the mean values of a group of pixels over a fixed region. They are used to reduce image resolution while preserving local radiometric characteristics, enabling efficient comparison and adjustment across images. Each map is structured as a grid of blocks with values for each spectral band. They can be saved as regular `geotif` files and together store this information: block_local_means, block_reference_mean, num_row, num_col, bounds_canvas_coords. There are two types of block maps, although their format is exactly the same: - **Local Block Map:** Each block stores the mean value of all pixels within its boundary for a single image. - **Reference Block Map:** Each block is the mean of all images means for its boundary; simply the mean of all local block maps. Both block maps have the shape: `num_row, num_col, num_bands`, however, there are multiple (one for each image) local block maps and only one reference block map. Once a reference block map is created it is unique to its input images and cannot be accurately modified to add additional images. However, images can be 'brought' to a reference block map even if they were not involved in its creation as long as it covers that image. ### File: index.md # spectralmatch: relative radiometric normalization toolkit for raster mosaics and time series [![PyPI version](https://img.shields.io/pypi/v/spectralmatch.svg)](https://pypi.org/project/spectralmatch/) [![QGIS Plugin](https://img.shields.io/badge/QGIS-Plugin-589632?logo=qgis)](https://plugins.qgis.org/plugins/spectralmatch_qgis/) [![Your-License-Badge](https://img.shields.io/badge/License-MIT-green)](#) [![codecov](https://codecov.io/gh/spectralmatch/spectralmatch/graph/badge.svg?token=03JTHNK76C)](https://codecov.io/gh/spectralmatch/spectralmatch) [![Open in Cloud Shell](https://img.shields.io/badge/Launch-Google_Cloud_Shell-blue?logo=googlecloud)](https://ssh.cloud.google.com/cloudshell/editor?cloudshell_git_repo=https://github.com/spectralmatch/spectralmatch&cloudshell_working_dir=.) [![📋 Copy LLM Prompt](https://img.shields.io/badge/📋_Copy-LLM_Prompt-brightgreen)](https://spectralmatch.github.io/spectralmatch/llm_prompt) [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.15312878.svg)](https://doi.org/10.5281/zenodo.15312878) [![DOI](https://joss.theoj.org/papers/10.21105/joss.08974/status.svg)](https://doi.org/10.21105/joss.08974) ## Overview ![Global and Local Matching](./images/spectralmatch.png) Spectralmatch provides algorithms to perform relative radiometric normalization (RRN) to enhance spectral consistency across raster mosaics and time series. It is built for geoscientific use, with a sensor- and unit-agnostic design, optimized for automation and efficiency on arbitrarily many images and bands, and works well with Very High Resolution Imagery (VHRI) as it does not require pixel co-registration. In addition to matching algorithms, the software supports cloud and vegetation masking, pseudo invariant feature (PIF) based exclusion, seamline network generation, raster merging, and plotting statistics. The toolkit is available as an open-source Python library, command line interface, and QGIS plugin. > Please cite as: Lindiwe et al., (2026). Spectralmatch: relative radiometric normalization toolkit for raster mosaics and time series. Journal of Open Source Software, 11(117), 8974, https://doi.org/10.21105/joss.08974 ## Features - **Automated, Efficient, and Scalable:** Designed for large-scale workflows with no manual steps, leveraging multiprocessing and Cloud Optimized GeoTIFF support for fast, efficient processing across images, windows, and bands. - **Resumable Processing:** Save image stats and block maps for quicker reprocessing. - **Integrated Seamline and Cloud Masking:** Generate seamlines and detect clouds within the same workflow. - **Specify Model Images** Include all or specified images in the matching solution to bring all images to a central tendency or selected images spectral profile. - **Consistent Multi-image Analysis:** Performs minimal necessary adjustments to achieve inter-image consistency while preserving the original spectral characteristics. - **Sensor and Unit Agnostic:** Supports optical imagery from handheld cameras, drones, crewed aircraft, and satellites for reliable single sensor and multi-sensor analysis, while preserving spectral integrity across all pixel units—including negative values and reflectance. - **Enhanced Imagery:** Helpful when performing mosaics and time series analysis by blending large image collections and normalizing them over time, providing consistent, high-quality data for machine learning and other analytical tasks. - **Open Source and Collaborative:** Free under the MIT License with a modular design that supports community contributions and easy development of new features and workflows. Accessible through a python library, command line interface, and QGIS plugin. --- ## Current Matching Algorithms ### Global to local matching This technique is derived from 'An auto-adapting global-to-local color balancing method for optical imagery mosaic' by Yu et al., 2017 (DOI: 10.1016/j.isprsjprs.2017.08.002). It is particularly useful for very high-resolution imagery (satellite or otherwise) and works in a two phase process. First, this method applies least squares regression to estimate scale and offset parameters that align the histograms of all images toward a shared spectral center. This is achieved by constructing a global model based on the overlapping areas of adjacent images, where the spectral relationships are defined. This global model ensures that each image conforms to a consistent radiometric baseline while preserving overall color fidelity. However, global correction alone cannot capture intra-image variability so a second local adjustment phase is performed. The overlap areas are divided into smaller blocks, and each block’s mean is used to fine-tune the color correction. This block-wise tuning helps maintain local contrast and reduces visible seams, resulting in seamless and spectrally consistent mosaics with minimal distortion. ![Histogram matching graph](./images/matching_histogram.png) *Comparison of three WorldView-3 images from PuÊ»u WaÊ»awaÊ»a, HawaiÊ»i before and after processing with global regression and local block adjustment using spectralmatch. The top left shows images before processing, the middle left shows images after process, the bottom left shows images mosaiced before and after processing, and lastly, the right shows the spectral profile of all images.* #### Assumptions - **Consistent Spectral Profile:** The true spectral response of overlapping areas remains the same throughout the images. - **Least Squares Modeling:** A least squares approach can effectively model and fit all images' spectral profiles. - **Scale and Offset Adjustment:** Applying scale and offset corrections can effectively harmonize images. - **Minimized Color Differences:** The best color correction is achieved when color differences are minimized. - **Geometric Alignment:** Images are assumed to be geometrically aligned with known relative positions via a geotransform. However, they only need to be roughly aligned as pixel co-registration is not required. - **Global Consistency:** Overlapping color differences are consistent across the entire image. - **Local Adjustments:** Block-level color differences result from the global application of adjustments. --- ## Installation > For additional installation instructions see the [Installation Methods](https://spectralmatch.github.io/spectralmatch/installation/) in the docs. ### Installation as a QGIS Plugin In the [QGIS](https://qgis.org/download/) plugin manager, install 'spectralmatch' and find it in the Processing Toolbox. ### Installation as a Python Library and CLI Ensure you have the following system-level prerequisites: `Python ≄ 3.10 and ≀ 3.12`, `pip`, `PROJ ≄ 9.3`, and `GDAL ≄ 3.11`; then use pip to install the library:
1
2
3
conda create -n spectralmatch python=3.12 "gdal>=3.11" "proj>=9.3" -c conda-forge
conda activate spectralmatch
pip install spectralmatch
--- ## Usage Example scripts and sample data are provided to verify a successful installation and help you get started quickly in the repository at [`/docs/examples`](https://github.com/spectralmatch/spectralmatch/blob/main/docs/examples/) and downloadable [here](https://download-directory.github.io/?url=https://github.com/spectralmatch/spectralmatch/tree/main/docs/examples&filename=spectralmatch_examples). This is an example mosaic workflow using folders for each step:
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
import os
from spectralmatch import *

working_directory = "/path/to/working/directory"
input_folder = os.path.join(working_directory, "Input")
global_folder = os.path.join(working_directory, "GlobalMatch")
local_folder = os.path.join(working_directory, "LocalMatch")
aligned_folder = os.path.join(working_directory, "Aligned")
clipped_folder = os.path.join(working_directory, "Clipped")

global_regression(
    input_images=input_folder,
    output_images=global_folder,
)

local_block_adjustment(
    input_images=global_folder,
    output_images=local_folder,
)

align_rasters(
    input_images=local_folder,
    output_images=aligned_folder,
    tap=True,
)

voronoi_center_seamline(
    input_images=aligned_folder,
    output_mask=os.path.join(working_directory, "ImageMasks.gpkg"),
    image_field_name="image",
)

mask_rasters(
    input_images=aligned_folder,
    output_images=clipped_folder,
    vector_mask=("include", os.path.join(working_directory, "ImageMasks.gpkg"), "image"),
)

merge_rasters(
    input_images=clipped_folder,
    output_image_path=os.path.join(working_directory, "MergedImage.tif"),
)
--- ## Documentation Documentation is available at [spectralmatch.github.io/spectralmatch/](https://spectralmatch.github.io/spectralmatch/). --- ## Contributing Guide Contributing Guide is available at [spectralmatch.github.io/spectralmatch/contributing](https://spectralmatch.github.io/spectralmatch/contributing/). --- ## License This project is licensed under the MIT License. See [LICENSE](https://github.com/spectralmatch/spectralmatch/blob/main/LICENSE) for details. ## Project Support This library was developed at the [Spatial Data Analysis and Visualization Lab (SDAV)](https://hilo.hawaii.edu/sdav/) at the University of Hawaii at Hilo by Kanoa Lindiwe. Funding was partly provided by the [Hau‘oli Mau Loa Foundation](https://www.hauolimauloa.org/), in addition to the National Science Foundation EPSCoR grant 2149133, Change Hawaiʻi: Harnessing the Data Revolution for Island Resilience. ### File: api/utils.md ::: spectralmatch.utils ### File: api/seamline.md ::: spectralmatch.seamline.voronoi_center_seamline ### File: api/mask.md ::: spectralmatch.mask.mask ::: spectralmatch.mask.utils_mask ### File: api/statistics.md ::: spectralmatch.statistics ### File: api/match.md ::: spectralmatch.match.global_regression ::: spectralmatch.match.local_block_adjustment ### File: api/handlers.md ::: spectralmatch.handlers