Use this text to prompt LLM models with context about this codebase which includes function headers and docs.
# LLM Prompt
The following content includes function signatures and docstrings from Python source files, as well as relevant Markdown documentation. Each section is labeled by its relative file path. Use this as context to understand the project structure, purpose, and functionality.
## Python Section
### File: utils_multiprocessing.py
def _choose_context(prefer_fork):
"""Chooses the most appropriate multiprocessing context based on platform and preference.
Args:
prefer_fork (bool): If True, prefers "fork" context where available; default is True.
Returns:
mp.context.BaseContext: Selected multiprocessing context ("fork", "forkserver", or "spawn")."""
def _resolve_parallel_config(config):
"""Parses a parallel worker config into execution flags and worker count.
Args:
config (Tuple["process" | "thread", "cpu" | int] | None): Parallelization strategy; None disables parallelism.
Returns:
Tuple[bool, Optional[str], Optional[int]]:
- Whether to run in parallel,
- The backend ("process" or "thread"),
- Number of workers."""
def _get_executor(backend, max_workers, initializer, initargs):
"""Creates a parallel executor (process or thread) with optional initialization logic.
Args:
backend (str): Execution backend, either "process" or "thread".
max_workers (int): Maximum number of worker processes or threads.
initializer (Callable, optional): Function to initialize worker context.
initargs (tuple, optional): Arguments to pass to the initializer.
Returns:
Executor: An instance of ThreadPoolExecutor or ProcessPoolExecutor.
Raises:
ValueError: If the backend is not "process" or "thread"."""
def _run_parallel_images(image_paths, run_parallel_windows, image_parallel_workers, window_parallel_workers):
"""Runs a window-level processing function across multiple images, with optional image-level parallelism.
Args:
image_paths (List[str]): List of input image file paths.
run_parallel_windows (Callable): Function to run on each image, accepting (path, window_parallel_workers).
image_parallel_workers (Tuple["process" | "thread", "cpu" | int] | None): Strategy for image-level parallelism.
window_parallel_workers (Tuple["process" | "thread", "cpu" | int] | None): Passed to `run_parallel_windows` for window-level parallelism.
Returns:
None"""
def _run_parallel_windows(windows, process_fn, window_parallel_workers):
"""Runs a processing function on a list of windows, with optional parallel execution.
Args:
windows (List[Any]): List of window-like objects to process.
process_fn (Callable[[Any], Any]): Function to run on each window.
window_parallel_workers (Tuple["process" | "thread", "cpu" | int] | None): Parallel execution strategy; None disables parallelism.
Returns:
None"""
def _resolve_windows(dataset, window_size):
"""Generates a list of windows for reading a raster dataset based on the given tiling strategy.
Args:
dataset (rasterio.DatasetReader): Open raster dataset.
window_size (int | Tuple[int, int] | Literal["internal", "block"] | None):
Tiling strategy:
- int: square tile size,
- (int, int): custom width and height in pixels,
- "internal": use native tiling of dataset,
- "block": tile by block layout defined in `block_params`,
- None: single full-image window.
block_params (Tuple[int, int, Tuple[float, float, float, float]] | None, optional):
Required if window_size is "block". A tuple of:
- number of block rows (int),
- number of block columns (int),
- bounding box (minx, miny, maxx, maxy) of canvas extent in image coordinates.
Returns:
List[Window]: List of rasterio Windows that cover the dataset."""
def _create_windows(width, height, tile_width, tile_height):
"""Generates tiled windows across a raster based on specified dimensions.
Args:
width (int): Total width of the raster.
height (int): Total height of the raster.
tile_width (int): Width of each tile.
tile_height (int): Height of each tile.
Yields:
rasterio.windows.Window: A window representing a tile's position and size."""
def init(cls, config):
"""Initializes per-process context from a typed config dictionary.
Each entry maps a key to a tuple describing how to initialize a resource:
- ('raster', filepath): Open raster with rasterio.
- ('shm', shm_name): Attach to shared memory.
- ('array', shm_name, shape, dtype): Create NumPy array from shared memory.
- ('value', literal): Store a direct Python value.
Examples:
{
"input": ("raster", "/path/to/image.tif"),
"weights": ("array", "shm_weights", (512, 512), "float32"),
"debug": ("value", True)
}
Resources are stored in WorkerContext.cache and accessed via WorkerContext.get(key)."""
def get(cls, key):
def close(cls):
### File: types_and_validation.py
def validate():
def validate_match():
def validate_global_regression():
def validate_local_block_adjustment():
def _validate_window_param(val):
### File: handlers.py
def _resolve_output_dtype(dataset, custom_output_dtype):
"""Resolves the output dtype for a raster operation.
Args:
dataset (rasterio.io.DatasetReader): The input dataset to derive default dtype from.
custom_output_dtype (str | None): A user-specified output dtype, or None to use dataset dtype.
Returns:
str: The resolved output dtype."""
def _resolve_nodata_value(dataset, custom_nodata_value):
"""Determine the appropriate nodata value for a raster dataset.
Priority is given to a user-provided custom nodata value. If not provided, the function attempts to use the nodata value defined in the dataset metadata. Returns None if neither is available.
Args:
dataset (rasterio.io.DatasetReader): The opened raster dataset.
custom_nodata_value (float | int | None): Optional user-defined nodata value.
Returns:
float | int | None: The resolved nodata value, or None if unavailable."""
def _resolve_paths(mode, input):
"""Resolves a list of input based on the mode and input format.
Args:
mode (Literal["search", "create", "match", "name"]): Type of operation to perform.
input (str | List[str]): Either a list of file input or a folder/template string.
kwargs (dict, optional): Additional keyword arguments passed to the resolved function.
Returns:
List[str]: List of resolved input."""
def search_paths(search_pattern):
"""Search for files using a glob pattern, or a folder with a default file pattern.
Args:
search_pattern (str, required): Defines input files from a glob path or folder. Specify like: "/input/files/*.tif" or "/input/folder" (while passing default_file_pattern like: '*.tif')
default_file_pattern (str, optional): Used when `pattern` is a directory. If not set and `pattern` is a folder, raises an error.
recursive (bool, optional): Whether to search recursively.
match_to_paths (Tuple[List[str], str], optional): Matches input files to a reference list using a regex.
debug_logs (bool, optional): Whether to print matched paths.
Returns:
List[str]: Sorted list of matched file paths.
Raises:
ValueError: If `search_pattern` is a directory and `default_file_pattern` is not provided."""
def create_paths(template_pattern, paths_or_bases):
"""Create output paths using a filename template_pattern and a list of reference paths or names.
Args:
template_pattern (str, required): Defines output files from a glob path or folder to match input paths or names. Specify like: "/input/files/$.tif" or "/input/folder" (while passing default_file_pattern like: '$.tif')
paths_or_bases (List[str]): List of full paths or base names to derive the replace_symbol from.
default_file_pattern (str, optional): Used if `template_pattern` is a directory.
debug_logs (bool): Whether to print the created paths.
replace_symbol (str): Placeholder symbol in the template to replace with base names.
create_folders (bool): Whether to create output folders if they don't exist.
Returns:
List[str]: List of constructed file paths.
Raises:
ValueError: If `template_pattern` is a directory and `default_file_pattern` is not provided."""
def match_paths(input_match_paths, reference_paths, match_regex, debug_logs):
"""Match `reference_paths` to `input_match_paths` using a regex applied to the basenames of `input_match_paths`. The extracted key must be a substring of the reference filename.
Args:
input_match_paths (List[str]): List of candidate paths to extract keys from.
reference_paths (List[str]): List of reference paths to align to.
match_regex (str): Regex applied to basenames of input_match_paths to extract a key to match via *inclusion* in reference_paths (e.g. "(.*)_LocalMatch\.gpkg$" (without one of the backslashes)).
debug_logs (bool): If True, print matched and unmatched file basenames.
Returns:
List[Optional[str]]: A list the same length as `reference_paths` where each
element is the matched path from `input_match_paths` or None.
Raises:
ValueError: If output list length does not match reference_paths length."""
def _check_raster_requirements(input_image_paths, debug_logs, check_geotransform, check_crs, check_bands, check_nodata, check_resolution):
"""Validates a list of raster image paths to ensure they are compatible for processing.
Args:
input_image_paths (list[str]): Paths to input raster images.
debug_logs (bool): If True, prints debug messages.
check_geotransform (bool): Check that all images have a valid geotransform.
check_crs (bool): Check that all images have the same CRS.
check_bands (bool): Check that all images have the same number of bands.
check_nodata (bool): Check that all images have the same nodata values per band.
check_resolution (bool): Check that all images have the same resolution.
Returns:
bool: True if all checks pass.
Raises:
ValueError: If any check fails."""
def _get_nodata_value(input_image_paths, custom_nodata_value):
"""Determines the NoData value to use from a list of raster images or a custom override.
Args:
input_image_paths (List[str]): List of raster image paths.
custom_nodata_value (float, optional): User-defined NoData value.
Returns:
float | None: The determined NoData value, or None if unavailable.
Warnings:
Emits a warning if a custom value overrides the image value or if no value is found."""
### File: cli.py
def _cli_version():
def _build_cli():
def main():
### File: utils.py
def merge_vectors(input_vectors, merged_vector_path, method, debug_logs, create_name_attribute):
"""Merge multiple vector files using the specified geometric method.
Args:
input_vectors (str | List[str]): Defines input files from a glob path, folder, or list of paths. Specify like: "/input/files/*.gpkg", "/input/folder" (assumes *.gpkg), ["/input/one.tif", "/input/two.tif"].
merged_vector_path (str): Path to save merged output.
method (Literal["intersection", "union", "keep"]): Merge strategy.
debug_logs (bool): If True, print debug information.
create_name_attribute (Optional[Tuple[str, str]]): Tuple of (field_name, separator) to add a combined name field.
Returns:
None"""
def align_rasters(input_images, output_images):
"""Aligns multiple rasters to a common resolution and grid using specified resampling.
Args:
input_images (str | List[str], required): Defines input files from a glob path, folder, or list of paths. Specify like: "/input/files/*.tif", "/input/folder" (assumes *.tif), ["/input/one.tif", "/input/two.tif"].
output_images (str | List[str], required): Defines output files from a template path, folder, or list of paths (with the same length as the input). Specify like: "/input/files/$.tif", "/input/folder" (assumes $_Align.tif), ["/input/one.tif", "/input/two.tif"].
resampling_method (Literal["nearest", "bilinear", "cubic"], optional): Resampling method to use; default is "bilinear".
tap (bool, optional): If True, aligns outputs to target-aligned pixels (GDAL's -tap); default is False.
resolution (Literal["highest", "average", "lowest"], optional): Strategy for choosing target resolution; default is "highest".
window_size (Universal.WindowSize, optional): Tiling strategy for windowed alignment.
debug_logs (Universal.DebugLogs, optional): If True, prints debug output.
image_parallel_workers (Universal.ImageParallelWorkers, optional): Parallelization strategy for image-level alignment.
window_parallel_workers (Universal.WindowParallelWorkers, optional): Parallelization strategy for within-image window alignment.
Returns:
None"""
def _align_process_image(image_name, window_parallel, in_path, out_path, target_res, resampling_method, tap, window_size, debug_logs):
"""Aligns a single raster image to a target resolution and grid, optionally in parallel by window.
Args:
image_name (str): Identifier for the image, used for worker context management.
window_parallel (Universal.WindowParallelWorkers): Optional multiprocessing config for window-level alignment.
in_path (str): Path to the input raster.
out_path (str): Path to save the aligned output raster.
target_res (Tuple[float, float]): Target resolution (x, y) to resample the raster to.
resampling_method (str): Resampling method: "nearest", "bilinear", or "cubic".
tap (bool): If True, aligns raster to target-aligned pixels (GDAL-style -tap).
window_size (Universal.WindowSize): Tiling strategy for dividing the image into windows.
debug_logs (bool): If True, prints debug output.
Returns:
None"""
def _align_process_window(src_window, dst_window, band_idx, dst_transform, resampling_method, nodata, debug_logs, image_name):
"""Aligns a single raster window for one band using reproject with a shared dataset.
Args:
src_window (Window): Source window to read.
dst_window (Window): Output window (used to compute offset transform and for saving).
band_idx (int): Band index to read.
dst_transform: The full transform of the output raster.
resampling_method: Reprojection resampling method.
nodata: NoData value.
debug_logs: Print debug info if True.
image_name: Key to fetch the raster from WorkerContext.
Returns:
Tuple[int, Window, np.ndarray]: Band index, destination window, and aligned data buffer."""
def merge_rasters(input_images, output_image_path):
"""Merges multiple rasters into a single mosaic aligned to the union extent and minimum resolution.
Args:
input_images (str | List[str], required): Defines input files from a glob path, folder, or list of paths. Specify like: "/input/files/*.tif", "/input/folder" (assumes *.tif), ["/input/one.tif", "/input/two.tif"].
output_image_path (str): Path to save the merged output raster.
image_parallel_workers (Universal.ImageParallelWorkers, optional): Strategy for parallelizing image-level merging.
window_parallel_workers (Universal.WindowParallelWorkers, optional): Strategy for within-image window merging.
window_size (Universal.WindowSize, optional): Tiling strategy for processing windows.
debug_logs (Universal.DebugLogs, optional): If True, prints debug output.
output_dtype (Universal.CustomOutputDtype, optional): Output data type; defaults to input type if None.
custom_nodata_value (Universal.CustomNodataValue, optional): NoData value to use; defaults to first input's value.
Returns:
None"""
def _merge_raster_process_window(window, band_idx, dtype, debug_logs, image_name, src_transform, dst_transform, nodata_value):
"""Processes a single raster window for merging by reading, masking, and mapping it to the destination grid.
Args:
window (Window): Source window to read.
band_idx (int): Zero-based band index to process.
dtype (str): Data type to cast the read block to.
debug_logs (bool): If True, prints debug output.
image_name (str): Identifier for accessing the source dataset from WorkerContext.
src_transform: Affine transform of the source image.
dst_transform: Affine transform of the destination mosaic.
nodata_value (Universal.CustomNodataValue): Value representing NoData pixels.
Returns:
tuple[int, Window, np.ndarray]: Band index, destination window, and processed data block (or None if fully masked)."""
def mask_rasters(input_images, output_images, vector_mask, window_size, debug_logs, image_parallel_workers, window_parallel_workers, include_touched_pixels, custom_nodata_value):
"""Applies a vector-based mask to one or more rasters, with support for image- and window-level parallelism.
Args:
input_images (str | List[str], required): Defines input files from a glob path, folder, or list of paths. Specify like: "/input/files/*.tif", "/input/folder" (assumes *.tif), ["/input/one.tif", "/input/two.tif"].
output_images (str | List[str], required): Defines output files from a template path, folder, or list of paths (with the same length as the input). Specify like: "/input/files/$.tif", "/input/folder" (assumes $_Clip.tif), ["/input/one.tif", "/input/two.tif"].
vector_mask (Universal.VectorMask, optional): Tuple ("include"/"exclude", vector path, optional field name) or None.
window_size (Universal.WindowSize, optional): Strategy for tiling rasters during processing.
debug_logs (Universal.DebugLogs, optional): If True, prints debug information.
image_parallel_workers (Universal.ImageParallelWorkers, optional): Strategy for parallelizing image-level masking.
window_parallel_workers (Universal.WindowParallelWorkers, optional): Strategy for parallelizing masking within windows.
include_touched_pixels (bool, optional): If True, includes pixels touched by mask geometry edges; default is False.
Returns:
None"""
def _mask_raster_process_image(window_parallel, max_workers, backend, input_image_path, output_image_path, image_name, vector_mask, window_size, debug_logs, include_touched_pixels, custom_nodata_value):
"""Processes a single raster image by applying a vector mask, optionally in parallel by window.
Args:
window_parallel (bool): Whether to use parallel processing at the window level.
max_workers (int): Maximum number of worker processes or threads.
backend (str): Execution backend, e.g., "process".
input_image_path (str): Path to the input raster.
output_image_path (str): Path to save the masked output raster.
image_name (str): Identifier for the raster used in worker context.
vector_mask (Universal.VectorMask): Masking config as ("include"/"exclude", path, optional field).
window_size (Universal.WindowSize): Strategy for tiling the raster into windows.
debug_logs (bool): If True, enables debug output.
include_touched_pixels (bool): If True, includes pixels touched by mask geometry boundaries.
Returns:
None"""
def _mask_raster_process_window(win, band_idx, image_name, nodata, geoms, invert, include_touched_pixels):
"""Applies a vector-based mask to a single raster window and returns the masked data.
Args:
win (Window): Raster window to process.
band_idx (int): Zero-based band index to read.
image_name (str): Identifier for the raster in the WorkerContext.
nodata (int | float): Value to assign to masked-out pixels.
geoms (list | None): List of geometries to mask with, or None to skip masking.
invert (bool): If True, masks outside the geometries (exclude mode).
include_touched_pixels (bool): If True, includes pixels touched by mask boundaries.
Returns:
tuple[Window, np.ndarray]: The window and its corresponding masked data array."""
### File: statistics.py
def compare_image_spectral_profiles_pairs(image_groups_dict, output_figure_path, title, xlabel, ylabel, line_width):
"""Plots paired spectral profiles for before-and-after image comparisons.
Args:
image_groups_dict (dict): Mapping of labels to image path pairs (before, after):
{'Image A': [
'/image/before/a.tif',
'image/after/a.tif'
],
'Image B': [
'/image/before/b.tif',
'/image/after/b.tif'
]}
output_figure_path (str): Path to save the resulting comparison figure.
title (str): Title of the plot.
xlabel (str): X-axis label.
ylabel (str): Y-axis label.
line_width (float, optional): Width of the spectral profiles lines. Default is 1.
Outputs:
Saves a spectral comparison plot showing pre- and post-processing profiles."""
def compare_spatial_spectral_difference_band_average(input_images, output_figure_path, title, diff_label, subtitle, scale):
"""Computes and visualizes the mean per-pixel spectral difference between two coregistered, equal-size images.
Args:
input_images (list): List of two image file paths [before, after].
output_figure_path (str): Path to save the resulting difference image (PNG).
title (str): Title for the plot.
diff_label (str): Label for the colorbar.
subtitle (str): Subtitle text shown below the image.
scale (tuple, optional): Tuple (vmin, vmax) to fix the color scale. Centered at 0.
Raises:
ValueError: If the input list doesn't contain exactly two image paths, or shapes mismatch."""
def compare_before_after_all_images(input_images_1, input_images_2, output_figure_path, title, ylabel_1, ylabel_2, image_names):
def compute_row_stretch(paths):
### File: seamline/voronoi_center_seamline.py
def voronoi_center_seamline(input_images, output_mask):
"""Generates a Voronoi-based seamline mask from edge-matching polygons (EMPs) and writes the result to a vector file.
Args:
input_images (str | List[str], required): Defines input files from a glob path, folder, or list of paths. Specify like: "/input/files/*.tif", "/input/folder" (assumes *.tif), ["/input/one.tif", "/input/two.tif"].
output_mask (str): Output path for the final seamline polygon vector file.
min_point_spacing (float, optional): Minimum spacing between Voronoi seed points; default is 10.
min_cut_length (float, optional): Minimum cutline segment length to retain; default is 0.
debug_logs (Universal.DebugLogs, optional): Enables debug print statements if True; default is False.
image_field_name (str, optional): Name of the attribute field for image ID in output; default is 'image'.
debug_vectors_path (str | None, optional): Optional path to save debug layers (cutlines, intersections).
Outputs:
Saves a polygon seamline layer to `output_mask`, and optionally saves intermediate cutlines to `debug_vectors_path`."""
def _read_mask(path, debug_logs):
"""Reads a raster mask and returns a binary array where valid data is True.
Args:
path (str): Path to the input raster file.
debug_logs (bool, optional): If True, enables debug output; default is False.
Returns:
Tuple[np.ndarray, Affine]: A binary mask array and the associated affine transform."""
def _seamline_mask(mask, transform, debug_logs):
"""Extracts polygons from a binary mask and returns the largest as the EMP.
Args:
mask (np.ndarray): Binary mask where True indicates valid area.
transform (Affine): Affine transform associated with the mask.
debug_logs (bool, optional): If True, prints debug info; default is False.
Returns:
Polygon: The largest extracted polygon from the mask."""
def _densify_polygon(poly, dist, debug_logs):
"""Densifies the exterior of the largest polygon by inserting points at regular intervals.
Args:
poly (Polygon | GeometryCollection): Input geometry to densify.
dist (float): Maximum distance between inserted points.
debug_logs (bool, optional): If True, prints debug info; default is False.
Returns:
List[Tuple[float, float]]: List of (x, y) coordinates with added intermediate points."""
def _compute_centerline(a, b, min_point_spacing, min_cut_length, debug_logs, crs, debug_vectors_path):
"""Computes a Voronoi-based centerline between two overlapping polygons.
Args:
a (Polygon): First polygon.
b (Polygon): Second polygon.
min_point_spacing (float): Minimum spacing between seed points for Voronoi generation.
min_cut_length (float): Minimum segment length to include in the centerline graph.
debug_logs (bool, optional): If True, prints debug information; default is False.
crs (optional): Coordinate reference system used for optional debug output.
debug_vectors_path (optional): Path to save debug Voronoi cells; if None, skips saving.
Returns:
LineString: Shortest centerline path computed through the Voronoi diagram of the overlap."""
def _segment_emp(emp, cuts, debug_logs):
"""Segments an EMP polygon by sequentially applying centerline cuts, retaining the piece containing the centroid.
Args:
emp (Polygon): The original EMP polygon to segment.
cuts (List[LineString]): List of cutlines to apply.
debug_logs (bool, optional): If True, prints debug info; default is False.
Returns:
Polygon: The segmented portion of the EMP containing the original centroid."""
def _save_intersection_points(a, b, path, crs, pair_id):
"""Saves intersection points between the boundaries of two polygons to a GeoPackage layer.
Args:
a (Polygon): First polygon.
b (Polygon): Second polygon.
path (str): Path to the output GeoPackage file.
crs: Coordinate reference system for the output.
pair_id (str): Identifier for the polygon pair, saved as an attribute.
Returns:
None"""
def _save_voronoi_cells(voronoi_cells, path, crs, layer_name):
"""Saves Voronoi polygon geometries to a specified GeoPackage layer.
Args:
voronoi_cells (GeometryCollection): Collection of Voronoi polygon geometries.
path (str): Path to the output GeoPackage file.
crs: Coordinate reference system for the output layer.
layer_name (str, optional): Name of the layer to write; default is "voronoi_cells".
Returns:
None"""
### File: match/lirrn.py
def main():
"""Run LIRRN normalization on selected subject and reference images."""
def _show_rgb(img, title):
"""Display RGB or first three bands of a multiband image."""
def lirrn(p_n, sub_img, ref_img):
"""Perform location-independent relative radiometric normalization."""
def _linear_reg(sub, ref):
"""Fit a linear model: ref ≈ a * sub + b."""
def _sample_selection(n, a, b, idx):
"""Select sample pairs from quantized sub/ reference image regions using minimal distance matching."""
### File: match/local_block_adjustment.py
def local_block_adjustment(input_images, output_images):
"""Performs local radiometric adjustment on a set of raster images using block-based statistics.
Args:
input_images (str | List[str], required): Defines input files from a glob path, folder, or list of paths. Specify like: "/input/files/*.tif", "/input/folder" (assumes *.tif), ["/input/one.tif", "/input/two.tif"].
output_images (str | List[str], required): Defines output files from a template path, folder, or list of paths (with the same length as the input). Specify like: "/input/files/$.tif", "/input/folder" (assumes $_Local.tif), ["/input/one.tif", "/input/two.tif"].
calculation_dtype (str, optional): Precision for internal calculations. Defaults to "float32".
output_dtype (str | None, optional): Data type for output rasters. Defaults to input image dtype.
vector_mask (Tuple[Literal["include", "exclude"], str, Optional[str]] | None): A mask limiting pixels to include when calculating stats for each block in the format of a tuple with two or three items: literal "include" or "exclude" the mask area, str path to the vector file, optional str of field name in vector file that *includes* (can be substring) input image name to filter geometry by. It is only applied when calculating local blocks, as the reference map is calculated as the mean of all local blocks. Loaded block maps won't have this applied unless it was used when calculating them. The matching solution is still applied to these areas in the output. Defaults to None for no mask.
debug_logs (bool, optional): If True, prints progress. Defaults to False.
custom_nodata_value (float | int | None, optional): Overrides detected NoData value. Defaults to None.
image_parallel_workers (Tuple[Literal["process", "thread"], Literal["cpu"] | int] | None = None): Parallelization strategy at the image level. Provide a tuple like ("process", "cpu") to use multiprocessing with all available cores. Threads are supported too. Set to None to disable.
window_parallel_workers (Tuple[Literal["process"], Literal["cpu"] | int] | None = None): Parallelization strategy at the window level within each image. Same format as image_parallel_workers. Threads are not supported. Set to None to disable.
window_size (int | Tuple[int, int] | Literal["block"] | None): Tile size for processing: int for square tiles, (width, height) for custom size, or "block" to set as the size of the block map, None for full image. Defaults to None.
save_as_cog (bool, optional): If True, saves as COG. Defaults to False.
number_of_blocks (int | tuple | Literal["coefficient_of_variation"]): int as a target of blocks per image, tuple to set manually set total blocks width and height, coefficient_of_variation to find the number of blocks based on this metric.
alpha (float, optional): Blending factor between reference and local means. Defaults to 1.0.
correction_method (Literal["gamma", "linear"], optional): Local correction method. Defaults to "gamma".
save_block_maps (tuple(str, str) | None): If enabled, saves block maps for review, to resume processing later, or to add additional images to the reference map.
- First str is the path to save the global block map.
- Second str is the path to save the local block maps, which must include "$" which will be replaced my the image name (because there are multiple local maps).
load_block_maps (Tuple[str, List[str]] | Tuple[str, None] | Tuple[None, List[str]] | None, optional):
Controls loading of precomputed block maps. Can be one of:
- Tuple[str, List[str]]: Load both reference and local block maps.
- Tuple[str, None]: Load only the reference block map.
- Tuple[None, List[str]]: Load only the local block maps.
- None: Do not load any block maps.
This supports partial or full reuse of precomputed block maps:
- Local block maps will still be computed for each input image that is not linked to a local block map by the images name being *included* in the local block maps name (file name).
- The reference block map will only be calculated (mean of all local blocks) if not set.
- The reference map defines the reference block statistics and the local maps define per-image local block statistics.
- Both reference and local maps must have the same canvas extent and dimensions which will be used to set those values.
override_bounds_canvas_coords (Tuple[float, float, float, float] | None): Manually set (min_x, min_y, max_x, max_y) bounds to override the computed/loaded canvas extent. If you wish to have a larger extent than the current images, you can manually set this, along with setting a fixed number of blocks, to anticipate images will expand beyond the current extent.
block_valid_pixel_threshold (float): Minimum fraction of valid pixels required to include a block (0–1).
Returns:
List[str]: Paths to the locally adjusted output raster images."""
def _validate_input_params(input_images, output_images, custom_nodata_value, number_of_blocks, alpha, calculation_dtype, output_dtype, debug_logs, window_size, save_as_cog, correction_method, image_parallel_workers, window_parallel_workers, save_block_maps, load_block_maps, override_bounds_canvas_coords, vector_mask, block_valid_pixel_threshold):
"""Validates input parameters for `local_block_adjustment`.
Raises:
TypeError or ValueError with a concise message if any parameter is improperly set."""
def _get_pre_computed_block_maps(load_block_maps, calculation_dtype, debug_logs):
"""Load pre-computed block mean maps from files.
Args:
load_block_maps (Tuple[str, List[str]] | Tuple[str, None] | Tuple[None, List[str]]):
- Tuple[str, List[str]]: Load both reference and local block maps.
- Tuple[str, None]: Load only the reference block map.
- Tuple[None, List[str]]: Load only the local block maps.
calculation_dtype (str): Numpy dtype to use for reading.
debug_logs (bool): To print debug statements or not.
Returns:
Tuple[
dict[str, np.ndarray], # block_local_means
Optional[np.ndarray], # block_reference_mean
Optional[int], # num_row
Optional[int], # num_col
Optional[Tuple[float, float, float, float]] # bounds_canvas_coords
]"""
def _get_bounding_rect_images_block_space(block_local_means):
"""Compute block-space bounding rectangles for each image based on valid block values.
Args:
block_local_means (dict[str, np.ndarray]): Per-image block means
with shape (num_row, num_col, num_bands).
Returns:
dict[str, tuple[int, int, int, int]]: Each entry maps image name to
(min_row, min_col, max_row, max_col)."""
def _compute_reference_blocks(block_local_means, calculation_dtype):
"""Computes reference block means across images by averaging non-NaN local block means.
Args:
block_local_means (dict[str, np.ndarray]): Per-image block mean arrays.
calculation_dtype (str): Numpy dtype for output array.
Returns:
np.ndarray: Reference block map of shape (num_row, num_col, num_bands)"""
def _apply_adjustment_process_image(name, img_path, out_path, num_bands, block_reference_mean, block_local_mean, bounds_image_block_space, bounds_canvas_coords, window_size, num_row, num_col, nodata_val, alpha, correction_method, calculation_dtype, output_dtype, debug_logs, parallel, backend, max_workers, save_as_cog):
"""Applies local radiometric adjustment to a single image using reference and local block statistics.
Args:
name (str): Image identifier.
img_path (str): Path to the input image.
out_path (str): Path to save the adjusted output image.
num_bands (int): Number of bands in the image.
block_reference_mean (np.ndarray): Global reference block mean array.
block_local_mean (np.ndarray): Image-specific local block mean array.
bounds_image_block_space (tuple): Block-space bounding box for the image.
bounds_canvas_coords (tuple): Full canvas extent for normalization.
window_size: Tiling strategy for processing.
num_row (int): Number of block rows.
num_col (int): Number of block columns.
nodata_val (float): Value representing missing data.
alpha (float): Blending factor for adjustment.
correction_method (str): Method to apply ("gamma" or "linear").
calculation_dtype (str): Dtype used for internal computation.
output_dtype (str): Dtype used for writing output.
debug_logs (bool): If True, logs progress.
parallel (bool): Whether to use multiprocessing for window-level processing.
backend (str): Backend to use for parallelism.
max_workers (int): Number of workers to use if parallel.
Writes:
The adjusted image to `out_path`."""
def _apply_adjustment_process_window(name, window, band_idx, num_row, num_col, bounds_canvas_coords, nodata_val, alpha, correction_method, calculation_dtype):
"""Applies radiometric correction to a single raster window using bilinear-interpolated block statistics.
Args:
name (str): Image identifier.
window (Window): Raster window to process.
band_idx (int): Band index (0-based).
num_row (int): Number of block rows in the canvas.
num_col (int): Number of block columns in the canvas.
bounds_canvas_coords (tuple): Spatial extent of the full block canvas.
nodata_val (float | int): NoData value in the raster.
alpha (float): Blending factor for correction.
correction_method (str): Either "gamma" or "linear".
calculation_dtype (str): Data type for intermediate calculations.
Returns:
Tuple[Window, int, np.ndarray]: The window, band index, and corrected data array."""
def _get_bounding_rectangle(image_paths):
"""Calculates the bounding rectangle that encompasses all input raster images.
Args:
image_paths (List[str]): List of raster file paths.
Returns:
Tuple[float, float, float, float]: (min_x, min_y, max_x, max_y) of the combined extent."""
def _compute_mosaic_coefficient_of_variation(image_paths, nodata_value, reference_std, reference_mean, base_block_size, band_index, calculation_dtype, debug_logs):
"""Estimates block size for local adjustment using the coefficient of variation across input images.
Args:
image_paths (List[str]): List of input raster file paths.
nodata_value (float): Value representing NoData in the input rasters.
reference_std (float, optional): Reference standard deviation for comparison. Defaults to 45.0.
reference_mean (float, optional): Reference mean for comparison. Defaults to 125.0.
base_block_size (Tuple[int, int], optional): Base block size (rows, cols). Defaults to (10, 10).
band_index (int, optional): Band index to use for statistics (1-based). Defaults to 1.
calculation_dtype (str, optional): Data type for computation. Defaults to "float32".
Returns:
Tuple[int, int]: Estimated block size (rows, cols) adjusted based on coefficient of variation."""
def _calculate_block_process_image(name, image_path, bounds_canvas_coords, num_row, num_col, num_bands, window_size, debug_logs, nodata_value, calculation_dtype, vector_mask, block_valid_pixel_threshold, parallel, backend, max_workers):
"""Computes per-block mean statistics for a single image by aggregating pixel values into a block grid.
Args:
name (str): Image identifier.
image_path (str): Path to the input raster.
bounds_canvas_coords (tuple): Full extent of the block canvas (minx, miny, maxx, maxy).
num_row (int): Number of block rows.
num_col (int): Number of block columns.
num_bands (int): Number of image bands.
window_size (tuple or "block" or None): Tiling strategy for processing.
debug_logs (bool): If True, prints progress info.
nodata_value (float): Value used to identify invalid pixels.
calculation_dtype (str): Numpy dtype for internal arrays.
vector_mask (tuple or None): Optional spatial mask to include/exclude regions.
block_valid_pixel_threshold (float): Minimum valid pixel ratio to include block.
parallel (bool): Whether to use multiprocessing for tiles.
backend (str): Parallel execution backend ("process" or "thread").
max_workers (int): Number of parallel workers.
Returns:
Tuple[str, np.ndarray, np.ndarray]: (Image name, block mean array, block pixel count array)"""
def _calculate_block_process_window(band_index, window, name, geoms, invert, nodata_value, calculation_dtype, transform, block_shape, bounds_canvas_coords):
"""Aggregates pixel values within a raster window into a block grid for one band.
Args:
band_index (int): Band index to process (0-based).
window (Window): Raster window to read.
name (str): Image identifier used to retrieve dataset.
geoms (list or None): Optional vector geometries for masking.
invert (bool): Whether to invert the mask.
nodata_value (float): NoData value in the raster.
calculation_dtype (str): Data type for computation.
transform: Affine transform of the dataset.
block_shape (tuple): (num_row, num_col) of the block grid.
bounds_canvas_coords (tuple): Extent of the full canvas (minx, miny, maxx, maxy).
Returns:
Optional[Tuple[np.ndarray, np.ndarray]]: (Sum of values per block, count of valid pixels per block),
or None if the window has no valid pixels."""
def _weighted_bilinear_interpolation(C_B, x_frac, y_frac):
"""Performs bilinear interpolation on a 2D array while handling NaN values using a validity mask.
Args:
C_B (np.ndarray): 2D array with possible NaNs to interpolate.
x_frac (np.ndarray): Fractional x-coordinates for interpolation.
y_frac (np.ndarray): Fractional y-coordinates for interpolation.
Returns:
np.ndarray: Interpolated values at the specified fractional coordinates, with NaNs preserved where data is invalid."""
def _download_block_map(block_map, bounding_rect, output_image_path, projection, dtype, nodata_value, width, height, write_bands, window, delete_output):
"""Writes a 3D block map to a raster file, creating or updating specified bands within a target window.
Args:
block_map (np.ndarray): Block data of shape (rows, cols, bands).
bounding_rect (tuple): Spatial extent (minx, miny, maxx, maxy).
output_image_path (str): Path to the output raster file.
projection (rasterio.CRS): Coordinate reference system.
dtype (str): Data type for output.
nodata_value (float): NoData value to write.
width (int): Full raster width.
height (int): Full raster height.
write_bands (tuple[int] | None): 0-based band indices to write; all if None.
window (Window | None): Raster window to write into; defaults to full image.
Output:
Writes the `block_map` array to `output_image_path`, either creating a new raster or updating an existing one."""
def _compute_block_size(input_image_array_path, target_blocks_per_image, bounds_canvas_coords):
"""Calculates the number of rows and columns for dividing a bounding rectangle into target-sized blocks.
Args:
input_image_array_path (list): List of image paths to determine total image count.
target_blocks_per_image (int | float): Desired number of blocks per image.
bounds_canvas_coords (tuple): Bounding box covering all images (minx, miny, maxx, maxy).
Returns:
Tuple[int, int]: Number of rows (num_row) and columns (num_col) for the block grid."""
def _apply_gamma_correction(arr_in, Mrefs, Mins, alpha):
"""Applies gamma correction to input pixel values based on reference and input block means.
Args:
arr_in (np.ndarray): Input pixel values to be corrected.
Mrefs (np.ndarray): Reference block means.
Mins (np.ndarray): Local block means of the input image.
alpha (float, optional): Scaling factor applied to the corrected output. Defaults to 1.0.
Returns:
Tuple[np.ndarray, np.ndarray]:
- Gamma-corrected pixel values.
- Gamma values used in the correction.
Raises:
ValueError: If any value in Mins is zero or negative."""
def _smooth_array(input_array, nodata_value, scale_factor):
"""Applies Gaussian smoothing to an array while preserving NoData regions.
Args:
input_array (np.ndarray): 2D array to be smoothed.
nodata_value (Optional[float], optional): Value representing NoData. Treated as NaN during smoothing. Defaults to None.
scale_factor (float, optional): Sigma value for the Gaussian filter. Controls smoothing extent. Defaults to 1.0.
Returns:
np.ndarray: Smoothed array with NoData regions preserved or restored."""
### File: match/global_regression.py
def global_regression(input_images, output_images):
"""Performs global radiometric normalization across overlapping images using least squares regression.
Args:
input_images (str | List[str], required): Defines input files from a glob path, folder, or list of paths. Specify like: "/input/files/*.tif", "/input/folder" (assumes *.tif), ["/input/one.tif", "/input/two.tif"].
output_images (str | List[str], required): Defines output files from a template path, folder, or list of paths (with the same length as the input). Specify like: "/input/files/$.tif", "/input/folder" (assumes $_Global.tif), ["/input/one.tif", "/input/two.tif"].
calculation_dtype (str, optional): Data type used for internal calculations. Defaults to "float32".
output_dtype (str | None, optional): Data type for output rasters. Defaults to input image dtype.
vector_mask (Tuple[Literal["include", "exclude"], str, Optional[str]] | None): Mask to limit stats calculation to specific areas in the format of a tuple with two or three items: literal "include" or "exclude" the mask area, str path to the vector file, optional str of field name in vector file that *includes* (can be substring) input image name to filter geometry by. Loaded stats won't have this applied to them. The matching solution is still applied to these areas in the output. Defaults to None for no mask.
debug_logs (bool, optional): If True, prints debug information and constraint matrices. Defaults to False.
custom_nodata_value (float | int | None, optional): Overrides detected NoData value. Defaults to None.
image_parallel_workers (Tuple[Literal["process", "thread"], Literal["cpu"] | int] | None = None): Parallelization strategy at the image level. Provide a tuple like ("process", "cpu") to use multiprocessing with all available cores. Threads are supported too. Set to None to disable.
window_parallel_workers (Tuple[Literal["process"], Literal["cpu"] | int] | None = None): Parallelization strategy at the window level within each image. Same format as image_parallel_workers. Threads are not supported. Set to None to disable.
window_size (int | Tuple[int, int] | Literal["internal"] | None): Tile size for reading and writing: int for square tiles, tuple for (width, height), "internal" to use raster's native tiling, or None for full image. "internal" enables efficient streaming from COGs.
save_as_cog (bool): If True, saves output as a Cloud-Optimized GeoTIFF using proper band and block order.
specify_model_images (Tuple[Literal["exclude", "include"], List[str]] | None ): First item in tuples sets weather to 'include' or 'exclude' the listed images from model building statistics. Second item is the list of image names (without their extension) to apply criteria to. For example, if this param is only set to 'include' one image, all other images will be matched to that one image. Defaults to no exclusion.
custom_mean_factor (float, optional): Weight for mean constraints in regression. Defaults to 1.0.
custom_std_factor (float, optional): Weight for standard deviation constraints in regression. Defaults to 1.0.
save_adjustments (str | None, optional): The output path of a .json file to save adjustments parameters. Defaults to not saving.
load_adjustments (str | None, optional): If set, loads saved whole and overlapping statistics only for images that exist in the .json file. Other images will still have their statistics calculated. Defaults to None.
Returns:
List[str]: Paths to the globally adjusted output raster images."""
def _solve_global_model(num_bands, num_total, all_image_names, included_names, input_image_names, all_overlap_stats, all_whole_stats, custom_mean_factor, custom_std_factor, overlapping_pairs, debug_logs):
"""Computes global radiometric normalization parameters (scale and offset) for each image and band using least squares regression.
Args:
num_bands: Number of image bands.
num_total: Total number of images (including loaded).
all_image_names: Ordered list of all image names.
included_names: Subset of images used to constrain the model.
input_image_names: Names of input images to apply normalization to.
all_overlap_stats: Pairwise overlap statistics per band.
all_whole_stats: Whole-image stats (mean, std) per band.
custom_mean_factor: Weight for mean constraints.
custom_std_factor: Weight for std constraints.
overlapping_pairs: Pairs of overlapping images.
debug_logs: If True, prints debug information.
Returns:
np.ndarray: Adjustment parameters of shape (bands, 2 * num_images, 1)."""
def _apply_adjustments_process_image(image_name, input_image_path, output_image_path, scale, offset, num_bands, nodata_val, window_size, calculation_dtype, output_dtype, window_parallel, window_backend, window_max_workers, save_as_cog, debug_logs):
"""Applies scale and offset adjustments to each band of an input image and writes the result to the output path.
Args:
image_name: Identifier for the image in the worker context.
input_image_path: Path to the input raster image.
output_image_path: Path to save the adjusted output image.
scale: Per-band scale factors (1D array of length num_bands).
offset: Per-band offset values (1D array of length num_bands).
num_bands: Number of image bands.
nodata_val: NoData value to preserve during adjustment.
window_size: Tiling strategy for processing (None, int, tuple, or "internal").
calculation_dtype: Data type for computation.
output_dtype: Output data type (defaults to input type if None).
window_parallel: Whether to parallelize over windows.
window_backend: Backend to use for window-level parallelism ("process").
window_max_workers: Number of parallel workers for window processing.
debug_logs: If True, prints debug info during processing.
Returns:
None"""
def _save_adjustments(save_path, input_image_names, all_params, all_whole_stats, all_overlap_stats, num_bands, calculation_dtype):
"""Saves adjustment parameters, whole-image stats, and overlap stats in a nested JSON format.
Args:
save_path (str): Output JSON path.
input_image_names (List[str]): List of input image names.
all_params (np.ndarray): Adjustment parameters, shape (bands, 2 * num_images, 1).
all_whole_stats (dict): Per-image stats (keyed by image name).
all_overlap_stats (dict): Per-pair overlap stats (keyed by image name).
num_bands (int): Number of bands.
calculation_dtype (str): Precision for saving values (e.g., "float32")."""
def _validate_adjustment_model_structure(model):
"""Validates the structure of a loaded adjustment model dictionary.
Ensures that:
- Each top-level key is an image name mapping to a dictionary.
- Each image has 'adjustments' and 'whole_stats' with per-band keys like 'band_0'.
- Each band entry in 'adjustments' contains 'scale' and 'offset'.
- Each band entry in 'whole_stats' contains 'mean', 'std', and 'size'.
- If present, 'overlap_stats' maps to other image names with valid per-band statistics.
The expected model structure is a dictionary with this format:
{
"image_name_1": {
"adjustments": {
"band_0": {"scale": float, "offset": float},
"band_1": {"scale": float, "offset": float},
...
},
"whole_stats": {
"band_0": {"mean": float, "std": float, "size": int},
"band_1": {"mean": float, "std": float, "size": int},
...
},
"overlap_stats": {
"image_name_2": {
"band_0": {"mean": float, "std": float, "size": int},
"band_1": {"mean": float, "std": float, "size": int},
...
},
...
}
},
...
}
- Keys are image basenames (without extension).
- Band keys are of the form "band_0", "band_1", etc.
- All numerical values are stored as floats (except 'size', which is an int).
Args:
model (dict): Parsed JSON adjustment model.
Raises:
ValueError: If any structural issues or missing keys are detected."""
def _apply_adjustments_process_window(window, band_idx, a, b, nodata, calculation_dtype, debug_logs, image_name):
"""Applies a global linear transformation (scale and offset) to a raster tile.
Args:
window (Window): Rasterio window specifying the region to process.
band_idx (int): Band index to read and adjust.
a (float): Multiplicative factor for normalization.
b (float): Additive offset for normalization.
nodata (int | float): NoData value to ignore during processing.
calculation_dtype (str): Data type to cast the block for computation.
debug_logs (bool): If True, prints processing information.
image_name (str): Key to fetch the raster from WorkerContext.
Returns:
Tuple[Window, np.ndarray]: Window and the adjusted data block."""
def _print_constraint_system(constraint_matrix, adjustment_params, observed_values_vector, overlap_pairs, image_names_with_id):
"""Prints the constraint matrix system with labeled rows and columns for debugging regression inputs.
Args:
constraint_matrix (ndarray): Coefficient matrix used in the regression system.
adjustment_params (ndarray): Solved adjustment parameters (regression output).
observed_values_vector (ndarray): Target values in the regression system.
overlap_pairs (tuple): Pairs of overlapping image indices used in constraints.
image_names_with_id (list of tuple): List of (ID, name) pairs corresponding to each image's position in the system.
Returns:
None"""
def _find_overlaps(image_bounds_dict):
"""Finds all pairs of image names with overlapping spatial bounds.
Args:
image_bounds_dict (dict): Dictionary mapping image names to their rasterio bounds.
Returns:
Tuple[Tuple[str, str], ...]: Pairs of image names with overlapping extents."""
def _overlap_stats_process_image(parallel, max_workers, backend, num_bands, input_image_path_i, input_image_path_j, name_i, name_j, bound_i, bound_j, nodata_i, nodata_j, vector_mask, window_size, debug_logs):
"""Computes per-band overlap statistics (mean, std, pixel count) between two images over their intersecting area.
Args:
parallel: Whether to use multiprocessing for window processing.
max_workers: Number of workers for parallel processing.
backend: Parallelization backend ("process").
num_bands: Number of image bands.
input_image_path_i: Path to the first image.
input_image_path_j: Path to the second image.
name_i: Identifier for the first image.
name_j: Identifier for the second image.
bound_i: BoundingBox of the first image.
bound_j: BoundingBox of the second image.
nodata_i: NoData value for the first image.
nodata_j: NoData value for the second image.
vector_mask: Optional mask to include/exclude regions, with optional field filter.
window_size: Windowing strategy for tile processing.
debug_logs: If True, prints overlap bounds and status.
Returns:
dict: Nested stats dictionary for each image pair and band."""
def _overlap_stats_process_window(win, band, col_min_i, row_min_i, name_i, name_j, nodata_i, nodata_j, geoms_i, geoms_j, invert, interpolation_method):
"""Processes a single overlapping window between two images, applying masks and interpolation if needed,
and returns valid pixel pairs for statistical comparison.
Args:
win: Window in image i's coordinate space.
band: Band index to process.
col_min_i: Column offset of overlap region in image i.
row_min_i: Row offset of overlap region in image i.
name_i: Image i identifier in WorkerContext.
name_j: Image j identifier in WorkerContext.
nodata_i: NoData value for image i.
nodata_j: NoData value for image j.
geoms_i: Optional list of geometries for masking image i.
geoms_j: Optional list of geometries for masking image j.
invert: Whether to invert the mask logic (exclude vs include).
interpolation_method: OpenCV interpolation method for resampling. 1 is bilinear interpolation.
Returns:
Tuple of valid pixel arrays (image_i_values, image_j_values), or None if no valid pixels found."""
def _fit_windows_to_pixel_bounds(windows, row_min, row_max, col_min, col_max, row_offset, col_offset):
"""Adjusts a list of image-relative windows to ensure they fit within specified pixel bounds, based on a global offset.
Args:
windows: List of rasterio Window objects (relative to an image region).
row_min, row_max: Global pixel row bounds to clip against.
col_min, col_max: Global pixel column bounds to clip against.
row_offset, col_offset: Offsets to convert window-relative coordinates to global coordinates.
Returns:
List[Window]: Windows cropped to the specified global bounds."""
def _whole_stats_process_image(parallel, max_workers, backend, input_image_path, nodata, num_bands, image_name, vector_mask, window_size, debug_logs):
"""Calculates whole-image statistics (mean, std, and valid pixel count) per band, with optional masking and window-level parallelism.
Args:
parallel: Whether to enable multiprocessing for window processing.
max_workers: Number of parallel workers to use.
backend: Multiprocessing backend ("process").
input_image_path: Path to the input raster.
nodata: NoData value to exclude from stats.
num_bands: Number of bands to process.
image_name: Identifier for use in WorkerContext.
vector_mask: Optional mask tuple to include/exclude specific regions, with optional field filter.
window_size: Tiling strategy (None, int, tuple, or "internal").
debug_logs: If True, prints debug messages.
Returns:
dict: Per-band statistics {image_name: {band: {mean, std, size}}}."""
def _whole_stats_process_window(win, band_idx, image_name, nodata, geoms, invert):
"""Extracts valid pixel values from a raster window, optionally applying a geometry mask.
Args:
win: Rasterio window to read.
band_idx: Band index to read (0-based).
image_name: Identifier for the image in WorkerContext.
nodata: NoData value to exclude.
geoms: Optional list of geometries for masking.
invert: If True, inverts the mask (exclude instead of include).
Returns:
np.ndarray or None: 1D array of valid pixel values, or None if none found."""
### File: mask/mask.py
def create_cloud_mask_with_omnicloudmask(input_images, output_images, red_band_index, green_band_index, nir_band_index):
"""Generates cloud masks from input images using OmniCloudMask, with optional downsampling and multiprocessing.
Args:
input_images (str | List[str], required): Defines input files from a glob path, folder, or list of paths. Specify like: "/input/files/*.tif", "/input/folder" (assumes *.tif), ["/input/one.tif", "/input/two.tif"].
output_images (str | List[str], required): Defines output files from a template path, folder, or list of paths (with the same length as the input). Specify like: "/input/files/$.tif", "/input/folder" (assumes $_CloudMask.tif), ["/input/one.tif", "/input/two.tif"].
red_band_index (int): Index of red band in the image.
green_band_index (int): Index of green band in the image.
nir_band_index (int): Index of NIR band in the image.
down_sample_m (float, optional): If set, resamples input to this resolution in meters.
debug_logs (bool, optional): If True, prints progress and debug info.
image_parallel_workers (ImageParallelWorkers, optional): Enables parallel execution. Note: "process" does not work on macOS due to PyTorch MPS limitations.
omnicloud_kwargs (dict | None): Additional arguments forwarded to predict_from_array.
Raises:
Exception: Propagates any error from processing individual images."""
def _process_cloud_mask_image(input_image_path, output_mask_path, red_band_index, green_band_index, nir_band_index, down_sample_m, debug_logs, omnicloud_kwargs):
"""Processes a single image to generate a cloud mask using OmniCloudMask.
Args:
input_image_path (str): Path to input image.
output_mask_path (str): Path to save output mask.
red_band_index (int): Index of red band.
green_band_index (int): Index of green band.
nir_band_index (int): Index of NIR band.
down_sample_m (float): Target resolution (if resampling).
debug_logs (bool): If True, print progress info.
omnicloud_kwargs (dict | None): Passed to predict_from_array.
Raises:
Exception: If any step in reading, prediction, or writing fails."""
def create_ndvi_raster(input_images, output_images, nir_band_index, red_band_index):
"""Computes NDVI masks for one or more images and writes them to disk.
Args:
input_images (str | List[str], required): Defines input files from a glob path, folder, or list of paths. Specify like: "/input/files/*.tif", "/input/folder" (assumes *.tif), ["/input/one.tif", "/input/two.tif"].
output_images (str | List[str], required): Defines output files from a template path, folder, or list of paths (with the same length as the input). Specify like: "/input/files/$.tif", "/input/folder" (assumes $_Vegetation.tif), ["/input/one.tif", "/input/two.tif"].
nir_band_index: Band index for NIR (1-based).
red_band_index: Band index for Red (1-based).
custom_output_dtype: Optional output data type (e.g., "float32").
window_size: Tile size or mode for window-based processing.
debug_logs: Whether to print debug messages.
image_parallel_workers: Parallelism strategy for image-level processing.
window_parallel_workers: Parallelism strategy for window-level processing.
Output:
NDVI raster saved to output_images."""
def _ndvi_process_image(input_path, output_path, image_name, nir_band_index, red_band_index, custom_output_dtype, window_size, debug_logs, window_parallel_workers):
"""Processes a single image for NDVI using windowed strategy."""
def _ndvi_process_window(image_name, window, nir_band_index, red_band_index, debug_logs):
"""Computes NDVI for a single window of a raster."""
def band_math(input_images, output_images, custom_math):
"""Applies custom band math expression to a list of input images and writes the results.
Args:
input_images (str | List[str], required): Defines input files from a glob path, folder, or list of paths. Specify like: "/input/files/*.tif", "/input/folder" (assumes *.tif), ["/input/one.tif", "/input/two.tif"].
output_images (str | List[str], required): Defines output files from a template path, folder, or list of paths (with the same length as the input). Specify like: "/input/files/$.tif", "/input/folder" (assumes $_Math.tif), ["/input/one.tif", "/input/two.tif"].
custom_math (str): Python-compatible math expression using bands (e.g., "b1 + b2 / 2").
debug_logs (bool, optional): If True, prints debug messages.
custom_nodata_value (Any, optional): Override nodata value in source image.
image_parallel_workers (int | str | None, optional): Controls image-level parallelism.
window_parallel_workers (int | str | None, optional): Controls window-level parallelism.
window_size (tuple[int, int] | None, optional): Size of processing windows (width, height).
custom_output_dtype (str | None, optional): Output image data type (e.g., "uint16").
calculation_dtype (str | None, optional): Computation data type (e.g., "float32")."""
def _band_math_process_image(input_image_path, output_image_path, name, custom_math, debug_logs, nodata_value, window_parallel_workers, window_size, band_indices, output_dtype, calculation_dtype):
"""Processes a single image by evaluating a custom math expression per pixel block.
Args:
input_image_path (str): Path to the input image.
output_image_path (str): Path to save the result.
name (str): Dataset identifier for use in worker context.
custom_math (str): Math expression using band variables (e.g., "b1 - b2").
debug_logs (bool): If True, prints debug information.
nodata_value (Any): Value to treat as nodata during processing.
window_parallel_workers (int | str | None): Parallelism setting for window processing.
window_size (tuple[int, int]): Size of the processing window (width, height).
band_indices (list[int]): List of 1-based band indices used in the expression.
output_dtype (str): Output data type (e.g., "uint16").
calculation_dtype (str): Intermediate computation data type (e.g., "float32")."""
def _band_math_process_window(name, window, custom_math, debug_logs, nodata_value, band_indices, calculation_dtype):
"""Computes the result of a band math expression within a raster window.
Args:
name (str): Dataset identifier to retrieve the open raster.
window (rasterio.windows.Window): Raster window to process.
custom_math (str): Math expression to evaluate (e.g., "b1 * b2").
debug_logs (bool): If True, prints window-level debug messages.
nodata_value (Any): Value representing nodata in the input bands.
band_indices (list[int]): Band indices referenced in the expression.
calculation_dtype (str): Data type used for evaluation.
Returns:
tuple: (band index, window, computed result as ndarray)"""
### File: mask/utils_mask.py
def threshold_raster(input_images, output_images, threshold_math):
"""Applies a thresholding operation to input raster images using a mathematical expression string.
Args:
input_images (str | List[str], required): Defines input files from a glob path, folder, or list of paths. Specify like: "/input/files/*.tif", "/input/folder" (assumes *.tif), ["/input/one.tif", "/input/two.tif"].
output_images (str | List[str], required): Defines output files from a template path, folder, or list of paths (with the same length as the input). Specify like: "/input/files/$.tif", "/input/folder" (assumes $_Threshold.tif), ["/input/one.tif", "/input/two.tif"].
threshold_math (str): A logical expression string using bands (e.g., "b1 > 5", "b1 > 5 & b2 < 10"). Supports: Band references: b1, b2, ...; Operators: >, <, >=, <=, ==, !=, &, |, ~, and (); Percentile-based thresholds: use e.g. "5%b1" to use the 5th percentile of band 1.
debug_logs (bool, optional): If True, prints debug messages.
custom_nodata_value (float | int | None, optional): Override the dataset's nodata value.
image_parallel_workers (ImageParallelWorkers, optional): Parallelism config for image-level processing.
window_parallel_workers (WindowParallelWorkers, optional): Parallelism config for window-level processing.
window_size (WindowSize, optional): Window tiling strategy for memory-efficient processing.
custom_output_dtype (CustomOutputDtype, optional): Output data type override.
calculation_dtype (CalculationDtype, optional): Internal computation dtype."""
def _threshold_process_image(input_image_path, output_image_path, name, threshold_math, debug_logs, nodata_value, window_parallel_workers, window_size, output_dtype, calculation_dtype):
"""Processes a single input raster image using a threshold expression and writes the result to disk.
Args:
input_image_path (str): Path to input raster image.
output_image_path (str): Path to save the output thresholded image.
name (str): Image name for worker context.
threshold_math (str): Expression string to evaluate pixel-wise conditions.
debug_logs (bool): Enable debug logging.
nodata_value (float | int | None): Value considered as nodata.
window_parallel_workers: Parallel config for window-level processing.
window_size: Window tiling size for memory efficiency.
output_dtype: Output raster data type.
calculation_dtype: Data type used for internal calculations."""
def _threshold_process_window(name, window, threshold_math, debug_logs, nodata_value, calculation_dtype):
"""Applies the threshold logic to a single image window.
Args:
name (str): Image identifier for WorkerContext access.
window (rasterio.windows.Window): Window to read and process.
threshold_math (str): Logical expression for thresholding using b1, b2, etc.
debug_logs (bool): Enable debug logs.
nodata_value (float | int | None): Value considered as nodata.
calculation_dtype: Dtype to cast bands for threshold computation.
Returns:
Tuple[int, rasterio.windows.Window, np.ndarray]: Band index, processed window, thresholded data mask (1 for true, 0 for false)."""
def _calculate_threshold_from_percent(input_image_path, threshold, band_index):
"""Calculates a threshold value based on a percentile of valid (non-nodata) pixel values in a raster.
Args:
input_image_path (str): Path to input raster image.
threshold (str): Percent string (e.g., "5%") indicating the percentile to compute.
band_index (int): Band index to evaluate.
debug_logs (bool, optional): If True, prints debug info.
nodata_value (float | int | None, optional): Value treated as nodata.
window_parallel_workers: Optional parallel config.
window_size: Tiling strategy.
calculation_dtype (str): Internal dtype used for calculations.
bins (int): Number of bins for histogram.
Returns:
float: Threshold value corresponding to the requested percentile."""
def process_raster_values_to_vector_polygons(input_images, output_vectors):
"""Converts raster values into vector polygons based on an expression and optional filtering logic.
Args:
input_images (str | List[str], required): Defines input files from a glob path, folder, or list of paths. Specify like: "/input/files/*.tif", "/input/folder" (assumes *.tif), ["/input/one.tif", "/input/two.tif"].
output_vectors (str | List[str], required): Defines output files from a template path, folder, or list of paths (with the same length as the input). Specify like: "/input/files/$.gpkg", "/input/folder" (assumes $_Vectorized.gpkg), ["/input/one.gpkg", "/input/two.gpkg"].
custom_nodata_value (Universal.CustomNodataValue, optional): Custom NoData value to override the default from the raster metadata.
custom_output_dtype (Universal.CustomOutputDtype, optional): Desired output data type. If not set, defaults to raster’s dtype.
image_parallel_workers (Universal.ImageParallelWorkers, optional): Controls parallelism across input images. Can be an integer, executor string, or boolean.
window_parallel_workers (Universal.WindowParallelWorkers, optional): Controls parallelism within a single image by processing windows in parallel.
window_size (Universal.WindowSizeWithBlock, optional): Size of each processing block (width, height), or a strategy string such as "block" or "whole".
debug_logs (Universal.DebugLogs, optional): Whether to print debug logs to the console.
extraction_expression (str): Logical expression to identify pixels of interest using band references (e.g., "b1 > 10 & b2 < 50").
filter_by_polygon_size (str, optional): Area filter for resulting polygons. Can be a number (e.g., ">100") or percentile (e.g., ">95%").
polygon_buffer (float, optional): Distance in coordinate units to buffer the resulting polygons. Default is 0.
value_mapping (dict, optional): Mapping from original raster values to new values. Use `None` to convert to NoData."""
def _process_image_to_polygons(input_image_path, output_vector_path, extraction_expression, filter_by_polygon_size, polygon_buffer, value_mapping, custom_nodata_value, custom_output_dtype, window_parallel, window_backend, window_max_workers, window_size, debug_logs):
"""Processes a single raster file and extracts polygons based on logical expressions and optional filters.
Args:
input_image_path (str): Path to the input raster image.
output_vector_path (str): Output file path for the resulting vector file (GeoPackage format).
extraction_expression (str): Logical expression using band indices (e.g., "b1 > 5 & b2 < 10").
filter_by_polygon_size (str): Area filter for polygons. Supports direct comparisons (">100") or percentiles ("90%").
polygon_buffer (float): Amount of buffer to apply to polygons in projection units.
value_mapping (dict): Dictionary mapping original raster values to new ones. Set value to `None` to mark as NoData.
custom_nodata_value: Custom NoData value to use during processing.
custom_output_dtype: Output data type for raster if relevant in future I/O steps.
window_parallel: Whether to parallelize over raster windows.
window_backend: Backend used for window-level parallelism (e.g., "thread", "process").
window_max_workers: Max number of parallel workers for window-level processing.
window_size: Tuple or strategy defining how the raster should be split into windows.
debug_logs (bool): Whether to print debug logging information."""
def _process_window(window, band_indices, expression, value_mapping, nodata_value):
"""Processes a single window of a raster image to extract polygons matching an expression.
Args:
window (rasterio.windows.Window): Raster window to process.
band_indices (list[int]): List of band indices required by the expression (e.g., [1, 2]).
expression (str): Logical expression involving bands (e.g., "b1 > 10 & b2 < 50").
value_mapping (dict): Dictionary mapping original raster values to new ones or to NoData.
nodata_value (int | float): NoData value to exclude from analysis.
Returns:
list[dict]: List of dictionaries with keys `"value"` and `"geometry"` representing polygons."""
def replace_percent_with_threshold(match):
## Markdown Section
### File: cli.md
# Command Line Interface
## Installation
The command line interface will be installed automatically when the Python library is installed. See instructions on the installation [page](https://spectralmatch.github.io/spectralmatch/installation/). Use the api reference or command --help to see options to pass into python functions.
## Usage
Print general help:
Print help for a specific command:
| spectralmatch COMMAND --help
|
Print installed version:
Run a specific command:
| spectralmatch COMMAND [OPTIONS]
|
## Commands
{commands_content}
### File: rrn_methods.md
# Relative Radiometric Normalization (RRN) Methods
RRN methods differ not only in the algorithms used to adjust image values but also in the requirements images must have and other techniques that can be used in conjunction. The following taxonomy summarizes the core dimensions along which RRN techniques vary:
- **Matching algorithm:** The core transformation applied to align radiometry between images.
- **Geometric alignment required:** The level of spatial alignment necessary for the method.
- **Pixel selection (PIFs/RCS):** How pseudo-invariant features/control sets are identified.
- **Adjustment scope:** How corrections are applied to the images.
- **Overlap:** Whether the method requires overlapping pixels.
- **Pixel units:** The radiometric units the method is able to operate on.
- **Bands:** Whether bands relationships are preserved.
- **Target reference:** What the target image is normalized to.
Multiple matching algorithms can be used in conjunction with multiple pixel selection methods. Note that the most restrictive method will dictate the image requirements (e.g. if using `Global regression` with `Overlapping area` the `Geometric alignment` will need to be `Moderate`). The specific matching algorithm used in each method is flexible and not fixed; it may involve least squares, RANSAC, Theil–Sen, Huber, or other forms of robust regression.
## Matching Algorithms
| Matching algorithm | Description | Geometric alignment | Adjustment granularity | Applies | Overlap required | Pixel units | Bands | Target reference | Year introduced | Key papers | Software |
| --------------------------------------- | ------------------------------------------------------------------------------------------------------ | ------------------- | ------------------------------- | ------------------------------ | ---------------- | ----------- | ---------------------- | ------------------------------------------ | --------------- | ---------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| Histogram Matching (HM) | Matches histogram distributions between images | None | Global | Lookup table | no | Any | Independent | Reference histogram | 1980s | | ENVI; [HistMatch QGIS Plugin](https://github.com/Gustavoohs/HistMatch); ArcGIS Pro; IMAGINE Mosaic Pro; [landsat R library via histmatch()](https://cran.r-project.org/web/packages/landsat/index.html) |
| Minimum–Maximum Scale Normalization | Linearly scales pixel values to match reference min/max | None | Global | Min/max | No | Any | Independent | Reference min/max | 1980s | | |
| Mean–Standard Deviation Regression | Fits linear regression using mean and std dev | None | Global | Gain/offset | No | Any | Independent/Correlated | Reference mean/std | 1980s | | ArcGIS Pro; [spectralmatch Python library and QGIS plugin](https://github.com/spectralmatch/spectralmatch) |
| Overlaping pixel-wise Linear Regression | Fits linear regression using overlapping pairs of pixels | Co-registered | Model | Gain/offset | Yes | Any | Independent/Correlated | Reference image pixels | 1980s | | ArcGIS Pro; [landsat R library via relnorm()](https://cran.r-project.org/web/packages/landsat/index.html) |
| Block adjusted gamma correction | Adjusts local brightness via block-based gamma scaling | Moderate | Blocks/interpolation resolution | Power function | Yes | Any | Independent | Reference block map (mean of local blocks) | | | [spectralmatch Python library and QGIS plugin](https://github.com/spectralmatch/spectralmatch) |
| CCA/KCCA-Based | Finding the most correlated combinations between images | Co-registered | CCA space resolution | Matrix | Yes | Any | Correlated | Reference canonical components | | | |
| Dodging | Smooths brightness using low-pass filtering to reduce lighting artifacts | Co-registered | Blur resolution | Low-pass brightness correction | Yes | Any | Independent | Blur created brightness values | | | ArcGIS Pro; IMAGINE Mosaic Pro |
| Illumination Equalization | Models and removes large-scale illumination differences across images | Co-registered | Surface model resolution | Modeled lighting correction | Yes | Any | Independent | Computed illumination values | | | IMAGINE Mosaic Pro |
| Wavelet reconstruction | Uses ancillary data to model and reconstruct image values at multiple detail levels | Co-registered | Ancillary data resolution | Decomposition/reconstruction | Yes | Any | Correlated | Ancillary data | | [(Gan et al., 2021)](https://doi.org/10.1109/JSTARS.2021.3069855) | |
| Dual-reference affine interpolation | Models corrections from the two nearest reference images and applies temporally weighted interpolation | Co-registered | Model | Gain/offset | Yes | Any | Independent | Two closest high-quality reference images | 2020 | [(Hessel et al., 2020)](https://isprs-annals.copernicus.org/articles/V-2-2020/845/2020/) | [rrn-multisensor-multidate Python scripts](https://github.com/chlsl/rrn-multisensor-multidate) |
## Pixel Selection
| Pixel selection (PIFs/RCS) | Description | Type | Geometric alignment | Overlap required | Pixel units | Year introduced | Key papers | Software |
| --------------------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ----------- | ------------------- | ---------------- | ---------------------------------- | --------------- | ------------------------------------------------------------------------------------------------------ | ---------------------------------------------------------------------------------------------- |
| Whole image | Uses all pixels without selection or masking | None | None | No | Any | | | |
| Overlapping area | Uses only pixels in the spatial overlap between images | None | Moderate | Yes | Any | | | |
| Manual polygons or pixels | User-defined areas or points chosen as invariant | Manual | None | No | Any | | | |
| Manual threshold | Selects pixels based on value threshold | Threshold | None | No | Any | | | |
| Dark/Bright Set (DB) | Selects darkest and brightest pixels assumed to be invariant | Threshold | None | No | Any/reflectance may perform better | | | |
| NDVI ratio | Uses vegetation indices to isolate vegetated areas for normalization | Band ratio | None | No | Reflectance | | | [spectralmatch Python library and QGIS plugin](https://github.com/spectralmatch/spectralmatch) |
| K-T ratio | Uses the Kauth–Thomas transformation to identify invariant pixels in greenness–brightness space | Band ratio | None | No | Reflectance | | [(Hall et al., 1991)](https://www.sciencedirect.com/science/article/pii/003442579190062B?via%3Dihub) | [landsat R library via RCS()](https://cran.r-project.org/web/packages/landsat/index.html) |
| Urban materials ratio | Assumes that certain **man-made surfaces** (e.g., roads, rooftops) have **stable reflectance over time** and uses their statistical properties to correct radiometric differences | Band ratio | None | No | Reflectance | 1988 | [(Schott et al., 1988)](https://www.sciencedirect.com/science/article/pii/0034425788901162?via%3Dihub) | [landsat R library via PIF()](https://cran.r-project.org/web/packages/landsat/index.html) |
| No-change Scattergrams (NC) | Selects pixels near the scatterplot diagonal where reference and target values match closely | Statistical | Co-registered | Yes | Any | | [(De Carvalho et al., 2013)](https://www.mdpi.com/2072-4292/5/6/2763) | |
| Multivariate Alteration Detection (MAD) | Identifies invariant pixels by transforming image differences into uncorrelated components; selects pixels with minimal change across all bands | Statistical | Co-registered | Yes | Any | | | |
| Iteratively Reweighted MAD (IR-MAD) | Refines MAD by reweighting pixels to improve change detection | Statistical | Co-registered | Yes | Any | | [(Canty & Nielsen, 2008)](https://doi.org/10.1016/j.rse.2007.07.013) | [ArrNorm Python scripts](https://github.com/SMByC/ArrNorm) |
| Multi-Rule-Based Normalization | Combines several selection rules to identify invariant pixels | Statistical | None | No | Any | | | |
| PCA | Uses principal component analysis to identify pseudo-invariant pixels along the major axis of multitemporal scatterplots | Statistical | Co-registered | Yes | Any | 2002 | [(Du et al., 2002)](https://www.sciencedirect.com/science/article/pii/S0034425702000299?via%3Dihub) | |
| Gradient angle similarity | Selecting the 10% of pixels with the smallest gradient angle differences between an image and its reference | Statistical | Co-registered | Yes | Any | 2020 | [(Hessel et al., 2020)](https://isprs-annals.copernicus.org/articles/V-2-2020/845/2020/) | [rrn-multisensor-multidate Python scripts](https://github.com/chlsl/rrn-multisensor-multidate) |
| Feature-Based (Keypoint) RRN | Matches distinctive features between images and uses their correspondence to guide normalization | Geometric | Moderate | Yes | Any | | | |
| Location-Independent RRN (LIRRN) | Groups pixels by brightness or spectral similarity, then matches these groups between images to perform group-wise normalization | Geometric | Moderate | Yes | Any | 2024 | [(Maghimi et al., 2024)](https://www.mdpi.com/1424-8220/24/7/2272) | [LIRRN MATLAB scripts](https://github.com/ArminMoghimi/LIRRN/tree/main) |
### File: formats_and_requirements.md
# File Formats and Input Requirements
## Input Raster Requirements
Input rasters must meet specific criteria to ensure compatibility during processing. These are checked by _check_raster_requirements():
- Have a valid geotransform
- Share the same coordinate reference system (CRS)
- Have an identical number of bands
- Use consistent nodata values
Additionally, all rasters should:
- Be a `.tif` file
- Have overlap which represents the same data in each raster
- Have a consistent spectral profile
## Regression Parameters File
Regression parameters can be stored in a `json` file which includes:
- Adjustments: Per-band scale and offset values applied to each image.
- Whole Stats: Per-band mean, std, and size representing overall image statistics.
- Overlap Stats: Per-image pair mean, std, and size for overlapping geometry regions.
The structure is a dictionary keyed by images basenames (no extension) with the following format:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20 | {
"image_name": {
"adjustments": {
"band_0": {"scale": float, "offset": float},
...
},
"whole_stats": {
"band_0": {"mean": float, "std": float, "size": int},
...
},
"overlap_stats": {
"other_image": {
"band_0": {"mean": float, "std": float, "size": int},
...
},
...
}
},
...
}
|
This format represents the following: For each image_name there are adjustment, whole_stats and overlap_stats. For each adjustments, for each band, there is scale and offset. For each whole_stats and overlap_stats, for each band, there is mean, std, and size (number of pixels). Each band key follows the format band_0, band_1, etc. Mean and std are floats and size is an integer.
This structure is validated by `_validate_adjustment_model_structure()` before use to ensure consistency and completeness across images and bands. Global regression does not actually use 'adjustments' field because they are recalculated every run.
## Block Maps File
Block maps are spatial summaries of raster data, where each block represents the mean values of a group of pixels over a fixed region. They are used to reduce image resolution while preserving local radiometric characteristics, enabling efficient comparison and adjustment across images. Each map is structured as a grid of blocks with values for each spectral band. They can be saved as regular `geotif` files and together store this information: block_local_means, block_reference_mean, num_row, num_col, bounds_canvas_coords.
There are two types of block maps, although their format is exactly the same:
- **Local Block Map:** Each block stores the mean value of all pixels within its boundary for a single image.
- **Reference Block Map:** Each block is the mean of all images means for its boundary; simply the mean of all local block maps.
Both block maps have the shape: `num_row, num_col, num_bands`, however, there are multiple (one for each image) local block maps and only one reference block map. Once a reference block map is created it is unique to its input images and cannot be accurately modified to add additional images. However, images can be 'brought' to a reference block map even if they were not involved in its creation as long as it covers that image.
### File: index.md
# spectralmatch: Performant Relative Radiometric Normalization toolkit with Pseudo-Invariant Features, seamlines, and other utilities for mosaics and time series
[](#)
[](https://codecov.io/gh/spectralmatch/spectralmatch)
[](https://ssh.cloud.google.com/cloudshell/editor?cloudshell_git_repo=https://github.com/spectralmatch/spectralmatch&cloudshell_working_dir=.)
[](https://spectralmatch.github.io/spectralmatch/llm_prompt)
[](https://pypi.org/project/spectralmatch/)
## Overview

*spectralmatch* provides a Python library, command line interface, and QGIS plugin with multiple algorithms to perform Relative Radiometric Normalization (RRN). It also includes utilities for generating seamlines, cloud masks, Pseudo-Invariant Features, statistics, preprocessing, and more.
## Features
- **Automated, Efficient, and Scalable:** Designed for large-scale workflows with no manual steps, leveraging multiprocessing and Cloud Optimized GeoTIFF support for fast, efficient processing across images, windows, and bands.
- **Resumable Processing:** Save image stats and block maps for quicker reprocessing.
- **Integrated Seamline and Cloud Masking:** Generate seamlines and detect clouds within the same workflow.
- **Specify Model Images** Include all or specified images in the matching solution to bring all images to a central tendency or selected images spectral profile.
- **Consistent Multi-image Analysis:** Performs minimal necessary adjustments to achieve inter-image consistency while preserving the original spectral characteristics.
- **Sensor and Unit Agnostic:** Supports optical imagery from handheld cameras, drones, crewed aircraft, and satellites for reliable single sensor and multi-sensor analysis, while preserving spectral integrity across all pixel units—including negative values and reflectance.
- **Enhanced Imagery:** Helpful when performing mosaics and time series analysis by blending large image collections and normalizing them over time, providing consistent, high-quality data for machine learning and other analytical tasks.
- **Open Source and Collaborative:** Free under the MIT License with a modular design that supports community contributions and easy development of new features and workflows. Accessible through a python library, command line interface, and QGIS plugin.
---
## Current Matching Algorithms
### Global to local matching
This technique is derived from 'An auto-adapting global-to-local color balancing method for optical imagery mosaic' by Yu et al., 2017 (DOI: 10.1016/j.isprsjprs.2017.08.002). It is particularly useful for very high-resolution imagery (satellite or otherwise) and works in a two phase process.
First, this method applies least squares regression to estimate scale and offset parameters that align the histograms of all images toward a shared spectral center. This is achieved by constructing a global model based on the overlapping areas of adjacent images, where the spectral relationships are defined. This global model ensures that each image conforms to a consistent radiometric baseline while preserving overall color fidelity.
However, global correction alone cannot capture intra-image variability so a second local adjustment phase is performed. The overlap areas are divided into smaller blocks, and each block’s mean is used to fine-tune the color correction. This block-wise tuning helps maintain local contrast and reduces visible seams, resulting in seamless and spectrally consistent mosaics with minimal distortion.

*Mean spectral profiles of five cloud masked Landsat images before and after applying Normalized Difference Vegetation Index masking, global regression, and local block adjustment with spectralmatch.*
#### Assumptions
- **Consistent Spectral Profile:** The true spectral response of overlapping areas remains the same throughout the images.
- **Least Squares Modeling:** A least squares approach can effectively model and fit all images' spectral profiles.
- **Scale and Offset Adjustment:** Applying scale and offset corrections can effectively harmonize images.
- **Minimized Color Differences:** The best color correction is achieved when color differences are minimized.
- **Geometric Alignment:** Images are assumed to be geometrically aligned with known relative positions via a geotransform. However, they only need to be roughly aligned as pixel co-registration is not required.
- **Global Consistency:** Overlapping color differences are consistent across the entire image.
- **Local Adjustments:** Block-level color differences result from the global application of adjustments.
---
## Installation [(Detailed methods)](https://spectralmatch.github.io/spectralmatch/installation/)
### Installation as a QGIS Plugin
Install the spectralmatch plugin in QGIS and find the tools in the Processing Toolbox. Install the spectralmatch Python library with the tools in the `setup` toolbox or manually.
### Installation as a Python Library and CLI
Ensure you have the following system-level prerequisites: `Python ≥ 3.10`, `pip`, `PROJ ≥ 9.3`, and `GDAL = 3.10.2`. Use this command to install the library:
| pip install spectralmatch
|
---
## Usage
Example scripts and sample data are provided to verify a successful installation and help you get started quickly in the repository at [`/docs/examples`](https://github.com/spectralmatch/spectralmatch/blob/main/docs/examples/) and downloadable [here](https://download-directory.github.io/?url=https://github.com/spectralmatch/spectralmatch/tree/main/docs/examples&filename=spectralmatch_examples).
This is an example mosaic workflow using folders for each step:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39 | working_directory = "/path/to/working/directory"
input_folder = os.path.join(working_directory, "Input")
global_folder = os.path.join(working_directory, "GlobalMatch")
local_folder = os.path.join(working_directory, "LocalMatch")
aligned_folder = os.path.join(working_directory, "Aligned")
clipped_folder = os.path.join(working_directory, "Clipped")
global_regression(
input_images=input_folder,
output_images=global_folder,
)
local_block_adjustment(
input_images=global_folder,
output_images=local_folder,
)
align_rasters(
input_images=local_folder,
output_images=aligned_folder,
tap=True,
)
voronoi_center_seamline(
input_images=aligned_folder,
output_mask=os.path.join(working_directory, "ImageMasks.gpkg"),
image_field_name="image",
)
mask_rasters(
input_images=aligned_folder,
output_images=clipped_folder,
vector_mask=("include", os.path.join(working_directory, "ImageMasks.gpkg"), "image"),
)
merge_rasters(
input_images=clipped_folder,
output_image_path=os.path.join(working_directory, "MergedImage.tif"),
)
|
---
## Documentation
Documentation is available at [spectralmatch.github.io/spectralmatch/](https://spectralmatch.github.io/spectralmatch/).
---
## Contributing Guide
Contributing Guide is available at [spectralmatch.github.io/spectralmatch/contributing](https://spectralmatch.github.io/spectralmatch/contributing/).
---
## License
This project is licensed under the MIT License. See [LICENSE](https://github.com/spectralmatch/spectralmatch/blob/main/LICENSE) for details.
### File: contributing.md
# Contributing Guide
Thank you for your interest in contributing. The sections below outline how the library is structured, how to submit changes, and the conventions to follow when developing new features or improving existing functionality.
For convenience, you can copy [this](/spectralmatch/llm_prompt/) auto updated LLM priming prompt with function headers and docs.
---
## Collaboration Instructions
We welcome all contributions the project! Please be respectful and work towards improving the library. To get started:
1. [Create an issue](https://github.com/spectralmatch/spectralmatch/issues/new) describing the feature or bug or just to ask a question. Provide relevant context, desired timeline, any assistance needed, who will be responsible for the work, anticipated results, and any other details.
2. [Fork the repository](https://github.com/spectralmatch/spectralmatch/fork) and create a new feature branch.
3. Make your changes and add any necessary tests.
4. Open a Pull Request against the main repository.
---
## Design Philosophy
- Keep code concise and simple
- Adapt code for large datasets with windows, multiprocessing, progressive computations, etc
- Keep code modular and have descriptive names
- Use PEP 8 code formatting
- Use functions that are already created when possible
- Combine similar params into one multi-value parameter
- Use similar naming convention and input parameter format as other functions.
- Create docstrings (Google style), tests, and update the docs for new functionality
---
## Extensible Function Types
In Relative Radiometric Normalization (RRN) methods often differ in how images are matched, pixels are selected, and seamlines are created. This library organizes those into distinct Python packages, while other operations like aligning rasters, applying masks, merging images, and calculating statistics are more consistent across techniques and are treated as standard utilities.
### Matching functions
Used to adjust the pixel values of images to ensure radiometric consistency across scenes. These functions compute differences between images and apply transformations so that brightness, contrast, or spectral characteristics align across datasets.
### Masking functions (PIF/RCS)
Used to define which parts of an image should be kept or discarded based on spatial criteria. These functions apply vector-based filters or logical rules to isolate regions of interest, remove clouds, or exclude invalid data from further processing.
### Seamline functions
Used to determine optimal boundaries between overlapping image regions. These functions generate cutlines that split image footprints in a way that minimizes visible seams and balances spatial coverage, often relying on geometric relationships between overlapping areas.
---
## Standard UI
Reusable types are organized into the types and validation module. Use these types directly as the types of params inside functions where applicable. Use the appropriate _resolve... function to resolve these inputs into usable variables.
### Input/Output
The input_name parameter defines how the input files are determined and accepts either a str or a list. If given as a str, it should contain either a folder glob pattern path and default_file_pattern must be set or a whole glob pattern file path. Functions should default to searching for all appropriately formated files in the input folder (for example "*.tif"). Alternatively, it can be a list of full file paths to individual input images. For example:
- input_images="/input/files/*.tif" (does not require default_file_pattern)
- input_images="/input/folder" (requires default_file_pattern to be set),
- input_images=["/input/one.tif", "/input/two.tif", ...] (does not require default_file_pattern)
The output_name parameter defines how output filenames are determined and accepts either a str or a list. If given as a str, it should contain either a folder template pattern path and default_file_pattern must be set or a whole template pattern file path. Functions should default to templating with basename, underscore, processing step (for example "$_Global"). Alternatively, it may be a list of full output paths, which must match the number of input images. For example:
- output_images="/output/files/$.tif" (does not require default_file_pattern)
- output_images="/output/folder" (requires default_file_pattern to be set),
- output_images=["/output/one.tif", "/output/two.tif", ...] (does not require default_file_pattern)
The _resolve_paths function handles creating folders for output files. Folders and files are distinguished by the presence of a "." in the basename.
1
2
3
4
5
6
7
8
9
10
11
12
13 | # Params
input_name # For example: input_images
input_name # For example: output_images
# Types
SearchFolderOrListFiles = str | List[str] # Required
CreateInFolderOrListFiles = str | List[str] # Required
# Resolve
input_image_paths = _resolve_paths("search", input_images, kwargs={"default_file_pattern":"*.tif"})
output_image_paths = _resolve_paths("create", output_images, kwargs={"paths_or_bases":input_image_paths, "default_file_pattern":"$_Global.tif"})
image_names = _resolve_paths("name", input_image_paths)
# This pattern can also be used with other input types like vectors
|
### Output dtype
The custom_output_dtype parameter specifies the data type for output rasters and defaults to the input image’s data type if not provided.
| # Param
custom_output_dtype
# Type
CustomOutputDtype = str | None # Default: None
# Resolve
output_dtype = _resolve_output_dtype(rasterio.DatasetReader, custom_output_dtype)
|
### Nodata Value
The custom_nodata_value parameter overrides the input nodata value from the first raster in the input rasters if set.
| # Param
custom_nodata_value
# Type
CustomNodataValue = float | int | None # Default: None
# Resolve
nodata_value = _resolve_nodata_value(rasterio.DatasetReader, custom_nodata_value)
|
### Debug Logs
The debug_logs parameter enables printing of debug information; it defaults to False. Functions should begin by printing "Start {process name}", while all other print statements should be conditional on debug_logs being True. When printing the image being processed, use the image name and not the image path.
| # Param
debug_logs
# Type
DebugLogs = bool # Default: False
# No resolve function necessary
|
### Vector Mask
The vector_mask parameter limits statistics calculations to specific areas and is given as a tuple with two or three items: a literal "include" or "exclude" to define how the mask is applied, a string path to the vector file, and an optional field name used to match geometries based on the input image name (substring match allowed). Defaults to None for no mask.
| # Param
vector_mask
# Type
VectorMask = Tuple[Literal["include", "exclude"], str, Optional[str]] | None
# No resolve function necessary
|
### Parallel Workers
The image_parallel_workers parameter defines the parallelization strategy at the image level. It accepts a tuple such as ("process", "cpu") to enable multiprocessing across all available CPU cores, or you can use "thread" as the backend if threading is preferred. Set it to None to disable image-level parallelism. The window_parallel_workers parameter controls parallelization within each image at the window level and follows the same format. Setting it to None disables window-level parallelism. Processing windows should be done one band at a time for scalability.
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52 | # Params
image_parallel_workers
window_parallel_workers
# Types
ImageParallelWorkers = Tuple[Literal["process", "thread"], Literal["cpu"] | int] | None
WindowParallelWorkers = Tuple[Literal["process"], Literal["cpu"] | int] | None
# Resolve
image_parallel, image_backend, image_max_workers = _resolve_parallel_config(image_parallel_workers)
window_parallel, window_backend, window_max_workers = _resolve_parallel_config(window_parallel_workers)
# Main process example
image_args = [(arg, other_args, ...) for arg in inputs]
if image_parallel:
with _get_executor(image_backend, image_max_workers) as executor:
futures = [executor.submit(_name_process_image, *arg) for arg in image_args]
for future in as_completed(futures):
result = future.result()
else:
for arg in image_args:
result = _name_process_image(*arg)
def _name_process_image(image_name, arg_1, arg_2, ...):
with rasterio.open(input_image_path) as src:
# Open output image as well if saving to image
windows = _resolve_windows(src, window_size)
window_args = [(window, other_args, ...) for window in windows]
with _get_executor(
window_backend,
window_max_workers,
initializer=WorkerContext.init,
initargs=({image_name: ("raster", input_image_path)},)
) as executor:
futures = [executor.submit(_name_process_window, *arg) for arg in window_args]
for future in as_completed(futures):
band, window, result = future.result()
# Save result to variable or dataset
else:
WorkerContext.init({image_name: ("raster", input_image_path)})
for arg in window_args:
band, window, buf = _name_process_window(*arg)
# Save result to variable or dataset
WorkerContext.close()
def _name_process_window(image_name, arg_1, arg_2, ...):
ds = WorkerContext.get(image_name)
# Process result to return
return band, window, data
|
### Windows
The window_size parameter sets the tile size for reading and writing, using an integer for square tiles, a tuple for custom dimensions, "internal" to use the raster’s native tiling (ideal for efficient streaming from COGs), or None to process the full image at once.
| # Param
window_size
# Types
WindowSize = int | Tuple[int, int] | Literal["internal"] | None
WindowSizeWithBlock = int | Tuple[int, int] | Literal["internal", "block"] | None
# Resolve
windows = _resolve_windows(rasterio.DatasetReader, window_size)
|
### COGs
The save_as_cog parameter, when set to True, saves the output as a Cloud-Optimized GeoTIFF with correct band and block ordering.
| # Param
SaveAsCog = bool # Default: True
# Type
SaveAsCog = bool # Default: True
# No resolve function necessary
|
---
## Validate Inputs
The validate methods are used to check that input parameters follow expected formats before processing begins. There are different validation methods for different scopes—some are general-purpose (e.g., Universal.validate) and others apply to specific contexts like matching (Match.validate_match). These functions raise clear errors when inputs are misconfigured, helping catch issues early and enforce consistent usage patterns across the library.
| # Validate params example
Universal.validate(
input_images=input_images,
output_images=output_images,
vector_mask=vector_mask,
)
Match.validate_match(
specify_model_images=specify_model_images,
)
|
---
## File Cleanup
Temporary generated files can be deleted once they are no longer needed via this command:
---
## Docs
### Serve docs locally
Runs a local dev server at http://localhost:8000.
### Build static site
Generates the static site into the site/ folder.
### Deploy to GitHub Pages
Deploys built site using mkdocs gh-deploy.
---
## Versioning
Uses git tag to create annotated version tags and push them. This also syncs to Pypi. New versions will be released when the maintainer determines sufficient new functionality has been added.
---
## Code Formatting
This project uses [black](https://black.readthedocs.io/en/stable/the_black_code_style/current_style.html) for code formatting and ruff for linting.
### Set Up Pre-commit Hooks (Recommended)
To maintain code consistency use this hook to check and correct code formatting automatically:
| pre-commit install
pre-commit run --all-files
|
### Manual Formatting
**Format code:** Automatically formats all Python files with black.
**Check formatting:** Checks that all code is formatted (non-zero exit code if not).
**Lint code:** Runs ruff to catch style and quality issues.
---
## Testing
[pytest](https://docs.pytest.org/) is used for testing. Tests will automatically be run when merging into main but they can also be run locally via:
To test a individual folder or file:
| make test-file path=path/to/folder_or_file
|
### File: installation.md
# Installation Methods
---
## Installation as a QGIS Plugin
### 1. Get QGIS
[Download](https://qgis.org/download/) and install QGIS.
> This plugin requires python>=3.10. QGIS ships with different versions of Python, to check, in the QGIS menu, go to QGIS>About gis. If your version is out of date you can use `conda install qgis` to create a containerized version of QGIS and then `qgis` to start the program.
### 2. Install spactalmatch QGIS plugin
- Go to Plugins → Manage and Install Plugins…
- Find spectralmatch in the list, install, and enable it
- Find the plugin in the Processing Toolbox
### 3. Install spectralmatch Python library
Use the tools in the `setup` toolbox to install the spectralmatch Python library or manually do it. You must install the Python library into the QGIS Python interpreter for the tool to work.
---
## Installation as a Python Library and CLI
### 1. System requirements
Before installing, ensure you have the following system-level prerequisites:
- Python ≥ 3.10
- PROJ ≥ 9.3
- GDAL = 3.10.2
- pip
An easy way to install these dependancies is to use [Miniconda](https://www.anaconda.com/docs/getting-started/miniconda/install#quickstart-install-instructions):
| conda create -n spectralmatch python=3.10 "gdal=3.10.2" "proj>=9.3" -c conda-forge
conda activate spectralmatch
|
### 2. Install spectralmatch
You can automatically install the library via [PyPI](https://pypi.org/). (this method installs only the core code as a library):
| pip install spectralmatch
|
---
## Installation from Source
### 1. Clone the Repository
| git clone https://github.com/spectralmatch/spectralmatch.git
cd spectralmatch
|
> Assuming you have Make installed, you can then run `make install-setup` to automatically complete the remaining setup steps.
### 2. System requirements
Before installing, ensure you have the following system-level prerequisites:
- Python ≥ 3.10
- PROJ ≥ 9.3
- GDAL = 3.10.2
An easy way to install these dependancies is to use [Miniconda](https://www.anaconda.com/docs/getting-started/miniconda/install#quickstart-install-instructions):
| conda create -n spectralmatch python=3.10 "gdal=3.10.2" "proj>=9.3" -c conda-forge
conda activate spectralmatch
|
### 3. Install Dependancies
The `pyproject.toml` defines **core** dependancies to run the library and optional **dev**, and **docs** dependancies.
| pip install . # normal dependencies
pip install -e ".[dev]" # developer dependencies
pip install -e ".[docs]" # documentation dependencies
|
### File: api/seamline.md
::: spectralmatch.seamline.voronoi_center_seamline
### File: api/utils.md
::: spectralmatch.utils
### File: api/match.md
::: spectralmatch.match.global_regression
::: spectralmatch.match.local_block_adjustment
### File: api/statistics.md
::: spectralmatch.statistics
### File: api/mask.md
::: spectralmatch.mask.mask
::: spectralmatch.mask.utils_mask
### File: api/handlers.md
::: spectralmatch.handlers