Skip to content

Data Handlers

create_paths(template_pattern, paths_or_bases, *, default_file_pattern=None, debug_logs=False, replace_symbol='$', create_folders=True)

Create output paths using a filename template_pattern and a list of reference paths or names.

Parameters:

Name Type Description Default
template_pattern (str, required)

Defines output files from a glob path or folder to match input paths or names. Specify like: "/input/files/\(.tif" or "/input/folder" (while passing default_file_pattern like: '\).tif')

required
paths_or_bases List[str]

List of full paths or base names to derive the replace_symbol from.

required
default_file_pattern str

Used if template_pattern is a directory.

None
debug_logs bool

Whether to print the created paths.

False
replace_symbol str

Placeholder symbol in the template to replace with base names.

'$'
create_folders bool

Whether to create output folders if they don't exist.

True

Returns:

Type Description
List[str]

List[str]: List of constructed file paths.

Raises:

Type Description
ValueError

If template_pattern is a directory and default_file_pattern is not provided.

Source code in spectralmatch/handlers.py
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
def create_paths(
    template_pattern: str,
    paths_or_bases: List[str],
    *,
    default_file_pattern: str | None = None,
    debug_logs: bool = False,
    replace_symbol: str = "$",
    create_folders: bool = True,
) -> List[str]:
    """
    Create output paths using a filename template_pattern and a list of reference paths or names.

    Args:
        template_pattern (str, required): Defines output files from a glob path or folder to match input paths or names. Specify like: "/input/files/$.tif" or "/input/folder" (while passing default_file_pattern like: '$.tif')
        paths_or_bases (List[str]): List of full paths or base names to derive the replace_symbol from.
        default_file_pattern (str, optional): Used if `template_pattern` is a directory.
        debug_logs (bool): Whether to print the created paths.
        replace_symbol (str): Placeholder symbol in the template to replace with base names.
        create_folders (bool): Whether to create output folders if they don't exist.

    Returns:
        List[str]: List of constructed file paths.

    Raises:
        ValueError: If `template_pattern` is a directory and `default_file_pattern` is not provided.
    """
    if not os.path.basename(template_pattern).count("."):
        if not default_file_pattern:
            raise ValueError(
                "Template is a directory, but no default_file_pattern was provided."
            )
        template_pattern = os.path.join(template_pattern, default_file_pattern)

    output_paths = []
    for ref in paths_or_bases:
        base = (
            os.path.splitext(os.path.basename(ref))[0]
            if ("/" in ref or "\\" in ref)
            else os.path.splitext(ref)[0]
        )
        filename = template_pattern.replace(replace_symbol, base)
        output_paths.append(filename)

    if create_folders:
        for path in output_paths:
            os.makedirs(os.path.dirname(path), exist_ok=True)

    if debug_logs:
        print(f"Created {len(output_paths)} paths:")
        for p in output_paths:
            print(f"  {p}")

    return output_paths

match_paths(input_match_paths, reference_paths, match_regex, debug_logs=False)

Match reference_paths to input_match_paths using a regex applied to the basenames of input_match_paths. The extracted key must be a substring of the reference filename.

Parameters:

Name Type Description Default
input_match_paths List[str]

List of candidate paths to extract keys from.

required
reference_paths List[str]

List of reference paths to align to.

required
match_regex str

Regex applied to basenames of input_match_paths to extract a key to match via inclusion in reference_paths (e.g. "(.*)_LocalMatch.gpkg$" (without one of the backslashes)).

required
debug_logs bool

If True, print matched and unmatched file basenames.

False

Returns:

Type Description
List[Optional[str]]

List[Optional[str]]: A list the same length as reference_paths where each

List[Optional[str]]

element is the matched path from input_match_paths or None.

Raises:

Type Description
ValueError

If output list length does not match reference_paths length.

Source code in spectralmatch/handlers.py
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
def match_paths(
    input_match_paths: List[str],
    reference_paths: List[str],
    match_regex: str,
    debug_logs: bool = False,
) -> List[Optional[str]]:
    """
    Match `reference_paths` to `input_match_paths` using a regex applied to the basenames of `input_match_paths`. The extracted key must be a substring of the reference filename.

    Args:
        input_match_paths (List[str]): List of candidate paths to extract keys from.
        reference_paths (List[str]): List of reference paths to align to.
        match_regex (str): Regex applied to basenames of input_match_paths to extract a key to match via *inclusion* in reference_paths (e.g. "(.*)_LocalMatch\\.gpkg$" (without one of the backslashes)).
        debug_logs (bool): If True, print matched and unmatched file basenames.

    Returns:
        List[Optional[str]]: A list the same length as `reference_paths` where each
        element is the matched path from `input_match_paths` or None.

    Raises:
        ValueError: If output list length does not match reference_paths length.
    """
    pattern = re.compile(match_regex)
    match_keys = {}
    used_matches = set()

    # Extract keys from input_match_paths
    for mpath in input_match_paths:
        basename = os.path.basename(mpath)
        match = pattern.search(basename)
        if not match:
            continue
        key = match.group(1) if match.groups() else match.group(0)
        match_keys[key] = mpath

    # Match each reference path
    matched_list: List[Optional[str]] = []
    for rpath in reference_paths:
        rbase = os.path.basename(rpath)
        matched = None
        for key, mpath in match_keys.items():
            if key in rbase:
                matched = mpath
                used_matches.add(mpath)
                break
        matched_list.append(matched)

    # Validate output length
    if len(matched_list) != len(reference_paths):
        raise ValueError("Matched list length does not match reference_paths length.")

    return matched_list

search_paths(search_pattern, *, default_file_pattern=None, recursive=False, match_to_paths=None, debug_logs=False)

Search for files using a glob pattern, or a folder with a default file pattern.

Parameters:

Name Type Description Default
search_pattern (str, required)

Defines input files from a glob path or folder. Specify like: "/input/files/.tif" or "/input/folder" (while passing default_file_pattern like: '.tif')

required
default_file_pattern str

Used when pattern is a directory. If not set and pattern is a folder, raises an error.

None
recursive bool

Whether to search recursively.

False
match_to_paths Tuple[List[str], str]

Matches input files to a reference list using a regex.

None
debug_logs bool

Whether to print matched paths.

False

Returns:

Type Description
List[str]

List[str]: Sorted list of matched file paths.

Raises:

Type Description
ValueError

If search_pattern is a directory and default_file_pattern is not provided.

Source code in spectralmatch/handlers.py
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
def search_paths(
    search_pattern: str,
    *,
    default_file_pattern: str | None = None,
    recursive: bool = False,
    match_to_paths: Tuple[List[str], str] | None = None,
    debug_logs: bool = False,
) -> List[str]:
    """
    Search for files using a glob pattern, or a folder with a default file pattern.

    Args:
        search_pattern (str, required): Defines input files from a glob path or folder. Specify like: "/input/files/*.tif" or "/input/folder" (while passing default_file_pattern like: '*.tif')
        default_file_pattern (str, optional): Used when `pattern` is a directory. If not set and `pattern` is a folder, raises an error.
        recursive (bool, optional): Whether to search recursively.
        match_to_paths (Tuple[List[str], str], optional): Matches input files to a reference list using a regex.
        debug_logs (bool, optional): Whether to print matched paths.

    Returns:
        List[str]: Sorted list of matched file paths.

    Raises:
        ValueError: If `search_pattern` is a directory and `default_file_pattern` is not provided.
    """
    if not os.path.basename(search_pattern).count("."):
        if not default_file_pattern:
            raise ValueError(
                "Pattern is a directory, but no default_file_pattern was provided."
            )
        search_pattern = os.path.join(search_pattern, default_file_pattern)

    input_paths = sorted(glob.glob(search_pattern, recursive=recursive))

    if debug_logs:
        print(f"Found {len(input_paths)} file(s) matching: {search_pattern}")

    if match_to_paths:
        input_paths = match_paths(input_paths, *match_to_paths)

    return input_paths