Benchmark Multithreading
In [ ]:
Copied!
import os, shutil, tempfile, time
from pathlib import Path
import os, shutil, tempfile, time
from pathlib import Path
In [ ]:
Copied!
import matplotlib.pyplot as plt
import numpy as np
import rasterio
from rasterio.transform import from_origin
import matplotlib.pyplot as plt
import numpy as np
import rasterio
from rasterio.transform import from_origin
In [ ]:
Copied!
from spectralmatch import global_regression, local_block_adjustment
from spectralmatch import global_regression, local_block_adjustment
In [ ]:
Copied!
def make_fake_rasters(out_dir, n_images, width, height, nodata=0):
out_dir = Path(out_dir)
out_dir.mkdir(parents=True, exist_ok=True)
profile = dict(
driver="GTiff",
width=width,
height=height,
count=8,
dtype="uint16",
nodata=nodata,
crs="EPSG:3857",
transform=from_origin(0, 0, 1, 1),
tiled=True,
blockxsize=512,
blockysize=512,
compress="LZW",
)
rng = np.random.default_rng(seed=42)
paths = []
for i in range(n_images):
p = out_dir / f"fake_{i+1}_{width}px.tif"
with rasterio.open(p, "w", **profile) as dst:
for b in range(1, 9):
data = rng.integers(1, 1000, size=(height, width), dtype="uint16")
data[0, 0] = nodata
dst.write(data, indexes=b)
paths.append(str(p))
return paths
def make_fake_rasters(out_dir, n_images, width, height, nodata=0):
out_dir = Path(out_dir)
out_dir.mkdir(parents=True, exist_ok=True)
profile = dict(
driver="GTiff",
width=width,
height=height,
count=8,
dtype="uint16",
nodata=nodata,
crs="EPSG:3857",
transform=from_origin(0, 0, 1, 1),
tiled=True,
blockxsize=512,
blockysize=512,
compress="LZW",
)
rng = np.random.default_rng(seed=42)
paths = []
for i in range(n_images):
p = out_dir / f"fake_{i+1}_{width}px.tif"
with rasterio.open(p, "w", **profile) as dst:
for b in range(1, 9):
data = rng.integers(1, 1000, size=(height, width), dtype="uint16")
data[0, 0] = nodata
dst.write(data, indexes=b)
paths.append(str(p))
return paths
In [ ]:
Copied!
SIZES = [2_048, 4_096, 6_144, 8_192, 10_240, 12_288]
NUM_IMAGES = 2
TILE_SIZE = (1024, 1024)
MAX_WORKERS = 32
SIZES = [2_048, 4_096, 6_144, 8_192, 10_240, 12_288]
NUM_IMAGES = 2
TILE_SIZE = (1024, 1024)
MAX_WORKERS = 32
In [ ]:
Copied!
WORK_DIR = Path(__file__).parent / "bench_output"
WORK_DIR.mkdir(exist_ok=True)
WORK_DIR = Path(__file__).parent / "bench_output"
WORK_DIR.mkdir(exist_ok=True)
In [ ]:
Copied!
SERIAL, PARALLEL = [], []
SERIAL, PARALLEL = [], []
In [ ]:
Copied!
for sz in SIZES:
print(f"\n=== {sz} × {sz} px ({NUM_IMAGES} images) ===")
tmp = Path(tempfile.mkdtemp(prefix=f"fake_{sz}px_", dir=WORK_DIR))
imgs = make_fake_rasters(tmp, NUM_IMAGES, sz, sz)
t0 = time.time()
g_dir = tmp / "serial_g"
l_dir = tmp / "serial_l"
global_regression(
imgs,
g_dir,
custom_mean_factor=3,
custom_std_factor=1,
window_size=TILE_SIZE,
parallel=False,
debug_logs=False,
)
glob_imgs = sorted((g_dir / "Images").glob("*.tif"))
local_block_adjustment(
[str(p) for p in glob_imgs],
l_dir,
target_blocks_per_image=100,
window_size=TILE_SIZE,
custom_nodata_value=-9999,
parallel=False,
debug_logs=False,
)
SERIAL.append(time.time() - t0)
print(f"serial : {SERIAL[-1]:.1f} s")
t0 = time.time()
g_dir = tmp / "parallel_g"
l_dir = tmp / "parallel_l"
global_regression(
imgs,
g_dir,
custom_mean_factor=3,
custom_std_factor=1,
window_size=TILE_SIZE,
parallel=True,
max_workers=MAX_WORKERS,
debug_logs=False,
)
glob_imgs = sorted((g_dir / "Images").glob("*.tif"))
local_block_adjustment(
[str(p) for p in glob_imgs],
l_dir,
target_blocks_per_image=100,
window_size=TILE_SIZE,
custom_nodata_value=-9999,
parallel=True,
max_workers=MAX_WORKERS,
debug_logs=False,
)
PARALLEL.append(time.time() - t0)
print(f"parallel : {PARALLEL[-1]:.1f} s")
shutil.rmtree(tmp, ignore_errors=True)
for sz in SIZES:
print(f"\n=== {sz} × {sz} px ({NUM_IMAGES} images) ===")
tmp = Path(tempfile.mkdtemp(prefix=f"fake_{sz}px_", dir=WORK_DIR))
imgs = make_fake_rasters(tmp, NUM_IMAGES, sz, sz)
t0 = time.time()
g_dir = tmp / "serial_g"
l_dir = tmp / "serial_l"
global_regression(
imgs,
g_dir,
custom_mean_factor=3,
custom_std_factor=1,
window_size=TILE_SIZE,
parallel=False,
debug_logs=False,
)
glob_imgs = sorted((g_dir / "Images").glob("*.tif"))
local_block_adjustment(
[str(p) for p in glob_imgs],
l_dir,
target_blocks_per_image=100,
window_size=TILE_SIZE,
custom_nodata_value=-9999,
parallel=False,
debug_logs=False,
)
SERIAL.append(time.time() - t0)
print(f"serial : {SERIAL[-1]:.1f} s")
t0 = time.time()
g_dir = tmp / "parallel_g"
l_dir = tmp / "parallel_l"
global_regression(
imgs,
g_dir,
custom_mean_factor=3,
custom_std_factor=1,
window_size=TILE_SIZE,
parallel=True,
max_workers=MAX_WORKERS,
debug_logs=False,
)
glob_imgs = sorted((g_dir / "Images").glob("*.tif"))
local_block_adjustment(
[str(p) for p in glob_imgs],
l_dir,
target_blocks_per_image=100,
window_size=TILE_SIZE,
custom_nodata_value=-9999,
parallel=True,
max_workers=MAX_WORKERS,
debug_logs=False,
)
PARALLEL.append(time.time() - t0)
print(f"parallel : {PARALLEL[-1]:.1f} s")
shutil.rmtree(tmp, ignore_errors=True)
In [ ]:
Copied!
plt.figure(figsize=(8, 5))
plt.plot(SIZES, SERIAL, "-o", label="serial")
plt.plot(SIZES, PARALLEL, "-o", label=f"parallel ({MAX_WORKERS} workers)")
plt.xlabel("Raster width = height (pixels)")
plt.ylabel("Total runtime: global + local (seconds)")
plt.title("Pipeline runtime vs. image size (8-band, 2 images)")
plt.grid(True)
plt.legend()
plt.tight_layout()
plt.show()
plt.figure(figsize=(8, 5))
plt.plot(SIZES, SERIAL, "-o", label="serial")
plt.plot(SIZES, PARALLEL, "-o", label=f"parallel ({MAX_WORKERS} workers)")
plt.xlabel("Raster width = height (pixels)")
plt.ylabel("Total runtime: global + local (seconds)")
plt.title("Pipeline runtime vs. image size (8-band, 2 images)")
plt.grid(True)
plt.legend()
plt.tight_layout()
plt.show()