We often want a small, well-curated set of satellite chips for teaching and prototyping. This exercise shows how to search the Microsoft Planetary Computer STAC, prefer clear scenes, clip to an area-of-interest, and save compact band stacks.
Parameters for this exercise
Run this cell to define a small AOI centered on the UCSB campus and a modest time window. Keep N_SAMPLES small so this finishes quickly.
from pathlib import Pathimport math# Output directory for this demo runOUT_DIR = Path("../data/hls_santabarbara")# Center on UCSB campus and buffer ~7 km (adjust as you like)center_lat, center_lon =34.4138, -119.8489buffer_km =10# Time window to search (narrow = faster)DATE_START ="2018-01-01"DATE_END ="2024-12-31"# Choose HLSS30 (Sentinel-2) or HLSL30 (Landsat)HLS_COLLECTION ="HLSS30"# or "HLSL30"# Keep downloads small for a quick exerciseN_SAMPLES =2# Basic filteringMAX_CLOUD =20# % cloud cover thresholdPREFERRED_MONTHS =list(range(4, 11)) # bias to AprilβOct for fewer clouds# Bands to export (common for HLS at 30 m)BANDS = ["B02","B03","B04","B05","B06","B07","B08","B11","B12"]print("Params set:")print(" AOI center:", center_lat, center_lon)print(" Buffer (km):", buffer_km)print(" Dates:", DATE_START, "to", DATE_END)print(" Product:", HLS_COLLECTION)print(" N_SAMPLES:", N_SAMPLES)
This cell runs the same search/selection logic and prints counts and examples, without downloading files. On the Microsoft Planetary Computer, HLS v2 collections are hls2-s30 (Sentinel-2) and hls2-l30 (Landsat).
import datetime as dtfrom collections import Counterfrom pystac_client import Clientimport geopandas as gpdfrom shapely.geometry import Pointwgs84 ="EPSG:4326"gdf = gpd.GeoDataFrame(geometry=[Point(center_lon, center_lat)], crs=wgs84)gdf_m = gdf.to_crs("EPSG:3310")aoi = gdf_m.buffer(buffer_km *1000).to_crs(wgs84).iloc[0]catalog = Client.open("https://planetarycomputer.microsoft.com/api/stac/v1")coll_id ="hls2-s30"if HLS_COLLECTION.upper().startswith("HLSS30") else"hls2-l30"search = catalog.search( collections=[coll_id], datetime=f"{DATE_START}/{DATE_END}", intersects=aoi.__geo_interface__, query={"eo:cloud_cover": {"lt": MAX_CLOUD}, }, limit=500,)items =list(search.get_items())print("Search returned:", len(items), "items")ifnot items:print("No items found. Try loosening dates, clouds, or enlarging buffer.")else: months = [dt.datetime.fromisoformat(it.properties["datetime"].replace("Z","")).month for it in items] clouds = [it.properties.get("eo:cloud_cover", None) for it in items]print("Month distribution (first 10 shown):", list(Counter(months).items())[:10]) clouds_non_none = [c for c in clouds if c isnotNone]if clouds_non_none:print("Cloud cover: min=", min(clouds_non_none), "median~", sorted(clouds_non_none)[len(clouds_non_none)//2], "max=", max(clouds_non_none))def sort_key(it): d = dt.datetime.fromisoformat(it.properties["datetime"].replace("Z","")) month_bias =0if d.month in PREFERRED_MONTHS else1return (month_bias, it.properties.get("eo:cloud_cover", 100), d) items.sort(key=sort_key)print("Top 5 items after sort:")for it in items[:5]: d = dt.datetime.fromisoformat(it.properties["datetime"].replace("Z",""))print(" -", it.id, "date=", d.date(), "cloud=", it.properties.get("eo:cloud_cover", None))# Spreading selection (Β±10 days) selected = [] used_days =set()for it in items: d = dt.datetime.fromisoformat(it.properties["datetime"].replace("Z","")) yearday = (d.year, d.timetuple().tm_yday)ifany(abs(d.timetuple().tm_yday - ud) <=10and d.year == uy for (uy, ud) in used_days):continue selected.append(it) used_days.add(yearday)iflen(selected) >= N_SAMPLES:breakprint("Selected:", len(selected), "items (target=", N_SAMPLES, ")")for it in selected: d = dt.datetime.fromisoformat(it.properties["datetime"].replace("Z",""))print(" *", it.id, "->", d.date(), "cloud=", it.properties.get("eo:cloud_cover", None))
/Users/kellycaylor/mambaforge/envs/geoAI/lib/python3.11/site-packages/pystac_client/item_search.py:925: FutureWarning:
get_items() is deprecated, use items() instead
The block below defines a single function that performs the search, selection, clipping, and save steps. It is defined locally for this exercise so it works standalone in any environment.
from __future__ import annotationsimport datetime as dtfrom pathlib import Pathfrom typing import List, Optional, Sequenceimport geopandas as gpdfrom shapely.geometry import Pointimport rioxarray as rxr # noqa: F401 # rioxarray registers .rio accessor on xarrayfrom pystac_client import Clientimport planetary_computer as pcfrom stackstac import stackimport numpy as npdef download_hls_samples( out_dir: Path, center_lat: float, center_lon: float, buffer_km: float, date_start: str, date_end: str, stac_url: str="https://planetarycomputer.microsoft.com/api/stac/v1", hls_collection: str="HLSS30", n_samples: int=4, max_cloud: int=20, preferred_months: Optional[Sequence[int]] =tuple(range(4, 11)), bands: Sequence[str] = ("B02","B03","B04","B05","B06","B07","B08","B11","B12"), resolution: int=30, name_prefix: str="SantaBarbara", verbose: bool=False,) -> List[Path]:""" Download and crop up to `n_samples` HLS scenes near a point and save band stacks. - Searches the Microsoft Planetary Computer STAC for `hls_collection` within the given date window and AOI buffer. - Sorts by a light month bias (prefer clearer months), then cloud cover, then time. - Skips near-duplicates within Β±10 days to spread scenes across the window. - Stacks requested `bands` into a single GeoTIFF per scene. Returns a list of written file paths. """ out_dir = Path(out_dir) out_dir.mkdir(parents=True, exist_ok=True)# 1) Build AOI polygon from center and buffer wgs84 ="EPSG:4326" gdf = gpd.GeoDataFrame(geometry=[Point(center_lon, center_lat)], crs=wgs84) gdf_m = gdf.to_crs("EPSG:3310") # California Albers (metric) for buffering aoi = gdf_m.buffer(buffer_km *1000).to_crs(wgs84).iloc[0]# 2) STAC search (MPC HLS v2): use hls2-s30 (Sentinel-2) or hls2-l30 (Landsat) catalog = Client.open(stac_url) normalized = hls_collection.upper() coll_id ="hls2-s30"if normalized.startswith("HLSS30") else"hls2-l30"if verbose:print("[hls] using MPC collection:", coll_id) search = catalog.search( collections=[coll_id], intersects=aoi.__geo_interface__, datetime=f"{date_start}/{date_end}", query={"eo:cloud_cover": {"lt": float(max_cloud)}}, max_items=500, )# Some servers expose .items(), others .get_items()try: items =list(search.items())exceptException: items =list(search.get_items())if verbose:print("[hls] search returned:", len(items))ifnot items:return []# 3) Light preference for drier months, then lower clouds, then earlier datedef _sort_key(it): d = dt.datetime.fromisoformat(it.properties["datetime"].replace("Z", "")) month_bias =0if (preferred_months and d.month in preferred_months) else1return (month_bias, it.properties.get("eo:cloud_cover", 100), d) items.sort(key=_sort_key)if verbose:print("[hls] after sort, first 3 ids:")for it in items[:3]: d = dt.datetime.fromisoformat(it.properties["datetime"].replace("Z",""))print(" -", it.id, d.date(), it.properties.get("eo:cloud_cover", None))# 4) Choose spaced scenes (avoid near-duplicates within Β±10 days) selected = [] used_days: set[tuple[int, int]] =set()for it in items: d = dt.datetime.fromisoformat(it.properties["datetime"].replace("Z", "")) yearday = (d.year, d.timetuple().tm_yday)ifany(abs(d.timetuple().tm_yday - ud) <=10and d.year == uy for (uy, ud) in used_days):continue selected.append(it) used_days.add(yearday)iflen(selected) >= n_samples:breakif verbose:print("[hls] selected:", len(selected), "(target=", n_samples, ")")for it in selected: d = dt.datetime.fromisoformat(it.properties["datetime"].replace("Z",""))print(" *", it.id, d.date(), it.properties.get("eo:cloud_cover", None))ifnot selected:return []# 5) Sign assets and write clipped band stacks signed_items = [pc.sign(it) for it in selected] product_suffix = ("S30"if hls_collection.upper().endswith("S30") else ("L30"if hls_collection.upper().endswith("L30") else"HLS") ) written: List[Path] = []for it_signed, it_orig inzip(signed_items, selected): props = it_orig.properties or {} tile_id = props.get("hls:tile_id") or props.get("mgrs:tile")ifnot tile_id: item_id =getattr(it_orig, "id", None)ifisinstance(item_id, str): parts = item_id.split(".")iflen(parts) >=3and parts[2].startswith("T"): tile_id = parts[2]ifnot tile_id: tile_id ="TXXXXXX" dt_iso = props.get("datetime") or"" dt_obj = dt.datetime.fromisoformat(dt_iso.replace("Z", "")) yyyyddd =f"{dt_obj.year}{dt_obj.timetuple().tm_yday:03d}" hhmmss = dt_obj.strftime("%H%M%S") out_name =f"{name_prefix}_HLS.{product_suffix}.{tile_id}.{yyyyddd}T{hhmmss}.v2.0_cropped.tif" out_path = out_dir / out_nametry:if verbose:print("[hls] writing:", out_path) single = {"type": "FeatureCollection", "features": [it_signed]}# Choose a consistent EPSG for stackstac to avoid CRS inference failures epsg_val = props.get("proj:epsg")ifnot epsg_val: utm_zone =Noneif tile_id andlen(tile_id) >=3and tile_id[1:3].isdigit(): utm_zone =int(tile_id[1:3])ifnot utm_zone: lon_center = (aoi.bounds[0] + aoi.bounds[2]) /2.0 utm_zone =int(math.floor((lon_center +180) /6) +1) northern =Trueif tile_id andlen(tile_id) >=4: lat_band = tile_id[3] northern = lat_band >='N'else: lat_center = (aoi.bounds[1] + aoi.bounds[3]) /2.0 northern = lat_center >=0 epsg_val = (32600if northern else32700) +int(utm_zone)# Select bands required by the demo in reflectance units export_bands = ( ["B02", "B03", "B04", "B8A", "B11", "B12"]if hls_collection.upper().startswith("HLSS30")else ["B02", "B03", "B04", "B05", "B06", "B07"] ) da = stack( [it_signed], assets=export_bands, chunksize=1024, resolution=resolution, epsg=epsg_val, dtype="float64", rescale=True, fill_value=np.nan, ) da = da.compute()if"time"in da.dims:# Use the first (and only) time slice when stacking a single item da = da.isel(time=0, drop=True) da = da.transpose("band", "y", "x") da = da.assign_coords(band=("band", export_bands))# Ensure CRS is set to target EPSG and clip using AOI polygon da = da.rio.write_crs(f"EPSG:{epsg_val}", inplace=True) da_clipped = da.rio.clip([aoi.__geo_interface__], crs="EPSG:4326", drop=False) da_clipped.rio.to_raster( out_path, dtype="float64", compress="deflate", predictor=3, tiled=True, BIGTIFF="IF_SAFER", ) written.append(out_path)if verbose:print("[hls] wrote:", out_path)exceptExceptionas e:print("[hls] ERROR writing", out_path, "->", repr(e))return written
Run the download (small sample)
This will contact the Planetary Computer and write a couple of small Cloud-Optimized GeoTIFFs. If you are on a slow connection, reduce N_SAMPLES=1.
import ostry: saved = download_hls_samples( out_dir=OUT_DIR, center_lat=center_lat, center_lon=center_lon, buffer_km=buffer_km, date_start=DATE_START, date_end=DATE_END, stac_url="https://planetarycomputer.microsoft.com/api/stac/v1", hls_collection=HLS_COLLECTION, n_samples=N_SAMPLES, max_cloud=MAX_CLOUD, preferred_months=PREFERRED_MONTHS, bands=BANDS, name_prefix="SantaBarbara", verbose=True, )print(f"Saved {len(saved)} files:")for p in saved:print(" -", p)exceptExceptionas e:print("Run failed:", repr(e))# List what is in OUT_DIR after runprint("\nListing OUT_DIR:", OUT_DIR, "(abs=", OUT_DIR.resolve(), ")")if OUT_DIR.exists():for p insorted(OUT_DIR.glob("*.tif")):try: size_kb = p.stat().st_size //1024exceptException: size_kb ="?"print(" -", p.name, "[", size_kb, "KB ]")else:print("OUT_DIR does not exist.")
/Users/kellycaylor/mambaforge/envs/geoAI/lib/python3.11/site-packages/pystac_client/item_search.py:925: FutureWarning:
get_items() is deprecated, use items() instead
A) intersects only: 584
sample: HLS.S30.T10SGD.2024366T184709.v2.0 2024-12-31 cloud= 36.0
Variant B β intersects + cloud filter
Precision: same spatial precision as A
Efficiency: reduces result volume; faster subsequent processing
Tradeoff: stricter cloud threshold can exclude valid scenes; consider seasonality
You should see .tif files in OUT_DIR (default ../data/hls_santabarbara) with names like SantaBarbara_HLS.S30.<TILE>.<YYYYDDD>T<HHMMSS>.v2.0_cropped.tif.
The band order is (band, y, x) and band names are attached to the band coordinate.
Try changing HLS_COLLECTION to HLSL30 (Landsat flavor) or adjust buffer_km and re-run to see the impact on scene availability.
Source Code
---title: "Download HLS (HLSS30/HLSL30) samples near UCSB"format: htmljupyter: geoai---## Why this mattersWe often want a small, well-curated set of satellite chips for teaching and prototyping. This exercise shows how to search the Microsoft Planetary Computer STAC, prefer clear scenes, clip to an area-of-interest, and save compact band stacks. ---## Parameters for this exerciseRun this cell to define a small AOI centered on the UCSB campus and a modest time window. Keep `N_SAMPLES` small so this finishes quickly.```{python}from pathlib import Pathimport math# Output directory for this demo runOUT_DIR = Path("../data/hls_santabarbara")# Center on UCSB campus and buffer ~7 km (adjust as you like)center_lat, center_lon =34.4138, -119.8489buffer_km =10# Time window to search (narrow = faster)DATE_START ="2018-01-01"DATE_END ="2024-12-31"# Choose HLSS30 (Sentinel-2) or HLSL30 (Landsat)HLS_COLLECTION ="HLSS30"# or "HLSL30"# Keep downloads small for a quick exerciseN_SAMPLES =2# Basic filteringMAX_CLOUD =20# % cloud cover thresholdPREFERRED_MONTHS =list(range(4, 11)) # bias to AprilβOct for fewer clouds# Bands to export (common for HLS at 30 m)BANDS = ["B02","B03","B04","B05","B06","B07","B08","B11","B12"]print("Params set:")print(" AOI center:", center_lat, center_lon)print(" Buffer (km):", buffer_km)print(" Dates:", DATE_START, "to", DATE_END)print(" Product:", HLS_COLLECTION)print(" N_SAMPLES:", N_SAMPLES)```---## Testing environment and pathsBefore we attempt to download any data, let's make sure our paths, environment, and write permissions are correct.```{python}import os, sysfrom pathlib import Pathprint("CWD:", os.getcwd())print("Python version:", sys.version.split()[0])print("OUT_DIR (raw):", OUT_DIR)try:print("OUT_DIR (abs):", OUT_DIR.resolve())exceptExceptionas e:print("OUT_DIR.resolve() failed:", e)print("OUT_DIR exists before mkdir:", OUT_DIR.exists())try: OUT_DIR.mkdir(parents=True, exist_ok=True) test_file = OUT_DIR /".write_test" test_file.write_text("ok", encoding="utf-8")print("Write test:", "success", "->", test_file) test_file.unlink(missing_ok=True)exceptExceptionas e:print("Write test: FAILED ->", repr(e))```---## STAC search previewThis cell runs the same search/selection logic and prints counts and examples, without downloading files. On the Microsoft Planetary Computer, HLS v2 collections are `hls2-s30` (Sentinel-2) and `hls2-l30` (Landsat).```{python}import datetime as dtfrom collections import Counterfrom pystac_client import Clientimport geopandas as gpdfrom shapely.geometry import Pointwgs84 ="EPSG:4326"gdf = gpd.GeoDataFrame(geometry=[Point(center_lon, center_lat)], crs=wgs84)gdf_m = gdf.to_crs("EPSG:3310")aoi = gdf_m.buffer(buffer_km *1000).to_crs(wgs84).iloc[0]catalog = Client.open("https://planetarycomputer.microsoft.com/api/stac/v1")coll_id ="hls2-s30"if HLS_COLLECTION.upper().startswith("HLSS30") else"hls2-l30"search = catalog.search( collections=[coll_id], datetime=f"{DATE_START}/{DATE_END}", intersects=aoi.__geo_interface__, query={"eo:cloud_cover": {"lt": MAX_CLOUD}, }, limit=500,)items =list(search.get_items())print("Search returned:", len(items), "items")ifnot items:print("No items found. Try loosening dates, clouds, or enlarging buffer.")else: months = [dt.datetime.fromisoformat(it.properties["datetime"].replace("Z","")).month for it in items] clouds = [it.properties.get("eo:cloud_cover", None) for it in items]print("Month distribution (first 10 shown):", list(Counter(months).items())[:10]) clouds_non_none = [c for c in clouds if c isnotNone]if clouds_non_none:print("Cloud cover: min=", min(clouds_non_none), "median~", sorted(clouds_non_none)[len(clouds_non_none)//2], "max=", max(clouds_non_none))def sort_key(it): d = dt.datetime.fromisoformat(it.properties["datetime"].replace("Z","")) month_bias =0if d.month in PREFERRED_MONTHS else1return (month_bias, it.properties.get("eo:cloud_cover", 100), d) items.sort(key=sort_key)print("Top 5 items after sort:")for it in items[:5]: d = dt.datetime.fromisoformat(it.properties["datetime"].replace("Z",""))print(" -", it.id, "date=", d.date(), "cloud=", it.properties.get("eo:cloud_cover", None))# Spreading selection (Β±10 days) selected = [] used_days =set()for it in items: d = dt.datetime.fromisoformat(it.properties["datetime"].replace("Z","")) yearday = (d.year, d.timetuple().tm_yday)ifany(abs(d.timetuple().tm_yday - ud) <=10and d.year == uy for (uy, ud) in used_days):continue selected.append(it) used_days.add(yearday)iflen(selected) >= N_SAMPLES:breakprint("Selected:", len(selected), "items (target=", N_SAMPLES, ")")for it in selected: d = dt.datetime.fromisoformat(it.properties["datetime"].replace("Z",""))print(" *", it.id, "->", d.date(), "cloud=", it.properties.get("eo:cloud_cover", None))```---## Reusable utility (defined in-notebook)The block below defines a single function that performs the search, selection, clipping, and save steps. It is defined locally for this exercise so it works standalone in any environment.```{python}from __future__ import annotationsimport datetime as dtfrom pathlib import Pathfrom typing import List, Optional, Sequenceimport geopandas as gpdfrom shapely.geometry import Pointimport rioxarray as rxr # noqa: F401 # rioxarray registers .rio accessor on xarrayfrom pystac_client import Clientimport planetary_computer as pcfrom stackstac import stackimport numpy as npdef download_hls_samples( out_dir: Path, center_lat: float, center_lon: float, buffer_km: float, date_start: str, date_end: str, stac_url: str="https://planetarycomputer.microsoft.com/api/stac/v1", hls_collection: str="HLSS30", n_samples: int=4, max_cloud: int=20, preferred_months: Optional[Sequence[int]] =tuple(range(4, 11)), bands: Sequence[str] = ("B02","B03","B04","B05","B06","B07","B08","B11","B12"), resolution: int=30, name_prefix: str="SantaBarbara", verbose: bool=False,) -> List[Path]:""" Download and crop up to `n_samples` HLS scenes near a point and save band stacks. - Searches the Microsoft Planetary Computer STAC for `hls_collection` within the given date window and AOI buffer. - Sorts by a light month bias (prefer clearer months), then cloud cover, then time. - Skips near-duplicates within Β±10 days to spread scenes across the window. - Stacks requested `bands` into a single GeoTIFF per scene. Returns a list of written file paths. """ out_dir = Path(out_dir) out_dir.mkdir(parents=True, exist_ok=True)# 1) Build AOI polygon from center and buffer wgs84 ="EPSG:4326" gdf = gpd.GeoDataFrame(geometry=[Point(center_lon, center_lat)], crs=wgs84) gdf_m = gdf.to_crs("EPSG:3310") # California Albers (metric) for buffering aoi = gdf_m.buffer(buffer_km *1000).to_crs(wgs84).iloc[0]# 2) STAC search (MPC HLS v2): use hls2-s30 (Sentinel-2) or hls2-l30 (Landsat) catalog = Client.open(stac_url) normalized = hls_collection.upper() coll_id ="hls2-s30"if normalized.startswith("HLSS30") else"hls2-l30"if verbose:print("[hls] using MPC collection:", coll_id) search = catalog.search( collections=[coll_id], intersects=aoi.__geo_interface__, datetime=f"{date_start}/{date_end}", query={"eo:cloud_cover": {"lt": float(max_cloud)}}, max_items=500, )# Some servers expose .items(), others .get_items()try: items =list(search.items())exceptException: items =list(search.get_items())if verbose:print("[hls] search returned:", len(items))ifnot items:return []# 3) Light preference for drier months, then lower clouds, then earlier datedef _sort_key(it): d = dt.datetime.fromisoformat(it.properties["datetime"].replace("Z", "")) month_bias =0if (preferred_months and d.month in preferred_months) else1return (month_bias, it.properties.get("eo:cloud_cover", 100), d) items.sort(key=_sort_key)if verbose:print("[hls] after sort, first 3 ids:")for it in items[:3]: d = dt.datetime.fromisoformat(it.properties["datetime"].replace("Z",""))print(" -", it.id, d.date(), it.properties.get("eo:cloud_cover", None))# 4) Choose spaced scenes (avoid near-duplicates within Β±10 days) selected = [] used_days: set[tuple[int, int]] =set()for it in items: d = dt.datetime.fromisoformat(it.properties["datetime"].replace("Z", "")) yearday = (d.year, d.timetuple().tm_yday)ifany(abs(d.timetuple().tm_yday - ud) <=10and d.year == uy for (uy, ud) in used_days):continue selected.append(it) used_days.add(yearday)iflen(selected) >= n_samples:breakif verbose:print("[hls] selected:", len(selected), "(target=", n_samples, ")")for it in selected: d = dt.datetime.fromisoformat(it.properties["datetime"].replace("Z",""))print(" *", it.id, d.date(), it.properties.get("eo:cloud_cover", None))ifnot selected:return []# 5) Sign assets and write clipped band stacks signed_items = [pc.sign(it) for it in selected] product_suffix = ("S30"if hls_collection.upper().endswith("S30") else ("L30"if hls_collection.upper().endswith("L30") else"HLS") ) written: List[Path] = []for it_signed, it_orig inzip(signed_items, selected): props = it_orig.properties or {} tile_id = props.get("hls:tile_id") or props.get("mgrs:tile")ifnot tile_id: item_id =getattr(it_orig, "id", None)ifisinstance(item_id, str): parts = item_id.split(".")iflen(parts) >=3and parts[2].startswith("T"): tile_id = parts[2]ifnot tile_id: tile_id ="TXXXXXX" dt_iso = props.get("datetime") or"" dt_obj = dt.datetime.fromisoformat(dt_iso.replace("Z", "")) yyyyddd =f"{dt_obj.year}{dt_obj.timetuple().tm_yday:03d}" hhmmss = dt_obj.strftime("%H%M%S") out_name =f"{name_prefix}_HLS.{product_suffix}.{tile_id}.{yyyyddd}T{hhmmss}.v2.0_cropped.tif" out_path = out_dir / out_nametry:if verbose:print("[hls] writing:", out_path) single = {"type": "FeatureCollection", "features": [it_signed]}# Choose a consistent EPSG for stackstac to avoid CRS inference failures epsg_val = props.get("proj:epsg")ifnot epsg_val: utm_zone =Noneif tile_id andlen(tile_id) >=3and tile_id[1:3].isdigit(): utm_zone =int(tile_id[1:3])ifnot utm_zone: lon_center = (aoi.bounds[0] + aoi.bounds[2]) /2.0 utm_zone =int(math.floor((lon_center +180) /6) +1) northern =Trueif tile_id andlen(tile_id) >=4: lat_band = tile_id[3] northern = lat_band >='N'else: lat_center = (aoi.bounds[1] + aoi.bounds[3]) /2.0 northern = lat_center >=0 epsg_val = (32600if northern else32700) +int(utm_zone)# Select bands required by the demo in reflectance units export_bands = ( ["B02", "B03", "B04", "B8A", "B11", "B12"]if hls_collection.upper().startswith("HLSS30")else ["B02", "B03", "B04", "B05", "B06", "B07"] ) da = stack( [it_signed], assets=export_bands, chunksize=1024, resolution=resolution, epsg=epsg_val, dtype="float64", rescale=True, fill_value=np.nan, ) da = da.compute()if"time"in da.dims:# Use the first (and only) time slice when stacking a single item da = da.isel(time=0, drop=True) da = da.transpose("band", "y", "x") da = da.assign_coords(band=("band", export_bands))# Ensure CRS is set to target EPSG and clip using AOI polygon da = da.rio.write_crs(f"EPSG:{epsg_val}", inplace=True) da_clipped = da.rio.clip([aoi.__geo_interface__], crs="EPSG:4326", drop=False) da_clipped.rio.to_raster( out_path, dtype="float64", compress="deflate", predictor=3, tiled=True, BIGTIFF="IF_SAFER", ) written.append(out_path)if verbose:print("[hls] wrote:", out_path)exceptExceptionas e:print("[hls] ERROR writing", out_path, "->", repr(e))return written```---## Run the download (small sample)This will contact the Planetary Computer and write a couple of small Cloud-Optimized GeoTIFFs. If you are on a slow connection, reduce `N_SAMPLES=1`.```{python}import ostry: saved = download_hls_samples( out_dir=OUT_DIR, center_lat=center_lat, center_lon=center_lon, buffer_km=buffer_km, date_start=DATE_START, date_end=DATE_END, stac_url="https://planetarycomputer.microsoft.com/api/stac/v1", hls_collection=HLS_COLLECTION, n_samples=N_SAMPLES, max_cloud=MAX_CLOUD, preferred_months=PREFERRED_MONTHS, bands=BANDS, name_prefix="SantaBarbara", verbose=True, )print(f"Saved {len(saved)} files:")for p in saved:print(" -", p)exceptExceptionas e:print("Run failed:", repr(e))# List what is in OUT_DIR after runprint("\nListing OUT_DIR:", OUT_DIR, "(abs=", OUT_DIR.resolve(), ")")if OUT_DIR.exists():for p insorted(OUT_DIR.glob("*.tif")):try: size_kb = p.stat().st_size //1024exceptException: size_kb ="?"print(" -", p.name, "[", size_kb, "KB ]")else:print("OUT_DIR does not exist.")```---## Catalog introspection and search variantsThis section explores tradeoffs for spatial and property filters when searching MPCβs HLS v2 collections (`hls2-s30` or `hls2-l30`).### Setup```{python}from pystac_client import Clientimport datetime as dtimport geopandas as gpdfrom shapely.geometry import Pointcatalog = Client.open("https://planetarycomputer.microsoft.com/api/stac/v1")# Build AOI polygon and bboxwgs84 ="EPSG:4326"gdf = gpd.GeoDataFrame(geometry=[Point(center_lon, center_lat)], crs=wgs84)gdf_m = gdf.to_crs("EPSG:3310")aoi = gdf_m.buffer(buffer_km *1000).to_crs(wgs84).iloc[0]bbox = aoi.boundscoll_id ="hls2-s30"if HLS_COLLECTION.upper().startswith("HLSS30") else"hls2-l30"print("Using collection:", coll_id)```#### Variant A β intersects only (precise geometry)- Precision: clips to the actual AOI polygon; avoids extra items outside the shape- Performance: may be slower than bbox for very complex polygons- Use when: AOI is irregular and you want tight spatial relevance```{python}search = catalog.search( collections=[coll_id], intersects=aoi.__geo_interface__, datetime=f"{DATE_START}/{DATE_END}", limit=100,)items =list(search.get_items())print("A) intersects only:", len(items))if items: it = items[0] d = dt.datetime.fromisoformat(it.properties["datetime"].replace("Z",""))print(" sample:", it.id, d.date(), "cloud=", it.properties.get("eo:cloud_cover"))```#### Variant B β intersects + cloud filter- Precision: same spatial precision as A- Efficiency: reduces result volume; faster subsequent processing- Tradeoff: stricter cloud threshold can exclude valid scenes; consider seasonality```{python}search = catalog.search( collections=[coll_id], intersects=aoi.__geo_interface__, datetime=f"{DATE_START}/{DATE_END}", query={"eo:cloud_cover": {"lt": MAX_CLOUD}}, limit=100,)items =list(search.get_items())print("B) intersects + cloud:", len(items))if items: it = items[0] d = dt.datetime.fromisoformat(it.properties["datetime"].replace("Z",""))print(" sample:", it.id, d.date(), "cloud=", it.properties.get("eo:cloud_cover"))```#### Variant C β bbox only (fast bounding box)- Performance: typically faster than polygon intersects- Coverage: may include items just outside the AOI polygon (since bbox is larger)- Use when: speed matters or AOI is roughly rectangular/simple```{python}search = catalog.search( collections=[coll_id], bbox=bbox, datetime=f"{DATE_START}/{DATE_END}", limit=100,)items =list(search.get_items())print("C) bbox only:", len(items))if items: it = items[0] d = dt.datetime.fromisoformat(it.properties["datetime"].replace("Z",""))print(" sample:", it.id, d.date(), "cloud=", it.properties.get("eo:cloud_cover"))```#### Variant D β bbox + cloud filter- Balanced: faster spatial test plus reduced results via cloud filter- Tradeoff: may still include items only partially overlapping the AOI polygon- Tip: you can refine later by client-side clipping```{python}search = catalog.search( collections=[coll_id], bbox=bbox, datetime=f"{DATE_START}/{DATE_END}", query={"eo:cloud_cover": {"lt": MAX_CLOUD}}, limit=100,)items =list(search.get_items())print("D) bbox + cloud:", len(items))if items: it = items[0] d = dt.datetime.fromisoformat(it.properties["datetime"].replace("Z",""))print(" sample:", it.id, d.date(), "cloud=", it.properties.get("eo:cloud_cover"))```---## What to notice- You should see `.tif` files in `OUT_DIR` (default `../data/hls_santabarbara`) with names like `SantaBarbara_HLS.S30.<TILE>.<YYYYDDD>T<HHMMSS>.v2.0_cropped.tif`.- The band order is `(band, y, x)` and band names are attached to the `band` coordinate.- Try changing `HLS_COLLECTION` to `HLSL30` (Landsat flavor) or adjust `buffer_km` and re-run to see the impact on scene availability.