Source code for pyreduce.datasets

"""
Download example datasets for PyReduce.

Downloads tarballs from the PyReduce server and extracts them
to $REDUCE_DATA (or ~/REDUCE_DATA by default).
"""

import hashlib
import logging
import os
import tarfile
import urllib.request
from os.path import isfile, join

logger = logging.getLogger(__name__)

SERVER = "https://sme.astro.uu.se/pyreduce/"

# Map instrument name to (tarball_name, sha256_checksum)
DATASETS = {
    "UVES": (
        "UVES",
        "1b44274b10e05b62e1d2f37b13495298359f91acaa5e7ee88214740a238bf4ab",
    ),
    "HARPS": (
        "HARPS",
        "c913b7c911de16ed6fc6be1525e315c9b041837bea7a8c7b909eb73b76a80406",
    ),
    "LICK_APF": (
        "LICK_APF",
        "6695064122f69143d5ec548364c93d4ff0f9f5a59c7187eacf7439a697cb523a",
    ),
    "MCDONALD": (
        "MCDONALD",
        "43b060fdb20fa2b1e77d7a391b9a6d2ab6bc7207f80f825d5182324994b4074e",
    ),
    "JWST_MIRI": (
        "JWST_MIRI",
        "569ec6b0a3b5d46fcdbf73c33332eca57aaea29c5ca306f7d685325fb3e5f451",
    ),
    "JWST_NIRISS": (
        "NIRISS",
        "f5a1a2894970471c27e7cbd73aed6027f72cd0df0900ea000119cacb852d72d3",
    ),
    "KECK_NIRSPEC": (
        "NIRSPEC",
        "7005bf1dc6953093866ab8a359d66aa9be8c26fa43d351dcb9c4c0aab1f80b61",
    ),
    "XSHOOTER": (
        "XSHOOTER",
        "4a3a86a50163b4d2136703f953f5e91e0867f8971f726642aba9b96bdb5f551e",
    ),
}


[docs] def get_data_dir(): """Get the default data directory. Returns $REDUCE_DATA if set, otherwise ~/REDUCE_DATA """ return os.environ.get("REDUCE_DATA", os.path.expanduser("~/REDUCE_DATA"))
def _download_with_progress(url, dest): """Download a file with progress indicator.""" def reporthook(block_num, block_size, total_size): downloaded = block_num * block_size if total_size > 0: percent = min(100, downloaded * 100 // total_size) mb_downloaded = downloaded / (1024 * 1024) mb_total = total_size / (1024 * 1024) print(f"\r {mb_downloaded:.1f}/{mb_total:.1f} MB ({percent}%)", end="") else: mb_downloaded = downloaded / (1024 * 1024) print(f"\r {mb_downloaded:.1f} MB", end="") urllib.request.urlretrieve(url, dest, reporthook) print() # newline after progress def _verify_checksum(filepath, expected_sha256): """Verify SHA256 checksum of a file.""" if expected_sha256 is None: return True sha256 = hashlib.sha256() with open(filepath, "rb") as f: for chunk in iter(lambda: f.read(8192), b""): sha256.update(chunk) actual = sha256.hexdigest() if actual != expected_sha256: logger.error("Checksum mismatch for %s", filepath) logger.error(" Expected: %s", expected_sha256) logger.error(" Got: %s", actual) return False return True
[docs] def get_dataset(name, local_dir=None): """Download and extract a dataset. Parameters ---------- name : str Name of the dataset (e.g., "UVES", "HARPS") local_dir : str, optional Directory to save data (default: $REDUCE_DATA or ~/REDUCE_DATA) Returns ------- str Directory where the data was saved """ if name not in DATASETS: available = ", ".join(sorted(DATASETS.keys())) raise ValueError(f"Unknown dataset '{name}'. Available: {available}") tarball_name, checksum = DATASETS[name] if local_dir is None: local_dir = get_data_dir() fname = f"{tarball_name}.tar.gz" data_dir = join(local_dir, name) filepath = join(data_dir, fname) os.makedirs(data_dir, exist_ok=True) if not os.path.isfile(filepath): url = SERVER + fname logger.info("Downloading %s from %s", name, url) logger.info("Saving to %s", data_dir) _download_with_progress(url, filepath) if not _verify_checksum(filepath, checksum): os.remove(filepath) raise RuntimeError(f"Checksum verification failed for {name}") else: logger.info("Using existing dataset %s", name) # Extract with tarfile.open(filepath) as tar: raw_dir = join(data_dir, "raw") members = [m for m in tar if not isfile(join(raw_dir, m.name))] if members: logger.info("Extracting %d files", len(members)) tar.extractall(path=raw_dir, members=members) return data_dir
# Convenience functions for each instrument
[docs] def UVES(local_dir=None): """Download UVES example dataset (target: HD132205).""" return get_dataset("UVES", local_dir)
[docs] def HARPS(local_dir=None): """Download HARPS example dataset (target: HD109200).""" return get_dataset("HARPS", local_dir)
[docs] def LICK_APF(local_dir=None): """Download Lick APF example dataset (target: KIC05005618).""" return get_dataset("LICK_APF", local_dir)
[docs] def MCDONALD(local_dir=None): """Download McDonald Observatory example dataset.""" return get_dataset("MCDONALD", local_dir)
[docs] def JWST_MIRI(local_dir=None): """Download JWST/MIRI example dataset (simulated with MIRIsim).""" return get_dataset("JWST_MIRI", local_dir)
[docs] def JWST_NIRISS(local_dir=None): """Download JWST/NIRISS example dataset (simulated with awesimsoss).""" return get_dataset("JWST_NIRISS", local_dir)
[docs] def KECK_NIRSPEC(local_dir=None): """Download Keck/NIRSPEC example dataset (target: GJ1214).""" return get_dataset("KECK_NIRSPEC", local_dir)
[docs] def XSHOOTER(local_dir=None): """Download XSHOOTER example dataset (target: Ux-Ori).""" return get_dataset("XSHOOTER", local_dir)