What's the proper way to fetch DP1 difference images starting from visit images references?

I have a set of visit images fetched by the Butler via:

def fetch_visits(ra: float, dec: float, ti: Time, tf: Time) -> list[DatasetRef]:
    butler = Butler("dp1", collections="LSSTComCam/DP1")
    dataset_refs = butler.query_datasets(
        "visit_image",
        where=(
            "visit_detector_region.region OVERLAPS POINT(:ra, :dec) AND " 
            "visit.timespan OVERLAPS :timespan"
        ),
        bind={"ra": ra, "dec": dec, "timespan": Timespan(ti, tf)},
        order_by=["visit.timespan.begin",],
    )
    return dataset_refs

I want to fetch all difference images associated to these visits.
At moment, I’m using this function:

def fetch_differences(visits_refs: list[DatasetRef]) -> list[DatasetRef]:
    butler = Butler("dp1", collections="LSSTComCam/DP1")
    visit_ids = [ref.dataId["visit"] for ref in visits_refs]
    visit_ids_str = "(" + ", ".join(map(str, visit_ids)) + ")"
    
    # Query for all difference images matching the visit IDs.
    # Note: This may return refs from detectors not present in visits_refs,
    # so len(dataset_refs) >= len(visits_refs)
    dataset_refs = butler.query_datasets(
        "difference_image",
        where=f"visit.id IN {visit_ids_str}"
    )
    # Map each (visit, detector) pair to its position in the original list
    visit_detector_to_index = {
        (ref.dataId["visit"], ref.dataId["detector"]): i 
        for i, ref in enumerate(visits_refs)
    }
    # Filter to keep only difference images with matching (visit, detector) pairs
    matching_refs = [
        ref for ref in dataset_refs 
        if (ref.dataId["visit"], ref.dataId["detector"]) in visit_detector_to_index
    ]
    # Sort by original order since the query doesn't preserve it
    return sorted(
        matching_refs,
        key=lambda ref: visit_detector_to_index[(ref.dataId["visit"], ref.dataId["detector"])]
    )

Which I use via:

visits_refs = fetch_visits(ra, dec, ti, tf)
diff_refs = fetch_differences(visits_refs)
assert len(visits_refs) == len(diff_refs)

It seems to work but feels like a hack. What’s the proper way to get references to the difference images associated to a set of visit image dataset reference?

As always, thank you.

There’s an “advanced” query context manager interface that can do uploads of DataCoordinate (ref.dataId) objects, and you can also use those data IDs as keys in dictionaries:

def fetch_differences(visits_refs: list[DatasetRef]) -> list[DatasetRef | None]:
    butler = Butler("dp1", collections="LSSTComCam/DP1")
    visit_image_data_ids = [ref.dataId for ref in visits_refs]
    with butler.query() as query:
        query = query.join_data_coordinates(visit_image_data_ids)
        difference_refs_by_data_id = {
            ref.dataId: ref for ref in query.datasets("difference_image")
        }
    return [
        difference_refs_by_data_id.get(data_id, None)  # in case it's missing
        for data_id in visit_image_data_ids
    ]

Caveat: I wrote that from memory with no testing; please confirm that it actually works, or let me know if it doesn’t.

1 Like

For future reference you do not have to do this because you can do visit IN (:visits) in your where clause with :visits being bound to a tuple.

1 Like

Thank you @jbosch and @timj !