Problem retrieving data from registry using Butler

Hello,
I am encountering a ValueError while attempting to retrieve the ccdVisitTable and calexp dataset using the LSST butler. Here is the traceback forccdVisitTable:

---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
File /opt/lsst/software/stack/conda/envs/lsst-scipipe-9.0.0/share/eups/Linux64/daf_butler/g588e17ab67+fac83bd63d/python/lsst/daf/butler/datastores/file_datastore/get.py:220, in _read_artifact_into_memory(getInfo, ref, cache_manager, isComponent)
    219 try:
--> 220     result = formatter.read(
    221         component=getInfo.component if isComponent else None,
    222         expected_size=recorded_size,
    223         cache_manager=cache_manager,
    224     )
    225 except (FileNotFoundError, FileIntegrityError):
    226     # This is expected for the case where the resource is missing
    227     # or the information we passed to the formatter about the file size
    228     # is incorrect.
    229     # Allow them to propagate up.

File /opt/lsst/software/stack/conda/envs/lsst-scipipe-9.0.0/share/eups/Linux64/daf_butler/g588e17ab67+fac83bd63d/python/lsst/daf/butler/_formatter.py:513, in FormatterV2.read(self, component, expected_size, cache_manager)
    512 if self.can_read_from_local_file or self.can_read_from_uri:
--> 513     result = self.read_from_possibly_cached_local_file(
    514         component, expected_size, cache_manager=cache_manager
    515     )
    516     if result is not NotImplemented:

File /opt/lsst/software/stack/conda/envs/lsst-scipipe-9.0.0/share/eups/Linux64/daf_butler/g588e17ab67+fac83bd63d/python/lsst/daf/butler/_formatter.py:759, in FormatterV2.read_from_possibly_cached_local_file(self, component, expected_size, cache_manager)
    758         if can_be_cached:
--> 759             cache_manager.move_to_cache(local_uri, cache_ref)
    761 return result

File /opt/lsst/software/stack/conda/envs/lsst-scipipe-9.0.0/share/eups/Linux64/daf_butler/g588e17ab67+fac83bd63d/python/lsst/daf/butler/datastore/cache_manager.py:742, in DatastoreCacheManager.move_to_cache(self, uri, ref)
    740 # Run cache expiry to ensure that we have room for this
    741 # item.
--> 742 self._expire_cache()
    744 # The above reset the in-memory cache status. It's entirely possible
    745 # that another process has just cached this file (if multiple
    746 # processes are caching on read), so check our in-memory cache
    747 # before attempting to cache the dataset.

File /opt/lsst/software/stack/conda/envs/lsst-scipipe-9.0.0/share/eups/Linux64/daf_butler/g588e17ab67+fac83bd63d/python/lsst/daf/butler/datastore/cache_manager.py:1016, in DatastoreCacheManager._expire_cache(self)
   1013 # Sync up cache. There is no file locking involved so for a shared
   1014 # cache multiple processes may be racing to delete files. Deleting
   1015 # a file that no longer exists is not an error.
-> 1016 self.scan_cache()
   1018 if self._expiration_mode == "files":

File /opt/lsst/software/stack/conda/envs/lsst-scipipe-9.0.0/share/eups/Linux64/daf_butler/g588e17ab67+fac83bd63d/python/lsst/daf/butler/datastore/cache_manager.py:897, in DatastoreCacheManager.scan_cache(self)
    895     continue
--> 897 path_in_cache = self._register_cache_entry(file, can_exist=True)
    898 if path_in_cache:

File /opt/lsst/software/stack/conda/envs/lsst-scipipe-9.0.0/share/eups/Linux64/daf_butler/g588e17ab67+fac83bd63d/python/lsst/daf/butler/datastore/cache_manager.py:877, in DatastoreCacheManager._register_cache_entry(self, cached_location, can_exist)
    876 try:
--> 877     details = CacheEntry.from_file(cached_location, root=self.cache_directory)
    878 except FileNotFoundError:

File /opt/lsst/software/stack/conda/envs/lsst-scipipe-9.0.0/share/eups/Linux64/daf_butler/g588e17ab67+fac83bd63d/python/lsst/daf/butler/datastore/cache_manager.py:169, in CacheEntry.from_file(cls, file, root)
    168     raise ValueError(f"Supplied file {file} is not inside root {root}")
--> 169 id_, component, _ = _parse_cache_name(file_in_cache)
    171 stat = os.stat(file.ospath)

File /opt/lsst/software/stack/conda/envs/lsst-scipipe-9.0.0/share/eups/Linux64/daf_butler/g588e17ab67+fac83bd63d/python/lsst/daf/butler/datastore/cache_manager.py:132, in _parse_cache_name(cached_location)
    131 parts = root.split("_")
--> 132 id_ = uuid.UUID(parts.pop(0))
    133 component = parts.pop(0) if parts else None

File /opt/lsst/software/stack/conda/envs/lsst-scipipe-9.0.0/lib/python3.11/uuid.py:178, in UUID.__init__(self, hex, bytes, bytes_le, fields, int, version, is_safe)
    177 if len(hex) != 32:
--> 178     raise ValueError('badly formed hexadecimal UUID string')
    179 int = int_(hex, 16)

ValueError: badly formed hexadecimal UUID string

The above exception was the direct cause of the following exception:

ValueError                                Traceback (most recent call last)
Cell In[2], line 15
     13 schema = process.measure_task()
     14 params = [{"t_0": 60700.5, "t_E": 110, "u_0": 0.5, "m_base": m} for m in np.linspace(18,24,n)]
---> 15 process.add_lc(params[0], ra=57.59451632893858, dec=-32.481152201226145)
     16 for p in params[1:]:
     17     process.add_lc(p, dist=0.2)

File ~/DP0_microlensing/task.py:72, in Run.add_lc(self, params, model, ra, dec, dist, plot)
     70 lc = LightCurve(ra, dec)
     71 if len(self.inj_lc) == 0:
---> 72     lc.collect_calexp(self.htm_level)
     73     self.calexp_data_ref = lc.calexp_data_ref
     74     self.mjds = lc.data["mjd"]

File ~/DP0_microlensing/light_curves.py:87, in LightCurve.collect_calexp(self, level)
     85 print(f"Found {len(datasetRefs)} calexps")
     86 # ccd_visit = list(butler.registry.queryDatasets("ccdVisitTable", collections=collections))[0] #
---> 87 butler.get('ccdVisitTable')
     88 # dataId = {'visit': 192350, 'detector': 175}
     89 # visitInfo = butler.get('calexp.visitInfo', dataId=dataId)
     90 mjds = []; detectors = [] ; visits = []; nans = []

File /opt/lsst/software/stack/conda/envs/lsst-scipipe-9.0.0/share/eups/Linux64/daf_butler/g588e17ab67+fac83bd63d/python/lsst/daf/butler/direct_butler/_direct_butler.py:1174, in DirectButler.get(self, datasetRefOrType, dataId, parameters, collections, storageClass, timespan, **kwargs)
   1165 _LOG.debug("Butler get: %s, dataId=%s, parameters=%s", datasetRefOrType, dataId, parameters)
   1166 ref = self._findDatasetRef(
   1167     datasetRefOrType,
   1168     dataId,
   (...)
   1172     **kwargs,
   1173 )
-> 1174 return self._datastore.get(ref, parameters=parameters, storageClass=storageClass)

File /opt/lsst/software/stack/conda/envs/lsst-scipipe-9.0.0/share/eups/Linux64/daf_butler/g588e17ab67+fac83bd63d/python/lsst/daf/butler/datastores/chainedDatastore.py:394, in ChainedDatastore.get(self, ref, parameters, storageClass)
    392 for datastore in self.datastores:
    393     try:
--> 394         inMemoryObject = datastore.get(ref, parameters, storageClass=storageClass)
    395         log.debug("Found dataset %s in datastore %s", ref, datastore.name)
    396         return inMemoryObject

File /opt/lsst/software/stack/conda/envs/lsst-scipipe-9.0.0/share/eups/Linux64/daf_butler/g588e17ab67+fac83bd63d/python/lsst/daf/butler/datastores/fileDatastore.py:2055, in FileDatastore.get(self, ref, parameters, storageClass)
   2052     ref = ref.overrideStorageClass(storageClass)
   2054 allGetInfo = self._prepare_for_direct_get(ref, parameters)
-> 2055 return get_dataset_as_python_object_from_get_info(
   2056     allGetInfo, ref=ref, parameters=parameters, cache_manager=self.cacheManager
   2057 )

File /opt/lsst/software/stack/conda/envs/lsst-scipipe-9.0.0/share/eups/Linux64/daf_butler/g588e17ab67+fac83bd63d/python/lsst/daf/butler/datastores/file_datastore/get.py:443, in get_dataset_as_python_object_from_get_info(allGetInfo, ref, parameters, cache_manager)
    436 else:
    437     # For an assembled composite this could be a derived
    438     # component derived from a real component. The validity
    439     # of the parameters is not clear. For now validate against
    440     # the composite storage class
    441     getInfo.formatter.file_descriptor.storageClass.validateParameters(parameters)
--> 443 return _read_artifact_into_memory(getInfo, ref, cache_manager, isComponent=isComponent)

File /opt/lsst/software/stack/conda/envs/lsst-scipipe-9.0.0/share/eups/Linux64/daf_butler/g588e17ab67+fac83bd63d/python/lsst/daf/butler/datastores/file_datastore/get.py:237, in _read_artifact_into_memory(getInfo, ref, cache_manager, isComponent)
    235     if notes:
    236         notes = "\n" + notes
--> 237     raise ValueError(
    238         f"Failure from formatter '{formatter.name()}' for dataset {ref.id}"
    239         f" ({ref.datasetType.name} from {uri}): {e}{notes}"
    240     ) from e
    242 return post_process_get(
    243     result, ref.datasetType.storageClass, getInfo.assemblerParams, isComponent=isComponent
    244 )

ValueError: Failure from formatter 'lsst.daf.butler.formatters.parquet.ParquetFormatter' for dataset 42c2550b-c69e-4866-8afb-5a0106abb6e7 (ccdVisitTable from s3://butler-us-central1-panda-dev/dc2/2.2i/runs/DP0.2/v23_0_2/PREOPS-905/step7/20220501T161443Z/ccdVisitTable/ccdVisitTable_LSSTCam-imSim_2_2i_runs_DP0_2_v23_0_2_PREOPS-905_step7_20220501T161443Z.parq): badly formed hexadecimal UUID string

The same issue shows up when running the following section of the tutorial 4a Introduction to Data Access with the Butler

datasetType = 'calexp'
dataId = {'visit': 192350, 'detector': 175}
calexp = butler.get(datasetType, dataId=dataId)

Both pieces of code were running without any issue a few weeks ago.
Thank you in advance for any help

This looks like a known issue that we’re working on fixing (see a previous instance here). What seems to happen is that unexpected files end up in the cache folder and cause issues.

In the meantime, you can clear out that cache directory by doing the following from a terminal:

cd $DAF_BUTLER_CACHE_DIRECTORY
pwd
# make sure the directory name printed by by 'pwd' command 
# ends in /butler_cache before going to the next step
rm *

After you clear the files from that directory the issue should go away.

Let us know if you come across any similar issues!

1 Like