Hi
I’ve been trying to get some DP0.2 objectTables from the butler and have been getting errors. Initially these were HTTP 500 errors (after multiple retries) but the latest are incomplete reads, URL of test seems to be [Preformatted text](https://sdfdatas3.slac.stanford.edu/rubin-dp02-products/2.2i/runs/DP0.2/v23_0_1/PREOPS-905/step3_31/20220307T050825Z/objectTable/5074/21/objectTable_5074_21_DC2_2_2i_runs_DP0_2_v23_0_1_PREOPS-905_step3_31_20220307T050825Z.parq)
I know there was work on the RSP yesterday so maybe some connections haven’t come back up? Or a permissions thing?
Example code snippet and error returned below.
Thanks
Mike
import lsst.daf.butler as dafButler
# Path to your Gen3 Butler repository
butler = dafButler.Butler('dp02', collections='2.2i/runs/DP0.2')
dataId = {"skymap": "DC2", "tract": 5074, "patch": 21}
object_table = butler.get("objectTable", dataId)
---------------------------------------------------------------------------
IncompleteRead Traceback (most recent call last)
File /opt/lsst/software/stack/conda/envs/lsst-scipipe-10.0.0/lib/python3.12/site-packages/urllib3/response.py:754, in HTTPResponse._error_catcher(self)
753 try:
--> 754 yield
756 except SocketTimeout as e:
757 # FIXME: Ideally we'd like to include the url in the ReadTimeoutError but
758 # there is yet no clean way to get at it from this context.
File /opt/lsst/software/stack/conda/envs/lsst-scipipe-10.0.0/lib/python3.12/site-packages/urllib3/response.py:900, in HTTPResponse._raw_read(self, amt, read1)
890 if (
891 self.enforce_content_length
892 and self.length_remaining is not None
(...)
898 # raised during streaming, so all calls with incorrect
899 # Content-Length are caught.
--> 900 raise IncompleteRead(self._fp_bytes_read, self.length_remaining)
901 elif read1 and (
902 (amt != 0 and not data) or self.length_remaining == len(data)
903 ):
(...)
906 # `http.client.HTTPResponse`, so we close it here.
907 # See https://github.com/python/cpython/issues/113199
IncompleteRead: IncompleteRead(524288 bytes read, 136119870 more expected)
The above exception was the direct cause of the following exception:
ProtocolError Traceback (most recent call last)
File /opt/lsst/software/stack/conda/envs/lsst-scipipe-10.0.0/lib/python3.12/site-packages/requests/models.py:820, in Response.iter_content.<locals>.generate()
819 try:
--> 820 yield from self.raw.stream(chunk_size, decode_content=True)
821 except ProtocolError as e:
File /opt/lsst/software/stack/conda/envs/lsst-scipipe-10.0.0/lib/python3.12/site-packages/urllib3/response.py:1066, in HTTPResponse.stream(self, amt, decode_content)
1065 while not is_fp_closed(self._fp) or len(self._decoded_buffer) > 0:
-> 1066 data = self.read(amt=amt, decode_content=decode_content)
1068 if data:
File /opt/lsst/software/stack/conda/envs/lsst-scipipe-10.0.0/lib/python3.12/site-packages/urllib3/response.py:983, in HTTPResponse.read(self, amt, decode_content, cache_content)
979 while len(self._decoded_buffer) < amt and data:
980 # TODO make sure to initially read enough data to get past the headers
981 # For example, the GZ file header takes 10 bytes, we don't want to read
982 # it one byte at a time
--> 983 data = self._raw_read(amt)
984 decoded_data = self._decode(data, decode_content, flush_decoder)
File /opt/lsst/software/stack/conda/envs/lsst-scipipe-10.0.0/lib/python3.12/site-packages/urllib3/response.py:878, in HTTPResponse._raw_read(self, amt, read1)
876 fp_closed = getattr(self._fp, "closed", False)
--> 878 with self._error_catcher():
879 data = self._fp_read(amt, read1=read1) if not fp_closed else b""
File /opt/lsst/software/stack/conda/envs/lsst-scipipe-10.0.0/lib/python3.12/contextlib.py:158, in _GeneratorContextManager.__exit__(self, typ, value, traceback)
157 try:
--> 158 self.gen.throw(value)
159 except StopIteration as exc:
160 # Suppress StopIteration *unless* it's the same exception that
161 # was passed to throw(). This prevents a StopIteration
162 # raised inside the "with" statement from being suppressed.
File /opt/lsst/software/stack/conda/envs/lsst-scipipe-10.0.0/lib/python3.12/site-packages/urllib3/response.py:778, in HTTPResponse._error_catcher(self)
777 arg = f"Connection broken: {e!r}"
--> 778 raise ProtocolError(arg, e) from e
780 except (HTTPException, OSError) as e:
ProtocolError: ('Connection broken: IncompleteRead(524288 bytes read, 136119870 more expected)', IncompleteRead(524288 bytes read, 136119870 more expected))
During handling of the above exception, another exception occurred:
ChunkedEncodingError Traceback (most recent call last)
File /opt/lsst/software/stack/conda/envs/lsst-scipipe-10.0.0/share/eups/Linux64/daf_butler/g7096c7a84b+2e8246025c/python/lsst/daf/butler/datastores/file_datastore/get.py:220, in _read_artifact_into_memory(getInfo, ref, cache_manager, isComponent)
219 try:
--> 220 result = formatter.read(
221 component=getInfo.component if isComponent else None,
222 expected_size=recorded_size,
223 cache_manager=cache_manager,
224 )
225 except (FileNotFoundError, FileIntegrityError):
226 # This is expected for the case where the resource is missing
227 # or the information we passed to the formatter about the file size
228 # is incorrect.
229 # Allow them to propagate up.
File /opt/lsst/software/stack/conda/envs/lsst-scipipe-10.0.0/share/eups/Linux64/daf_butler/g7096c7a84b+2e8246025c/python/lsst/daf/butler/_formatter.py:515, in FormatterV2.read(self, component, expected_size, cache_manager)
514 if self.can_read_from_local_file or self.can_read_from_uri:
--> 515 result = self.read_from_possibly_cached_local_file(
516 component, expected_size, cache_manager=cache_manager
517 )
518 if result is not NotImplemented:
File /opt/lsst/software/stack/conda/envs/lsst-scipipe-10.0.0/share/eups/Linux64/daf_butler/g7096c7a84b+2e8246025c/python/lsst/daf/butler/_formatter.py:714, in FormatterV2.read_from_possibly_cached_local_file(self, component, expected_size, cache_manager)
712 msg = ""
--> 714 with uri.as_local() as local_uri:
715 self._check_resource_size(self.file_descriptor.location.uri, expected_size, local_uri.size())
File /opt/lsst/software/stack/conda/envs/lsst-scipipe-10.0.0/lib/python3.12/contextlib.py:137, in _GeneratorContextManager.__enter__(self)
136 try:
--> 137 return next(self.gen)
138 except StopIteration:
File /opt/lsst/software/stack/conda/envs/lsst-scipipe-10.0.0/share/eups/Linux64/resources/g4a157353b6+d65b3c2b70/python/lsst/resources/_resourcePath.py:1113, in ResourcePath.as_local(self, multithreaded, tmpdir)
1112 raise ValueError(f"Temporary directory for as_local must be local resource not {temp_dir}")
-> 1113 local_src, is_temporary = self._as_local(multithreaded=multithreaded, tmpdir=temp_dir)
1114 local_uri = ResourcePath(local_src, isTemporary=is_temporary)
File /opt/lsst/software/stack/conda/envs/lsst-scipipe-10.0.0/share/eups/Linux64/resources/g4a157353b6+d65b3c2b70/python/lsst/resources/http.py:1548, in HttpResourcePath._as_local(self, multithreaded, tmpdir)
1547 with open(tmp_uri.ospath, "wb", buffering=buffer_size) as tmpFile:
-> 1548 for chunk in resp.iter_content(chunk_size=buffer_size):
1549 tmpFile.write(chunk)
File /opt/lsst/software/stack/conda/envs/lsst-scipipe-10.0.0/lib/python3.12/site-packages/requests/models.py:822, in Response.iter_content.<locals>.generate()
821 except ProtocolError as e:
--> 822 raise ChunkedEncodingError(e)
823 except DecodeError as e:
ChunkedEncodingError: ('Connection broken: IncompleteRead(524288 bytes read, 136119870 more expected)', IncompleteRead(524288 bytes read, 136119870 more expected))
The above exception was the direct cause of the following exception:
ValueError Traceback (most recent call last)
Cell In[1], line 4
2 butler = dafButler.Butler('dp02', collections='2.2i/runs/DP0.2')
3 dataId = {"skymap": "DC2", "tract": 5074, "patch": 21}
----> 4 object_table = butler.get("objectTable", dataId)
5 print(len(object_table))
File /opt/lsst/software/stack/conda/envs/lsst-scipipe-10.0.0/share/eups/Linux64/daf_butler/g7096c7a84b+2e8246025c/python/lsst/daf/butler/remote_butler/_remote_butler.py:285, in RemoteButler.get(self, datasetRefOrType, dataId, parameters, collections, storageClass, timespan, **kwargs)
282 ref = ref.makeComponentRef(componentOverride)
283 ref = apply_storage_class_override(ref, datasetRefOrType, storageClass)
--> 285 return self._get_dataset_as_python_object(ref, model, parameters)
File /opt/lsst/software/stack/conda/envs/lsst-scipipe-10.0.0/share/eups/Linux64/daf_butler/g7096c7a84b+2e8246025c/python/lsst/daf/butler/remote_butler/_remote_butler.py:295, in RemoteButler._get_dataset_as_python_object(self, ref, model, parameters)
287 def _get_dataset_as_python_object(
288 self,
289 ref: DatasetRef,
(...)
293 # This thin wrapper method is here to provide a place to hook in a mock
294 # mimicking DatastoreMock functionality for use in unit tests.
--> 295 return get_dataset_as_python_object(
296 ref,
297 _to_file_payload(model),
298 parameters=parameters,
299 cache_manager=self._cache_manager,
300 )
File /opt/lsst/software/stack/conda/envs/lsst-scipipe-10.0.0/share/eups/Linux64/daf_butler/g7096c7a84b+2e8246025c/python/lsst/daf/butler/datastores/fileDatastoreClient.py:88, in get_dataset_as_python_object(ref, payload, parameters, cache_manager)
86 if cache_manager is None:
87 cache_manager = DatastoreDisabledCacheManager()
---> 88 return get_dataset_as_python_object_from_get_info(
89 datastore_file_info, ref=ref, parameters=parameters, cache_manager=cache_manager
90 )
File /opt/lsst/software/stack/conda/envs/lsst-scipipe-10.0.0/share/eups/Linux64/daf_butler/g7096c7a84b+2e8246025c/python/lsst/daf/butler/datastores/file_datastore/get.py:443, in get_dataset_as_python_object_from_get_info(allGetInfo, ref, parameters, cache_manager)
436 else:
437 # For an assembled composite this could be a derived
438 # component derived from a real component. The validity
439 # of the parameters is not clear. For now validate against
440 # the composite storage class
441 getInfo.formatter.file_descriptor.storageClass.validateParameters(parameters)
--> 443 return _read_artifact_into_memory(getInfo, ref, cache_manager, isComponent=isComponent)
File /opt/lsst/software/stack/conda/envs/lsst-scipipe-10.0.0/share/eups/Linux64/daf_butler/g7096c7a84b+2e8246025c/python/lsst/daf/butler/datastores/file_datastore/get.py:237, in _read_artifact_into_memory(getInfo, ref, cache_manager, isComponent)
235 if notes:
236 notes = "\n" + notes
--> 237 raise ValueError(
238 f"Failure from formatter '{formatter.name()}' for dataset {ref.id}"
239 f" ({ref.datasetType.name} from {uri}): {e}{notes}"
240 ) from e
242 return post_process_get(
243 result, ref.datasetType.storageClass, getInfo.assemblerParams, isComponent=isComponent
244 )
ValueError: Failure from formatter 'lsst.daf.butler.formatters.parquet.ParquetFormatter' for dataset 62ae94aa-ec87-49a6-ac41-2fdb540b8a0b (objectTable from https://sdfdatas3.slac.stanford.edu/rubin-dp02-products/2.2i/runs/DP0.2/v23_0_1/PREOPS-905/step3_31/20220307T050825Z/objectTable/5074/21/objectTable_5074_21_DC2_2_2i_runs_DP0_2_v23_0_1_PREOPS-905_step3_31_20220307T050825Z.parq?AWSAccessKeyId=dp02user&Signature=gXC4R%2Fv8QUx47ep5wjfafVedrXQ%3D&Expires=1749833369): ('Connection broken: IncompleteRead(524288 bytes read, 136119870 more expected)', IncompleteRead(524288 bytes read, 136119870 more expected))