Skip to content

Commit

Permalink
fix: failed to load basin data when port was specified in location
Browse files Browse the repository at this point in the history
  • Loading branch information
paulmueller committed Dec 30, 2023
1 parent 68c8852 commit 1cf494e
Show file tree
Hide file tree
Showing 5 changed files with 51 additions and 2 deletions.
1 change: 1 addition & 0 deletions CHANGELOG
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
- feat: allow nested basins
- feat: implement DCOR basins
- fix: make sure basins are always closed on context exit (#238)
- fix: failed to load basin data when port was specified in location
- enh: requests session pooling for fmt_http and fmt_dcor
- enh: implement context manager for RTDCBase
- ref: new http_utils submodule for managing HTTP connections
Expand Down
4 changes: 3 additions & 1 deletion dclab/http_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -266,7 +266,9 @@ def is_url_available(url: str, ret_reason=False):
s.settimeout(1)
# Try to connect to the host
try:
s.connect((urlp.netloc, port))
# Use `hostname`, not `netloc`, because `netloc` contains
# the port number which we do not want here.
s.connect((urlp.hostname, port))
except (socket.gaierror, OSError):
reason = "no connection"
else:
Expand Down
4 changes: 3 additions & 1 deletion dclab/rtdc_dataset/fmt_s3.py
Original file line number Diff line number Diff line change
Expand Up @@ -171,7 +171,9 @@ def is_s3_object_available(url: str,
s.settimeout(1)
# Try to connect to the host
try:
s.connect((urlp.netloc, port))
# Use `hostname`, not `netloc`, because `netloc` contains
# the port number which we do not want here.
s.connect((urlp.hostname, port))
except (socket.gaierror, OSError):
pass
else:
Expand Down
14 changes: 14 additions & 0 deletions tests/test_rtdc_fmt_http.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,20 @@ def test_identifier():
assert ds.identifier == "f0104b0ca2e7d6960189c60fc8b4b986-14"


@pytest.mark.parametrize("netloc", [
"objectstore.hpccloud.mpcdf.mpg.de",
"objectstore.hpccloud.mpcdf.mpg.de:443"
])
def test_netloc_vs_hostname(netloc):
s3_url = (f"https://{netloc}/"
f"circle-5a7a053d-55fb-4f99-960c-f478d0bd418f/"
f"resource/fb7/19f/b2-bd9f-817a-7d70-f4002af916f0")
with RTDC_HTTP(s3_url) as ds:
assert len(ds) == 5000
assert np.allclose(ds["deform"][0], 0.009741939,
rtol=0, atol=1e-7)


def test_open_public_s3_dataset():
# This is the calibration beads measurement.
# https://dcor.mpl.mpg.de/dataset/figshare-7771184-v2/
Expand Down
30 changes: 30 additions & 0 deletions tests/test_rtdc_fmt_http_basin.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,36 @@ def test_basin_as_dict(tmp_path):
assert bdict2["basin_descr"] == "an example http test basin"


def test_basin_as_dict_netloc_vs_hostname(tmp_path):
tmp_path = tmp_path.resolve()
h5path = tmp_path / "test_basin_http.rtdc"

# note the port is included here
http_url_netloc = ("https://objectstore.hpccloud.mpcdf.mpg.de:443/"
"circle-5a7a053d-55fb-4f99-960c-f478d0bd418f/"
"resource/fb7/19f/b2-bd9f-817a-7d70-f4002af916f0")

with h5py.File(h5path, "a") as dst, RTDC_HTTP(http_url) as src:
# Store non-existent basin information
with RTDCWriter(dst, mode="append") as hw:
meta = src.config.as_dict(pop_filtering=True)
hw.store_metadata(meta)
hw.store_basin(basin_name="example basin",
basin_type="remote",
basin_format="http",
basin_locs=[http_url_netloc],
basin_descr="an example http test basin",
)

with new_dataset(h5path) as ds:
assert len(ds) == 5000
# This failed in <0.56.0, because `netloc` was used instead of
# `hostname` when connecting to the socket to check whether the
# server is available.
assert np.allclose(ds["deform"][0], 0.009741939,
rtol=0, atol=1e-7)


@pytest.mark.parametrize("url", [
"https://example.com/nonexistentbucket/nonexistentkey",
f"https://objectstore.hpccloud.mpcdf.mpg.de/noexist-{uuid.uuid4()}/key",
Expand Down

0 comments on commit 1cf494e

Please sign in to comment.