From bbafd009a6d06a47760539754fdfcaedb3ba5c13 Mon Sep 17 00:00:00 2001 From: Weixiong Lin Date: Thu, 19 Oct 2023 17:00:39 +0800 Subject: [PATCH] add progress bar to http_get Signed-off-by: Weixiong Lin --- scispacy/file_cache.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/scispacy/file_cache.py b/scispacy/file_cache.py index d737dab..9ff9918 100644 --- a/scispacy/file_cache.py +++ b/scispacy/file_cache.py @@ -12,6 +12,7 @@ from hashlib import sha256 import requests +from tqdm import tqdm CACHE_ROOT = Path(os.getenv("SCISPACY_CACHE", str(Path.home() / ".scispacy"))) DATASET_CACHE = str(CACHE_ROOT / "datasets") @@ -96,9 +97,13 @@ def filename_to_url(filename: str, cache_dir: Optional[str] = None) -> Tuple[str def http_get(url: str, temp_file: IO) -> None: req = requests.get(url, stream=True) + total = int(req.headers.get("content-length", 0)) + pbar = tqdm(total=total, unit="iB", unit_scale=True, unit_divisor=1024) for chunk in req.iter_content(chunk_size=1024): if chunk: # filter out keep-alive new chunks - temp_file.write(chunk) + size = temp_file.write(chunk) + pbar.update(size) + pbar.close() def get_from_cache(url: str, cache_dir: Optional[str] = None) -> str: