Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Store local paths as pathlib paths #48

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
69 changes: 22 additions & 47 deletions src/proxpi/_cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,10 @@
import time
import shutil
import logging
import pathlib
import tempfile
import warnings
import functools
import posixpath
import threading
import dataclasses
import typing as t
Expand Down Expand Up @@ -574,7 +574,7 @@ class _CachedFile:

__slots__ = ("path", "size", "n_hits")

path: str
path: pathlib.Path
"""File path."""

size: int
Expand All @@ -584,40 +584,19 @@ class _CachedFile:
"""Number of cache hits."""


def _split_path(
path: str, split: t.Callable[[str], t.Tuple[str, str]]
) -> t.Generator[str, None, None]:
"""Split path into directory components.

Args:
path: path to split
split: path-split functions

Returns:
path parts generator
"""

parent, filename = split(path)
if not filename:
return
if parent:
yield from _split_path(parent, split)
yield filename


class _FileCache:
"""Package files cache."""

max_size: int
cache_dir: str
cache_dir: pathlib.Path
_cache_dir_provided: t.Union[str, None]
_files: t.Dict[str, t.Union[_CachedFile, Thread]]
_evict_lock: threading.Lock

def __init__(
self,
max_size: int,
cache_dir: str = None,
cache_dir: t.Union[str, pathlib.Path] = None,
download_timeout: float = 0.9,
session: requests.Session = None,
):
Expand All @@ -632,7 +611,7 @@ def __init__(
"""

self.max_size = max_size
self.cache_dir = os.path.abspath(cache_dir or tempfile.mkdtemp())
self.cache_dir = pathlib.Path(cache_dir or tempfile.mkdtemp()).absolute()
self.download_timeout = download_timeout
self.session = session or requests.Session()
self._cache_dir_provided = cache_dir
Expand All @@ -648,19 +627,16 @@ def __repr__(self):
)

def __del__(self):
if not self._cache_dir_provided and os.path.isdir(self.cache_dir):
if not self._cache_dir_provided and self.cache_dir.is_dir():
logger.debug(f"Deleting '{self.cache_dir}'")
shutil.rmtree(self.cache_dir)
shutil.rmtree(str(self.cache_dir))

def _populate_files_from_existing_cache_dir(self):
"""Populate from user-provided cache directory."""
for dirpath, _, filenames in os.walk(self.cache_dir):
for filename in filenames:
filepath = os.path.join(dirpath, filename)
size = os.path.getsize(filepath)
name = os.path.relpath(filepath, self.cache_dir)
if os.path != posixpath:
name = posixpath.join(*_split_path(name, os.path.split))
for filepath in self.cache_dir.glob("**/*"):
size = filepath.stat().st_size
name = str(pathlib.PurePosixPath(filepath.relative_to(self.cache_dir)))
if True: # minimise Git diff
self._files[name] = _CachedFile(filepath, size, n_hits=0)

@staticmethod
Expand All @@ -669,9 +645,9 @@ def _get_key(url: str) -> str:
"""Get file cache reference key from file URL."""
urlsplit = urllib.parse.urlsplit(url)
parent = _hostname_normalise_pattern.sub("-", urlsplit.hostname)
return posixpath.join(parent, *_split_path(urlsplit.path, posixpath.split))
return str(pathlib.PurePosixPath(parent) / urlsplit.path[1:])

def _download_file(self, url: str, path: str):
def _download_file(self, url: str, path: pathlib.Path) -> None:
"""Download a file.

Args:
Expand All @@ -688,13 +664,12 @@ def _download_file(self, url: str, path: str):
f"status={response.status_code}, body={response.text}"
)
return
parent, _ = os.path.split(path)
os.makedirs(parent, exist_ok=True)
with open(path, "wb") as f:
path.parent.mkdir(parents=True, exist_ok=True)
with open(str(path), "wb") as f:
for chunk in response.iter_content(None):
f.write(chunk)
key = self._get_key(url)
self._files[key] = _CachedFile(path, os.stat(path).st_size, 0)
self._files[key] = _CachedFile(path, path.stat().st_size, 0)
logger.debug(f"Finished downloading '{url_masked}'")

def _wait_for_existing_download(self, url: str) -> bool:
Expand All @@ -719,19 +694,19 @@ def _wait_for_existing_download(self, url: str) -> bool:
return True # default to original URL (due to timeout or HTTP error)
return False

def _get_cached(self, url: str) -> t.Union[str, None]:
def _get_cached(self, url: str) -> t.Union[pathlib.Path, None]:
"""Get file from cache."""
if url in self._files:
file = self._files[url]
assert isinstance(file, _CachedFile)
file.n_hits += 1
return file.path
return pathlib.Path(file.path)
return None

def _start_downloading(self, url: str):
"""Start downloading a file."""
key = self._get_key(url)
path = os.path.join(self.cache_dir, *_split_path(key, posixpath.split))
path = pathlib.Path(pathlib.PurePosixPath(self.cache_dir) / key)

thread = Thread(target=self._download_file, args=(url, path))
self._files[key] = thread
Expand All @@ -748,10 +723,10 @@ def _evict_lfu(self, url: str):
while existing_size + file_size > self.max_size and existing_size > 0:
existing_url = cache_keys.pop(0)
file = self._files.pop(existing_url)
os.unlink(file.path)
file.path.unlink()
existing_size -= file.size

def get(self, url: str) -> str:
def get(self, url: str) -> t.Union[str, pathlib.Path]:
"""Get a file using or updating cache.

Args:
Expand Down Expand Up @@ -884,7 +859,7 @@ def list_files(self, package_name: str) -> t.List[File]:
raise exc
return files

def get_file(self, package_name: str, file_name: str) -> str:
def get_file(self, package_name: str, file_name: str) -> t.Union[str, pathlib.Path]:
"""Get a file.

Args:
Expand Down
5 changes: 2 additions & 3 deletions src/proxpi/server.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@
import gzip
import zlib
import logging
import pathlib
import typing as t
import urllib.parse

import flask
import jinja2
Expand Down Expand Up @@ -203,8 +203,7 @@ def get_file(package_name: str, file_name: str):
except _cache.NotFound:
flask.abort(404)
raise
scheme = urllib.parse.urlparse(path).scheme
if scheme and scheme != "file":
if not isinstance(path, pathlib.Path):
return flask.redirect(path)
return flask.send_file(path, mimetype=_file_mime_type)

Expand Down