Skip to content

Commit

Permalink
feat: add LargeFile.chunk_file method
Browse files Browse the repository at this point in the history
  • Loading branch information
Void-ux committed May 12, 2024
2 parents d89bea4 + 337cf9f commit 4b69ebc
Show file tree
Hide file tree
Showing 5 changed files with 151 additions and 13 deletions.
6 changes: 1 addition & 5 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
# aiob2

---

<p align="center">
<a href="https://www.python.org/downloads/">
<img src="https://img.shields.io/pypi/pyversions/aiob2?style=for-the-badge" alt="Python version">
Expand All @@ -19,7 +17,7 @@

aiob2 is an asynchronous API wrapper for the [Backblaze B2 Bucket API](https://www.backblaze.com/b2/docs/calling.html).

It will allow you to interact with your B2 bucket, it's files and anything else that the B2 API allows in a modern, object-oriented fashion.
It will allow you to interact with your B2 bucket and its files in a modern, object-oriented fashion.

**NOTE:** There are API endpoints left to implement, eventually they will be added. To speed up this process you can submit a [pull request](https://github.com/Void-ux/aiob2/pulls) or [suggest it](https://github.com/Void-ux/aiob2/discussions/categories/ideas).

Expand Down Expand Up @@ -71,6 +69,4 @@ The `File` object's documentation can be found [here](https://aiob2.readthedocs.

## License

---

This project is released under the [MIT License](https://opensource.org/licenses/MIT).
14 changes: 13 additions & 1 deletion aiob2/bucket.py
Original file line number Diff line number Diff line change
Expand Up @@ -176,7 +176,7 @@ async def upload_large_file(
bucket_id: str
The ID of the bucket to upload to.
file_name: str
The name of the file.
The name of the remote file.
content_type: str
The content type of the file once every part is combined together.
Expand Down Expand Up @@ -229,6 +229,7 @@ async def download_file_by_id(
self,
file_id: str,
*,
range_: Optional[str] = None,
content_disposition: Optional[str] = None,
content_language: Optional[str] = None,
expires: Optional[str] = None,
Expand All @@ -243,6 +244,10 @@ async def download_file_by_id(
-----------
file_id: :class:`str`
The file id of the file to be downloaded.
range_: Optional[:class:`str`]
A standard byte-range request, which will return just part of the stored file. For
example, "bytes=0,99" selects bytes 0 through 99 (inclusive) of the file, so it will
return the first 100 bytes.
content_disposition: Optional[:class:`str`]
Overrides the current 'b2-content-disposition' specified when the file was uploaded.
content_language: Optional[:class:`str`]
Expand All @@ -267,6 +272,7 @@ async def download_file_by_id(

data = await self._http.download_file_by_id(
file_id=file_id,
range_=range_,
content_disposition=content_disposition,
content_language=content_language,
expires=expires,
Expand All @@ -282,6 +288,7 @@ async def download_file_by_name(
file_name: str,
bucket_name: str,
*,
range_: Optional[str] = None,
content_disposition: Optional[str] = None,
content_language: Optional[str] = None,
expires: Optional[str] = None,
Expand All @@ -299,6 +306,10 @@ async def download_file_by_name(
bucket_name: :class:`str`
The bucket name of the file to be downloaded. This should only be specified if you have specified
file_name and not file_id.
range_: Optional[:class:`str`]
A standard byte-range request, which will return just part of the stored file. For
example, "bytes=0,99" selects bytes 0 through 99 (inclusive) of the file, so it will
return the first 100 bytes.
content_disposition: Optional[:class:`str`]
Overrides the current 'b2-content-disposition' specified when the file was uploaded.
content_language: Optional[:class:`str`]
Expand All @@ -324,6 +335,7 @@ async def download_file_by_name(
data = await self._http.download_file_by_name(
file_name=file_name,
bucket_name=bucket_name,
range_=range_,
content_disposition=content_disposition,
content_language=content_language,
expires=expires,
Expand Down
37 changes: 36 additions & 1 deletion aiob2/file.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,15 @@
from __future__ import annotations

import datetime
import hashlib
from typing import Optional, Literal, Dict, List, Any
from typing import IO, Generator, Union, Optional, Literal, Dict, List, Any

from .utils import format_timestamp
from .http import HTTPClient, UploadPayload
from .models.file import LargeFilePart, PartialFile, File



class LargeFile(PartialFile):
"""Represents a large file being uploaded to Backblaze.
Expand Down Expand Up @@ -70,6 +73,38 @@ def __init__(
self.recommended_part_size: int = self._http._recommended_part_size # type: ignore
self.absolute_minimum_part_size: int = self._http._absolute_minimum_part_size # type: ignore

async def chunk_file(self, file: Union[str, IO[bytes]]) -> None:
"""|coro|
Automatically chunks a file or buffer into optimal sizes for the fastest upload.
Parameters
----------
file: Union[:class:`str`, IO[T]]
The file to upload.
"""
if self._cancelled:
raise RuntimeError('New parts cannot be uploaded to a cancelled large file upload')
if self._finished:
raise RuntimeError('New parts cannot be uploaded to an already complete large file')

if isinstance(file, str):
file = open(file, 'rb')

try:
def _chunk(size: int) -> Generator[bytes, None, None]:
nonlocal file
while True:
data = file.read(size)
if data:
break
yield data

for chunk in _chunk(self.recommended_part_size):
await self.upload_part(chunk)
finally:
file.close()

async def upload_part(self, content_bytes: bytes) -> LargeFilePart:
"""|coro|
Expand Down
25 changes: 19 additions & 6 deletions aiob2/http.py
Original file line number Diff line number Diff line change
Expand Up @@ -178,16 +178,17 @@ def __init__(
path: str,
*,
base: Optional[str] = None,
query_parameters: Optional = None,
**parameters: Any
) -> None:
self.method: Literal['GET', 'POST', 'PUT', 'DELETE'] = method
self.path = path
self.parameters = parameters
self.query_parameters = query_parameters
url = (base or self.BASE) + self.path
if parameters:
url = url.format_map({k: quote(v) if isinstance(v, str) else v for k, v in self.parameters.items()})

self.url: URL = URL(url, encoded=True)
self.url: URL = URL(url, encoded=True).with_query(query_parameters)

def __repr__(self) -> str:
return f'{self.method} {str(self.url)}'
Expand Down Expand Up @@ -414,7 +415,7 @@ async def request(
if self._authorization_token is MISSING and route.path != '/b2_authorize_account':
await self._find_authorization_token()
headers['Authorization'] = self._authorization_token
route = Route(route.method, route.path, base=self._api_url, **route.parameters)
route = Route(route.method, route.path, base=self._api_url, query_parameters=route.query_parameters, **route.parameters)

for tries in range(5):
if upload_info:
Expand Down Expand Up @@ -722,6 +723,7 @@ def download_file_by_id(
self,
*,
file_id: str,
range_: Optional[str] = None,
content_disposition: Optional[str] = None,
content_language: Optional[str] = None,
expires: Optional[str] = None,
Expand All @@ -732,6 +734,10 @@ def download_file_by_id(
) -> Response[Tuple[bytes, Dict[str, Any]]]:
headers = {
'Authorization': self._authorization_token,
'Range': range_,
}
headers = {key: value for key, value in headers.items() if value is not None}
query_parameters = {
'b2ContentDisposition': content_disposition,
'b2ContentLanguage': content_language,
'b2Expires': expires,
Expand All @@ -740,14 +746,15 @@ def download_file_by_id(
'b2ContentType': content_type,
'serverSideEncryption': server_side_encryption
}
headers = {key: value for key, value in headers.items() if value is not None}
query_parameters = {key: value for key, value in query_parameters.items() if value is not None}
params = {
'fileId': file_id
}
route = Route(
'GET',
'/b2api/v2/b2_download_file_by_id',
base=self._download_url
base=self._download_url,
query_parameters=query_parameters,
)

return self.request(route, headers=headers, params=params)
Expand All @@ -757,6 +764,7 @@ def download_file_by_name(
*,
file_name: str,
bucket_name: str,
range_: Optional[str] = None,
content_disposition: Optional[str] = None,
content_language: Optional[str] = None,
expires: Optional[str] = None,
Expand All @@ -767,6 +775,10 @@ def download_file_by_name(
) -> Response[Tuple[bytes, Dict[str, Any]]]:
headers = {
'Authorization': self._authorization_token,
'Range': range_,
}
headers = {key: value for key, value in headers.items() if value is not None}
query_parameters = {
'b2ContentDisposition': content_disposition,
'b2ContentLanguage': content_language,
'b2Expires': expires,
Expand All @@ -775,11 +787,12 @@ def download_file_by_name(
'b2ContentType': content_type,
'serverSideEncryption': server_side_encryption
}
headers = {key: value for key, value in headers.items() if value is not None}
query_parameters = {key: value for key, value in query_parameters.items() if value is not None}
route = Route(
'GET',
'/file/{bucket_name}/{file_name}',
base=self._download_url,
query_parameters=query_parameters,
bucket_name=bucket_name,
file_name=file_name
)
Expand Down
82 changes: 82 additions & 0 deletions tests/test_download.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,3 +36,85 @@ async def test_download(self):
assert downloaded_file.content == path.read_bytes()

await client.close()

@pytest.mark.asyncio
@pytest.mark.order(2)
async def test_download_range(self):
client = Client(os.environ['KEY_ID'], os.environ['KEY'], log_level=logging.DEBUG)
file = ValueStorage.test_upload_file
assert isinstance(file, File)

range_length = 100
range_str = f"bytes=0-{range_length - 1}"

downloaded_file = await client.download_file_by_name(
file_name=file.name,
bucket_name=os.environ['BUCKET_NAME'],
range_=range_str
)

assert downloaded_file.name == file.name
assert downloaded_file.id == file.id
assert len(downloaded_file.content) <= range_length
assert int(downloaded_file.content_length) <= range_length
assert int(downloaded_file.content_length) == len(downloaded_file.content)

# Download (by id)

downloaded_file = await client.download_file_by_id(file_id=file.id, range_=range_str)

assert downloaded_file.name == file.name
assert downloaded_file.id == file.id
assert len(downloaded_file.content) <= range_length
assert int(downloaded_file.content_length) <= range_length
assert int(downloaded_file.content_length) == len(downloaded_file.content)

await client.close()

@pytest.mark.asyncio
@pytest.mark.order(2)
async def test_download_params(self):
client = Client(os.environ['KEY_ID'], os.environ['KEY'], log_level=logging.DEBUG)
file = ValueStorage.test_upload_file
assert isinstance(file, File)

content_disposition = f'attachment; filename="filename.jpg"'
content_language = 'de-DE'
expires = 'Wed, 21 Oct 2015 07:28:00 GMT'
# TODO: this is a valid value for cache-control, but DownloadedFile tries to parse it as a timestamp
# cache_control = 'max-age=604800, must-revalidate'
content_encoding = 'compress'
content_type = 'text/html;charset=utf-8'

downloaded_file = await client.download_file_by_name(
file_name=file.name,
bucket_name=os.environ['BUCKET_NAME'],
content_disposition=content_disposition,
content_language=content_language,
expires=expires,
# cache_control=cache_control,
content_encoding=content_encoding,
content_type=content_type
)

assert downloaded_file.name == file.name
assert downloaded_file.id == file.id
assert downloaded_file.content == path.read_bytes()
assert downloaded_file.content_disposition == content_disposition
assert downloaded_file.content_language == content_language
# TODO: cache-control header is not exposed by DownloadedFile, and the logic that sets expires needs some
# attention
# assert downloaded_file.cache_control == cache_control
# assert downloaded_file.expires == expires
assert downloaded_file.content_encoding == content_encoding
assert downloaded_file.content_type == content_type

# Download (by id)

downloaded_file = await client.download_file_by_id(file_id=file.id)

assert downloaded_file.name == file.name
assert downloaded_file.id == file.id
assert downloaded_file.content == path.read_bytes()

await client.close()

0 comments on commit 4b69ebc

Please sign in to comment.