From 4a02a30bc9a339c1def9dcf5b184a21024ec8b26 Mon Sep 17 00:00:00 2001 From: Ferdinand Schlatt Date: Fri, 15 Nov 2024 16:06:48 +0100 Subject: [PATCH] falke8 --- lightning_ir/base/model.py | 2 +- lightning_ir/bi_encoder/tokenizer.py | 5 +++-- lightning_ir/lightning_utils/callbacks.py | 8 +++++--- 3 files changed, 9 insertions(+), 6 deletions(-) diff --git a/lightning_ir/base/model.py b/lightning_ir/base/model.py index c6da2a1..998ba0c 100644 --- a/lightning_ir/base/model.py +++ b/lightning_ir/base/model.py @@ -8,7 +8,7 @@ from dataclasses import dataclass from functools import partial, wraps from pathlib import Path -from typing import Any, Callable, Literal, Mapping, Protocol, Sequence, Type, TypeVar +from typing import Any, Literal, Mapping, Protocol, Sequence, Type, TypeVar import torch from transformers import MODEL_MAPPING, BatchEncoding, BertModel diff --git a/lightning_ir/bi_encoder/tokenizer.py b/lightning_ir/bi_encoder/tokenizer.py index 11941d6..82e2169 100644 --- a/lightning_ir/bi_encoder/tokenizer.py +++ b/lightning_ir/bi_encoder/tokenizer.py @@ -53,7 +53,8 @@ def __init__( :param attend_to_doc_expanded_tokens: Whether to let non-expanded document tokens be able to attend to mask expanded document tokens, defaults to False :type attend_to_doc_expanded_tokens: bool, optional - :param add_marker_tokens: Whether to add marker tokens to the query and document input sequences, defaults to True + :param add_marker_tokens: Whether to add marker tokens to the query and document input sequences, + defaults to True :type add_marker_tokens: bool, optional :raises ValueError: If add_marker_tokens is True and a non-supported tokenizer is used """ @@ -127,7 +128,7 @@ def doc_token_id(self) -> int | None: return None def __call__(self, *args, warn: bool = True, **kwargs) -> BatchEncoding: - """Overrides the PretrainedTokenizer.__call___ method to warn the user to use :meth:`.tokenize_query` and + """Overrides the PretrainedTokenizer.__call___ method to warn the user to use :meth:`.tokenize_query` and :meth:`.tokenize_doc` methods instead. .. PretrainedTokenizer.__call__: \ diff --git a/lightning_ir/lightning_utils/callbacks.py b/lightning_ir/lightning_utils/callbacks.py index 238f75d..40e7188 100644 --- a/lightning_ir/lightning_utils/callbacks.py +++ b/lightning_ir/lightning_utils/callbacks.py @@ -3,7 +3,7 @@ import itertools from dataclasses import is_dataclass from pathlib import Path -from typing import TYPE_CHECKING, Any, Callable, Dict, List, Sequence, Tuple, TypeVar +from typing import TYPE_CHECKING, Any, Callable, Dict, List, Literal, Sequence, Tuple, TypeVar import pandas as pd import torch @@ -372,9 +372,11 @@ def __init__( :param dataset_id: Dataset id :type dataset_id: str - :param docs: Path to documents file or valid ir_datasets id from which documents should be taken, defaults to None + :param docs: Path to documents file or valid ir_datasets id from which documents should be taken, + defaults to None :type docs: str | None, optional - :param queries: Path to queries file or valid ir_datastes id from which queries should be taken, defaults to None + :param queries: Path to queries file or valid ir_datastes id from which queries should be taken, + defaults to None :type queries: str | None, optional :param qrels: Path to qrels file or valid ir_datasets id from which qrels will be taken, defaults to None :type qrels: str | None, optional