From 29907e43a818493f223c84ee406903357147aabd Mon Sep 17 00:00:00 2001 From: Thomas Piekarski Date: Fri, 3 Jul 2020 14:49:37 +0200 Subject: [PATCH 1/3] Adding type hints for source files by function annotations --- src/derl/checker.py | 13 +++++++------ src/derl/collector.py | 6 ++++-- src/derl/dispatcher.py | 2 +- src/derl/filterer.py | 2 +- src/derl/main.py | 4 ++-- src/derl/model/file.py | 10 +++++----- src/derl/model/stats.py | 16 ++++++++-------- src/derl/model/url.py | 14 +++++++------- src/derl/outputer.py | 2 +- src/derl/parser.py | 4 ++-- src/derl/processor.py | 11 ++++++----- src/derl/searcher.py | 2 +- src/derl/tracker.py | 18 +++++++++--------- 13 files changed, 54 insertions(+), 50 deletions(-) diff --git a/src/derl/checker.py b/src/derl/checker.py index 312092e..03b7f39 100644 --- a/src/derl/checker.py +++ b/src/derl/checker.py @@ -6,6 +6,7 @@ import sys +from argparse import Namespace from logging import getLogger from os import path from magic import from_file @@ -18,32 +19,32 @@ _logger = getLogger(__name__) -def is_directory(value): +def is_directory(value: str) -> bool: _logger.debug("Checking provided directory %s", value) return path.isdir(value) -def is_retry_value(value): +def is_retry_value(value: str) -> bool: return 0 < value <= 10 -def is_text_file(file): +def is_text_file(file: str) -> bool: _logger.debug("Checking file %s", file) mimetype = from_file(str(file), mime=True) return file.is_file() and mimetype[:4] == "text" -def is_timeout_value(value): +def is_timeout_value(value: int) -> bool: return value > 0 -def is_url(value): +def is_url(value: str) -> bool: return url(value) -def check_arguments(args): +def check_arguments(args: Namespace): if not is_timeout_value(args.timeout): _logger.error("Invalid timeout, timeout must be greater than 0") sys.exit(_INVALID_TIMEOUT) diff --git a/src/derl/collector.py b/src/derl/collector.py index c4e59ed..19b41b5 100644 --- a/src/derl/collector.py +++ b/src/derl/collector.py @@ -6,12 +6,14 @@ from logging import getLogger +from derl.model.url import URL + _logger = getLogger(__name__) _DEFAULT_ENCODING = "utf-8" -def _extract_context(url, lines): +def _extract_context(url: URL, lines: list) -> list: context = [] line_index = url.line_number - 1 @@ -26,7 +28,7 @@ def _extract_context(url, lines): return context -def collect_context(files): +def collect_context(files: list) -> list: for current_file in files: for current_url in current_file.urls: with open(current_file.filename, "r", encoding=_DEFAULT_ENCODING) as open_file: diff --git a/src/derl/dispatcher.py b/src/derl/dispatcher.py index 173ccd5..1eb23d0 100644 --- a/src/derl/dispatcher.py +++ b/src/derl/dispatcher.py @@ -19,7 +19,7 @@ _DEFAULT_TIMEOUT = 10 -def request(files, retry=_DEFAULT_RETRY, timeout=_DEFAULT_TIMEOUT): +def request(files: list, retry: int = _DEFAULT_RETRY, timeout: int = _DEFAULT_TIMEOUT) -> list: if len(files) == 0: _logger.debug("No matches for HTTP requests") return [] diff --git a/src/derl/filterer.py b/src/derl/filterer.py index 247e3c2..4e8e5af 100644 --- a/src/derl/filterer.py +++ b/src/derl/filterer.py @@ -9,7 +9,7 @@ _logger = getLogger(__name__) -def filter_not_matching(files): +def filter_not_matching(files: list) -> list: filtered_files = [] for current_file in files: diff --git a/src/derl/main.py b/src/derl/main.py index b04d3be..f3f56d9 100644 --- a/src/derl/main.py +++ b/src/derl/main.py @@ -27,7 +27,7 @@ _tracker = get_tracker() -def setup_logging(loglevel): +def setup_logging(loglevel: int): basicConfig( datefmt="%Y-%m-%d %H:%M:%S", format="[%(asctime)s] %(levelname)s:%(name)s:%(message)s", @@ -36,7 +36,7 @@ def setup_logging(loglevel): ) -def main(args): +def main(args: list): args = parse_args(args) setup_logging(args.loglevel) check_arguments(args) diff --git a/src/derl/model/file.py b/src/derl/model/file.py index 5aadb1f..2bcb512 100644 --- a/src/derl/model/file.py +++ b/src/derl/model/file.py @@ -11,11 +11,11 @@ class File: filename = None urls: [] - def __init__(self, filename): + def __init__(self: "File", filename: str): self.filename = filename self.urls = [] - def __str__(self): + def __str__(self: "File") -> str: output = "" for current_url in self.urls: @@ -23,11 +23,11 @@ def __str__(self): return output - def __repr__(self): + def __repr__(self: "File") -> str: return self.__str__() - def append(self, url, line_number): + def append(self: "File", url: str, line_number: int): self.urls.append(URL(url, line_number)) - def contains_urls(self): + def contains_urls(self: "File") -> bool: return len(self.urls) > 0 diff --git a/src/derl/model/stats.py b/src/derl/model/stats.py index fd42055..6f87478 100644 --- a/src/derl/model/stats.py +++ b/src/derl/model/stats.py @@ -13,7 +13,7 @@ class Stats(): tokens = 0 urls = 0 - def __str__(self): + def __str__(self: "Stats") -> str: output = "Processed Directories/Files/Lines/Tokens/URLs: {0:d}/{1:d}/{2:d}/{3:d}/{4:d}".format( self.directories, self.files, self.lines, self.tokens, self.urls ) @@ -23,23 +23,23 @@ def __str__(self): return output - def __repr__(self): + def __repr__(self: "Stats") -> str: return self.__str__() - def inc_directories(self): + def inc_directories(self: "Stats"): self.directories += 1 - def inc_files(self): + def inc_files(self: "Stats"): self.files += 1 - def inc_lines(self): + def inc_lines(self: "Stats"): self.lines += 1 - def inc_tokens(self): + def inc_tokens(self: "Stats"): self.tokens += 1 - def inc_urls(self): + def inc_urls(self: "Stats"): self.urls += 1 - def inc_requests(self): + def inc_requests(self: "Stats"): self.requests += 1 diff --git a/src/derl/model/url.py b/src/derl/model/url.py index 472e11a..495322b 100644 --- a/src/derl/model/url.py +++ b/src/derl/model/url.py @@ -7,7 +7,7 @@ _CONTEXT_OUTPUT_MAP = {0: "", 1: " {}\n", 2: " {}\n {}\n", 3: " {}\n {}\n {}\n"} -def _normalize_context(context): +def _normalize_context(context: list) -> list: normalized_context = [] for current_line in context: @@ -22,11 +22,11 @@ class URL: line_number = None context = [] - def __init__(self, location, line_number): + def __init__(self: "URL", location: str, line_number: int): self.location = location self.line_number = line_number - def __str__(self): + def __str__(self: "URL") -> str: if self.status_code is None: output = "{}, {}".format(self.line_number, self.location) else: @@ -38,11 +38,11 @@ def __str__(self): return output - def __repr__(self): + def __repr__(self: "URL") -> str: return self.__str__() - def is_context_present(self): - return len(self.context) + def is_context_present(self: "URL") -> bool: + return len(self.context) > 0 - def set_context(self, context): + def set_context(self: "URL", context: list): self.context = _normalize_context(context) diff --git a/src/derl/outputer.py b/src/derl/outputer.py index 3bb196d..4762cfb 100644 --- a/src/derl/outputer.py +++ b/src/derl/outputer.py @@ -11,7 +11,7 @@ _tracker = get_tracker() -def output(files, stats=False): +def output(files: list, stats: bool = False): if len(files) == 0: _logger.debug("No matched files for output") return diff --git a/src/derl/parser.py b/src/derl/parser.py index 04f5d3e..050a61c 100644 --- a/src/derl/parser.py +++ b/src/derl/parser.py @@ -4,14 +4,14 @@ # Copyright 2020 Thomas Piekarski # -from argparse import ArgumentParser, HelpFormatter +from argparse import ArgumentParser, HelpFormatter, Namespace from logging import DEBUG, INFO from derl import __version__ from derl.dispatcher import _DEFAULT_RETRY, _DEFAULT_TIMEOUT -def parse_args(args): +def parse_args(args: list) -> Namespace: parser = ArgumentParser( prog="derl", formatter_class=lambda prog: HelpFormatter(prog, max_help_position=35, width=90), diff --git a/src/derl/processor.py b/src/derl/processor.py index 5d39afd..670e42f 100644 --- a/src/derl/processor.py +++ b/src/derl/processor.py @@ -7,6 +7,7 @@ from logging import getLogger from pathlib import Path from re import compile as rcompile, IGNORECASE +from typing import TextIO from derl.checker import is_text_file, is_url from derl.model.file import File @@ -20,7 +21,7 @@ _tracker = get_tracker() -def process_file(file): +def process_file(file: TextIO) -> list: _logger.debug("Spliting current file %s into lines...", file.name) _tracker.stats.inc_files() @@ -44,7 +45,7 @@ def process_file(file): return urls -def process_line(file, line, urls): +def process_line(file: TextIO, line: tuple, urls: list) -> list: _logger.debug("Splitting current line into tokens...") _tracker.stats.inc_lines() @@ -66,7 +67,7 @@ def process_line(file, line, urls): return urls -def process_token(file, token, line_number): +def process_token(file: TextIO, token: str, line_number: int) -> URL: _tracker.stats.inc_tokens() match = _pattern.match(token) @@ -80,7 +81,7 @@ def process_token(file, token, line_number): return url -def process_directory(directory, files): +def process_directory(directory: str, files: list) -> list: _logger.info("Starting to process directory '%s'", directory) _tracker.stats.inc_directories() @@ -89,7 +90,7 @@ def process_directory(directory, files): for current in path.iterdir(): if current.is_dir(): _logger.debug("'%s' is a directory, descending...", current.name) - files = process_directory(current, files) + files = process_directory(str(current), files) elif is_text_file(current): _logger.debug("Appending file '%s'", current.name) files.append(File(current)) diff --git a/src/derl/searcher.py b/src/derl/searcher.py index d33a1db..b9f3fb7 100644 --- a/src/derl/searcher.py +++ b/src/derl/searcher.py @@ -13,7 +13,7 @@ _DEFAULT_ENCODING = "utf-8" -def search_urls(files): +def search_urls(files: list) -> list: _logger.info("Starting to search for URLs in %i files...", len(files)) if len(files) == 0: diff --git a/src/derl/tracker.py b/src/derl/tracker.py index 1368f9b..1d7bf9f 100644 --- a/src/derl/tracker.py +++ b/src/derl/tracker.py @@ -12,7 +12,7 @@ class Singleton(type): _instances = {} - def __call__(cls, *args, **kwargs): + def __call__(cls: "Singleton", *args: tuple, **kwargs: dict) -> "Tracker": if cls not in cls._instances: cls._instances[cls] = super(Singleton, cls).__call__(*args, **kwargs) @@ -25,29 +25,29 @@ class Tracker(metaclass=Singleton): stats = Stats() test = False - def start(self): + def start(self: "Tracker"): if self.start_time is None: self.start_time = perf_counter() - def stop(self): + def stop(self: "Tracker"): if self.stop_time is None: self.stop_time = perf_counter() - def calc_time(self): + def calc_time(self: "Tracker") -> float: if self.test: return -1 return round(self.stop_time - self.start_time) - def reset(self): + def reset(self: "Tracker"): self.start_time = 0 self.stop_time = 0 self.stats = Stats() - def set_test(self): + def set_test(self: "Tracker"): self.test = True - def __str__(self): + def __str__(self: "Tracker") -> str: output = "" if self.start_time is not None and self.stop_time is not None: @@ -57,9 +57,9 @@ def __str__(self): return output - def __repr__(self): + def __repr__(self: "Tracker") -> str: return self.__str__() -def get_tracker(): +def get_tracker() -> "Tracker": return Tracker() From 94089f1721ca30e8bd8728900d3447fe8ccad740 Mon Sep 17 00:00:00 2001 From: Thomas Piekarski Date: Fri, 3 Jul 2020 14:49:50 +0200 Subject: [PATCH 2/3] Adding type hints for test files by function annotations --- tests/test_checker.py | 4 ++-- tests/test_collector.py | 8 ++++---- tests/test_dispatcher.py | 12 ++++++------ tests/test_filterer.py | 2 +- tests/test_main.py | 22 +++++++++++----------- tests/test_outputer.py | 2 +- tests/test_parser.py | 4 ++-- tests/test_processor.py | 8 ++++---- tests/test_searcher.py | 2 +- 9 files changed, 32 insertions(+), 32 deletions(-) diff --git a/tests/test_checker.py b/tests/test_checker.py index 5a038be..735e810 100644 --- a/tests/test_checker.py +++ b/tests/test_checker.py @@ -12,10 +12,10 @@ class CheckerTest(TestCase): - def test_is_directory(self): + def test_is_directory(self: "CheckerTest"): self.assertTrue(is_directory("tests/test-directory")) self.assertFalse(is_directory("tests/not-existent-directory")) - def test_is_text_file(self): + def test_is_text_file(self: "CheckerTest"): self.assertTrue(is_text_file(Path("tests/test-files/plain-text"))) self.assertFalse(is_text_file(Path("tests/test-files/binary-data"))) diff --git a/tests/test_collector.py b/tests/test_collector.py index dd8dafe..6d2c270 100644 --- a/tests/test_collector.py +++ b/tests/test_collector.py @@ -10,7 +10,7 @@ from derl.model.file import File -def _build_test_files(filename, line_number): +def _build_test_files(filename: str, line_number: int) -> list: file = File(filename) file.append("http://www.python", line_number) files = [file] @@ -20,14 +20,14 @@ def _build_test_files(filename, line_number): class CollectorTest(TestCase): - def test_collect_context(self): + def test_collect_context(self: "CollectorTest"): files_with_context = collect_context(_build_test_files("tests/test-files/context.txt", 2)) self.assertEqual(len(files_with_context[0].urls[0].context), 3) - def test_collect_context_at_top_line(self): + def test_collect_context_at_top_line(self: "CollectorTest"): files_with_context = collect_context(_build_test_files("tests/test-files/context-at-top.txt", 1)) self.assertEqual(len(files_with_context[0].urls[0].context), 2) - def test_collect_context_at_bottom_line(self): + def test_collect_context_at_bottom_line(self: "CollectorTest"): files_with_context = collect_context(_build_test_files("tests/test-files/context-at-bottom.txt", 3)) self.assertEqual(len(files_with_context[0].urls[0].context), 2) diff --git a/tests/test_dispatcher.py b/tests/test_dispatcher.py index a31b3f7..38ff542 100644 --- a/tests/test_dispatcher.py +++ b/tests/test_dispatcher.py @@ -13,7 +13,7 @@ from derl.model.file import File -def _build_test_files(): +def _build_test_files() -> list: test_file = File("test-file.txt") test_file.append("http://www.python.org/", 14) @@ -22,30 +22,30 @@ def _build_test_files(): class DispatcherTest(TestCase): - def test_request(self): + def test_request(self: "DispatcherTest"): files = request(_build_test_files()) self.assertEqual(files[0].urls[0].status_code, 200) - def test_dispatcher_without_any_files(self): + def test_dispatcher_without_any_files(self: "DispatcherTest"): self.assertEqual(request([]), []) @patch("requests.get") - def test_timeout(self, mocked_get): + def test_timeout(self: "DispatcherTest", mocked_get: "Mock"): mocked_get.side_effect = Timeout files = request(_build_test_files()) self.assertEqual(files[0].urls[0].status_code, 0) @patch("requests.get") - def test_too_many_redirects(self, mocked_get): + def test_too_many_redirects(self: "DispatcherTest", mocked_get: "Mock"): mocked_get.side_effect = TooManyRedirects files = request(_build_test_files()) self.assertEqual(files[0].urls[0].status_code, 0) @patch("requests.get") - def test_connection_error(self, mocked_get): + def test_connection_error(self: "DispatcherTest", mocked_get: "Mock"): mocked_get.side_effect = RequestConnectionError files = request(_build_test_files()) diff --git a/tests/test_filterer.py b/tests/test_filterer.py index 672bd65..1e51982 100644 --- a/tests/test_filterer.py +++ b/tests/test_filterer.py @@ -12,7 +12,7 @@ class FiltererTest(TestCase): - def test_filter_not_matching(self): + def test_filter_not_matching(self: "FiltererTest"): file_with_url = File("a.txt") file_with_url.append("http://www.somewhere.com/", 0) file_without_url = File("b.txt") diff --git a/tests/test_main.py b/tests/test_main.py index 880ce16..8e6c848 100644 --- a/tests/test_main.py +++ b/tests/test_main.py @@ -27,7 +27,7 @@ class MainTest(TestCase): maxDiff = None - def _reference_testing(self, arguments, reference): + def _reference_testing(self: "MainTest", arguments: list, reference: str): with patch("sys.stdout", new=StringIO()) as fake_stdout: with open(reference, "r") as opened_reference: with raises(SystemExit) as wrapped_exit: @@ -37,57 +37,57 @@ def _reference_testing(self, arguments, reference): self.assertEqual(wrapped_exit.value.code, 0) self.assertEqual(fake_stdout.getvalue(), opened_reference.read()) - def test_main_without_context_without_dispatch(self): + def test_main_without_context_without_dispatch(self: "MainTest"): self._reference_testing([_TEST_DIRECTORY], "tests/references/output-without-context-without-dispatch.out") - def test_main_with_context_without_dispatch(self): + def test_main_with_context_without_dispatch(self: "MainTest"): self._reference_testing([_TEST_DIRECTORY, "--context"], "tests/references/output-with-context-without-dispatch.out") - def test_main_without_context_with_dispatch(self): + def test_main_without_context_with_dispatch(self: "MainTest"): self._reference_testing([_TEST_DIRECTORY, "--dispatch"], "tests/references/output-without-context-with-dispatch.out") - def test_main_with_context_with_dispatch(self): + def test_main_with_context_with_dispatch(self: "MainTest"): self._reference_testing([_TEST_DIRECTORY, "--context", "--dispatch"], "tests/references/output-with-context-with-dispatch.out") - def test_main_with_stats_without_dispatch(self): + def test_main_with_stats_without_dispatch(self: "MainTest"): _tracker.set_test() _tracker.reset() self._reference_testing([_TEST_DIRECTORY, "--stats"], "tests/references/output-with-stats-without-dispatch.out") - def test_main_with_stats_with_dispatch(self): + def test_main_with_stats_with_dispatch(self: "MainTest"): _tracker.set_test() _tracker.reset() self._reference_testing([_TEST_DIRECTORY, "--stats", "--dispatch"], "tests/references/output-with-stats-with-dispatch.out") - def test_main_with_not_existing_directory(self): + def test_main_with_not_existing_directory(self: "MainTest"): with raises(SystemExit) as wrapped_exit: main(["tests/not-existing"]) self.assertEqual(wrapped_exit.type, SystemExit) self.assertEqual(wrapped_exit.value.code, _INVALID_DIRECTORY) - def test_main_with_invalid_timeout(self): + def test_main_with_invalid_timeout(self: "MainTest"): with raises(SystemExit) as wrapped_exit: main(["--dispatch", "--timeout", "-5", _TEST_DIRECTORY]) self.assertEqual(wrapped_exit.type, SystemExit) self.assertEqual(wrapped_exit.value.code, _INVALID_TIMEOUT) - def test_main_with_invalid_retry(self): + def test_main_with_invalid_retry(self: "MainTest"): with raises(SystemExit) as wrapped_exit: main(["--dispatch", "--retry", "1000", _TEST_DIRECTORY]) self.assertEqual(wrapped_exit.type, SystemExit) self.assertEqual(wrapped_exit.value.code, _INVALID_RETRY) - def test_run(self): + def test_run(self: "MainTest"): with patch("sys.stdout", new=StringIO()) as fake_stdout: with raises(SystemExit) as wrapped_exit: run() diff --git a/tests/test_outputer.py b/tests/test_outputer.py index 74785c6..8da12e8 100644 --- a/tests/test_outputer.py +++ b/tests/test_outputer.py @@ -13,7 +13,7 @@ class OutputerTest(TestCase): - def test_output_without_any_files(self): + def test_output_without_any_files(self: "OutputerTest"): with patch("sys.stdout", new=StringIO()) as fake_stdout: output([]) self.assertTrue(fake_stdout.getvalue() == "") diff --git a/tests/test_parser.py b/tests/test_parser.py index 9f375ad..437451a 100644 --- a/tests/test_parser.py +++ b/tests/test_parser.py @@ -11,12 +11,12 @@ class ParserTest(TestCase): - def test_parse_directory(self): + def test_parse_directory(self: "ParserTest"): parsed_args = parse_args(["test-directory"]) self.assertEqual(parsed_args.directory, "test-directory") - def test_parse_dispatch(self): + def test_parse_dispatch(self: "ParserTest"): self.assertTrue(parse_args(["test-directory", "-d"]).dispatch) self.assertTrue(parse_args(["test-directory", "--dispatch"]).dispatch) self.assertFalse(parse_args(["test-directory"]).dispatch) diff --git a/tests/test_processor.py b/tests/test_processor.py index acad5a8..e0099e3 100644 --- a/tests/test_processor.py +++ b/tests/test_processor.py @@ -12,19 +12,19 @@ class ProcessorTest(TestCase): - def test_process_empty_line(self): + def test_process_empty_line(self: "ProcessorTest"): urls = [] self.assertEqual(process_line(None, [0, ""], urls), []) - def test_process_directory(self): + def test_process_directory(self: "ProcessorTest"): self.assertEqual(len(process_directory(_TEST_DIRECTORY, [])), 7) self.assertEqual(len(process_directory("not-existing-directory", [])), 0) - def test_unsupported_file_encoding(self): + def test_unsupported_file_encoding(self: "ProcessorTest"): with open("tests/test-files/binary-data", "r") as test_file: self.assertEqual(process_file(test_file), []) - def test_process_file_with_empty_lines(self): + def test_process_file_with_empty_lines(self: "ProcessorTest"): with open("tests/test-files/empty-file", "r") as test_file: self.assertEqual(process_file(test_file), []) diff --git a/tests/test_searcher.py b/tests/test_searcher.py index 56d1d61..4c3bdfe 100644 --- a/tests/test_searcher.py +++ b/tests/test_searcher.py @@ -11,5 +11,5 @@ class SearcherTest(TestCase): - def test_search_urls_without_any_files(self): + def test_search_urls_without_any_files(self: "SearcherTest"): self.assertEqual(search_urls([]), []) From a991e69931f41710d81834fb595d841dade5d209 Mon Sep 17 00:00:00 2001 From: Thomas Piekarski Date: Fri, 3 Jul 2020 14:50:07 +0200 Subject: [PATCH 3/3] Adding target for listing all function signatures --- Makefile | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Makefile b/Makefile index 97c858a..1977167 100644 --- a/Makefile +++ b/Makefile @@ -60,6 +60,10 @@ run: $(info Testing if derl runs with $(TEST_DIRECTORY) (use args="" to pass arguments)) derl $(TEST_DIRECTORY) $(args) +show-function-signatures: + $(info Showing all function (and method) signatures) + find src/ tests/ -name "*.py" | xargs grep -E "def.*\(.*\).*:" + sonar: $(info Running Sonar Scanner (only when automatic analysis is turned off)) sonar-scanner -Dsonar.login=${SONAR_KEY}