diff --git a/.pylintrc b/.pylintrc index 8d7d68d..0cd06f9 100644 --- a/.pylintrc +++ b/.pylintrc @@ -61,88 +61,10 @@ confidence= # no Warning level messages displayed, use "--disable=all --enable=classes # --disable=W". disable= - #apply-builtin, - #backtick, - #bad-inline-option, - #bad-python3-import, - #basestring-builtin, - #buffer-builtin, - #cmp-builtin, - #cmp-method, - #coerce-builtin, - #coerce-method, - #comprehension-escape, - #delslice-method, - #deprecated-itertools-function, - #deprecated-operator-function, - #deprecated-pragma, - #deprecated-str-translate-call, - #deprecated-string-function, - #deprecated-sys-function, - #deprecated-types-field, - #deprecated-urllib-function, - #dict-items-not-iterating, - #dict-iter-method, - #dict-keys-not-iterating, - #dict-values-not-iterating, - #dict-view-method, - #div-method, - #eq-without-hash, - #exception-escape, - #exception-message-attribute, - #execfile-builtin, - #file-builtin, - #file-ignored, - #filter-builtin-not-iterating, - #getslice-method, - #hex-method, - #idiv-method, - #import-star-module-level, - #indexing-exception, - #input-builtin, - #intern-builtin, - #invalid-str-codec, - #locally-disabled, - #long-builtin, - #long-suffix, - #map-builtin-not-iterating, - #metaclass-assignment, + fixme, missing-class-docstring, missing-function-docstring, - missing-module-docstring, - #next-method-called, - #next-method-defined, - #no-absolute-import, - #non-ascii-bytes-literal, - #nonzero-method, - #oct-method, - #old-division, - #old-ne-operator, - #old-octal-literal, - #old-raise-syntax, - #parameter-unpacking, - #print-statement, - #raising-string, - #range-builtin-not-iterating, - #raw_input-builtin, - #raw-checker-failed, - #rdiv-method, - #reduce-builtin, - #reload-builtin, - #round-builtin, - #setslice-method, - #standarderror-builtin, - #suppressed-message, - #sys-max-int, - #unichr-builtin, - #unicode-builtin, - #unpacking-in-except, - #use-symbolic-message-instead, - #useless-suppression, - #using-cmp-argument, - #xrange-builtin, - #xreadlines-attribute, - #zip-builtin-not-iterating, + missing-module-docstring # Enable the message, report, category or checker with the given id(s). You can # either give multiple identifier separated by comma (,) or put this option diff --git a/.vscode/settings.json b/.vscode/settings.json index 9ba93ba..b0a9a65 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -63,4 +63,5 @@ "ultricies", "vestibulum" ], + "python.pythonPath": ".venv/bin/python", } diff --git a/derl.code-workspace b/derl.code-workspace index dff687a..62b8a25 100644 --- a/derl.code-workspace +++ b/derl.code-workspace @@ -14,7 +14,10 @@ "Regexes", "Twilio", "Youtube", + "aiohttp", + "aiounittest", "argparse", + "asyncio", "cheatsheet", "conda", "coveragerc", diff --git a/requirements-dev.txt b/requirements-dev.txt index c4a10aa..3ad2202 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -1,5 +1,9 @@ # Requirements for developing +aiounittest==1.4.0 +autopep8==1.5.3 +isort==4.3.21 pylint==2.5.3 pytest-cov==2.10.0 pytest==5.4.3 python-coveralls==2.9.3 +rope==0.17.0 diff --git a/requirements.txt b/requirements.txt index 08d1850..2c96e17 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,10 +1,33 @@ # Requirements for running +aiohttp==3.6.2 +aiohttp-retry==1.0 +astroid==2.4.2 +async-timeout==3.0.1 +asyncio==3.4.3 +attrs==19.3.0 certifi==2020.4.5.2 chardet==3.0.4 +coverage==5.2 decorator==4.4.2 idna==2.9 +importlib-metadata==1.7.0 +lazy-object-proxy==1.4.3 +mccabe==0.6.1 +more-itertools==8.4.0 +multidict==4.7.6 +packaging==20.4 +pluggy==0.13.1 +py==1.9.0 +pycodestyle==2.6.0 +pyparsing==2.4.7 python-magic==0.4.18 -requests==2.24.0 +pyyaml==5.3.1 six==1.15.0 +toml==0.10.1 +typed-ast==1.4.1 urllib3==1.25.9 validators==0.15.0 +wcwidth==0.2.5 +wrapt==1.12.1 +yarl==1.4.2 +zipp==3.1.0 diff --git a/setup.cfg b/setup.cfg index abf3dba..86e14ca 100644 --- a/setup.cfg +++ b/setup.cfg @@ -35,15 +35,38 @@ package_dir = python_requires = >= 3.6 setup_requires = pyscaffold>=3.2a0,<3.3a0 install_requires = + aiohttp==3.6.2 + aiohttp-retry==1.0 + astroid==2.4.2 + async-timeout==3.0.1 + asyncio==3.4.3 + attrs==19.3.0 certifi==2020.4.5.2 chardet==3.0.4 + coverage==5.2 decorator==4.4.2 idna==2.9 + importlib-metadata==1.7.0 + lazy-object-proxy==1.4.3 + mccabe==0.6.1 + more-itertools==8.4.0 + multidict==4.7.6 + packaging==20.4 + pluggy==0.13.1 + py==1.9.0 + pycodestyle==2.6.0 + pyparsing==2.4.7 python-magic==0.4.18 - requests==2.24.0 + pyyaml==5.3.1 six==1.15.0 + toml==0.10.1 + typed-ast==1.4.1 urllib3==1.25.9 validators==0.15.0 + wcwidth==0.2.5 + wrapt==1.12.1 + yarl==1.4.2 + zipp==3.1.0 [options.packages.find] where = src @@ -52,6 +75,7 @@ exclude = [options.extras_require] testing = + aiounittest==1.4.0 pytest-cov==2.10.0 pytest==5.4.3 python-coveralls==2.9.3 diff --git a/src/derl/dispatcher.py b/src/derl/dispatcher.py index ec0a1f1..9c4eeec 100644 --- a/src/derl/dispatcher.py +++ b/src/derl/dispatcher.py @@ -4,11 +4,14 @@ # Copyright 2020 Thomas Piekarski # - +import asyncio import logging +import sys + +from aiohttp import ClientTimeout +from aiohttp.client_exceptions import ClientConnectionError, TooManyRedirects +from aiohttp_retry import RetryClient -from requests import ConnectionError as RequestConnectionError, Session, Timeout, TooManyRedirects -from requests.adapters import HTTPAdapter from derl.tracker import get_tracker from derl import __version__ @@ -22,39 +25,48 @@ _DEFAULT_ADDITIONAL_HEADER = {"user-agent": _DEFAULT_USER_AGENT} -def _get_status_code(location: str, session: Session, timeout: int) -> int: +async def _get_status_code(location: str, client: RetryClient, retry: int) -> int: _tracker.stats.inc_requests() status_code = 0 try: _logger.debug("Requesting status code for %s", location) - status_code = session.get(location, timeout=timeout, headers=_DEFAULT_ADDITIONAL_HEADER).status_code - except Timeout: - _logger.debug("Waited for %i seconds, giving up getting %s", timeout, location) + async with client.get(location, retry_attempts=retry) as response: + status_code = response.status except TooManyRedirects: _logger.debug("Redirection Tango, danced enough with %s", location) - except RequestConnectionError: + except ClientConnectionError: _logger.debug("Connection Error occurred while getting %s", location) return status_code -def request(files: list, retry: int = _DEFAULT_RETRY, timeout: int = _DEFAULT_TIMEOUT) -> list: - +async def _request(files: list, retry: int = _DEFAULT_RETRY, timeout: int = _DEFAULT_TIMEOUT) -> list: if len(files) == 0: _logger.debug("No matches for HTTP(S) requests") return [] _logger.debug("Timeout for all HTTP(S) requests is %i seconds", timeout) - with Session() as session: - adaptor = HTTPAdapter(max_retries=retry) - - session.mount("http://", adaptor) - session.mount("https://", adaptor) + client_timeout = ClientTimeout(total=timeout) + async with RetryClient(headers=_DEFAULT_ADDITIONAL_HEADER, timeout=client_timeout) as client: for current_file in files: for current_url in current_file.urls: - current_url.status_code = _get_status_code(current_url.location, session, timeout) + current_url.status_code = await _get_status_code(current_url.location, client, retry) return files + + +def run_loop(files: list, retry: int, timeout: int) -> list: + _logger.info("Starting async dispatcher...") + + if (sys.version_info.major == 3 and sys.version_info.minor >= 7): + # Running Event Loop, Python >= 3.7.x + files = asyncio.run(_request(files, retry, timeout)) + else: + # Running Event Loop, Python < 3.7.x + event_loop = asyncio.get_event_loop() + files = event_loop.run_until_complete(_request(files, retry, timeout)) + + return files diff --git a/src/derl/main.py b/src/derl/main.py index cc3c7ff..415c912 100644 --- a/src/derl/main.py +++ b/src/derl/main.py @@ -11,7 +11,7 @@ from derl.checker import check_arguments from derl.collector import collect_context -from derl.dispatcher import request +from derl.dispatcher import run_loop from derl.filterer import filter_not_matching from derl.outputer import output from derl.parser import parse_args @@ -50,8 +50,7 @@ def main(args: list): filtered_files = filter_not_matching(searched_files) if args.dispatch: - filtered_files = request(filtered_files, args.retry, args.timeout) - + filtered_files = run_loop(filtered_files, args.retry, args.timeout) if args.context: filtered_files = collect_context(filtered_files) diff --git a/src/derl/model/stats.py b/src/derl/model/stats.py index 6f87478..53eb16b 100644 --- a/src/derl/model/stats.py +++ b/src/derl/model/stats.py @@ -5,7 +5,7 @@ # -class Stats(): +class Stats: directories = 0 files = 0 lines = 0 diff --git a/src/derl/processor.py b/src/derl/processor.py index 670e42f..04a8846 100644 --- a/src/derl/processor.py +++ b/src/derl/processor.py @@ -22,7 +22,7 @@ def process_file(file: TextIO) -> list: - _logger.debug("Spliting current file %s into lines...", file.name) + _logger.debug("Splitting current file %s into lines...", file.name) _tracker.stats.inc_files() try: diff --git a/tests/test_dispatcher.py b/tests/test_dispatcher.py index b5d3a5f..fdf577d 100644 --- a/tests/test_dispatcher.py +++ b/tests/test_dispatcher.py @@ -4,12 +4,10 @@ # Copyright 2020 Thomas Piekarski # -from unittest import TestCase -from unittest.mock import patch -from requests.exceptions import ConnectionError as RequestConnectionError, Timeout, TooManyRedirects +from aiounittest import AsyncTestCase -from derl.dispatcher import request +from derl.dispatcher import _request from derl.model.file import File @@ -20,33 +18,43 @@ def _build_test_files() -> list: return [test_file] -class DispatcherTest(TestCase): +class DispatcherTest(AsyncTestCase): - def test_request(self: "DispatcherTest"): - files = request(_build_test_files()) + # todo: Rewrite all remaining tests to use run_loop and not _request + + async def test_request(self: "DispatcherTest"): + files = await _request(_build_test_files()) self.assertEqual(files[0].urls[0].status_code, 200) - def test_dispatcher_without_any_files(self: "DispatcherTest"): - self.assertEqual(request([]), []) + async def test_dispatcher_without_any_files(self: "DispatcherTest"): + files = await _request([]) + + self.assertEqual(files, []) - @patch("requests.Session.get") - def test_timeout(self: "DispatcherTest", mocked_get: "Mock"): - mocked_get.side_effect = Timeout + # Following tests "seem" to work, but they do not! Tests do not wait for coroutines, + # although AsyncTestCase is used and upper two tests are working. Tried solutions: + # - pytest-asyncio - It does not work inside classes at all + # (See Issue at GitHub: https://github.com/pytest-dev/pytest-asyncio/issues/77) + # + # - IsolatedAsyncioTestCase - Class will be available with Python > 3.8.x + # - aiounittest - Seems to work, but only without Mocks + # + # -> How to use Mocks and return an Exception ith aiounittest? (Question for StackOverflow) + # - files = request(_build_test_files()) - self.assertEqual(files[0].urls[0].status_code, 0) + # todo: Try to write two _working_ tests for too many redirects and connection errors - @patch("requests.Session.get") - def test_too_many_redirects(self: "DispatcherTest", mocked_get: "Mock"): - mocked_get.side_effect = TooManyRedirects + # @patch("aiohttp_retry.RetryClient.get") + # async def test_too_many_redirects(self: "DispatcherTest", mocked_get: "Mock"): + # mocked_get.side_effect = TooManyRedirects - files = request(_build_test_files()) - self.assertEqual(files[0].urls[0].status_code, 0) + # files = await _request(_build_test_files()) + # self.assertEqual(files[0].urls[0].status_code, 0) - @patch("requests.Session.get") - def test_connection_error(self: "DispatcherTest", mocked_get: "Mock"): - mocked_get.side_effect = RequestConnectionError + # @patch("aiohttp_retry.RetryClient.get") + # async def test_connection_error(self: "DispatcherTest", mocked_get: "Mock"): + # mocked_get.side_effect = ClientConnectionError - files = request(_build_test_files()) - self.assertEqual(files[0].urls[0].status_code, 0) + # files = await _request(_build_test_files()) + # self.assertEqual(files[0].urls[0].status_code, 0)