Skip to content

Commit

Permalink
Merge pull request #30 from tpiekarski/feature/migration-to-aiohttp
Browse files Browse the repository at this point in the history
Merging feature/migration-to-aiohttp, resolves #28
  • Loading branch information
tpiekarski authored Jul 15, 2020
2 parents 1350eb8 + 869ed56 commit 9b86d4b
Show file tree
Hide file tree
Showing 11 changed files with 113 additions and 127 deletions.
82 changes: 2 additions & 80 deletions .pylintrc
Original file line number Diff line number Diff line change
Expand Up @@ -61,88 +61,10 @@ confidence=
# no Warning level messages displayed, use "--disable=all --enable=classes
# --disable=W".
disable=
#apply-builtin,
#backtick,
#bad-inline-option,
#bad-python3-import,
#basestring-builtin,
#buffer-builtin,
#cmp-builtin,
#cmp-method,
#coerce-builtin,
#coerce-method,
#comprehension-escape,
#delslice-method,
#deprecated-itertools-function,
#deprecated-operator-function,
#deprecated-pragma,
#deprecated-str-translate-call,
#deprecated-string-function,
#deprecated-sys-function,
#deprecated-types-field,
#deprecated-urllib-function,
#dict-items-not-iterating,
#dict-iter-method,
#dict-keys-not-iterating,
#dict-values-not-iterating,
#dict-view-method,
#div-method,
#eq-without-hash,
#exception-escape,
#exception-message-attribute,
#execfile-builtin,
#file-builtin,
#file-ignored,
#filter-builtin-not-iterating,
#getslice-method,
#hex-method,
#idiv-method,
#import-star-module-level,
#indexing-exception,
#input-builtin,
#intern-builtin,
#invalid-str-codec,
#locally-disabled,
#long-builtin,
#long-suffix,
#map-builtin-not-iterating,
#metaclass-assignment,
fixme,
missing-class-docstring,
missing-function-docstring,
missing-module-docstring,
#next-method-called,
#next-method-defined,
#no-absolute-import,
#non-ascii-bytes-literal,
#nonzero-method,
#oct-method,
#old-division,
#old-ne-operator,
#old-octal-literal,
#old-raise-syntax,
#parameter-unpacking,
#print-statement,
#raising-string,
#range-builtin-not-iterating,
#raw_input-builtin,
#raw-checker-failed,
#rdiv-method,
#reduce-builtin,
#reload-builtin,
#round-builtin,
#setslice-method,
#standarderror-builtin,
#suppressed-message,
#sys-max-int,
#unichr-builtin,
#unicode-builtin,
#unpacking-in-except,
#use-symbolic-message-instead,
#useless-suppression,
#using-cmp-argument,
#xrange-builtin,
#xreadlines-attribute,
#zip-builtin-not-iterating,
missing-module-docstring

# Enable the message, report, category or checker with the given id(s). You can
# either give multiple identifier separated by comma (,) or put this option
Expand Down
1 change: 1 addition & 0 deletions .vscode/settings.json
Original file line number Diff line number Diff line change
Expand Up @@ -63,4 +63,5 @@
"ultricies",
"vestibulum"
],
"python.pythonPath": ".venv/bin/python",
}
3 changes: 3 additions & 0 deletions derl.code-workspace
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,10 @@
"Regexes",
"Twilio",
"Youtube",
"aiohttp",
"aiounittest",
"argparse",
"asyncio",
"cheatsheet",
"conda",
"coveragerc",
Expand Down
4 changes: 4 additions & 0 deletions requirements-dev.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
# Requirements for developing
aiounittest==1.4.0
autopep8==1.5.3
isort==4.3.21
pylint==2.5.3
pytest-cov==2.10.0
pytest==5.4.3
python-coveralls==2.9.3
rope==0.17.0
24 changes: 23 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
@@ -1,10 +1,32 @@
# Requirements for running
aiohttp==3.6.2
aiohttp-retry==1.0
astroid==2.4.2
async-timeout==3.0.1
attrs==19.3.0
certifi==2020.4.5.2
chardet==3.0.4
coverage==5.2
decorator==4.4.2
idna==2.9
importlib-metadata==1.7.0
lazy-object-proxy==1.4.3
mccabe==0.6.1
more-itertools==8.4.0
multidict==4.7.6
packaging==20.4
pluggy==0.13.1
py==1.9.0
pycodestyle==2.6.0
pyparsing==2.4.7
python-magic==0.4.18
requests==2.24.0
pyyaml==5.3.1
six==1.15.0
toml==0.10.1
typed-ast==1.4.1
urllib3==1.25.9
validators==0.15.0
wcwidth==0.2.5
wrapt==1.12.1
yarl==1.4.2
zipp==3.1.0
25 changes: 24 additions & 1 deletion setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -35,15 +35,37 @@ package_dir =
python_requires = >= 3.6
setup_requires = pyscaffold>=3.2a0,<3.3a0
install_requires =
aiohttp==3.6.2
aiohttp-retry==1.0
astroid==2.4.2
async-timeout==3.0.1
attrs==19.3.0
certifi==2020.4.5.2
chardet==3.0.4
coverage==5.2
decorator==4.4.2
idna==2.9
importlib-metadata==1.7.0
lazy-object-proxy==1.4.3
mccabe==0.6.1
more-itertools==8.4.0
multidict==4.7.6
packaging==20.4
pluggy==0.13.1
py==1.9.0
pycodestyle==2.6.0
pyparsing==2.4.7
python-magic==0.4.18
requests==2.24.0
pyyaml==5.3.1
six==1.15.0
toml==0.10.1
typed-ast==1.4.1
urllib3==1.25.9
validators==0.15.0
wcwidth==0.2.5
wrapt==1.12.1
yarl==1.4.2
zipp==3.1.0

[options.packages.find]
where = src
Expand All @@ -52,6 +74,7 @@ exclude =

[options.extras_require]
testing =
aiounittest==1.4.0
pytest-cov==2.10.0
pytest==5.4.3
python-coveralls==2.9.3
Expand Down
38 changes: 22 additions & 16 deletions src/derl/dispatcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,13 @@
# Copyright 2020 Thomas Piekarski <[email protected]>
#


import asyncio
import logging

from requests import ConnectionError as RequestConnectionError, Session, Timeout, TooManyRedirects
from requests.adapters import HTTPAdapter
from aiohttp import ClientTimeout
from aiohttp.client_exceptions import ClientConnectionError, TooManyRedirects
from aiohttp_retry import RetryClient

from derl.tracker import get_tracker
from derl import __version__

Expand All @@ -22,39 +24,43 @@
_DEFAULT_ADDITIONAL_HEADER = {"user-agent": _DEFAULT_USER_AGENT}


def _get_status_code(location: str, session: Session, timeout: int) -> int:
async def _get_status_code(location: str, client: RetryClient, retry: int) -> int:
_tracker.stats.inc_requests()
status_code = 0

try:
_logger.debug("Requesting status code for %s", location)
status_code = session.get(location, timeout=timeout, headers=_DEFAULT_ADDITIONAL_HEADER).status_code
except Timeout:
_logger.debug("Waited for %i seconds, giving up getting %s", timeout, location)
async with client.get(location, retry_attempts=retry) as response:
status_code = response.status
except TooManyRedirects:
_logger.debug("Redirection Tango, danced enough with %s", location)
except RequestConnectionError:
except ClientConnectionError:
_logger.debug("Connection Error occurred while getting %s", location)

return status_code


def request(files: list, retry: int = _DEFAULT_RETRY, timeout: int = _DEFAULT_TIMEOUT) -> list:

async def _request(files: list, retry: int = _DEFAULT_RETRY, timeout: int = _DEFAULT_TIMEOUT) -> list:
if len(files) == 0:
_logger.debug("No matches for HTTP(S) requests")
return []

_logger.debug("Timeout for all HTTP(S) requests is %i seconds", timeout)

with Session() as session:
adaptor = HTTPAdapter(max_retries=retry)

session.mount("http://", adaptor)
session.mount("https://", adaptor)
client_timeout = ClientTimeout(total=timeout)

async with RetryClient(headers=_DEFAULT_ADDITIONAL_HEADER, timeout=client_timeout) as client:
for current_file in files:
for current_url in current_file.urls:
current_url.status_code = _get_status_code(current_url.location, session, timeout)
current_url.status_code = await _get_status_code(current_url.location, client, retry)

return files


def run_loop(files: list, retry: int, timeout: int) -> list:
_logger.info("Starting async dispatcher...")

event_loop = asyncio.get_event_loop()
files = event_loop.run_until_complete(_request(files, retry, timeout))

return files
5 changes: 2 additions & 3 deletions src/derl/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@

from derl.checker import check_arguments
from derl.collector import collect_context
from derl.dispatcher import request
from derl.dispatcher import run_loop
from derl.filterer import filter_not_matching
from derl.outputer import output
from derl.parser import parse_args
Expand Down Expand Up @@ -50,8 +50,7 @@ def main(args: list):
filtered_files = filter_not_matching(searched_files)

if args.dispatch:
filtered_files = request(filtered_files, args.retry, args.timeout)

filtered_files = run_loop(filtered_files, args.retry, args.timeout)
if args.context:
filtered_files = collect_context(filtered_files)

Expand Down
2 changes: 1 addition & 1 deletion src/derl/model/stats.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
#


class Stats():
class Stats:
directories = 0
files = 0
lines = 0
Expand Down
2 changes: 1 addition & 1 deletion src/derl/processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@


def process_file(file: TextIO) -> list:
_logger.debug("Spliting current file %s into lines...", file.name)
_logger.debug("Splitting current file %s into lines...", file.name)
_tracker.stats.inc_files()

try:
Expand Down
54 changes: 30 additions & 24 deletions tests/test_dispatcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,10 @@
# Copyright 2020 Thomas Piekarski <[email protected]>
#

from unittest import TestCase

from unittest.mock import patch
from requests.exceptions import ConnectionError as RequestConnectionError, Timeout, TooManyRedirects
from aiounittest import AsyncTestCase

from derl.dispatcher import request
from derl.dispatcher import _request
from derl.model.file import File


Expand All @@ -20,33 +18,41 @@ def _build_test_files() -> list:
return [test_file]


class DispatcherTest(TestCase):
class DispatcherTest(AsyncTestCase):

def test_request(self: "DispatcherTest"):
files = request(_build_test_files())
# todo: Rewrite all remaining tests to use run_loop and not _request

async def test_request(self: "DispatcherTest"):
files = await _request(_build_test_files())

self.assertEqual(files[0].urls[0].status_code, 200)

def test_dispatcher_without_any_files(self: "DispatcherTest"):
self.assertEqual(request([]), [])
async def test_dispatcher_without_any_files(self: "DispatcherTest"):
files = await _request([])

self.assertEqual(files, [])

@patch("requests.Session.get")
def test_timeout(self: "DispatcherTest", mocked_get: "Mock"):
mocked_get.side_effect = Timeout
# Following tests "seem" to work, but they do not! Tests do not wait for coroutines,
# although AsyncTestCase is used and upper two tests are working. Tried solutions:
# - pytest-asyncio - It does not work inside classes at all -> Question for StackOverflow
# - IsolatedAsyncioTestCase - Class will be available with Python > 3.8.x
# - aiounittest - Seems to work, but only without Mocks
#
# -> How to use Mocks and return an Exception ith aiounittest? (Question for StackOverflow)
#

files = request(_build_test_files())
self.assertEqual(files[0].urls[0].status_code, 0)
# todo: Try to write two _working_ tests for too many redirects and connection errors

@patch("requests.Session.get")
def test_too_many_redirects(self: "DispatcherTest", mocked_get: "Mock"):
mocked_get.side_effect = TooManyRedirects
# @patch("aiohttp_retry.RetryClient.get")
# async def test_too_many_redirects(self: "DispatcherTest", mocked_get: "Mock"):
# mocked_get.side_effect = TooManyRedirects

files = request(_build_test_files())
self.assertEqual(files[0].urls[0].status_code, 0)
# files = await _request(_build_test_files())
# self.assertEqual(files[0].urls[0].status_code, 0)

@patch("requests.Session.get")
def test_connection_error(self: "DispatcherTest", mocked_get: "Mock"):
mocked_get.side_effect = RequestConnectionError
# @patch("aiohttp_retry.RetryClient.get")
# async def test_connection_error(self: "DispatcherTest", mocked_get: "Mock"):
# mocked_get.side_effect = ClientConnectionError

files = request(_build_test_files())
self.assertEqual(files[0].urls[0].status_code, 0)
# files = await _request(_build_test_files())
# self.assertEqual(files[0].urls[0].status_code, 0)

0 comments on commit 9b86d4b

Please sign in to comment.