Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merging feature/migration-to-aiohttp, resolves #28 (Work-in-progress), #29

Closed
wants to merge 6 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
82 changes: 2 additions & 80 deletions .pylintrc
Original file line number Diff line number Diff line change
Expand Up @@ -61,88 +61,10 @@ confidence=
# no Warning level messages displayed, use "--disable=all --enable=classes
# --disable=W".
disable=
#apply-builtin,
#backtick,
#bad-inline-option,
#bad-python3-import,
#basestring-builtin,
#buffer-builtin,
#cmp-builtin,
#cmp-method,
#coerce-builtin,
#coerce-method,
#comprehension-escape,
#delslice-method,
#deprecated-itertools-function,
#deprecated-operator-function,
#deprecated-pragma,
#deprecated-str-translate-call,
#deprecated-string-function,
#deprecated-sys-function,
#deprecated-types-field,
#deprecated-urllib-function,
#dict-items-not-iterating,
#dict-iter-method,
#dict-keys-not-iterating,
#dict-values-not-iterating,
#dict-view-method,
#div-method,
#eq-without-hash,
#exception-escape,
#exception-message-attribute,
#execfile-builtin,
#file-builtin,
#file-ignored,
#filter-builtin-not-iterating,
#getslice-method,
#hex-method,
#idiv-method,
#import-star-module-level,
#indexing-exception,
#input-builtin,
#intern-builtin,
#invalid-str-codec,
#locally-disabled,
#long-builtin,
#long-suffix,
#map-builtin-not-iterating,
#metaclass-assignment,
fixme,
missing-class-docstring,
missing-function-docstring,
missing-module-docstring,
#next-method-called,
#next-method-defined,
#no-absolute-import,
#non-ascii-bytes-literal,
#nonzero-method,
#oct-method,
#old-division,
#old-ne-operator,
#old-octal-literal,
#old-raise-syntax,
#parameter-unpacking,
#print-statement,
#raising-string,
#range-builtin-not-iterating,
#raw_input-builtin,
#raw-checker-failed,
#rdiv-method,
#reduce-builtin,
#reload-builtin,
#round-builtin,
#setslice-method,
#standarderror-builtin,
#suppressed-message,
#sys-max-int,
#unichr-builtin,
#unicode-builtin,
#unpacking-in-except,
#use-symbolic-message-instead,
#useless-suppression,
#using-cmp-argument,
#xrange-builtin,
#xreadlines-attribute,
#zip-builtin-not-iterating,
missing-module-docstring

# Enable the message, report, category or checker with the given id(s). You can
# either give multiple identifier separated by comma (,) or put this option
Expand Down
1 change: 1 addition & 0 deletions .vscode/settings.json
Original file line number Diff line number Diff line change
Expand Up @@ -63,4 +63,5 @@
"ultricies",
"vestibulum"
],
"python.pythonPath": ".venv/bin/python",
}
3 changes: 3 additions & 0 deletions derl.code-workspace
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,10 @@
"Regexes",
"Twilio",
"Youtube",
"aiohttp",
"aiounittest",
"argparse",
"asyncio",
"cheatsheet",
"conda",
"coveragerc",
Expand Down
4 changes: 4 additions & 0 deletions requirements-dev.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
# Requirements for developing
aiounittest==1.4.0
autopep8==1.5.3
isort==4.3.21
pylint==2.5.3
pytest-cov==2.10.0
pytest==5.4.3
python-coveralls==2.9.3
rope==0.17.0
25 changes: 24 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
@@ -1,10 +1,33 @@
# Requirements for running
aiohttp==3.6.2
aiohttp-retry==1.0
astroid==2.4.2
async-timeout==3.0.1
asyncio==3.4.3
attrs==19.3.0
certifi==2020.4.5.2
chardet==3.0.4
coverage==5.2
decorator==4.4.2
idna==2.9
importlib-metadata==1.7.0
lazy-object-proxy==1.4.3
mccabe==0.6.1
more-itertools==8.4.0
multidict==4.7.6
packaging==20.4
pluggy==0.13.1
py==1.9.0
pycodestyle==2.6.0
pyparsing==2.4.7
python-magic==0.4.18
requests==2.24.0
pyyaml==5.3.1
six==1.15.0
toml==0.10.1
typed-ast==1.4.1
urllib3==1.25.9
validators==0.15.0
wcwidth==0.2.5
wrapt==1.12.1
yarl==1.4.2
zipp==3.1.0
26 changes: 25 additions & 1 deletion setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -35,15 +35,38 @@ package_dir =
python_requires = >= 3.6
setup_requires = pyscaffold>=3.2a0,<3.3a0
install_requires =
aiohttp==3.6.2
aiohttp-retry==1.0
astroid==2.4.2
async-timeout==3.0.1
asyncio==3.4.3
attrs==19.3.0
certifi==2020.4.5.2
chardet==3.0.4
coverage==5.2
decorator==4.4.2
idna==2.9
importlib-metadata==1.7.0
lazy-object-proxy==1.4.3
mccabe==0.6.1
more-itertools==8.4.0
multidict==4.7.6
packaging==20.4
pluggy==0.13.1
py==1.9.0
pycodestyle==2.6.0
pyparsing==2.4.7
python-magic==0.4.18
requests==2.24.0
pyyaml==5.3.1
six==1.15.0
toml==0.10.1
typed-ast==1.4.1
urllib3==1.25.9
validators==0.15.0
wcwidth==0.2.5
wrapt==1.12.1
yarl==1.4.2
zipp==3.1.0

[options.packages.find]
where = src
Expand All @@ -52,6 +75,7 @@ exclude =

[options.extras_require]
testing =
aiounittest==1.4.0
pytest-cov==2.10.0
pytest==5.4.3
python-coveralls==2.9.3
Expand Down
44 changes: 28 additions & 16 deletions src/derl/dispatcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,14 @@
# Copyright 2020 Thomas Piekarski <[email protected]>
#


import asyncio
tpiekarski marked this conversation as resolved.
Show resolved Hide resolved
import logging
import sys

from aiohttp import ClientTimeout
from aiohttp.client_exceptions import ClientConnectionError, TooManyRedirects
from aiohttp_retry import RetryClient

from requests import ConnectionError as RequestConnectionError, Session, Timeout, TooManyRedirects
from requests.adapters import HTTPAdapter
from derl.tracker import get_tracker
from derl import __version__

Expand All @@ -22,39 +25,48 @@
_DEFAULT_ADDITIONAL_HEADER = {"user-agent": _DEFAULT_USER_AGENT}


def _get_status_code(location: str, session: Session, timeout: int) -> int:
async def _get_status_code(location: str, client: RetryClient, retry: int) -> int:
_tracker.stats.inc_requests()
status_code = 0

try:
_logger.debug("Requesting status code for %s", location)
status_code = session.get(location, timeout=timeout, headers=_DEFAULT_ADDITIONAL_HEADER).status_code
except Timeout:
_logger.debug("Waited for %i seconds, giving up getting %s", timeout, location)
async with client.get(location, retry_attempts=retry) as response:
status_code = response.status
except TooManyRedirects:
_logger.debug("Redirection Tango, danced enough with %s", location)
except RequestConnectionError:
except ClientConnectionError:
_logger.debug("Connection Error occurred while getting %s", location)

return status_code


def request(files: list, retry: int = _DEFAULT_RETRY, timeout: int = _DEFAULT_TIMEOUT) -> list:

async def _request(files: list, retry: int = _DEFAULT_RETRY, timeout: int = _DEFAULT_TIMEOUT) -> list:
if len(files) == 0:
_logger.debug("No matches for HTTP(S) requests")
return []

_logger.debug("Timeout for all HTTP(S) requests is %i seconds", timeout)

with Session() as session:
adaptor = HTTPAdapter(max_retries=retry)

session.mount("http://", adaptor)
session.mount("https://", adaptor)
client_timeout = ClientTimeout(total=timeout)

async with RetryClient(headers=_DEFAULT_ADDITIONAL_HEADER, timeout=client_timeout) as client:
for current_file in files:
for current_url in current_file.urls:
current_url.status_code = _get_status_code(current_url.location, session, timeout)
current_url.status_code = await _get_status_code(current_url.location, client, retry)

return files


def run_loop(files: list, retry: int, timeout: int) -> list:
_logger.info("Starting async dispatcher...")

if (sys.version_info.major == 3 and sys.version_info.minor >= 7):
# Running Event Loop, Python >= 3.7.x
files = asyncio.run(_request(files, retry, timeout))
tpiekarski marked this conversation as resolved.
Show resolved Hide resolved
else:
# Running Event Loop, Python < 3.7.x
event_loop = asyncio.get_event_loop()
files = event_loop.run_until_complete(_request(files, retry, timeout))

return files
5 changes: 2 additions & 3 deletions src/derl/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@

from derl.checker import check_arguments
from derl.collector import collect_context
from derl.dispatcher import request
from derl.dispatcher import run_loop
from derl.filterer import filter_not_matching
from derl.outputer import output
from derl.parser import parse_args
Expand Down Expand Up @@ -50,8 +50,7 @@ def main(args: list):
filtered_files = filter_not_matching(searched_files)

if args.dispatch:
filtered_files = request(filtered_files, args.retry, args.timeout)

filtered_files = run_loop(filtered_files, args.retry, args.timeout)
if args.context:
filtered_files = collect_context(filtered_files)

Expand Down
2 changes: 1 addition & 1 deletion src/derl/model/stats.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
#


class Stats():
class Stats:
directories = 0
files = 0
lines = 0
Expand Down
2 changes: 1 addition & 1 deletion src/derl/processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@


def process_file(file: TextIO) -> list:
_logger.debug("Spliting current file %s into lines...", file.name)
_logger.debug("Splitting current file %s into lines...", file.name)
_tracker.stats.inc_files()

try:
Expand Down
56 changes: 32 additions & 24 deletions tests/test_dispatcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,10 @@
# Copyright 2020 Thomas Piekarski <[email protected]>
#

from unittest import TestCase

from unittest.mock import patch
from requests.exceptions import ConnectionError as RequestConnectionError, Timeout, TooManyRedirects
from aiounittest import AsyncTestCase

from derl.dispatcher import request
from derl.dispatcher import _request
from derl.model.file import File


Expand All @@ -20,33 +18,43 @@ def _build_test_files() -> list:
return [test_file]


class DispatcherTest(TestCase):
class DispatcherTest(AsyncTestCase):

def test_request(self: "DispatcherTest"):
files = request(_build_test_files())
# todo: Rewrite all remaining tests to use run_loop and not _request

async def test_request(self: "DispatcherTest"):
files = await _request(_build_test_files())

self.assertEqual(files[0].urls[0].status_code, 200)

def test_dispatcher_without_any_files(self: "DispatcherTest"):
self.assertEqual(request([]), [])
async def test_dispatcher_without_any_files(self: "DispatcherTest"):
files = await _request([])

self.assertEqual(files, [])

@patch("requests.Session.get")
def test_timeout(self: "DispatcherTest", mocked_get: "Mock"):
mocked_get.side_effect = Timeout
# Following tests "seem" to work, but they do not! Tests do not wait for coroutines,
# although AsyncTestCase is used and upper two tests are working. Tried solutions:
# - pytest-asyncio - It does not work inside classes at all
# (See Issue at GitHub: https://github.com/pytest-dev/pytest-asyncio/issues/77)
#
# - IsolatedAsyncioTestCase - Class will be available with Python > 3.8.x
# - aiounittest - Seems to work, but only without Mocks
#
# -> How to use Mocks and return an Exception ith aiounittest? (Question for StackOverflow)
#

files = request(_build_test_files())
self.assertEqual(files[0].urls[0].status_code, 0)
# todo: Try to write two _working_ tests for too many redirects and connection errors

@patch("requests.Session.get")
def test_too_many_redirects(self: "DispatcherTest", mocked_get: "Mock"):
mocked_get.side_effect = TooManyRedirects
# @patch("aiohttp_retry.RetryClient.get")
# async def test_too_many_redirects(self: "DispatcherTest", mocked_get: "Mock"):
# mocked_get.side_effect = TooManyRedirects

files = request(_build_test_files())
self.assertEqual(files[0].urls[0].status_code, 0)
# files = await _request(_build_test_files())
# self.assertEqual(files[0].urls[0].status_code, 0)

@patch("requests.Session.get")
def test_connection_error(self: "DispatcherTest", mocked_get: "Mock"):
mocked_get.side_effect = RequestConnectionError
# @patch("aiohttp_retry.RetryClient.get")
# async def test_connection_error(self: "DispatcherTest", mocked_get: "Mock"):
# mocked_get.side_effect = ClientConnectionError

files = request(_build_test_files())
self.assertEqual(files[0].urls[0].status_code, 0)
# files = await _request(_build_test_files())
# self.assertEqual(files[0].urls[0].status_code, 0)