Skip to content

Commit

Permalink
Merge pull request #36 from tpiekarski/feature/streamlining-logging-o…
Browse files Browse the repository at this point in the history
…utput

Streamlining logging output at level info and debug, resolves #12
  • Loading branch information
tpiekarski authored Jul 20, 2020
2 parents 956a4de + 62a2e7c commit 80d2530
Show file tree
Hide file tree
Showing 8 changed files with 31 additions and 19 deletions.
25 changes: 14 additions & 11 deletions src/derl/checker.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,40 +19,43 @@
_logger = getLogger(__name__)


def is_directory(value: str) -> bool:
def _is_directory(value: str) -> bool:
_logger.debug("Checking provided directory %s", value)

return path.isdir(value)


def is_retry_value(value: int) -> bool:
def _is_retry_value(value: int) -> bool:
_logger.debug("Checking provided retry value %i", value)
return 0 < value <= 10


def _is_timeout_value(value: int) -> bool:
_logger.debug("Checking provided timeout value %i", value)
return value > 0


def is_text_file(file: str) -> bool:
_logger.debug("Checking file %s", file)
_logger.debug("Checking provided file %s", file)
mimetype = from_file(str(file), mime=True)

return file.is_file() and mimetype[:4] == "text"


def is_timeout_value(value: int) -> bool:
return value > 0


def is_url(value: str) -> bool:
def is_valid_url(value: str) -> bool:
_logger.debug("Checking provided URL %s", value)
return url(value)


def check_arguments(args: Namespace):
if not is_timeout_value(args.timeout):
if not _is_timeout_value(args.timeout):
_logger.error("Invalid timeout, timeout must be greater than 0")
sys.exit(_INVALID_TIMEOUT)

if not is_retry_value(args.retry):
if not _is_retry_value(args.retry):
_logger.error("Invalid retry, retry must be greater than 0 and less or equal than 10")
sys.exit(_INVALID_RETRY)

if not is_directory(args.directory):
if not _is_directory(args.directory):
_logger.error("Cannot access '%s': No such directory", args.directory)
sys.exit(_INVALID_DIRECTORY)
3 changes: 3 additions & 0 deletions src/derl/collector.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@


def _extract_context(url: URL, lines: list) -> list:
_logger.debug("Extracting context for the URL %s at line number %i", url.location, url.line_number)
context = []
line_index = url.line_number - 1

Expand All @@ -29,7 +30,9 @@ def _extract_context(url: URL, lines: list) -> list:


def collect_context(files: list) -> list:
_logger.debug("Collecting context")
for current_file in files:
_logger.debug("Collecting context for file %s", current_file.filename)
for current_url in current_file.urls:
with open(current_file.filename, "r", encoding=_DEFAULT_ENCODING) as open_file:
lines = open_file.readlines()
Expand Down
2 changes: 1 addition & 1 deletion src/derl/dispatcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ async def _request(files: list, retry: int = _DEFAULT_RETRY, timeout: int = _DEF
_logger.debug("No matches for HTTP(S) requests")
return []

_logger.debug("Timeout for all HTTP(S) requests is %i seconds", timeout)
_logger.debug("Timeout is %i seconds and retry is %i for all HTTP(S) requests", timeout, retry)

client_timeout = ClientTimeout(total=timeout)

Expand Down
1 change: 1 addition & 0 deletions src/derl/filterer.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ def filter_not_matching(files: list) -> list:

for current_file in files:
if current_file.contains_urls():
_logger.debug("%s contains URLs", current_file.filename)
filtered_files.append(current_file)

del files
Expand Down
6 changes: 5 additions & 1 deletion src/derl/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,17 @@
#

from argparse import ArgumentParser, HelpFormatter, Namespace
from logging import DEBUG, INFO
from logging import DEBUG, INFO, getLogger

from derl import __version__
from derl.dispatcher import _DEFAULT_RETRY, _DEFAULT_TIMEOUT

_logger = getLogger(__name__)


def parse_args(args: list) -> Namespace:
_logger.debug("Parsing command line arguments")

parser = ArgumentParser(
prog="derl",
formatter_class=lambda prog: HelpFormatter(prog, max_help_position=35, width=90),
Expand Down
5 changes: 3 additions & 2 deletions src/derl/processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from re import compile as rcompile
from typing import TextIO

from derl.checker import is_text_file, is_url
from derl.checker import is_text_file, is_valid_url
from derl.model.file import File
from derl.model.url import URL
from derl.tracker import get_tracker
Expand Down Expand Up @@ -69,12 +69,13 @@ def process_line(file: TextIO, line: tuple, urls: list) -> list:


def process_token(file: TextIO, token: str, line_number: int) -> URL:
_logger.debug("Processing token %s...", token)
_tracker.stats.inc_tokens()

match = _pattern.match(token)
url = None

if match and is_url(match.string):
if match and is_valid_url(match.string):
_logger.info("Found a match (%s) in file '%s'", match.string, file.name)
_tracker.stats.inc_urls()
url = URL(match.string, line_number)
Expand Down
2 changes: 1 addition & 1 deletion src/derl/searcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,6 @@ def search_urls(files: list) -> list:
with open(current_entry.filename, "r", encoding=_DEFAULT_ENCODING) as current_file:
current_entry.urls = process_file(current_file)

_logger.debug("Searched %i files", len(files))
_logger.info("Finished searching %i files", len(files))

return files
6 changes: 3 additions & 3 deletions tests/test_checker.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,14 +7,14 @@
from pathlib import Path
from unittest import TestCase

from derl.checker import is_directory, is_text_file
from derl.checker import _is_directory, is_text_file


class CheckerTest(TestCase):

def test_is_directory(self: "CheckerTest"):
self.assertTrue(is_directory("tests/test-directory"))
self.assertFalse(is_directory("tests/not-existent-directory"))
self.assertTrue(_is_directory("tests/test-directory"))
self.assertFalse(_is_directory("tests/not-existent-directory"))

def test_is_text_file(self: "CheckerTest"):
self.assertTrue(is_text_file(Path("tests/test-files/plain-text")))
Expand Down

0 comments on commit 80d2530

Please sign in to comment.