Skip to content

Commit

Permalink
Merge pull request #13 from whomydee/enhancement/add-precommit-hook
Browse files Browse the repository at this point in the history
Enhancement/add precommit hook
  • Loading branch information
whomydee authored Apr 6, 2023
2 parents 56d3732 + 7b41735 commit 7e5918a
Show file tree
Hide file tree
Showing 10 changed files with 368 additions and 87 deletions.
27 changes: 27 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v4.4.0
hooks:
- id: check-yaml
- id: check-added-large-files
args: [--maxkb=64]
- id: check-ast
- id: check-case-conflict
- id: requirements-txt-fixer
- id: trailing-whitespace
args: [--markdown-linebreak-ext=md]

- repo: https://github.com/psf/black
rev: 23.3.0
hooks:
- id: black
language_version: python3.8
default_stages: [commit, push]

- repo: https://github.com/pycqa/flake8
rev: 6.0.0
hooks:
- id: flake8
args: ['--max-line-length=100', '--max-complexity=18']
exclude: tests
stages: [commit, push]
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ Please enter the location of the file:

### The Highest Hits for a Specific Time Interval

This fetches the maximum hit that came to your site within the specific time range. Here I used --time-interval 10; it
This fetches the maximum hit that came to your site within the specific time range. Here I used --time-interval 10; it
means I want to see all the hits that came within that timeframe by a 10 minutes interval. I also used --topk 3;
meaning it will bring me the latest top 3 results that matches that criteria.

Expand Down Expand Up @@ -99,5 +99,5 @@ Here are some demos of nginx-log-analyzer in action:
## 🚀 About Me
I'm **Shad** Humydee, a Software Engineer in AI/ML who happens to love the DevOps and Architecture side of the Applications.

I currently work in Infolytx (https://infolytx.ai) as a **Software Engineer II** in AI - ML - Architecture
I currently work in Infolytx (https://infolytx.ai) as a **Software Engineer II** in AI - ML - Architecture

39 changes: 26 additions & 13 deletions app/service/insight_provider_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,36 +9,49 @@


class InsightProviderService:
def get_timeframes_by_hit_count(self, logs: List[str], time_interval_in_minutes: int, top_k: int = 3) -> \
List[TimeFrameWiseHitDto]:

def get_timeframes_by_hit_count(
self, logs: List[str], time_interval_in_minutes: int, top_k: int = 3
) -> List[TimeFrameWiseHitDto]:
count_of_hits = 0
timeframe_wise_hit = []

for single_line_log in logs:

if not count_of_hits:
interval_start_time = LogFilterUtil.get_date_time_in_comparable_format(
InfoExtractionUtil.get_timestamp_from_single_line_text(single_line_log)[0]
InfoExtractionUtil.get_timestamp_from_single_line_text(
single_line_log
)[0]
)

interval_end_time = interval_start_time + datetime.timedelta(minutes=time_interval_in_minutes)
interval_end_time = interval_start_time + datetime.timedelta(
minutes=time_interval_in_minutes
)

event_timestamp = LogFilterUtil.get_date_time_in_comparable_format(
InfoExtractionUtil.get_timestamp_from_single_line_text(single_line_log)[0]
InfoExtractionUtil.get_timestamp_from_single_line_text(single_line_log)[
0
]
)
if interval_start_time <= event_timestamp <= interval_end_time:
count_of_hits += 1
else:
timeframe = TimeRangeDto(start_time=interval_start_time.strftime('%d/%b/%Y:%H:%M:%S'),
end_time=interval_end_time.strftime('%d/%b/%Y:%H:%M:%S'))
timeframe_wise_hit.append(TimeFrameWiseHitDto(timeframe=timeframe, hit_count=count_of_hits))
timeframe = TimeRangeDto(
start_time=interval_start_time.strftime("%d/%b/%Y:%H:%M:%S"),
end_time=interval_end_time.strftime("%d/%b/%Y:%H:%M:%S"),
)
timeframe_wise_hit.append(
TimeFrameWiseHitDto(timeframe=timeframe, hit_count=count_of_hits)
)

interval_start_time = interval_end_time
interval_end_time = interval_start_time + datetime.timedelta(minutes=time_interval_in_minutes)
interval_end_time = interval_start_time + datetime.timedelta(
minutes=time_interval_in_minutes
)

count_of_hits = 1

timeframe_wise_hit = sorted(timeframe_wise_hit, key= lambda x: x.hit_count, reverse=True)
timeframe_wise_hit = sorted(
timeframe_wise_hit, key=lambda x: x.hit_count, reverse=True
)

return timeframe_wise_hit[0: top_k]
return timeframe_wise_hit[0:top_k]
16 changes: 8 additions & 8 deletions app/util/info_extraction_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
class InfoExtractionUtil:
@staticmethod
def get_ip_from_single_line_text(single_line_text: str) -> List[str]:
regex_pattern_for_ip = re.compile(r'^\b(?:[0-9]{1,3}\.){3}[0-9]{1,3}\b')
regex_pattern_for_ip = re.compile(r"^\b(?:[0-9]{1,3}\.){3}[0-9]{1,3}\b")
ip_addresses = regex_pattern_for_ip.findall(single_line_text)

logger.debug(f"Found IP: {ip_addresses}")
Expand All @@ -16,24 +16,24 @@ def get_ip_from_single_line_text(single_line_text: str) -> List[str]:

@staticmethod
def get_timestamp_from_single_line_text(single_line_text: str) -> List[str]:
regex_pattern_for_timestamp = re.compile(r'\[(\d{2}\/\w{3}\/\d{4}:\d{2}:\d{2}:\d{2})')
regex_pattern_for_timestamp = re.compile(
r"\[(\d{2}\/\w{3}\/\d{4}:\d{2}:\d{2}:\d{2})"
)
timestamp = regex_pattern_for_timestamp.findall(single_line_text)

logger.debug(f"Timestamp (UTC): {timestamp}")

return timestamp

@staticmethod
def get_status_code_from_single_line_text(single_line_text: str) -> Union[str, None]:
regex_pattern_for_timestamp = re.compile(r'HTTP/1.1*. ([0-9][0-9][0-9])')
def get_status_code_from_single_line_text(
single_line_text: str,
) -> Union[str, None]:
regex_pattern_for_timestamp = re.compile(r"HTTP/1.1*. ([0-9][0-9][0-9])")
status_code = regex_pattern_for_timestamp.findall(single_line_text)

logger.debug(f"Status Code: {status_code}")

if status_code:
return status_code[0]
return None




1 change: 1 addition & 0 deletions app/util/log_file_handler_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ def get_new_file_location() -> str:

return new_file_location


def set_new_file_location(access_log_location: str) -> str:
new_file_location = access_log_location

Expand Down
22 changes: 17 additions & 5 deletions app/util/log_filter_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,21 +7,33 @@

class LogFilterUtil:
@staticmethod
def get_date_time_in_comparable_format(date_time_as_string: str, format: str = '%d/%b/%Y:%H:%M:%S'):
def get_date_time_in_comparable_format(
date_time_as_string: str, format: str = "%d/%b/%Y:%H:%M:%S"
):
return datetime.strptime(date_time_as_string, format)

@staticmethod
def get_logs_within_timeframe(logs_to_filter: List[str], time_range: TimeRangeDto):
logs_within_timeframe = []

start_time = LogFilterUtil.get_date_time_in_comparable_format(time_range.start_time)
start_time = LogFilterUtil.get_date_time_in_comparable_format(
time_range.start_time
)
end_time = LogFilterUtil.get_date_time_in_comparable_format(time_range.end_time)

for single_line_log in logs_to_filter:
time_stamp = InfoExtractionUtil.get_timestamp_from_single_line_text(single_line_log)
time_stamp = InfoExtractionUtil.get_timestamp_from_single_line_text(
single_line_log
)
if time_stamp:
time_stamp_as_string = InfoExtractionUtil.get_timestamp_from_single_line_text(single_line_log)[0]
time_stamp = LogFilterUtil.get_date_time_in_comparable_format(time_stamp_as_string)
time_stamp_as_string = (
InfoExtractionUtil.get_timestamp_from_single_line_text(
single_line_log
)[0]
)
time_stamp = LogFilterUtil.get_date_time_in_comparable_format(
time_stamp_as_string
)

if start_time <= time_stamp <= end_time:
logs_within_timeframe.append(single_line_log)
Expand Down
83 changes: 82 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,86 @@
aiohttp==3.8.4
aiosignal==1.3.1
astroid==2.15.1
async-generator==1.10
async-timeout==4.0.2
attrs==22.2.0
autopep8==2.0.2
bleach==6.0.0
certifi==2022.12.7
cfgv==3.3.1
chardet==5.1.0
charset-normalizer==3.1.0
click==8.1.3
cssselect==1.2.0
decorator==5.1.1
dicttoxml==1.7.16
dill==0.3.6
distlib==0.3.6
docutils==0.19
exceptiongroup==1.1.1
filelock==3.11.0
frozenlist==1.3.3
future==0.18.3
geocoder==1.38.1
geoip2==4.6.0
h11==0.14.0
identify==2.5.22
idna==3.4
importlib-metadata==6.1.0
importlib-resources==5.12.0
ip2geotools==0.1.6
IP2Location==8.9.0
isort==5.12.0
jaraco.classes==3.2.3
keyring==23.13.1
lazy-object-proxy==1.9.0
loguru==0.6.0
lxml==4.9.2
markdown-it-py==2.2.0
maxminddb==2.2.0
mccabe==0.7.0
mdurl==0.1.2
more-itertools==9.1.0
multidict==6.0.4
nodeenv==1.7.0
outcome==1.2.0
packaging==23.0
parameterized==0.9.0
pkg_resources==0.0.0
pip-review==1.3.0
pkginfo==1.9.6
platformdirs==3.2.0
pre-commit==3.2.2
pycodestyle==2.10.0
pydantic==1.10.7
Pygments==2.14.0
pylint==2.17.1
pyparsing==3.0.9
pyquery==2.0.0
PySocks==1.7.1
PyYAML==6.0
ratelim==0.1.6
readme-renderer==37.3
requests==2.28.2
requests-toolbelt==0.10.1
rfc3986==2.0.0
rich==13.3.3
selenium==4.8.3
six==1.16.0
sniffio==1.3.0
sortedcontainers==2.4.0
tomli==2.0.1
tomlkit==0.11.7
tqdm==4.65.0
trio==0.22.0
trio-websocket==0.10.2
twine==4.0.2
typed-ast==1.5.4
typing==3.7.4.3
typing_extensions==4.5.0
urllib3==1.26.15
virtualenv==20.21.0
webencodings==0.5.1
wrapt==1.15.0
wsproto==1.2.0
yarl==1.8.2
zipp==3.15.0
Loading

0 comments on commit 7e5918a

Please sign in to comment.