Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add a script to make review of pipeline logs easier #6

Open
wants to merge 8 commits into
base: master
Choose a base branch
from
Open
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
WIP: move LogViewer class into util module and add tests
mhidas committed Aug 1, 2018
commit 376946dc62ec0e00bbbf84b5bc3b1e4aa43747b6
90 changes: 1 addition & 89 deletions aodncore/bin/logview.py
Original file line number Diff line number Diff line change
@@ -5,99 +5,11 @@
"""

import argparse
from collections import OrderedDict
import os
import re
import sys


# location of logs
LOGDIR_BASE = '/sw/chef/src/tmp/p2_logs'
LOG_WATCH = LOGDIR_BASE + '/watchservice/pipeline_watchservice-stderr.log'
LOGDIR_CELERY = LOGDIR_BASE + '/celery'
LOGDIR_PROCESS = LOGDIR_BASE + '/process'

# regular expressions to match log format and define fields extracted from log
LOG_FIELDS = OrderedDict([
('time', r"(?P<time>\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2},\d{3})\s+"),
('level', r"(?P<level>[A-Z]+)\s+"),
('task_name', r"tasks.(?P<task_name>\w+)"),
('task_id', r"\[(?P<task_id>[0-9a-f-]+)\]\s+"),
('message', r"(?P<message>.*)")
])
INPUT_REGEX = re.compile(''.join(LOG_FIELDS.values()))
DEFAULT_FORMAT = '{time:20} {level:>9} {message}\n'


class LogViewer(object):
"""
Class to parse logs written by pipelines and output various filtered or summary views.
"""

def __init__(self, logfile):
if not os.path.isfile(logfile):
raise ValueError('{logfile}: no such file!'.format(logfile=logfile))
self.logfile = logfile

def log_entries(self):
"""Parse the log and return a tuple (raw, data) for one log entry at a time, where
raw is te full text from the log, and data is a dictionary of extracted fields as
per INPUT_REGEX.

"""
# TODO: option to read from stdin
with open(self.logfile) as log:
for line in log:
line = line.strip()
m = INPUT_REGEX.match(line)
if m is None:
# TODO: deal with unformatted lines
continue
data = m.groupdict()

yield line, data

def filtered_entries(self, task_id=None, levels=None, pattern=None):
"""
Filter the tuples returned by log_entries according to the filters specified.

:param str task_id: only include log for given task uuid
:param list levels: only include include messages with the given logging levels
:param str pattern: only include log messages matching pattern (regular expression)
:return: tuple (raw, data) as for log_entries

"""
if pattern:
pattern = re.compile(pattern)

for raw, data in self.log_entries():
if task_id and data['task_id'] != task_id:
continue
if levels and data['level'] not in levels:
continue
if pattern and not pattern.search(data['message']):
continue
# TODO: filter by handler step?
yield raw, data

def show(self, task_id=None, levels=None, pattern=None, fmt=DEFAULT_FORMAT):
"""
Print a filtered & re-formatted view of the log to stdout

:param str task_id: only include log for given task uuid
:param list levels: only include include messages with the given logging levels
:param str pattern: only include log messages matching pattern (regular expression)
:param str fmt: output format (fmt.format() applied to dict of LOG_FIELDS extracted from log)

"""
for raw, data in self.filtered_entries(task_id=task_id, levels=levels, pattern=pattern):
line_out = fmt.format(**data)
try:
sys.stdout.write(line_out)
sys.stdout.flush()
except IOError:
# this can happen if output is piped to `head` or `less`
pass
from aodncore.util.logviewer import LOG_WATCH, LOGDIR_PROCESS, LogViewer


def find_log(input_file):
91 changes: 91 additions & 0 deletions aodncore/util/logviewer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
import os
import re
import sys
from collections import OrderedDict

LOGDIR_BASE = '/sw/chef/src/tmp/p2_logs'
LOG_WATCH = LOGDIR_BASE + '/watchservice/pipeline_watchservice-stderr.log'
LOGDIR_CELERY = LOGDIR_BASE + '/celery'
LOGDIR_PROCESS = LOGDIR_BASE + '/process'

# regular expressions to match log format and define fields extracted from log
LOG_FIELDS = OrderedDict([
('time', r"(?P<time>\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2},\d{3})\s+"),
('level', r"(?P<level>[A-Z]+)\s+"),
('task_name', r"tasks.(?P<task_name>\w+)"),
('task_id', r"\[(?P<task_id>[0-9a-f-]+)\]\s+"),
('message', r"(?P<message>.*)")
])
INPUT_REGEX = re.compile(''.join(LOG_FIELDS.values()))
DEFAULT_FORMAT = '{time:20} {level:>9} {message}\n'


class LogViewer(object):
"""
Class to parse logs written by pipelines and output various filtered or summary views.
"""

def __init__(self, logfile):
if not os.path.isfile(logfile):
raise ValueError('{logfile}: no such file!'.format(logfile=logfile))
self.logfile = logfile

def log_entries(self):
"""Parse the log and return a tuple (raw, data) for one log entry at a time, where
raw is te full text from the log, and data is a dictionary of extracted fields as
per INPUT_REGEX.

"""
# TODO: option to read from stdin
with open(self.logfile) as log:
for line in log:
line = line.strip()
m = INPUT_REGEX.match(line)
if m is None:
# TODO: deal with unformatted lines
continue
data = m.groupdict()

yield line, data

def filtered_entries(self, task_id=None, levels=None, pattern=None):
"""
Filter the tuples returned by log_entries according to the filters specified.

:param str task_id: only include log for given task uuid
:param list levels: only include include messages with the given logging levels
:param str pattern: only include log messages matching pattern (regular expression)
:return: tuple (raw, data) as for log_entries

"""
if pattern:
pattern = re.compile(pattern)

for raw, data in self.log_entries():
if task_id and data['task_id'] != task_id:
continue
if levels and data['level'] not in levels:
continue
if pattern and not pattern.search(data['message']):
continue
# TODO: filter by handler step?
yield raw, data

def show(self, task_id=None, levels=None, pattern=None, fmt=DEFAULT_FORMAT):
"""
Print a filtered & re-formatted view of the log to stdout

:param str task_id: only include log for given task uuid
:param list levels: only include include messages with the given logging levels
:param str pattern: only include log messages matching pattern (regular expression)
:param str fmt: output format (fmt.format() applied to dict of LOG_FIELDS extracted from log)

"""
for raw, data in self.filtered_entries(task_id=task_id, levels=levels, pattern=pattern):
line_out = fmt.format(**data)
try:
sys.stdout.write(line_out)
sys.stdout.flush()
except IOError:
# this can happen if output is piped to `head` or `less`
pass
22 changes: 22 additions & 0 deletions test_aodncore/util/test_logviewer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
import os
import unittest

from aodncore.testlib import BaseTestCase
from aodncore.util.logviewer import LogViewer

from .test_misc import get_nonexistent_path


TEST_ROOT = os.path.join(os.path.dirname(__file__))
LOG_FILE = os.path.join(TEST_ROOT, 'tasks.ANMN_SA.log')


class TestLogViewer(BaseTestCase):
def test_init(self):
lv = LogViewer(LOG_FILE)
self.assertEqual(LOG_FILE, lv.logfile)
self.assertRaises(ValueError, LogViewer, get_nonexistent_path())


if __name__ == '__main__':
unittest.main()