Skip to content

Commit

Permalink
first commit
Browse files Browse the repository at this point in the history
  • Loading branch information
Thane Thomson committed May 16, 2011
0 parents commit b6eb6cc
Show file tree
Hide file tree
Showing 11 changed files with 581 additions and 0 deletions.
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
*.pyc
*.swp
*.egg-info/

14 changes: 14 additions & 0 deletions LICENSE
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
Copyright 2011 Thane Thomson

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.

7 changes: 7 additions & 0 deletions README.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
Django Web Log Parser
=====================

Copyright (c) 2011, Thane Thomson. See the LICENSE file for licensing information.

Documentation coming soon!

Empty file added weblogparser/__init__.py
Empty file.
25 changes: 25 additions & 0 deletions weblogparser/admin.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
#
# django-weblog-parser v0.1
#
# Admin
#

from django.contrib import admin
from weblogparser.models import LogFilePath, LogFile, LogEntry


class LogFilePathAdmin(admin.ModelAdmin):
list_display = ['path']
admin.site.register(LogFilePath, LogFilePathAdmin)


class LogFileAdmin(admin.ModelAdmin):
list_display = ['path', 'filename', 'created', 'modified', 'errors']
admin.site.register(LogFile, LogFileAdmin)


class LogEntryAdmin(admin.ModelAdmin):
list_display = ['timestamp', 'log_file', 'status', 'bytes_returned']
list_filter = ['status']
admin.site.register(LogEntry, LogEntryAdmin)

60 changes: 60 additions & 0 deletions weblogparser/mod_analytics.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
#
# Analytics for django-weblog-parser
#
# Requires panya-analytics
#

from datetime import timedelta
from django.db.models import Count
from analytics.basemetric import BaseMetric
from weblogparser.models import LogEntry


class PageImpressions(BaseMetric):
"""
A simple page impressions metric, counting the number
of HTTP status 200 responses from the web server.
NOTE: This assumes that all the entries in the LogEntry
table are relevant to this instance.
"""

uid = 'pageimpressions'
title = 'Page Impressions'


def calculate(self, start_dt, end_dt):
return LogEntry.objects.filter(status=200, timestamp__gte=start_dt,
timestamp__lt=end_dt, is_robot=False, is_page=True).count()


def get_earliest_data_datetime(self):
try:
return LogEntry.objects.filter(status=200, is_robot=False, is_page=True).order_by('timestamp')[0].timestamp
except IndexError:
return None




class Sessions(BaseMetric):
"""
Calculates the number of sessions.
"""

uid = 'sessions'
title = 'Sessions'


def calculate(self, start_dt, end_dt):
return LogEntry.objects.filter(status=200, is_robot=False, is_page=True,
timestamp__gte=start_dt, timestamp__lt=end_dt).aggregate(Count('session_id', distinct=True))['session_id__count']


def get_earliest_data_datetime(self):
try:
return LogEntry.objects.filter(status=200, is_robot=False, is_page=True).order_by('timestamp')[0].timestamp
except IndexError:
return None


160 changes: 160 additions & 0 deletions weblogparser/models.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,160 @@
#
# django-weblog-parser v0.1
#
# Models
#

import os
from django.db import models
from django.utils.translation import ugettext as _
from weblogparser import settings



class LogFilePath(models.Model):
"""
Allows for multiple logs to be stored in the same
path, saving on database usage and allowing for easy
migration of the database from one server to another.
"""

path = models.TextField(
unique=True,
help_text=_("The base path for one or more log files."),
)

def __unicode__(self):
return self.path




class LogFile(models.Model):
"""
Representation of a log file.
"""

path = models.ForeignKey(
LogFilePath,
help_text=_("The base path for this log file."),
)
filename = models.CharField(
max_length=200,
help_text=_("The base file name of the log file within the base path."),
)
fmt = models.IntegerField(
default=settings.LOGFILE_FORMAT_COMMON,
choices=settings.LOGFILE_FORMAT_CHOICES,
verbose_name=_('format'),
help_text=_("The format to which this log file conforms."),
)

created = models.DateTimeField(
auto_now_add=True,
help_text=_("When this log file was created in the database."),
)
modified = models.DateTimeField(
auto_now=True,
help_text=_("The last date/time that this log file was modified."),
)

parsed = models.DateTimeField(
blank=True,
null=True,
help_text=_("The date/time at which this log file was completely parsed."),
)
errors = models.IntegerField(
default=0,
help_text=_("The number of errors found while parsing this log file."),
)


def __unicode__(self):
return os.path.join(u'%s' % self.path, u'%s' % self.filename)




class LogEntry(models.Model):
"""
A single web log entry from a specific log file.
"""

log_file = models.ForeignKey(
LogFile,
help_text=_("The log file in which this entry was found."),
db_index=True,
related_name='entries',
)
remote_host = models.CharField(
max_length=100,
help_text=_("The remote host which made the request (%%h)."),
)
client_id = models.CharField(
max_length=100,
help_text=_("RFC 1413 identity of the client (%%l)."),
)
user_id = models.CharField(
max_length=100,
help_text=_("The user ID of the user requesting the document (%%u)."),
)
timestamp = models.DateTimeField(
help_text=_("The date/time that the request was received (%%t)."),
db_index=True,
)
request = models.TextField(
help_text=_("The request line from the client (%%r)."),
)
status = models.IntegerField(
help_text=_("The status code that the server sent back to the client (%%>s)."),
db_index=True,
)
bytes_returned = models.IntegerField(
help_text=_("The size of the object returned to the client (%%b)."),
)
referer = models.TextField(
blank=True,
null=True,
help_text=_("The \"Referer\" HTTP request header (\\\"%%{Referer}i\\\")."),
)
user_agent = models.TextField(
blank=True,
null=True,
help_text=_("The \"User-Agent\" HTTP request header (\\\"%%{User-Agent}i\\\")."),
db_index=True,
)
session_id = models.CharField(
max_length=100,
blank=True,
null=True,
help_text=_("The session ID, if any, depending on whether this is configured."),
db_index=True,
)

# metadata parameters built up from entries
path = models.TextField(
blank=True,
null=True,
help_text=_("The path requested from the web server."),
)
is_robot = models.BooleanField(
default=False,
help_text=_("Was this request generated by some sort of robot?"),
)
is_page = models.BooleanField(
default=True,
help_text=_("Was the response most likely an HTML document?"),
)


class Meta:
verbose_name = _('log entry')
verbose_name_plural = _('log entries')


def __unicode__(self):
return _("Request at %(timestamp)s from %(remote)s") % {'timestamp': self.timestamp.strftime("%Y-%m-%d %H:%M:%S %z"),
'remote': self.remote_host}



Loading

0 comments on commit b6eb6cc

Please sign in to comment.