-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Thane Thomson
committed
May 16, 2011
0 parents
commit b6eb6cc
Showing
11 changed files
with
581 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
*.pyc | ||
*.swp | ||
*.egg-info/ | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
Copyright 2011 Thane Thomson | ||
|
||
Licensed under the Apache License, Version 2.0 (the "License"); | ||
you may not use this file except in compliance with the License. | ||
You may obtain a copy of the License at | ||
|
||
http://www.apache.org/licenses/LICENSE-2.0 | ||
|
||
Unless required by applicable law or agreed to in writing, software | ||
distributed under the License is distributed on an "AS IS" BASIS, | ||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
See the License for the specific language governing permissions and | ||
limitations under the License. | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
Django Web Log Parser | ||
===================== | ||
|
||
Copyright (c) 2011, Thane Thomson. See the LICENSE file for licensing information. | ||
|
||
Documentation coming soon! | ||
|
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
# | ||
# django-weblog-parser v0.1 | ||
# | ||
# Admin | ||
# | ||
|
||
from django.contrib import admin | ||
from weblogparser.models import LogFilePath, LogFile, LogEntry | ||
|
||
|
||
class LogFilePathAdmin(admin.ModelAdmin): | ||
list_display = ['path'] | ||
admin.site.register(LogFilePath, LogFilePathAdmin) | ||
|
||
|
||
class LogFileAdmin(admin.ModelAdmin): | ||
list_display = ['path', 'filename', 'created', 'modified', 'errors'] | ||
admin.site.register(LogFile, LogFileAdmin) | ||
|
||
|
||
class LogEntryAdmin(admin.ModelAdmin): | ||
list_display = ['timestamp', 'log_file', 'status', 'bytes_returned'] | ||
list_filter = ['status'] | ||
admin.site.register(LogEntry, LogEntryAdmin) | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,60 @@ | ||
# | ||
# Analytics for django-weblog-parser | ||
# | ||
# Requires panya-analytics | ||
# | ||
|
||
from datetime import timedelta | ||
from django.db.models import Count | ||
from analytics.basemetric import BaseMetric | ||
from weblogparser.models import LogEntry | ||
|
||
|
||
class PageImpressions(BaseMetric): | ||
""" | ||
A simple page impressions metric, counting the number | ||
of HTTP status 200 responses from the web server. | ||
NOTE: This assumes that all the entries in the LogEntry | ||
table are relevant to this instance. | ||
""" | ||
|
||
uid = 'pageimpressions' | ||
title = 'Page Impressions' | ||
|
||
|
||
def calculate(self, start_dt, end_dt): | ||
return LogEntry.objects.filter(status=200, timestamp__gte=start_dt, | ||
timestamp__lt=end_dt, is_robot=False, is_page=True).count() | ||
|
||
|
||
def get_earliest_data_datetime(self): | ||
try: | ||
return LogEntry.objects.filter(status=200, is_robot=False, is_page=True).order_by('timestamp')[0].timestamp | ||
except IndexError: | ||
return None | ||
|
||
|
||
|
||
|
||
class Sessions(BaseMetric): | ||
""" | ||
Calculates the number of sessions. | ||
""" | ||
|
||
uid = 'sessions' | ||
title = 'Sessions' | ||
|
||
|
||
def calculate(self, start_dt, end_dt): | ||
return LogEntry.objects.filter(status=200, is_robot=False, is_page=True, | ||
timestamp__gte=start_dt, timestamp__lt=end_dt).aggregate(Count('session_id', distinct=True))['session_id__count'] | ||
|
||
|
||
def get_earliest_data_datetime(self): | ||
try: | ||
return LogEntry.objects.filter(status=200, is_robot=False, is_page=True).order_by('timestamp')[0].timestamp | ||
except IndexError: | ||
return None | ||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,160 @@ | ||
# | ||
# django-weblog-parser v0.1 | ||
# | ||
# Models | ||
# | ||
|
||
import os | ||
from django.db import models | ||
from django.utils.translation import ugettext as _ | ||
from weblogparser import settings | ||
|
||
|
||
|
||
class LogFilePath(models.Model): | ||
""" | ||
Allows for multiple logs to be stored in the same | ||
path, saving on database usage and allowing for easy | ||
migration of the database from one server to another. | ||
""" | ||
|
||
path = models.TextField( | ||
unique=True, | ||
help_text=_("The base path for one or more log files."), | ||
) | ||
|
||
def __unicode__(self): | ||
return self.path | ||
|
||
|
||
|
||
|
||
class LogFile(models.Model): | ||
""" | ||
Representation of a log file. | ||
""" | ||
|
||
path = models.ForeignKey( | ||
LogFilePath, | ||
help_text=_("The base path for this log file."), | ||
) | ||
filename = models.CharField( | ||
max_length=200, | ||
help_text=_("The base file name of the log file within the base path."), | ||
) | ||
fmt = models.IntegerField( | ||
default=settings.LOGFILE_FORMAT_COMMON, | ||
choices=settings.LOGFILE_FORMAT_CHOICES, | ||
verbose_name=_('format'), | ||
help_text=_("The format to which this log file conforms."), | ||
) | ||
|
||
created = models.DateTimeField( | ||
auto_now_add=True, | ||
help_text=_("When this log file was created in the database."), | ||
) | ||
modified = models.DateTimeField( | ||
auto_now=True, | ||
help_text=_("The last date/time that this log file was modified."), | ||
) | ||
|
||
parsed = models.DateTimeField( | ||
blank=True, | ||
null=True, | ||
help_text=_("The date/time at which this log file was completely parsed."), | ||
) | ||
errors = models.IntegerField( | ||
default=0, | ||
help_text=_("The number of errors found while parsing this log file."), | ||
) | ||
|
||
|
||
def __unicode__(self): | ||
return os.path.join(u'%s' % self.path, u'%s' % self.filename) | ||
|
||
|
||
|
||
|
||
class LogEntry(models.Model): | ||
""" | ||
A single web log entry from a specific log file. | ||
""" | ||
|
||
log_file = models.ForeignKey( | ||
LogFile, | ||
help_text=_("The log file in which this entry was found."), | ||
db_index=True, | ||
related_name='entries', | ||
) | ||
remote_host = models.CharField( | ||
max_length=100, | ||
help_text=_("The remote host which made the request (%%h)."), | ||
) | ||
client_id = models.CharField( | ||
max_length=100, | ||
help_text=_("RFC 1413 identity of the client (%%l)."), | ||
) | ||
user_id = models.CharField( | ||
max_length=100, | ||
help_text=_("The user ID of the user requesting the document (%%u)."), | ||
) | ||
timestamp = models.DateTimeField( | ||
help_text=_("The date/time that the request was received (%%t)."), | ||
db_index=True, | ||
) | ||
request = models.TextField( | ||
help_text=_("The request line from the client (%%r)."), | ||
) | ||
status = models.IntegerField( | ||
help_text=_("The status code that the server sent back to the client (%%>s)."), | ||
db_index=True, | ||
) | ||
bytes_returned = models.IntegerField( | ||
help_text=_("The size of the object returned to the client (%%b)."), | ||
) | ||
referer = models.TextField( | ||
blank=True, | ||
null=True, | ||
help_text=_("The \"Referer\" HTTP request header (\\\"%%{Referer}i\\\")."), | ||
) | ||
user_agent = models.TextField( | ||
blank=True, | ||
null=True, | ||
help_text=_("The \"User-Agent\" HTTP request header (\\\"%%{User-Agent}i\\\")."), | ||
db_index=True, | ||
) | ||
session_id = models.CharField( | ||
max_length=100, | ||
blank=True, | ||
null=True, | ||
help_text=_("The session ID, if any, depending on whether this is configured."), | ||
db_index=True, | ||
) | ||
|
||
# metadata parameters built up from entries | ||
path = models.TextField( | ||
blank=True, | ||
null=True, | ||
help_text=_("The path requested from the web server."), | ||
) | ||
is_robot = models.BooleanField( | ||
default=False, | ||
help_text=_("Was this request generated by some sort of robot?"), | ||
) | ||
is_page = models.BooleanField( | ||
default=True, | ||
help_text=_("Was the response most likely an HTML document?"), | ||
) | ||
|
||
|
||
class Meta: | ||
verbose_name = _('log entry') | ||
verbose_name_plural = _('log entries') | ||
|
||
|
||
def __unicode__(self): | ||
return _("Request at %(timestamp)s from %(remote)s") % {'timestamp': self.timestamp.strftime("%Y-%m-%d %H:%M:%S %z"), | ||
'remote': self.remote_host} | ||
|
||
|
||
|
Oops, something went wrong.