Skip to content

Commit

Permalink
Basic API listener works. Example works and runs on enwiki. 70% unit …
Browse files Browse the repository at this point in the history
…tests. Version 0.0.1. Can be registered with PyPI.
  • Loading branch information
halfak committed Aug 3, 2014
1 parent 15eae41 commit aafca6b
Show file tree
Hide file tree
Showing 44 changed files with 2,178 additions and 334 deletions.
1 change: 1 addition & 0 deletions VERSION
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
0.0.1
39 changes: 39 additions & 0 deletions examples/listen.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
"""
Listens to a wiki's recentchanges feed.
Usage:
listen <api_url>
Options:
<api_url> The url for the MediaWiki API to connect to.
"""
import pprint
import sys

from docopt import docopt

try:
sys.path.insert(0, ".")
from mwevents.sources import API
except:
raise

def main():
args = docopt(__doc__)

run(args['<api_url>'])

def run(api_url):

api_source = API.from_api_url(api_url)

try:
for event, state in api_source.listen():

pprint.pprint(event.to_json())

except KeyboardInterrupt:
print("Keyboard interrupt received. Shutting down.")


if __name__ == "__main__": main()
14 changes: 13 additions & 1 deletion mwevents/configuration.py
Original file line number Diff line number Diff line change
@@ -1 +1,13 @@
DEFAULTS = {}
import copy


def load(self, doc):
config = copy.deepcopy(DEFAULT)
config.update(doc)

return config

DEFAULT = {
'expiration_format': "expires %H:%M, %d %B %Y (UTC)",
'indefinite': "indefinite"
}
5 changes: 0 additions & 5 deletions mwevents/defaults.py
Original file line number Diff line number Diff line change
@@ -1,5 +0,0 @@



PARAMS_TIME_FORMAT = "expires %H:%M, %d %B %Y (UTC)"
PARAMS_INDEFINITE = "indefinite"
13 changes: 0 additions & 13 deletions mwevents/functions.py

This file was deleted.

1 change: 1 addition & 0 deletions mwevents/sources/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
from .api import API
49 changes: 49 additions & 0 deletions mwevents/sources/api.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
import time

from mw import api

from ..types import Event


class API:
"""
Example:
.. code-block::python
"""
RC_PROPS = {'user', 'userid', 'comment', 'timestamp', 'title', 'ids',
'sizes', 'loginfo', 'sha1'}

def __init__(self, session):
self.session = session

def listen(self, *args, min_wait=5, rcs_per_request=50,
stop=lambda: False,
direction="newer",
properties=RC_PROPS, types=None, **kwargs):

kwargs['limit'] = rcs_per_request
kwargs['properties'] = properties
kwargs['direction'] = direction

while not stop():
start = time.time()

rc_docs, kwargs['rccontinue'] = \
self.session.recent_changes._query(*args, **kwargs)

for rc_doc in rc_docs:
print(rc_doc)
state = rc_doc['timestamp'] + "|" + str(rc_doc['rcid'])
for event in Event.from_rc_doc(rc_doc):
if types is None or type(event) in types:
yield event, state


if len(rc_docs) < rcs_per_request:
time.sleep(min_wait - (time.time() - start))


@classmethod
def from_api_url(cls, url):
return cls(api.Session(url))
5 changes: 5 additions & 0 deletions mwevents/types/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,9 @@
from .revision import Revision
from .user import User
from .block import Block
from .protection import Protection
from .timestamp import Timestamp
from .unavailable import Unavailable, UnavailableType
from .user import User
from .page import Page
from .events import Event
12 changes: 12 additions & 0 deletions mwevents/types/block.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
from jsonable import JSONable

from .timestamp import Timestamp
from .unavailable import Unavailable


class Block(JSONable):
__slots__ = ('flags', 'duration', 'expiration')
def initialize(self, flags, duration, expiration):
self.flags = [str(flag) for flag in flags]
self.duration = str(duration)
self.expiration = Unavailable.otherwise(expiration, Timestamp)
14 changes: 14 additions & 0 deletions mwevents/types/events/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
from .event import Event
from .page_created import PageCreated
from .page_deleted import PageDeleted
from .page_moved import PageMoved
from .page_protection_modified import PageProtectionModified
from .page_restored import PageRestored
from .revisions_deleted import RevisionsDeleted
#from .revision_restored import RevisionRestored
from .revision_saved import RevisionSaved
from .user_blocked import UserBlocked
from .user_groups_modified import UserGroupsModified
from .user_registered import UserRegistered
from .user_renamed import UserRenamed
from .user_unblocked import UserUnblocked
59 changes: 48 additions & 11 deletions mwevents/types/events/event.py
Original file line number Diff line number Diff line change
@@ -1,43 +1,80 @@
import copy
from collections import defaultdict

from mw import Timestamp

from jsonable import JSONable

from ..types import User
from .. import User
from ... import configuration
from .match import Match


class Event(JSONable):
__slots__ = ('timestamp', 'user', 'comment')
MATCHES = NotImplemented
EVENTS = defaultdict(lambda: [])
EVENTS = {}
MATCH_GROUPS = defaultdict(lambda: [])
PRIORITY = 99

def initialize(self, timestamp, user, comment):
self.timestamp = Timestamp(timestamp)
self.user = User(user)
self.comment = str(comment)

def to_json(self):
doc = super().to_json()
doc['event'] = self.__class__.__name__
return doc


@classmethod
def from_json(cls, doc):
if 'event' in doc:
EventClass = cls.EVENTS.get(doc['event'], cls)
new_doc = copy.copy(doc)
del new_doc['event']
return EventClass.from_json(new_doc)
else:
return cls._from_json(doc)

@classmethod
def register(cls, EventClass):
for match in EventClass.MATCHES:
cls.EVENTS[match].append(EventClass)
cls.EVENTS[match].sort(key=lambda e:e.PRIORITY)
cls.MATCH_GROUPS[match].append(EventClass)
cls.MATCH_GROUPS[match].sort(key=lambda e:e.PRIORITY)

cls.EVENTS[EventClass.__name__] = EventClass

@classmethod
def matches(cls, match):
return cls.EVENTS[match]
return cls.MATCH_GROUPS[match]

@classmethod
def from_rc_doc(cls, rc_doc, config=configuration.DEFAULT):
match = Match.from_rc_doc(rc_doc)

for EventClass in cls.matches(match):
yield EventClass.from_rc_doc(rc_doc, config)

@classmethod
def from_rev_doc(cls, rev_doc, config=configuration.DEFAULT):
match = Match.from_rev_doc(rev_doc)

for EventClass in cls.matches(match):
yield EventClass.from_rev_doc(rev_doc, config)


@classmethod
def from_api_doc(cls, api_doc):
match = Match.from_api_doc(api_doc)
def from_log_row(cls, log_row, config=configuration.DEFAULT):
match = Match.from_log_row(log_row)

for EventClass in cls.matches(match):
yield EventClass.from_api_doc(api_doc)
yield EventClass.from_log_row(log_row, config)

@classmethod
def from_db_row(cls, db_row):
match = Match.from_db_row(db_row)
def from_rc_row(cls, rc_row, config=configuration.DEFAULT):
match = Match.from_rc_row(rc_row)

for EventClass in cls.matches(match):
yield EventClass.from_db_row(api_doc)
yield EventClass.from_rc_row(rc_doc, config)
79 changes: 79 additions & 0 deletions mwevents/types/events/match.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
from jsonable import instance


class Match:

RC_TYPES = {
0: "edit",
1: "new",
2: "move",
3: "log",
4: "move_over_redirect",
5: "external"
}

def __init__(self, type, action, has_rev_id, rc_type):
self.type = str(type)
self.action = str(action)
self.has_rev_id = bool(has_rev_id)
self.rc_type = str(rc_type)

def __eq__(self, other):
try:
return (
self.type == other.type and
self.action == other.action and
self.has_rev_id == other.has_rev_id and
self.rc_type == other.rc_type
)
except AttributeError:
return False

def __hash__(self):
return hash((self.type, self.action, self.has_rev_id, self.rc_type))

def __repr__(self):
return instance.simple_repr(self.__class__.__name__,
self.type,
self.action,
self.has_rev_id,
self.rc_type)

@classmethod
def from_rc_doc(cls, rc_doc):

return cls(
rc_doc.get('logtype'),
rc_doc.get('logaction'),
rc_doc.get('revid', 0) > 0,
rc_doc['type']
)

@classmethod
def from_rev_doc(cls, rev_doc):

return cls(
None,
None,
rev_doc.get('revid', 0) > 0,
"edit" if rev_doc['parentid'] > 0 else "new"
)


@classmethod
def from_log_row(cls, log_row):
return cls(
log_row.get('log_type'),
log_row.get('log_action'),
False,
"log"
)

@classmethod
def from_rc_row(cls, rc_row):
return cls(
db_row.get('rc_log_type'),
db_row.get('rc_log_action'),
rc_doc.get('rc_this_oldid', 0) > 0,
cls.RC_TYPES[db_row['rc_type']]
)
35 changes: 19 additions & 16 deletions mwevents/types/events/page_created.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,20 @@
from .event import Event, Match
from .. import Page, Timestamp, User
from ... import configuration
from ...util import split_page_name
from .event import Event
from .match import Match


class PageCreated(Event):
MATCHES = [Match(None, None, True, "new", priority=25)]
MATCHES = [Match(None, None, True, "new")]
PRIORITY = 50 # Must happen before RevisionSaved
__slots__ = ('page',)
def __init__(self, timestamp, user, comment, page):
super().__init__(timestamp, user, comment)
def initialize(self, timestamp, user, comment, page):
super().initialize(timestamp, user, comment)
self.page = Page(page)

@classmethod
def from_api_doc(cls, api_doc, config=configuration.DEFAULTS):
def from_rc_doc(cls, rc_doc, config=configuration.DEFAULT):
"""
Example:
{
Expand All @@ -31,22 +36,20 @@ def from_api_doc(cls, api_doc, config=configuration.DEFAULTS):
}
"""

ns, title = config.title_parser.parse(api_doc['title'])
assert ns == api_doc['ns']
nsname, title = split_page_name(rc_doc['ns'], rc_doc['title'])

cls(
Timestamp(api_doc['timestamp']),
return cls(
Timestamp(rc_doc['timestamp']),
User(
int(api_doc['userid']),
api_doc['user']
rc_doc.get('userid'),
rc_doc.get('user')
),
api_doc['comment'],
rc_doc.get('comment'),
Page(
api_doc['page_id'],
ns,
rc_doc.get('pageid'),
rc_doc['ns'],
title
)
)

# Event.register(PageCreated)
# TODO: Uncomment when ready
Event.register(PageCreated)
Loading

0 comments on commit aafca6b

Please sign in to comment.