From aafca6b3a51ec4477b5633169b614d54cdba097d Mon Sep 17 00:00:00 2001 From: halfak Date: Sat, 2 Aug 2014 19:49:10 -0500 Subject: [PATCH] Basic API listener works. Example works and runs on enwiki. 70% unit tests. Version 0.0.1. Can be registered with PyPI. --- VERSION | 1 + examples/listen.py | 39 + mwevents/configuration.py | 14 +- mwevents/defaults.py | 5 - mwevents/functions.py | 13 - mwevents/sources/__init__.py | 1 + mwevents/sources/api.py | 49 + mwevents/types/__init__.py | 5 + mwevents/types/block.py | 12 + mwevents/types/events/__init__.py | 14 + mwevents/types/events/event.py | 59 +- mwevents/types/events/match.py | 79 ++ mwevents/types/events/page_created.py | 35 +- mwevents/types/events/page_deleted.py | 49 +- mwevents/types/events/page_moved.py | 74 +- .../types/events/page_protection_modified.py | 69 +- mwevents/types/events/page_restored.py | 34 +- mwevents/types/events/revision_deleted.py | 11 - mwevents/types/events/revision_saved.py | 196 ++-- mwevents/types/events/revisions_deleted.py | 58 ++ mwevents/types/events/tests/rc_docs.json | 875 ++++++++++++++++++ mwevents/types/events/tests/test_event.py | 77 ++ .../types/events/tests/test_page_restored.py | 53 ++ .../types/events/tests/test_revision_saved.py | 121 ++- mwevents/types/events/user_blocked.py | 43 +- mwevents/types/events/user_groups_modified.py | 72 ++ mwevents/types/events/user_registered.py | 44 +- mwevents/types/events/user_renamed.py | 27 +- mwevents/types/events/user_rights_modified.py | 11 - mwevents/types/events/user_unblocked.py | 27 +- mwevents/types/page.py | 11 + mwevents/types/protection.py | 8 +- mwevents/types/revision.py | 14 +- mwevents/types/tests/test_block.py | 19 + mwevents/types/tests/test_page.py | 18 + mwevents/types/tests/test_protection.py | 15 + mwevents/types/tests/test_revision.py | 64 ++ mwevents/types/tests/test_timestamp.py | 15 + mwevents/types/tests/test_unavailable.py | 10 + mwevents/types/tests/test_user.py | 30 + mwevents/types/unavailable.py | 85 ++ mwevents/types/user.py | 4 +- mwevents/util.py | 10 + setup.py | 42 + 44 files changed, 2178 insertions(+), 334 deletions(-) create mode 100644 VERSION create mode 100644 examples/listen.py delete mode 100644 mwevents/functions.py create mode 100644 mwevents/types/block.py create mode 100644 mwevents/types/events/__init__.py create mode 100644 mwevents/types/events/match.py delete mode 100644 mwevents/types/events/revision_deleted.py create mode 100644 mwevents/types/events/revisions_deleted.py create mode 100644 mwevents/types/events/tests/rc_docs.json create mode 100644 mwevents/types/events/tests/test_event.py create mode 100644 mwevents/types/events/tests/test_page_restored.py create mode 100644 mwevents/types/events/user_groups_modified.py delete mode 100644 mwevents/types/events/user_rights_modified.py create mode 100644 mwevents/types/page.py create mode 100644 mwevents/types/tests/test_block.py create mode 100644 mwevents/types/tests/test_page.py create mode 100644 mwevents/types/tests/test_revision.py create mode 100644 mwevents/types/tests/test_timestamp.py create mode 100644 mwevents/types/tests/test_unavailable.py create mode 100644 mwevents/types/tests/test_user.py create mode 100644 mwevents/types/unavailable.py create mode 100644 mwevents/util.py diff --git a/VERSION b/VERSION new file mode 100644 index 0000000..8acdd82 --- /dev/null +++ b/VERSION @@ -0,0 +1 @@ +0.0.1 diff --git a/examples/listen.py b/examples/listen.py new file mode 100644 index 0000000..89d3f34 --- /dev/null +++ b/examples/listen.py @@ -0,0 +1,39 @@ +""" +Listens to a wiki's recentchanges feed. + +Usage: + listen + +Options: + The url for the MediaWiki API to connect to. +""" +import pprint +import sys + +from docopt import docopt + +try: + sys.path.insert(0, ".") + from mwevents.sources import API +except: + raise + +def main(): + args = docopt(__doc__) + + run(args['']) + +def run(api_url): + + api_source = API.from_api_url(api_url) + + try: + for event, state in api_source.listen(): + + pprint.pprint(event.to_json()) + + except KeyboardInterrupt: + print("Keyboard interrupt received. Shutting down.") + + +if __name__ == "__main__": main() diff --git a/mwevents/configuration.py b/mwevents/configuration.py index 8251abd..8eacf2e 100644 --- a/mwevents/configuration.py +++ b/mwevents/configuration.py @@ -1 +1,13 @@ -DEFAULTS = {} +import copy + + +def load(self, doc): + config = copy.deepcopy(DEFAULT) + config.update(doc) + + return config + +DEFAULT = { + 'expiration_format': "expires %H:%M, %d %B %Y (UTC)", + 'indefinite': "indefinite" +} diff --git a/mwevents/defaults.py b/mwevents/defaults.py index cf2e57a..e69de29 100644 --- a/mwevents/defaults.py +++ b/mwevents/defaults.py @@ -1,5 +0,0 @@ - - - -PARAMS_TIME_FORMAT = "expires %H:%M, %d %B %Y (UTC)" -PARAMS_INDEFINITE = "indefinite" diff --git a/mwevents/functions.py b/mwevents/functions.py deleted file mode 100644 index 430e12f..0000000 --- a/mwevents/functions.py +++ /dev/null @@ -1,13 +0,0 @@ -from . import change - -def from_api(rc_docs): - - for rc_doc in rc_doc: - c = change.from_api(rc_row) - if c != None: yield c - -def from_db(rc_rows): - - for rc_row in rc_row: - c = change.from_db(rc_row) - if c != None: yield c diff --git a/mwevents/sources/__init__.py b/mwevents/sources/__init__.py index e69de29..ae40d11 100644 --- a/mwevents/sources/__init__.py +++ b/mwevents/sources/__init__.py @@ -0,0 +1 @@ +from .api import API diff --git a/mwevents/sources/api.py b/mwevents/sources/api.py index e69de29..40ab6c4 100644 --- a/mwevents/sources/api.py +++ b/mwevents/sources/api.py @@ -0,0 +1,49 @@ +import time + +from mw import api + +from ..types import Event + + +class API: + """ + Example: + + .. code-block::python + """ + RC_PROPS = {'user', 'userid', 'comment', 'timestamp', 'title', 'ids', + 'sizes', 'loginfo', 'sha1'} + + def __init__(self, session): + self.session = session + + def listen(self, *args, min_wait=5, rcs_per_request=50, + stop=lambda: False, + direction="newer", + properties=RC_PROPS, types=None, **kwargs): + + kwargs['limit'] = rcs_per_request + kwargs['properties'] = properties + kwargs['direction'] = direction + + while not stop(): + start = time.time() + + rc_docs, kwargs['rccontinue'] = \ + self.session.recent_changes._query(*args, **kwargs) + + for rc_doc in rc_docs: + print(rc_doc) + state = rc_doc['timestamp'] + "|" + str(rc_doc['rcid']) + for event in Event.from_rc_doc(rc_doc): + if types is None or type(event) in types: + yield event, state + + + if len(rc_docs) < rcs_per_request: + time.sleep(min_wait - (time.time() - start)) + + + @classmethod + def from_api_url(cls, url): + return cls(api.Session(url)) diff --git a/mwevents/types/__init__.py b/mwevents/types/__init__.py index f4ac132..4986247 100644 --- a/mwevents/types/__init__.py +++ b/mwevents/types/__init__.py @@ -1,4 +1,9 @@ from .revision import Revision from .user import User +from .block import Block from .protection import Protection from .timestamp import Timestamp +from .unavailable import Unavailable, UnavailableType +from .user import User +from .page import Page +from .events import Event diff --git a/mwevents/types/block.py b/mwevents/types/block.py new file mode 100644 index 0000000..08a7f06 --- /dev/null +++ b/mwevents/types/block.py @@ -0,0 +1,12 @@ +from jsonable import JSONable + +from .timestamp import Timestamp +from .unavailable import Unavailable + + +class Block(JSONable): + __slots__ = ('flags', 'duration', 'expiration') + def initialize(self, flags, duration, expiration): + self.flags = [str(flag) for flag in flags] + self.duration = str(duration) + self.expiration = Unavailable.otherwise(expiration, Timestamp) diff --git a/mwevents/types/events/__init__.py b/mwevents/types/events/__init__.py new file mode 100644 index 0000000..1a27f83 --- /dev/null +++ b/mwevents/types/events/__init__.py @@ -0,0 +1,14 @@ +from .event import Event +from .page_created import PageCreated +from .page_deleted import PageDeleted +from .page_moved import PageMoved +from .page_protection_modified import PageProtectionModified +from .page_restored import PageRestored +from .revisions_deleted import RevisionsDeleted +#from .revision_restored import RevisionRestored +from .revision_saved import RevisionSaved +from .user_blocked import UserBlocked +from .user_groups_modified import UserGroupsModified +from .user_registered import UserRegistered +from .user_renamed import UserRenamed +from .user_unblocked import UserUnblocked diff --git a/mwevents/types/events/event.py b/mwevents/types/events/event.py index 75275f7..8807ac8 100644 --- a/mwevents/types/events/event.py +++ b/mwevents/types/events/event.py @@ -1,16 +1,20 @@ +import copy from collections import defaultdict from mw import Timestamp from jsonable import JSONable -from ..types import User +from .. import User +from ... import configuration +from .match import Match class Event(JSONable): __slots__ = ('timestamp', 'user', 'comment') MATCHES = NotImplemented - EVENTS = defaultdict(lambda: []) + EVENTS = {} + MATCH_GROUPS = defaultdict(lambda: []) PRIORITY = 99 def initialize(self, timestamp, user, comment): @@ -18,26 +22,59 @@ def initialize(self, timestamp, user, comment): self.user = User(user) self.comment = str(comment) + def to_json(self): + doc = super().to_json() + doc['event'] = self.__class__.__name__ + return doc + + + @classmethod + def from_json(cls, doc): + if 'event' in doc: + EventClass = cls.EVENTS.get(doc['event'], cls) + new_doc = copy.copy(doc) + del new_doc['event'] + return EventClass.from_json(new_doc) + else: + return cls._from_json(doc) + @classmethod def register(cls, EventClass): for match in EventClass.MATCHES: - cls.EVENTS[match].append(EventClass) - cls.EVENTS[match].sort(key=lambda e:e.PRIORITY) + cls.MATCH_GROUPS[match].append(EventClass) + cls.MATCH_GROUPS[match].sort(key=lambda e:e.PRIORITY) + + cls.EVENTS[EventClass.__name__] = EventClass @classmethod def matches(cls, match): - return cls.EVENTS[match] + return cls.MATCH_GROUPS[match] + + @classmethod + def from_rc_doc(cls, rc_doc, config=configuration.DEFAULT): + match = Match.from_rc_doc(rc_doc) + + for EventClass in cls.matches(match): + yield EventClass.from_rc_doc(rc_doc, config) + + @classmethod + def from_rev_doc(cls, rev_doc, config=configuration.DEFAULT): + match = Match.from_rev_doc(rev_doc) + + for EventClass in cls.matches(match): + yield EventClass.from_rev_doc(rev_doc, config) + @classmethod - def from_api_doc(cls, api_doc): - match = Match.from_api_doc(api_doc) + def from_log_row(cls, log_row, config=configuration.DEFAULT): + match = Match.from_log_row(log_row) for EventClass in cls.matches(match): - yield EventClass.from_api_doc(api_doc) + yield EventClass.from_log_row(log_row, config) @classmethod - def from_db_row(cls, db_row): - match = Match.from_db_row(db_row) + def from_rc_row(cls, rc_row, config=configuration.DEFAULT): + match = Match.from_rc_row(rc_row) for EventClass in cls.matches(match): - yield EventClass.from_db_row(api_doc) + yield EventClass.from_rc_row(rc_doc, config) diff --git a/mwevents/types/events/match.py b/mwevents/types/events/match.py new file mode 100644 index 0000000..624e983 --- /dev/null +++ b/mwevents/types/events/match.py @@ -0,0 +1,79 @@ +from jsonable import instance + + +class Match: + + RC_TYPES = { + 0: "edit", + 1: "new", + 2: "move", + 3: "log", + 4: "move_over_redirect", + 5: "external" + } + + def __init__(self, type, action, has_rev_id, rc_type): + self.type = str(type) + self.action = str(action) + self.has_rev_id = bool(has_rev_id) + self.rc_type = str(rc_type) + + def __eq__(self, other): + try: + return ( + self.type == other.type and + self.action == other.action and + self.has_rev_id == other.has_rev_id and + self.rc_type == other.rc_type + ) + except AttributeError: + return False + + def __hash__(self): + return hash((self.type, self.action, self.has_rev_id, self.rc_type)) + + def __repr__(self): + return instance.simple_repr(self.__class__.__name__, + self.type, + self.action, + self.has_rev_id, + self.rc_type) + + @classmethod + def from_rc_doc(cls, rc_doc): + + return cls( + rc_doc.get('logtype'), + rc_doc.get('logaction'), + rc_doc.get('revid', 0) > 0, + rc_doc['type'] + ) + + @classmethod + def from_rev_doc(cls, rev_doc): + + return cls( + None, + None, + rev_doc.get('revid', 0) > 0, + "edit" if rev_doc['parentid'] > 0 else "new" + ) + + + @classmethod + def from_log_row(cls, log_row): + return cls( + log_row.get('log_type'), + log_row.get('log_action'), + False, + "log" + ) + + @classmethod + def from_rc_row(cls, rc_row): + return cls( + db_row.get('rc_log_type'), + db_row.get('rc_log_action'), + rc_doc.get('rc_this_oldid', 0) > 0, + cls.RC_TYPES[db_row['rc_type']] + ) diff --git a/mwevents/types/events/page_created.py b/mwevents/types/events/page_created.py index adc00ef..e916f61 100644 --- a/mwevents/types/events/page_created.py +++ b/mwevents/types/events/page_created.py @@ -1,15 +1,20 @@ -from .event import Event, Match +from .. import Page, Timestamp, User +from ... import configuration +from ...util import split_page_name +from .event import Event +from .match import Match class PageCreated(Event): - MATCHES = [Match(None, None, True, "new", priority=25)] + MATCHES = [Match(None, None, True, "new")] + PRIORITY = 50 # Must happen before RevisionSaved __slots__ = ('page',) - def __init__(self, timestamp, user, comment, page): - super().__init__(timestamp, user, comment) + def initialize(self, timestamp, user, comment, page): + super().initialize(timestamp, user, comment) self.page = Page(page) @classmethod - def from_api_doc(cls, api_doc, config=configuration.DEFAULTS): + def from_rc_doc(cls, rc_doc, config=configuration.DEFAULT): """ Example: { @@ -31,22 +36,20 @@ def from_api_doc(cls, api_doc, config=configuration.DEFAULTS): } """ - ns, title = config.title_parser.parse(api_doc['title']) - assert ns == api_doc['ns'] + nsname, title = split_page_name(rc_doc['ns'], rc_doc['title']) - cls( - Timestamp(api_doc['timestamp']), + return cls( + Timestamp(rc_doc['timestamp']), User( - int(api_doc['userid']), - api_doc['user'] + rc_doc.get('userid'), + rc_doc.get('user') ), - api_doc['comment'], + rc_doc.get('comment'), Page( - api_doc['page_id'], - ns, + rc_doc.get('pageid'), + rc_doc['ns'], title ) ) -# Event.register(PageCreated) -# TODO: Uncomment when ready +Event.register(PageCreated) diff --git a/mwevents/types/events/page_deleted.py b/mwevents/types/events/page_deleted.py index a3f2c5f..86fd2b3 100644 --- a/mwevents/types/events/page_deleted.py +++ b/mwevents/types/events/page_deleted.py @@ -1,3 +1,6 @@ +from .. import Page, Timestamp, Unavailable, User +from ... import configuration +from ...util import split_page_name from .event import Event from .match import Match @@ -5,14 +8,14 @@ class PageDeleted(Event): MATCHES = [Match("delete", "delete", False, "log")] __slots__ = ('page',) - def __init__(self, timestamp, user, comment, page): - super().__init__(timestamp, user, comment) + def initialize(self, timestamp, user, comment, page): + super().initialize(timestamp, user, comment) self.page = Page(page) @classmethod - def from_api_doc(cls, api_doc, config=DEFAULT_CONFIG): + def from_rc_doc(cls, rc_doc, config=configuration.DEFAULT): """ - :Example API doc:: + Example: { "type": "log", "ns": 15, @@ -34,41 +37,19 @@ def from_api_doc(cls, api_doc, config=DEFAULT_CONFIG): "tags": [] } """ - ns, title = config.title_parser.parse(api_doc['title']) - assert ns == api_doc['ns'] + nsname, title = split_page_name(rc_doc['ns'], rc_doc['title']) return cls( - Timestamp('rc_timestamp'), + Timestamp(rc_doc['timestamp']), User( - int(api_doc['userid']), - api_doc['user'] + rc_doc.get('userid'), + rc_doc.get('user') ), - api_doc['comment'], + rc_doc.get('comment'), Page( - api_doc['pageid'], - ns, - title - ) - ) - - @classmethod - def from_db_row(cls, db_row, config=DEFAULT_CONFIG): - """ - """ - ns, title = config.title_parser.parse(db_row['log_title']) - assert ns == db_row['ns'] - - return cls( - Timestamp(db_row['log_timestamp']), - User( - int(db_row['log_user']), - db_row['log_user_text'] - ), - db_row['log_comment'], - Page( - db_row['log_page'], # Note, this is set to zero for old deleted - # pages. - ns, + rc_doc.get('pageid') or Unavailable, # For old entries, + # this is set to zero + rc_doc['ns'], title ) ) diff --git a/mwevents/types/events/page_moved.py b/mwevents/types/events/page_moved.py index aab3df0..067e0e5 100644 --- a/mwevents/types/events/page_moved.py +++ b/mwevents/types/events/page_moved.py @@ -1,5 +1,8 @@ import phpserialize +from .. import Page, Timestamp, Unavailable, User +from ... import configuration +from ...util import split_page_name from .event import Event from .match import Match @@ -8,14 +11,17 @@ class PageMoved(Event): MATCHES = [Match("move", "move", False, "log"), Match("move", "move_redir", False, "log")] __slots__ = ('old', 'new') - def __init__(self, timestamp, user, comment, action, old, new): - super().__init__(timestamp, user, comment) + def initialize(self, timestamp, user, comment, + action, redirect_page_id, old, new): + super().initialize(timestamp, user, comment) self.action = str(action) + self.redirect_page_id = \ + Unavailable.otherwise(redirect_page_id, int) or 0 self.old = Page(old) self.new = Page(new) @classmethod - def from_api_doc(cls, api_doc): + def from_rc_doc(cls, rc_doc, config=configuration.DEFAULT): """ :Example API doc:: { @@ -43,65 +49,31 @@ def from_api_doc(cls, api_doc): "tags": [] } """ - old_ns, old_title = config.title_parser.parse(doc['title']) - assert old_ns == doc['ns'] + old_nsname, old_title = split_page_name(rc_doc['ns'], rc_doc['title']) - new_ns, new_title = config.title_parser.parse(doc['move']['new_title']) - assert new_ns == doc['move']['new_ns'] + new_nsname, new_title = split_page_name(rc_doc['move']['new_ns'], + rc_doc['move']['new_title']) return cls( - Timestamp(api_doc["timestamp"]), + Timestamp(rc_doc['timestamp']), User( - int(api_doc['userid']), - api_doc['user'] + rc_doc.get('userid'), + rc_doc.get('user') ), - api_doc['comment'], - api_doc['log_action'], + rc_doc.get('comment'), + rc_doc['logaction'], + rc_doc.get('pageid') or 0, # Note: Not the moved page_id. Page( - api_doc.get('movedpageid'), - old_ns, + Unavailable, + rc_doc['ns'], old_title ), Page( - api_doc.get('movedtitle'), - new_ns, - new_title - ) - ) - - @classmethod - def from_db_row(cls, db_row): - """ - """ - old_ns, old_title = \ - config.title_parser.parse(str(db_row['log_title'], 'utf-8')) - assert old_ns == db_row['log_namespace'] - - params_array = phpserialize.loads(db_row['log_params']) - to_page_name = str(params_array['4::target'], 'utf-8', 'replace') - new_ns, new_title = config.title_parser.parse() - assert new_ns == doc['move']['new_ns'] - - return cls( - Timestamp(api_doc["timestamp"]) - User( - int(api_doc['userid']), - api_doc['user'] - ), - api_doc['comment'], - api_doc['log_action'], - Page( - api_doc.get('movedpageid'), - old_ns, - old_title - ), - Page( - api_doc.get('movedtitle'), - new_ns, + Unavailable, + rc_doc['move']['new_ns'], new_title ) ) -# Event.register(PageMoved) -# TODO: Uncomment when ready +Event.register(PageMoved) diff --git a/mwevents/types/events/page_protection_modified.py b/mwevents/types/events/page_protection_modified.py index 5a20f8f..5404cca 100644 --- a/mwevents/types/events/page_protection_modified.py +++ b/mwevents/types/events/page_protection_modified.py @@ -1,3 +1,6 @@ +from .. import Page, Protection, Timestamp, Unavailable, User +from ... import configuration +from ...util import split_page_name from .event import Event from .match import Match @@ -6,15 +9,17 @@ class PageProtectionModified(Event): MATCHES = [Match("protect", "protect", False, "log"), Match("protect", "modify", False, "log"), Match("protect", "unprotect", False, "log")] - __slots__ = ('page', 'action', 'protection') - def __init__(self, timestamp, user, comment, page, action, protections): - super().__init__(timestamp, user, comment) + __slots__ = ('page', 'action', 'protections') + def initialize(self, timestamp, user, comment, page, action, protections): + super().initialize(timestamp, user, comment) self.page = Page(page) - self.action = str(action) - self.protections = [Protection(p) for p in protections] + self.action = Unavailable.otherwise(action, str, none_ok=False) + self.protections = \ + Unavailable.otherwise(protections, + lambda ps: [Protection(p) for p in ps]) @classmethod - def from_api_doc(cls, api_doc, config=configuration.DEFAULTS): + def from_rc_doc(cls, rc_doc, config=configuration.DEFAULT): """ Example: { @@ -38,6 +43,27 @@ def from_api_doc(cls, api_doc, config=configuration.DEFAULTS): "1": "", "tags": [] } + { + "type": "log", + "ns": 4, + "title": "Wikipedia:Requests for page protection", + "pageid": 352651, + "revid": 0, + "old_revid": 0, + "rcid": 672604441, + "user": "NativeForeigner", + "userid": "964805", + "oldlen": 0, + "newlen": 0, + "timestamp": "2014-08-02T05:51:34Z", + "comment": "Persistent [[WP:Vandalism|vandalism]]/[[WP:BLP|BLP Issues]]", + "logid": 57939003, + "logtype": "protect", + "logaction": "modify", + "0": "\u200e[edit=autoconfirmed] (expires 00:00, 31 August 2014 (UTC))\u200e[move=sysop] (indefinite)", + "1": "", + "tags": [] + } { "type": "log", "ns": 6, @@ -59,29 +85,34 @@ def from_api_doc(cls, api_doc, config=configuration.DEFAULTS): "tags": [] } """ - ns, title = config.title_parser.parse(api_doc['title']) - assert ns == api_doc['ns'] + nsname, title = split_page_name(rc_doc['ns'], rc_doc['title']) - if api_doc['logaction'] in ("protect", "modify"): - protections = Protection.from_params(doc.get('0'), config) - elif api_doc['logaction'] == "unprotect": + if rc_doc['logaction'] in ("protect", "modify"): + protections = Protection.from_params( + rc_doc.get('0', ""), + expiration_format=config['expiration_format'], + indefinite=config['indefinite']) + + elif rc_doc['logaction'] == "unprotect": protections = [] + else: assert False, "Shouldn't happen." return cls( - Timestamp(api_doc['comment']), + Timestamp(rc_doc['timestamp']), User( - int(api_doc['userid']), - api_doc['user'] - ) + rc_doc.get('userid'), + rc_doc.get('user') + ), + rc_doc.get('comment'), Page( - api_doc['pageid'], - api_doc['ns'], + rc_doc.get('pageid'), + rc_doc.get('ns'), title ), + rc_doc['logaction'], protections ) -# Event.register(PageProtectionModified) -# TODO: Uncomment when ready +Event.register(PageProtectionModified) diff --git a/mwevents/types/events/page_restored.py b/mwevents/types/events/page_restored.py index c0bbf32..8cd8a61 100644 --- a/mwevents/types/events/page_restored.py +++ b/mwevents/types/events/page_restored.py @@ -1,3 +1,7 @@ + +from .. import Page, Timestamp, Unavailable, User +from ... import configuration +from ...util import split_page_name from .event import Event from .match import Match @@ -5,15 +9,15 @@ class PageRestored(Event): MATCHES = [Match("delete", "restore", False, "log")] __slots__ = ('old_page_id', 'page') - def __init__(self, timestamp, user, comment, old_page_id, page): - super().__init__(timestamp, user, comment) - self.old_page_id = int(old_page_id) if old_page_id is not None else None + def initialize(self, timestamp, user, comment, old_page_id, page): + super().initialize(timestamp, user, comment) + self.old_page_id = Unavailable.otherwise(old_page_id, int) self.page = Page(page) @classmethod - def from_api_doc(cls, api_doc): + def from_rc_doc(cls, rc_doc, config=configuration.DEFAULT): """ - :Example API doc:: + Example: { "type": "log", "ns": 3, @@ -34,25 +38,23 @@ def from_api_doc(cls, api_doc): "tags": [] } """ - ns, title = Page.parse_title(api_doc['title']) - assert ns == api_doc['ns'] + nsname, title = split_page_name(rc_doc['ns'], rc_doc['title']) return cls( - Timestamp(api_doc['timestamp']) + Timestamp(rc_doc['timestamp']), User( - int(doc['userid']), - doc['user'] + rc_doc.get('userid'), + rc_doc.get('user') ), - api_doc['comment'], - None, # Not available + rc_doc.get('comment'), + Unavailable, # Not available Page( - doc['pageid'], - ns, + rc_doc.get('pageid'), + rc_doc.get('ns'), title ) ) -# Event.register(RevisionSaved) -# TODO: Uncomment when ready +Event.register(PageRestored) diff --git a/mwevents/types/events/revision_deleted.py b/mwevents/types/events/revision_deleted.py deleted file mode 100644 index 7c10ab3..0000000 --- a/mwevents/types/events/revision_deleted.py +++ /dev/null @@ -1,11 +0,0 @@ -from .event import Event, Match - - -class RevisionDeleted(Event): - MATCHES = [Match("delete", "revision", True, "log")] - __slots__ = ('revision',) - def __init__(self, timestamp, user, comment, revision): - super().__init__(timestamp, user, comment) - self.revision = Revision(revision) - -Event.EVENTS[RevisionDeleted] = RevisionDeleted diff --git a/mwevents/types/events/revision_saved.py b/mwevents/types/events/revision_saved.py index 5194d0c..dbfd50d 100644 --- a/mwevents/types/events/revision_saved.py +++ b/mwevents/types/events/revision_saved.py @@ -1,18 +1,21 @@ -from .event import Event, Match +from .. import Revision, Timestamp, Unavailable, User +from ... import configuration +from .event import Event +from .match import Match class RevisionSaved(Event): - MATCHED = [Match(None, None, True, "edit"), - Match(None, None, True, "new", priority=50)] __slots__ = ('revision',) - def __init__(self, timestamp, user, comment, revision): - super().__init__(timestamp, user, comment) + MATCHES = [Match(None, None, True, "edit"), + Match(None, None, True, "new")] + def initialize(self, timestamp, user, comment, revision): + super().initialize(timestamp, user, comment) self.revision = Revision(revision) @classmethod - def from_api_doc(cls, api_doc, config=configuration.DEFAULTS): + def from_rc_doc(cls, rc_doc, config=configuration.DEFAULT): """ - :Example API doc:: + Example: { "type": "edit", "ns": 1, @@ -31,91 +34,146 @@ def from_api_doc(cls, api_doc, config=configuration.DEFAULTS): "sha1": "8817b4efd42c936254dfb09ce5bbfd0e4f9b848a" } """ - ns, title = config.title_parser.parse(api_doc['title']) - assert ns == api_doc['ns'] return cls( - Timestamp(api_doc['timestamp']), + Timestamp(rc_doc['timestamp']), User( - int(api_doc['userid']), - api_doc['user'] + rc_doc.get('userid'), + rc_doc.get('user') ), - api_doc['comment'], + rc_doc.get('comment'), Revision( - api_doc['revid'], - api_doc['old_revid'], - api_doc['newlen'], - api_doc['sha1'], - api_doc['pageid'], - 'minor' in api_doc + rc_doc.get('revid'), + rc_doc.get('old_revid'), + rc_doc.get('newlen'), + rc_doc.get('sha1'), + rc_doc.get('pageid'), + 'minor' in rc_doc ) ) @classmethod - def from_rev_row(cls, rev_row): + def from_rev_doc(cls, rev_doc, config=configuration.DEFAULT): + """ + Example: + { + "revid": 619093743, + "parentid": 618899706, + "user": "Eduen", + "userid": 7527773, + "timestamp": "2014-07-30T07:26:05Z", + "size": 181115, + "sha1": "c6236e5ad7b6af7c353a43ded631298c2b7e95ea", + "contentmodel": "wikitext", + "comment": "another quote from a prominent anarchist theroy which talks againts the simplification of a libertarian society to simply the absence of a state", + "page": { + "pageid": 12, + "ns": 0, + "title": "Anarchism" + } + } + """ + return cls( + Timestamp(rev_doc['timestamp']), + User( + rev_doc.get('userid'), + rev_doc.get('user') + ), + rev_doc.get('comment'), + Revision( + rev_doc['revid'], + rev_doc['parentid'], + rev_doc['size'], + rev_doc['sha1'], + rev_doc['page']['pageid'], + "minor" in rev_doc + ) + ) + + @classmethod + def from_rev_row(cls, rev_row, config=configuration.DEFAULT): + """ + Example: + { + 'rev_id': 233192, + 'rev_page': 10, + 'rev_text_id': 233192, + 'rev_comment': "*", + 'rev_user': 99, + 'rev_user_text': "RoseParks", + 'rev_timestamp': "20010121021221", + 'rev_minor_edit': 0, + 'rev_deleted': 0, + 'rev_len': 124, + 'rev_parent_id': 0, + 'rev_sha1': "8kul9tlwjm9oxgvqzbwuegt9b2830vw" + } + """ return cls( - Timestamp(db_row['rev_timestamp']) + Timestamp(rev_row['rev_timestamp']), User( - row(db_row['rev_user']), - row['rev_user_text'] + rev_row['rev_user'], + rev_row['rev_user_text'] ), - rev['rev_comment'], + rev_row['rev_comment'], Revision( - db_row['rev_id'], - db_row['rev_parent_id'], - db_row['rev_len'], - db_row['rev_sha1'], - db_row['rev_page'], - db_row['rev_minor'] + rev_row['rev_id'], + rev_row['rev_parent_id'], + rev_row['rev_len'], + rev_row['rev_sha1'], + rev_row['rev_page'], + rev_row['rev_minor_edit'] ) ) @classmethod - def from_rc_row(cls, rc_row): + def from_rc_row(cls, rc_row, config=configuration.DEFAULT): """ - :Example DB row:: - rc_id: 624362534 - rc_timestamp: "20131219020516" - rc_cur_time: "" - rc_user: 16380370 - rc_user_text: "RedVanderwall" - rc_namespace: 0 - rc_title: "Narragansett_Race_Track" - rc_comment: "/* The Biscuit */" - rc_minor: 1 - rc_bot: 0 - rc_new: 0 - rc_cur_id: 10680758 - rc_this_oldid: 586726430 - rc_last_oldid: 586725822 - rc_type: 0 - rc_source: mw.edit - rc_moved_to_ns: 0 - rc_moved_to_title: "" - rc_patrolled: 0 - rc_ip: - rc_old_len: 24309 - rc_new_len: 24348 - rc_deleted: 0 - rc_logid: 0 - rc_log_type: NULL - rc_log_action: "" - rc_params: + Example: + { + rc_id: 624362534 + rc_timestamp: "20131219020516" + rc_cur_time: "" + rc_user: 16380370 + rc_user_text: "RedVanderwall" + rc_namespace: 0 + rc_title: "Narragansett_Race_Track" + rc_comment: "/* The Biscuit */" + rc_minor: 1 + rc_bot: 0 + rc_new: 0 + rc_cur_id: 10680758 + rc_this_oldid: 586726430 + rc_last_oldid: 586725822 + rc_type: 0 + rc_source: mw.edit + rc_moved_to_ns: 0 + rc_moved_to_title: "" + rc_patrolled: 0 + rc_ip: + rc_old_len: 24309 + rc_new_len: 24348 + rc_deleted: 0 + rc_logid: 0 + rc_log_type: NULL + rc_log_action: "" + rc_params: + } """ return cls( - Timestamp(db_row['rc_timestamp']) + Timestamp(rc_row['rc_timestamp']), User( - row(db_row['rc_user']), - row['rc_user_text'] + rc_row['rc_user'], + rc_row['rc_user_text'] ), - rev['rc_comment'], + rc_row['rc_comment'], Revision( - db_row['rc_this_oldid'], - db_row['rc_last_oldid'], - db_row['rc_new_len'], - None, # Not available - db_row['rc_cur_id'], - db_row['rc_minor'] + rc_row['rc_this_oldid'], + rc_row['rc_last_oldid'], + rc_row['rc_new_len'], + rc_row['rev_sha1'] if 'rev_sha1' in rc_row else Unavailable, + rc_row['rc_cur_id'], + rc_row['rc_minor'] ) ) diff --git a/mwevents/types/events/revisions_deleted.py b/mwevents/types/events/revisions_deleted.py new file mode 100644 index 0000000..beebceb --- /dev/null +++ b/mwevents/types/events/revisions_deleted.py @@ -0,0 +1,58 @@ +from ... import configuration +from .event import Event +from .match import Match + + +class RevisionsDeleted(Event): + MATCHES = [Match("delete", "revision", True, "log")] + __slots__ = ('revision',) + def initialize(self, timestamp, user, comment, rev_ids): + super().initialize(timestamp, user, comment) + self.revisions = \ + Unavailable.otherwise(rev_ids, lambda ids:[int(i) for i in ids]) + + @classmethod + def from_rc_doc(cls, rc_doc, config=configuration.DEFAULT): + """ + Example: + { + "type": "log", + "ns": 3, + "title": "User talk:S205643", + "pageid": 39719564, + "revid": 0, + "old_revid": 0, + "rcid": 672703228, + "user": "Mojo Hand", + "userid": "1453997", + "oldlen": 0, + "newlen": 0, + "timestamp": "2014-08-02T17:09:39Z", + "comment": "[[WP:RD2|RD2]]: Grossly insulting, degrading, or offensive material", + "logid": 57952428, + "logtype": "delete", + "logaction": "revision", + "0": "revision", + "1": "619569809", + "2": "ofield=0", + "3": "nfield=1", + "tags": [] + } + """ + + if len(rc_doc.get('1', "")) > 0: + rev_ids = [int(id) for id in rc_doc['1'].split(",")] + else: + rev_ids = [] + + return cls( + Timestamp(rc_doc['timestamp']), + User( + rc_doc.get('userid'), + rc_doc.get('user') + ), + rc_doc.get('comment'), + rev_ids + ) + +Event.register(RevisionsDeleted) diff --git a/mwevents/types/events/tests/rc_docs.json b/mwevents/types/events/tests/rc_docs.json new file mode 100644 index 0000000..8bfe686 --- /dev/null +++ b/mwevents/types/events/tests/rc_docs.json @@ -0,0 +1,875 @@ +[ + { + "type": "edit", + "ns": 2, + "title": "User:AlexNewArtBot/LawFeedSearchResult", + "pageid": 10143095, + "revid": 619586849, + "old_revid": 619465524, + "rcid": 672726010, + "user": "InceptionBot", + "userid": "20056178", + "oldlen": 290827, + "newlen": 292318, + "timestamp": "2014-08-02T19:53:15Z", + "comment": "most recent results, added 70, removed 59, total 1042", + "sha1": "d2b4128309528dbd91d294c639d77e6e4ff43bd6" + }, + { + "type": "edit", + "ns": 0, + "title": "Primrose Class", + "pageid": 43457326, + "revid": 619586848, + "old_revid": 619586418, + "rcid": 672726009, + "user": "Largoplazo", + "userid": "2766075", + "oldlen": 3232, + "newlen": 3265, + "timestamp": "2014-08-02T19:53:15Z", + "comment": "Verification failed: the source cited makes no mention of either of these groups nor of the album.", + "sha1": "7b8bd338d24fed5905364d5c3dd6bfe864ee5f28" + }, + { + "type": "log", + "ns": 2, + "title": "User:Maximilly", + "pageid": 0, + "revid": 0, + "old_revid": 0, + "rcid": 672726008, + "user": "Maximilly", + "userid": 22021690, + "oldlen": 0, + "newlen": 0, + "timestamp": "2014-08-02T19:53:15Z", + "comment": "", + "logid": 57955770, + "logtype": "newusers", + "logaction": "create" + }, + { + "type": "edit", + "ns": 0, + "title": "Imphal International Airport", + "pageid": 9536168, + "revid": 619586847, + "old_revid": 619539511, + "rcid": 672726007, + "user": "Trinidade", + "userid": "12085680", + "oldlen": 5963, + "newlen": 5880, + "timestamp": "2014-08-02T19:53:13Z", + "comment": "Reverted to revision 619539160 by [[Special:Contributions/115.244.170.245|115.244.170.245]] ([[User talk:115.244.170.245|talk]]). ([[WP:TW|TW]])", + "sha1": "a8e02e6a0f51ba9fbfc842e4e0e363d7af57c08a" + }, + { + "type": "edit", + "ns": 0, + "title": "List of Royal Northumberland Fusiliers battalions in World War II", + "pageid": 24356306, + "revid": 619586846, + "old_revid": 619586599, + "rcid": 672726006, + "user": "Hamish59", + "userid": "15572676", + "oldlen": 35873, + "newlen": 35949, + "timestamp": "2014-08-02T19:53:13Z", + "comment": "/* {{anchor|8|3rd Recce}}8th Battalion */", + "sha1": "f593fe2e7ac24d74c304901c9c2f3e1716605f08" + }, + { + "type": "edit", + "ns": 4, + "title": "Wikipedia:Teahouse/Questions", + "pageid": 34745517, + "revid": 619586845, + "old_revid": 619583434, + "rcid": 672726005, + "user": "Eventhorizon51", + "userid": "20108326", + "oldlen": 96153, + "newlen": 97088, + "timestamp": "2014-08-02T19:53:12Z", + "comment": "/* Sparking discussion */ new section", + "sha1": "ee01baeac8987e669b0214de546439c6300faea9" + }, + { + "type": "edit", + "ns": 2, + "title": "User:Eagc7/List of Characters and Cast in the Marvel Cinematic Universe", + "pageid": 28196402, + "revid": 619586844, + "old_revid": 619518200, + "rcid": 672726004, + "user": "Eagc7", + "userid": "9572574", + "oldlen": 189746, + "newlen": 190887, + "timestamp": "2014-08-02T19:53:12Z", + "comment": "/* Fan stuff (MCU Unrelated) */", + "sha1": "8114f00d7383c0bdd7a0ab68640cc2a024abf706" + }, + { + "type": "edit", + "ns": 0, + "title": "Richard Boyle, 2nd Viscount Shannon", + "pageid": 407457, + "revid": 619586843, + "old_revid": 619586752, + "rcid": 672726003, + "user": "Dormskirk", + "userid": "3306290", + "oldlen": 10753, + "newlen": 10753, + "timestamp": "2014-08-02T19:53:11Z", + "comment": "/* Military career */ exp", + "sha1": "c3438cb2a0857cb2cb7ccf038be114b04b989ae4" + }, + { + "type": "edit", + "ns": 118, + "title": "Draft:BEDHEAD (Web Series)", + "pageid": 43452997, + "revid": 619586842, + "old_revid": 619503910, + "rcid": 672726002, + "user": "Revent", + "userid": "4179294", + "oldlen": 3166, + "newlen": 3118, + "timestamp": "2014-08-02T19:53:11Z", + "comment": "Cleaning up submission ([[WP:AFCHRW|afch-rewrite]] 0.8)", + "sha1": "5d4957c6a15aec7da57c13a4bf07368e72d887c2" + }, + { + "type": "edit", + "ns": 0, + "title": "2014 U.S. F2000 National Championship", + "pageid": 41352635, + "revid": 619586841, + "old_revid": 619480612, + "rcid": 672726001, + "user": "Drdisque", + "userid": "272143", + "oldlen": 17306, + "newlen": 17481, + "timestamp": "2014-08-02T19:53:10Z", + "comment": "/* Race calendar and results */ +race 2", + "sha1": "27550d31b9a259ecf717c6f57436dca1c81b62de" + }, + { + "type": "edit", + "ns": 0, + "title": "Orlando Executive Airport", + "pageid": 765458, + "revid": 619586840, + "old_revid": 619586760, + "rcid": 672726000, + "user": "68.62.253.185", + "userid": "0", + "anon": "", + "oldlen": 12155, + "newlen": 12145, + "timestamp": "2014-08-02T19:53:10Z", + "comment": "/* History */", + "sha1": "9ae9bc90ffbca089c4286151737cff6c49f1ba58" + }, + { + "type": "edit", + "ns": 0, + "title": "Norman Cornish", + "pageid": 43457314, + "revid": 619586839, + "old_revid": 619586682, + "rcid": 672725999, + "user": "The Anome", + "userid": "76", + "oldlen": 1068, + "newlen": 1107, + "timestamp": "2014-08-02T19:53:09Z", + "comment": "[[Category:People from County Durham]]", + "sha1": "c3e4a00e28da7e308f296639867eb7f9f744e80c" + }, + { + "type": "edit", + "ns": 0, + "title": "Glossop North End A.F.C.", + "pageid": 2239915, + "revid": 619586838, + "old_revid": 619578096, + "rcid": 672725998, + "user": "GNEbandit", + "userid": "14908874", + "oldlen": 56481, + "newlen": 56485, + "timestamp": "2014-08-02T19:53:08Z", + "comment": "/* Season Stats */", + "sha1": "380d4d62361a47ae1bb1cd550ef7484b3a6d5d04" + }, + { + "type": "edit", + "ns": 0, + "title": "Catherine Lacoste", + "pageid": 2202676, + "revid": 619586837, + "old_revid": 561933027, + "rcid": 672725997, + "user": "Editguy9", + "userid": "12763997", + "oldlen": 4862, + "newlen": 4893, + "timestamp": "2014-08-02T19:53:08Z", + "comment": "", + "sha1": "20c69c5a44a8b526d80de68dd2ba940fa6b37a2c" + }, + { + "type": "edit", + "ns": 0, + "title": "John Drew, Jr.", + "pageid": 744551, + "revid": 619586836, + "old_revid": 619586721, + "rcid": 672725996, + "user": "Koplimek", + "userid": "2360147", + "oldlen": 5818, + "newlen": 5818, + "timestamp": "2014-08-02T19:53:07Z", + "comment": "corrected spelling", + "sha1": "f1305902c13b69c6d88c6c2c4fb1e7c5c0efa4a7" + }, + { + "type": "edit", + "ns": 2, + "title": "User:Cyde/List of requests for help", + "pageid": 10432778, + "revid": 619586835, + "old_revid": 619572687, + "rcid": 672725995, + "user": "Cydebot", + "userid": "1215485", + "oldlen": 28, + "newlen": 0, + "timestamp": "2014-08-02T19:53:07Z", + "comment": "Bot: Listifying from Category:Wikipedians looking for help (0 entries)", + "sha1": "da39a3ee5e6b4b0d3255bfef95601890afd80709" + }, + { + "type": "edit", + "ns": 0, + "title": "Joseph Calleia", + "pageid": 2079078, + "revid": 619586834, + "old_revid": 619586334, + "rcid": 672725994, + "user": "WFinch", + "userid": "2477977", + "oldlen": 9549, + "newlen": 9548, + "timestamp": "2014-08-02T19:53:06Z", + "comment": "/* Hollywood */ um", + "sha1": "c45c8bd7a96be180a157012ddd6297742afe94bb" + }, + { + "type": "log", + "ns": 2, + "title": "User:Svgmcr", + "pageid": 0, + "revid": 0, + "old_revid": 0, + "rcid": 672725989, + "user": "Svgmcr", + "userid": 22021689, + "oldlen": 0, + "newlen": 0, + "timestamp": "2014-08-02T19:53:06Z", + "comment": "iOS App Account Creation", + "logid": 57955769, + "logtype": "newusers", + "logaction": "create" + }, + { + "type": "log", + "ns": 2, + "title": "User:Cgmarsh", + "pageid": 0, + "revid": 0, + "old_revid": 0, + "rcid": 672725980, + "user": "Cgmarsh", + "userid": 22021688, + "oldlen": 0, + "newlen": 0, + "timestamp": "2014-08-02T19:53:06Z", + "comment": "iOS App Account Creation", + "logid": 57955768, + "logtype": "newusers", + "logaction": "create" + }, + { + "type": "log", + "ns": 2, + "title": "User:Jackyinjun", + "pageid": 0, + "revid": 0, + "old_revid": 0, + "rcid": 672725979, + "user": "Jackyinjun", + "userid": 22021687, + "oldlen": 0, + "newlen": 0, + "timestamp": "2014-08-02T19:53:05Z", + "comment": "iOS App Account Creation", + "logid": 57955767, + "logtype": "newusers", + "logaction": "create" + }, + { + "type": "edit", + "ns": 0, + "title": "List of shipwrecks in August 1942", + "pageid": 33427178, + "revid": 619586833, + "old_revid": 619585917, + "rcid": 672725978, + "user": "George 1861", + "userid": "13603074", + "oldlen": 99394, + "newlen": 99511, + "timestamp": "2014-08-02T19:53:04Z", + "comment": "/* 25 August */", + "sha1": "de699e3aa88c06fe2b5e78c9b7f301a14a3fafc2" + }, + { + "type": "edit", + "ns": 4, + "title": "Wikipedia:Department of Fun/Word Association", + "pageid": 29440978, + "revid": 619586832, + "old_revid": 619586632, + "rcid": 672725977, + "user": "Cassie Hawthorne", + "userid": "19943550", + "oldlen": 23125, + "newlen": 23128, + "timestamp": "2014-08-02T19:53:04Z", + "comment": "/* Branch #4: */", + "sha1": "011e211f892a43b4d94a85278ff2950d3339bb61" + }, + { + "type": "log", + "ns": 2, + "title": "User:Sushantnd", + "pageid": 0, + "revid": 0, + "old_revid": 0, + "rcid": 672725976, + "user": "Sushantnd", + "userid": 22021685, + "oldlen": 0, + "newlen": 0, + "timestamp": "2014-08-02T19:53:03Z", + "comment": "", + "logid": 57955765, + "logtype": "newusers", + "logaction": "create" + }, + { + "type": "edit", + "ns": 0, + "title": "Mental retardation and microcephaly with pontine and cerebellar hypoplasia", + "pageid": 39398552, + "revid": 619586831, + "old_revid": 619586787, + "rcid": 672725975, + "user": "24.147.208.125", + "userid": "0", + "anon": "", + "oldlen": 1265, + "newlen": 1266, + "timestamp": "2014-08-02T19:53:01Z", + "comment": "/* See also */", + "sha1": "59d008d30d4a7475d9a34301fccf31bd8a99f434" + }, + { + "type": "log", + "ns": 2, + "title": "User:Patnalakiran", + "pageid": 0, + "revid": 0, + "old_revid": 0, + "rcid": 672725974, + "user": "Patnalakiran", + "userid": 22021684, + "oldlen": 0, + "newlen": 0, + "timestamp": "2014-08-02T19:53:01Z", + "comment": "iOS App Account Creation", + "logid": 57955764, + "logtype": "newusers", + "logaction": "create" + }, + { + "type": "edit", + "ns": 0, + "title": "Dive center", + "pageid": 4300016, + "revid": 619586830, + "old_revid": 606593344, + "rcid": 672725973, + "user": "BeyondD", + "userid": "21981020", + "oldlen": 1591, + "newlen": 1598, + "timestamp": "2014-08-02T19:53:00Z", + "comment": "/* External links */", + "sha1": "ea84aa27c98e8b537706151726718cf214a3b697" + }, + { + "type": "edit", + "ns": 0, + "title": "Whitney Miller", + "pageid": 32368852, + "revid": 619586829, + "old_revid": 619586793, + "rcid": 672725972, + "user": "Shinyandblue", + "userid": "15937519", + "oldlen": 5077, + "newlen": 5079, + "timestamp": "2014-08-02T19:53:00Z", + "comment": "", + "sha1": "ea723c2ef3f3b3fa91d92e19c63d7b703b2ef2d5" + }, + { + "type": "edit", + "ns": 2, + "title": "User:Made In Norfolk", + "pageid": 40345626, + "revid": 619586828, + "old_revid": 619586622, + "rcid": 672725971, + "user": "Made In Norfolk", + "userid": "19582464", + "oldlen": 304, + "newlen": 257, + "timestamp": "2014-08-02T19:52:59Z", + "comment": "", + "sha1": "04e4a550eb71df5ebf1a6f734fda256a247e0497" + }, + { + "type": "edit", + "ns": 0, + "title": "Port Vale F.C.", + "pageid": 203456, + "revid": 619586827, + "old_revid": 619585397, + "rcid": 672725970, + "user": "EchetusXe", + "userid": "7338423", + "oldlen": 56971, + "newlen": 57127, + "timestamp": "2014-08-02T19:52:58Z", + "comment": "/* Current squad */", + "sha1": "a1521eee356f07437b7797f0560fe1e375d6ef0f" + }, + { + "type": "edit", + "ns": 0, + "title": "Jimmy Keohane", + "pageid": 32862434, + "revid": 619586826, + "old_revid": 601775355, + "rcid": 672725969, + "user": "TomECFC", + "userid": "22021628", + "oldlen": 10209, + "newlen": 10307, + "timestamp": "2014-08-02T19:52:57Z", + "comment": "Added content", + "sha1": "044adb2c2130a3cc99e18777e1867f843aa07c56" + }, + { + "type": "edit", + "ns": 0, + "title": "Paxi", + "pageid": 309748, + "revid": 619586825, + "old_revid": 608095236, + "rcid": 672725968, + "user": "Anders Feder", + "userid": "7724", + "oldlen": 9168, + "newlen": 9167, + "timestamp": "2014-08-02T19:52:56Z", + "comment": "*not* Pontikonisi. That's at Kerkyra.", + "sha1": "ffbba010752bfb2b2ca1a17582ae1273dc9c842d" + }, + { + "type": "log", + "ns": 2, + "title": "User:Glburt", + "pageid": 0, + "revid": 0, + "old_revid": 0, + "rcid": 672725967, + "user": "Glburt", + "userid": 22021683, + "oldlen": 0, + "newlen": 0, + "timestamp": "2014-08-02T19:52:56Z", + "comment": "iOS App Account Creation", + "logid": 57955763, + "logtype": "newusers", + "logaction": "create" + }, + { + "type": "edit", + "ns": 0, + "title": "Yu-Gi-Oh! Duel Monsters", + "pageid": 40573410, + "revid": 619586824, + "old_revid": 619585943, + "rcid": 672725966, + "user": "38.114.66.184", + "userid": "0", + "anon": "", + "oldlen": 28425, + "newlen": 28550, + "timestamp": "2014-08-02T19:52:55Z", + "comment": "/* English Cast */", + "sha1": "bf25e3e7e6cf483876a721f42ad656868b7a8381" + }, + { + "type": "edit", + "ns": 0, + "title": "List of New York City Ballet repertory", + "pageid": 14437896, + "revid": 619586823, + "old_revid": 614230186, + "rcid": 672725965, + "user": "DavidBrooks", + "userid": "55225", + "oldlen": 20609, + "newlen": 20609, + "timestamp": "2014-08-02T19:52:55Z", + "comment": "/* P */ correct capitalization", + "sha1": "a414c79884b65ec1426d4a30d288804c3c8e026a" + }, + { + "type": "edit", + "ns": 0, + "title": "Emory University Hospital", + "pageid": 19236104, + "revid": 619586822, + "old_revid": 619578696, + "rcid": 672725964, + "user": "MaddiKadafy", + "userid": "21891220", + "oldlen": 6781, + "newlen": 6821, + "timestamp": "2014-08-02T19:52:54Z", + "comment": "/* 2014 Ebola Virus Outbreak */", + "sha1": "ce1f8134f1f99c8e6b34badfaa28beb8e5191f1e" + }, + { + "type": "edit", + "ns": 1, + "title": "Talk:Fertilizer", + "pageid": 696496, + "revid": 619586821, + "old_revid": 619562018, + "rcid": 672725963, + "user": "Smokefoot", + "userid": "698909", + "oldlen": 49405, + "newlen": 49595, + "timestamp": "2014-08-02T19:52:54Z", + "comment": "/* 18% recovery rate */looking for fert-to-food conversion efficiencies vs run-off", + "sha1": "20d6dfc60c032d969c62021d5a2b27ec1634ea4e" + }, + { + "type": "log", + "ns": 2, + "title": "User:Gregleightaylor87", + "pageid": 0, + "revid": 0, + "old_revid": 0, + "rcid": 672725962, + "user": "Gregleightaylor87", + "userid": 22021682, + "oldlen": 0, + "newlen": 0, + "timestamp": "2014-08-02T19:52:51Z", + "comment": "iOS App Account Creation", + "logid": 57955762, + "logtype": "newusers", + "logaction": "create" + }, + { + "type": "edit", + "ns": 0, + "title": "Penalty fare", + "pageid": 14198533, + "revid": 619586819, + "old_revid": 619586730, + "rcid": 672725961, + "user": "JaJaWa", + "userid": "10026808", + "oldlen": 8788, + "newlen": 8809, + "timestamp": "2014-08-02T19:52:51Z", + "comment": "/* History and legal status */ add Tramlink", + "sha1": "bfe35989da2aa579fc72cdd6d11d2913e1056e56" + }, + { + "type": "edit", + "ns": 0, + "title": "Bolnhurst and Keysoe", + "pageid": 3284380, + "revid": 619586817, + "old_revid": 544216645, + "rcid": 672725960, + "user": "Bleaney", + "userid": "3441262", + "oldlen": 4109, + "newlen": 4110, + "timestamp": "2014-08-02T19:52:50Z", + "comment": "Borough of Bedford", + "sha1": "68e28e301dcea3870d069c3718303f3081a34c63" + }, + { + "type": "edit", + "ns": 0, + "title": "Jason Taumalolo", + "pageid": 32892045, + "revid": 619586818, + "old_revid": 618525428, + "rcid": 672725959, + "user": "AddyAddy1", + "userid": "21316051", + "oldlen": 7735, + "newlen": 7735, + "timestamp": "2014-08-02T19:52:50Z", + "comment": "", + "sha1": "0ddb2ad113f4d73357dc13ba40407a24f739cd74" + }, + { + "type": "edit", + "ns": 2, + "title": "User:Ryan shell/translatons", + "pageid": 18254376, + "revid": 619586816, + "old_revid": 619586584, + "rcid": 672725958, + "user": "Ryan shell", + "userid": "3277244", + "oldlen": 4494, + "newlen": 4491, + "timestamp": "2014-08-02T19:52:50Z", + "comment": "/* See also */ tr", + "sha1": "eb8ef779f17b7e1d9823a037f97f3fb05b880f95" + }, + { + "type": "edit", + "ns": 3, + "title": "User talk:87.139.10.57", + "pageid": 43457339, + "revid": 619586815, + "old_revid": 619586580, + "rcid": 672725957, + "user": "SNUGGUMS", + "userid": "19269270", + "oldlen": 2056, + "newlen": 2929, + "timestamp": "2014-08-02T19:52:50Z", + "comment": "Warning: Addition of unsourced or improperly cited material. ([[WP:TW|TW]])", + "sha1": "77f6a62aa80bab220a323955a8458c577cb02429" + }, + { + "type": "edit", + "ns": 0, + "title": "Liz Lemon", + "pageid": 8340321, + "revid": 619586814, + "old_revid": 618739058, + "rcid": 672725956, + "user": "198.244.107.69", + "userid": "0", + "anon": "", + "oldlen": 43766, + "newlen": 43767, + "timestamp": "2014-08-02T19:52:50Z", + "comment": "/* Personality */ missing comma", + "sha1": "bf8acc169a420772a0a4fa16524c97ecca87ce15" + }, + { + "type": "log", + "ns": 2, + "title": "User:Dmhernandez228", + "pageid": 0, + "revid": 0, + "old_revid": 0, + "rcid": 672725955, + "user": "Dmhernandez228", + "userid": 22021681, + "oldlen": 0, + "newlen": 0, + "timestamp": "2014-08-02T19:52:50Z", + "comment": "iOS App Account Creation", + "logid": 57955761, + "logtype": "newusers", + "logaction": "create" + }, + { + "type": "edit", + "ns": 0, + "title": "List of current champions in TNA Wrestling", + "pageid": 27484248, + "revid": 619586813, + "old_revid": 619577259, + "rcid": 672725954, + "user": "200.42.244.88", + "userid": "0", + "anon": "", + "oldlen": 5211, + "newlen": 5223, + "timestamp": "2014-08-02T19:52:50Z", + "comment": "", + "sha1": "cd6a1f98b1121b7f1c6db420e18186d6d44177be" + }, + { + "type": "log", + "ns": 15, + "title": "Category talk:Joe songs", + "rcid": 616263547, + "pageid": 0, + "revid": 0, + "old_revid": 0, + "user": "Cydebot", + "userid": "1215485", + "bot": "", + "oldlen": 0, + "newlen": 0, + "timestamp": "2013-11-12T01:29:06Z", + "comment": "Robot - Speedily moving category Joe songs to [[:Category:Joe (singer) songs]] per [[WP:CFDS|CFDS]].", + "logid": 52556251, + "logtype": "delete", + "logaction": "delete", + "tags": [] + }, + { + "type": "log", + "ns": 3, + "title": "User talk:Envisage Drawn", + "rcid": 616228397, + "pageid": 41053035, + "revid": 0, + "old_revid": 0, + "user": "Peridon", + "userid": "7128128", + "oldlen": 0, + "newlen": 0, + "timestamp": "2013-11-11T22:01:52Z", + "comment": "1 revision restored: wrong button!", + "logid": 52553202, + "logtype": "delete", + "logaction": "restore", + "tags": [] + }, + { + "type": "log", + "ns": 15, + "title": "Category talk:Joe 90 albums", + "rcid": 616263570, + "pageid": 41054940, + "revid": 0, + "old_revid": 0, + "user": "Cydebot", + "userid": "1215485", + "bot": "", + "oldlen": 0, + "newlen": 0, + "timestamp": "2013-11-12T01:29:15Z", + "comment": "Robot - Speedily moving category Joe 90 albums to [[:Category:Joe 90 (band) albums]] per [[WP:CFDS|CFDS]].", + "logid": 52556255, + "logtype": "move", + "logaction": "move", + "move": { + "new_ns": 15, + "new_title": "Category talk:Joe 90 (band) albums" + }, + "tags": [] + }, + { + "type": "log", + "ns": 2, + "title": "User:190.203.41.111", + "rcid": 616287367, + "pageid": 0, + "revid": 0, + "old_revid": 0, + "user": "ProcseeBot", + "userid": "8760229", + "bot": "", + "oldlen": 0, + "newlen": 0, + "timestamp": "2013-11-12T04:21:18Z", + "comment": "{{blocked proxy}} ", + "logid": 52558623, + "logtype": "block", + "logaction": "block", + "block": { + "flags": "nocreate", + "duration": "60 days", + "expiry": "2014-01-11T04:21:18Z" + }, + "tags": [] + }, + { + "type": "log", + "ns": 2, + "title": "User:Tuhin Karmakar", + "rcid": 615891880, + "pageid": 0, + "revid": 0, + "old_revid": 0, + "user": "Andrevan", + "userid": "13732", + "oldlen": 0, + "newlen": 0, + "timestamp": "2013-11-10T12:04:41Z", + "comment": "WP:CHU", + "logid": 52520596, + "logtype": "renameuser", + "logaction": "renameuser", + "olduser": "Tuhin Karmakar", + "newuser": "Anonymous23648762289", + "edits": 19, + "tags": [] + }, + { + "type": "log", + "ns": 0, + "old_revid": 0, + "title": "Tuheitia Paki", + "0": "\u200e[edit=autoconfirmed] (expires 00:39, 11 July 2014 (UTC))\u200e[move=autoconfirmed] (expires 00:39, 11 July 2014 (UTC))", + "newlen": 0, + "logaction": "protect", + "1": "", + "revid": 0, + "logtype": "protect", + "userid": "54381", + "logid": 57392538, + "user": "Gadfium", + "timestamp": "2014-07-04T00:39:03Z", + "oldlen": 0, + "pageid": 6581987, + "comment": "repeated addition of pov material", + "rcid": 666584163 + } +] diff --git a/mwevents/types/events/tests/test_event.py b/mwevents/types/events/tests/test_event.py new file mode 100644 index 0000000..df75ded --- /dev/null +++ b/mwevents/types/events/tests/test_event.py @@ -0,0 +1,77 @@ +import json +import os.path + +from nose.tools import eq_ + +from .. import Event, PageCreated, PageRestored, RevisionSaved +from ... import Page, Revision, Timestamp, Unavailable, User + + +def test_construction_and_values(): + timestamp = Timestamp(1234567890) + user = User(10, "foobar") + comment = "A sample comment that says some things." + + event = Event(timestamp, user, comment) + + eq_(event.timestamp, timestamp) + eq_(event.user, user) + eq_(event.comment, comment) + + eq_(event, Event(event)) + eq_(event, Event(event.to_json())) + +def test_json_of_subclasses(): + user = User(10, "Foo") + page = Page(12, 2, "Bar") + timestamp = Timestamp(1234567890) + revision = Revision(457863, 7328, 23423, + "1234567890123457890123457890ab", 12, False) + events = [ + PageRestored(timestamp, user, "?", Unavailable, page), + RevisionSaved(timestamp, user, "!", revision) + ] + + docs = [e.to_json() for e in events] + + new_events = [Event(d) for d in docs] + + eq_(events, new_events) + +def test_from_rc_doc_order(): + rc_doc = { + "type": "new", + "ns": 14, + "title": "Category:Buildings and structures under " + \ + "construction in Belgium", + "pageid": 43457411, + "revid": 619588219, + "old_revid": 0, + "rcid": 672727804, + "user": "Vegaswikian", + "userid": "214427", + "oldlen": 0, + "newlen": 137, + "timestamp": "2014-08-02T20:07:22Z", + "comment": "Add a series category", + "sha1": "30f240252ca93e5830bef7f831fb8cb251cc4d72" + } + + events = list(Event.from_rc_doc(rc_doc)) + + eq_(len(events), 2) + assert isinstance(events[0], PageCreated) + assert isinstance(events[1], RevisionSaved) + + eq_(events[0].page.id, events[1].revision.page_id) + + +def test_from_rc_docs(): + f = open(os.path.join(os.path.dirname(__file__), "rc_docs.json")) + + rc_docs = json.load(f) + + events = list(event for rc_doc in rc_docs \ + for event in Event.from_rc_doc(rc_doc)) + + eq_(len(events), 51) diff --git a/mwevents/types/events/tests/test_page_restored.py b/mwevents/types/events/tests/test_page_restored.py new file mode 100644 index 0000000..cf6d70c --- /dev/null +++ b/mwevents/types/events/tests/test_page_restored.py @@ -0,0 +1,53 @@ +from nose.tools import eq_ + +from jsonable import JSONable + +from ... import Page, Timestamp, Unavailable, User +from ..page_restored import PageRestored + + +def test_construction_and_values(): + timestamp = Timestamp(1234567890) + user = User(10, "Foobar!") + comment = "This is a comment!" + + old_page_id = 10 + page = Page(10, 2, "Foobar!") + page_restored = PageRestored(timestamp, user, comment, old_page_id, page) + + eq_(page_restored.timestamp, timestamp) + eq_(page_restored.user, user) + eq_(page_restored.comment, comment) + eq_(page_restored.old_page_id, old_page_id) + eq_(page_restored.page, page) + + +def test_from_rc_doc(): + rc_doc = { + "type": "log", + "ns": 3, + "title": "User talk:Envisage Drawn", + "rcid": 616228397, + "pageid": 41053035, + "revid": 0, + "old_revid": 0, + "user": "Peridon", + "userid": "7128128", + "oldlen": 0, + "newlen": 0, + "timestamp": "2013-11-11T22:01:52Z", + "comment": "1 revision restored: wrong button!", + "logid": 52553202, + "logtype": "delete", + "logaction": "restore", + "tags": [] + } + + page_restored = PageRestored.from_rc_doc(rc_doc) + + eq_(page_restored.timestamp, Timestamp(rc_doc['timestamp'])) + eq_(page_restored.user, User(rc_doc['userid'], rc_doc['user'])) + eq_(page_restored.comment, rc_doc['comment']) + eq_(page_restored.old_page_id, Unavailable) + eq_(page_restored.page.id, rc_doc['pageid']) + eq_(page_restored.page.namespace, rc_doc['ns']) diff --git a/mwevents/types/events/tests/test_revision_saved.py b/mwevents/types/events/tests/test_revision_saved.py index 988c498..fa1ff3a 100644 --- a/mwevents/types/events/tests/test_revision_saved.py +++ b/mwevents/types/events/tests/test_revision_saved.py @@ -2,7 +2,7 @@ from jsonable import JSONable -from ...types import Revision, Timestamp, User +from ... import Revision, Timestamp, Unavailable, User from ..revision_saved import RevisionSaved @@ -19,3 +19,122 @@ def test_construction_and_values(): eq_(revision_saved.user, user) eq_(revision_saved.comment, comment) eq_(revision_saved.revision, revision) + +def test_from_rc_doc(): + rc_doc = { + "type": "edit", + "ns": 1, + "title": "Talk:Neutral mutation", + "rcid": 616266829, + "pageid": 5555386, + "revid": 581269873, + "old_revid": 581268750, + "user": "Grabriggs", + "userid": "19701352", + "oldlen": 23767, + "newlen": 24046, + "timestamp": "2013-11-12T01:48:22Z", + "comment": "/* Neutral theory */", + "tags": [], + "sha1": "8817b4efd42c936254dfb09ce5bbfd0e4f9b848a" + } + + revision_saved = RevisionSaved.from_rc_doc(rc_doc) + + eq_(revision_saved.timestamp, Timestamp(rc_doc['timestamp'])) + eq_(revision_saved.user, User(rc_doc['userid'], rc_doc['user'])) + eq_(revision_saved.comment, rc_doc['comment']) + eq_(revision_saved.revision.bytes, rc_doc['newlen']) + eq_(revision_saved.revision.sha1, rc_doc['sha1']) + eq_(revision_saved.revision.page_id, rc_doc['pageid']) + eq_(revision_saved.revision.minor, 'minor' in rc_doc) + +def test_from_rev_doc(): + rev_doc = { + "revid": 619093743, + "parentid": 618899706, + "user": "Eduen", + "userid": 7527773, + "timestamp": "2014-07-30T07:26:05Z", + "size": 181115, + "sha1": "c6236e5ad7b6af7c353a43ded631298c2b7e95ea", + "contentmodel": "wikitext", + "comment": "another quote from a prominent anarchist theroy which talks againts the simplification of a libertarian society to simply the absence of a state", + "page": { + "pageid": 12, + "ns": 0, + "title": "Anarchism" + } + } + + revision_saved = RevisionSaved.from_rev_doc(rev_doc) + + eq_(revision_saved.timestamp, Timestamp(rev_doc['timestamp'])) + eq_(revision_saved.user, User(rev_doc['userid'], rev_doc['user'])) + eq_(revision_saved.comment, rev_doc['comment']) + eq_(revision_saved.revision.sha1, rev_doc['sha1']) + eq_(revision_saved.revision.page_id, rev_doc['page']['pageid']) + +def test_from_rc_row(): + rc_row = { + 'rc_id': 624362534, + 'rc_timestamp': "20131219020516", + 'rc_cur_time': "", + 'rc_user': 16380370, + 'rc_user_text': "RedVanderwall", + 'rc_namespace': 0, + 'rc_title': "Narragansett_Race_Track", + 'rc_comment': "/* The Biscuit */", + 'rc_minor': 1, + 'rc_bot': 0, + 'rc_new': 0, + 'rc_cur_id': 10680758, + 'rc_this_oldid': 586726430, + 'rc_last_oldid': 586725822, + 'rc_type': 0, + 'rc_source': "mw.edit", + 'rc_moved_to_ns': 0, + 'rc_moved_to_title': "", + 'rc_patrolled': 0, + 'rc_ip': "", + 'rc_old_len': 24309, + 'rc_new_len': 24348, + 'rc_deleted': 0, + 'rc_logid': 0, + 'rc_log_type': None, + 'rc_log_action': "", + 'rc_params': "" + } + + revision_saved = RevisionSaved.from_rc_row(rc_row) + + eq_(revision_saved.timestamp, Timestamp(rc_row['rc_timestamp'])) + eq_(revision_saved.user, User(rc_row['rc_user'], rc_row['rc_user_text'])) + eq_(revision_saved.comment, rc_row['rc_comment']) + eq_(revision_saved.revision.sha1, Unavailable) + eq_(revision_saved.revision.page_id, rc_row['rc_cur_id']) + +def test_from_rev_row(): + rev_row = { + 'rev_id': 233192, + 'rev_page': 10, + 'rev_text_id': 233192, + 'rev_comment': "*", + 'rev_user': 99, + 'rev_user_text': "RoseParks", + 'rev_timestamp': "20010121021221", + 'rev_minor_edit': 0, + 'rev_deleted': 0, + 'rev_len': 124, + 'rev_parent_id': 0, + 'rev_sha1': "8kul9tlwjm9oxgvqzbwuegt9b2830vw" + } + + revision_saved = RevisionSaved.from_rev_row(rev_row) + + eq_(revision_saved.timestamp, Timestamp(rev_row['rev_timestamp'])) + eq_(revision_saved.user, + User(rev_row['rev_user'], rev_row['rev_user_text'])) + eq_(revision_saved.comment, rev_row['rev_comment']) + eq_(revision_saved.revision.sha1, rev_row['rev_sha1']) + eq_(revision_saved.revision.page_id, rev_row['rev_page']) diff --git a/mwevents/types/events/user_blocked.py b/mwevents/types/events/user_blocked.py index f24574f..8b0d7f4 100644 --- a/mwevents/types/events/user_blocked.py +++ b/mwevents/types/events/user_blocked.py @@ -1,17 +1,21 @@ -from .event import Event, Match +from .. import Block, Timestamp, User +from ... import configuration +from ...util import split_page_name +from .event import Event +from .match import Match class UserBlocked(Event): - MATCHES = [MATCH("block", "block", False, "log"), - MATCH("block", "reblock", False, "log")] + MATCHES = [Match("block", "block", False, "log"), + Match("block", "reblock", False, "log")] __slots__ = ('blocked', 'block',) - def __init__(self, timestamp, user, comment, blocked, block): - super().__init__(timestamp, user, comment) + def initialize(self, timestamp, user, comment, blocked, block): + super().initialize(timestamp, user, comment) self.blocked = User(blocked) self.block = Block(block) @classmethod - def from_api_doc(cls, api_doc): + def from_rc_doc(cls, rc_doc, config=configuration.DEFAULT): """ { "type": "log", @@ -39,29 +43,30 @@ def from_api_doc(cls, api_doc): "tags": [] } """ - ns, title = Page.parse_title(api_doc['title']) - ns, blocked_name = config.title_parser.parse(title) - assert ns == 2 - blocked_name = blocked_name.replace("_", " ") + nsname, blocked_name = split_page_name(rc_doc['ns'], rc_doc['title']) + + if len(rc_doc['block'].get('flags', "")) > 0: + flags = rc_doc['block']['flags'].split(",") + else: + flags = [] return cls( - Timestamp(api_doc['timestamp']) + Timestamp(rc_doc['timestamp']), User( - api_doc['userid'], - api_doc['user'] + rc_doc.get('userid'), + rc_doc.get('user') ), - api_doc['comment'], + rc_doc.get('comment'), User( None, # Not available blocked_name ), Block( - doc['block']['flags'], - doc['block']['duration'], - doc['block']['expiry'], + flags, + rc_doc['block']['duration'], + rc_doc['block'].get('expiry'), ) ) -# Event.register(UserBlocked) -# TODO: Uncomment when ready +Event.register(UserBlocked) diff --git a/mwevents/types/events/user_groups_modified.py b/mwevents/types/events/user_groups_modified.py new file mode 100644 index 0000000..6c21695 --- /dev/null +++ b/mwevents/types/events/user_groups_modified.py @@ -0,0 +1,72 @@ +from .. import Unavailable +from ... import configuration +from ...util import split_page_name +from .event import Event +from .match import Match + + +class UserGroupsModified(Event): + MATCHES = [Match('rights', 'rights', False, "log")] + __slots__ = ('modified', 'old', 'new') + def initialize(self, timestamp, user, comment, modified, old, new): + super().initialize(timestamp, user, comment) + + self.modified = User(modified) + self.old = [str(group) for group in old] + self.new = [str(group) for group in new] + + @classmethod + def from_rc_doc(cls, rc_doc, config=configuration.DEFAULT): + """ + Example: + { + "type": "log", + "ns": 2, + "title": "User:1980na", + "pageid": 43436336, + "revid": 0, + "old_revid": 0, + "rcid": 672700514, + "user": "Pharos", + "userid": "111996", + "oldlen": 0, + "newlen": 0, + "timestamp": "2014-08-02T16:52:37Z", + "comment": "[[Wikipedia:Education noticeboard#Request for course instructor right: Ninafundisha (talk) (course page draft)]]", + "logid": 57952057, + "logtype": "rights", + "logaction": "rights", + "rights": { + "new": "epinstructor", + "old": "" + }, + "tags": [] + } + """ + nsname, modified_name = split_page_name(rc_doc['ns'], rc_doc['title']) + + if len(rc_doc['rights']['old']) > 0: + old_groups = rc_doc['rights']['old'].split(",") + else: + old_groups = [] + + if len(rc_doc['rights']['new']) > 0: + new_groups = rc_doc['rights']['new'].split(",") + else: + new_groups = [] + + + return cls( + Timestamp(rc_doc['timestamp']), + User( + rc_doc.get('userid'), + rc_doc.get('user') + ), + rc_doc.get('comment'), + User( + Unavailable, + modified_name + ), + old_groups, + new_groups + ) diff --git a/mwevents/types/events/user_registered.py b/mwevents/types/events/user_registered.py index 72ec5dc..898d5c0 100644 --- a/mwevents/types/events/user_registered.py +++ b/mwevents/types/events/user_registered.py @@ -1,4 +1,8 @@ -from .event import Event, Match +from .. import Timestamp, Unavailable, User +from ... import configuration +from ...util import split_page_name +from .event import Event +from .match import Match class UserRegistered(Event): @@ -7,14 +11,14 @@ class UserRegistered(Event): Match("newusers", "create2", False, "log"), Match("newusers", "autocreate", False, "log"), Match("newusers", "byemail", False, "log")] - __slots__ = ('action', 'newuser') - def __init__(self, timestamp, user, comment, action, new): - super().__init__(timestamp, user, comment) + __slots__ = ('action', 'new') + def initialize(self, timestamp, user, comment, action, new): + super().initialize(timestamp, user, comment) self.action = str(action) self.new = User(new) @classmethod - def from_api_doc(cls, api_doc, config=configuration.DEFAULT): + def from_rc_doc(cls, rc_doc, config=configuration.DEFAULT): """ Example: { @@ -75,30 +79,28 @@ def from_api_doc(cls, api_doc, config=configuration.DEFAULT): "tags": [] } """ - if api_doc['logaction'] == "create": - user_name = api_doc['user'] - user_id + if rc_doc['logaction'] == "create": + registered_name = rc_doc['user'] + registered_id = rc_doc['userid'] else: #doc['logaction'] in ("create2","byemail") - ns, name_title = config.title_parser.parse(doc['title']) - assert ns == 2 - user_name = name_title - user_id = None # Not available + nsname, registered_name = split_page_name(rc_doc['ns'], + rc_doc['title']) + registered_id = Unavailable # Not available return cls( - Timestamp(api_doc['timestamp']) + Timestamp(rc_doc['timestamp']), User( - api_doc['userid'], - api_doc['name'] + rc_doc.get('userid'), + rc_doc.get('user') ), - api_doc['comment'], - api_doc['logaction'], + rc_doc.get('comment'), + rc_doc['logaction'], User( - user_id, - user_name + registered_id, + registered_name ) ) -# Event.register(UserRegistered) -# TODO: Uncomment when ready +Event.register(UserRegistered) diff --git a/mwevents/types/events/user_renamed.py b/mwevents/types/events/user_renamed.py index 68cb4f0..2ed845b 100644 --- a/mwevents/types/events/user_renamed.py +++ b/mwevents/types/events/user_renamed.py @@ -1,20 +1,22 @@ -from .event import Event, Match +from ... import configuration +from .event import Event +from .match import Match class UserRenamed(Event): """ TODO: Figure out what to do with centralauth stuff. """ - MATCHES = [MATCH("renameuser", "renameuser", False, "log")] + MATCHES = [Match("renameuser", "renameuser", False, "log")] __slots__ = ('old', 'new') - def __init__(self, timestamp, user, comment, old, new): - super().__init__(timestamp, user, comment) + def initialize(self, timestamp, user, comment, old, new): + super().initialize(timestamp, user, comment) self.old = User(old) self.new = User(new) @classmethod - def from_api_doc(cls, api_doc): + def from_api_doc(cls, api_doc, config=configuration.DEFAULT): """ Example: { @@ -41,21 +43,20 @@ def from_api_doc(cls, api_doc): } """ return cls( - Timestamp(doc['timestamp']), + Timestamp(rc_doc['timestamp']), User( - int(doc['userid']), - doc['user'] + rc_doc.get('userid'), + rc_doc.get('user') ), - doc['comment'], + rc_doc.get('comment'), User( - None, #Not available + Unavailable, #Not available doc['olduser'] ), User( - None, #Not available + Unavailable, #Not available doc['newuser'] ) ) -# Event.register(UserRenamed) -# TODO: Uncomment when ready +Event.register(UserRenamed) diff --git a/mwevents/types/events/user_rights_modified.py b/mwevents/types/events/user_rights_modified.py deleted file mode 100644 index 5ec382f..0000000 --- a/mwevents/types/events/user_rights_modified.py +++ /dev/null @@ -1,11 +0,0 @@ -from .event import Event - - -class UserRightsModified(Event): - __slots__ = ('modified', 'rights') - def initialize(self, timestamp, user, comment, modified_user, old, new): - super().initialize(timestamp, user, comment) - - self.modified_user = User(modified_user) - self.old = Rights(old) - self.new = Rights(new) diff --git a/mwevents/types/events/user_unblocked.py b/mwevents/types/events/user_unblocked.py index 3688ae3..1751826 100644 --- a/mwevents/types/events/user_unblocked.py +++ b/mwevents/types/events/user_unblocked.py @@ -1,16 +1,20 @@ -from .event import Event, Match +from .. import Timestamp, Unavailable, User +from ... import configuration +from ...util import split_page_name +from .event import Event +from .match import Match class UserUnblocked(Event): - MATCHES = [MATCH("block", "unblock", False, "log")] + MATCHES = [Match("block", "unblock", False, "log")] __slots__ = ('unblocked',) - def __init__(self, timestamp, user, comment, unblocked): - super().__init__(timestamp, user, comment) + def initialize(self, timestamp, user, comment, unblocked): + super().initialize(timestamp, user, comment) self.unblocked = User(unblocked) @classmethod - def from_api_doc(cls, api_doc, config): + def from_rc_doc(cls, rc_doc, config=configuration.DEFAULT): """ Example: { @@ -33,18 +37,17 @@ def from_api_doc(cls, api_doc, config): "tags": [] } """ - ns, unblocked_name = config.title_parser.parse(api_doc['title']) - assert ns == 2 + nsname, unblocked_name = split_page_name(rc_doc['ns'], rc_doc['title']) return cls( - Timestamp(api_doc['timestamp']) + Timestamp(rc_doc['timestamp']), User( - api_doc['userid'], - api_doc['user'] + rc_doc.get('userid'), + rc_doc.get('user') ), - api_doc['comment'], + rc_doc.get('comment'), User( - None, # Not available + Unavailable, # Not available unblocked_name ) ) diff --git a/mwevents/types/page.py b/mwevents/types/page.py new file mode 100644 index 0000000..7cf484e --- /dev/null +++ b/mwevents/types/page.py @@ -0,0 +1,11 @@ +from jsonable import JSONable + +from .unavailable import Unavailable + + +class Page(JSONable): + __slots__ = ('id', 'namespace', 'title') + def initialize(self, id, namespace, title): + self.id = Unavailable.otherwise(id, int) + self.namespace = int(namespace) + self.title = str(title) diff --git a/mwevents/types/protection.py b/mwevents/types/protection.py index 25192f6..1d459d6 100644 --- a/mwevents/types/protection.py +++ b/mwevents/types/protection.py @@ -2,7 +2,7 @@ from jsonable import instance, JSONable -from .. import defaults +from .. import configuration from .timestamp import Timestamp @@ -21,8 +21,8 @@ def initialize(self, action=None, group=None, expiration=None): @classmethod def from_params(cls, params, - indefinite=defaults.PARAMS_INDEFINITE, - time_format=defaults.PARAMS_TIME_FORMAT): + indefinite=configuration.DEFAULT['indefinite'], + expiration_format=configuration.DEFAULT['expiration_format']): for match in cls.LOG_PARAMS_RE.finditer(params): action, group, expiration, _ = match.groups() @@ -30,6 +30,6 @@ def from_params(cls, params, if expiration == indefinite: expiration = None else: - expiration = Timestamp.strptime(expiration, time_format) + expiration = Timestamp.strptime(expiration, expiration_format) yield cls(action, group, expiration) diff --git a/mwevents/types/revision.py b/mwevents/types/revision.py index 039e642..8ff4545 100644 --- a/mwevents/types/revision.py +++ b/mwevents/types/revision.py @@ -1,12 +1,14 @@ from jsonable import JSONable +from .unavailable import Unavailable + class Revision(JSONable): __slots__ = ('id', 'parent_id', 'bytes', 'sha1', 'page_id', 'minor') def initialize(self, id, parent_id, bytes, sha1, page_id, minor): - self.id = int(id) - self.parent_id = int(parent_id) if parent_id is not None else 0 - self.bytes = int(bytes) - self.sha1 = str(sha1) - self.page_id = int(page_id) - self.minor = bool(minor) + self.id = Unavailable.otherwise(id, int) + self.parent_id = Unavailable.otherwise(parent_id, int) or 0 + self.bytes = Unavailable.otherwise(bytes, int) + self.sha1 = Unavailable.otherwise(sha1, str) + self.page_id = Unavailable.otherwise(page_id, int) + self.minor = Unavailable.otherwise(minor, bool) diff --git a/mwevents/types/tests/test_block.py b/mwevents/types/tests/test_block.py new file mode 100644 index 0000000..49feb7b --- /dev/null +++ b/mwevents/types/tests/test_block.py @@ -0,0 +1,19 @@ +from nose.tools import eq_ + +from ..block import Block +from ..timestamp import Timestamp + + +def test_construction_and_values(): + flags = ["nocreate", "noedit"] + duration = "24 years" + expiration = Timestamp(1234567890) + + block = Block(flags, duration, expiration) + + eq_(block.flags, flags) + eq_(block.duration, duration) + eq_(block.expiration, expiration) + + eq_(block, Block(block)) + eq_(block, Block(block.to_json())) diff --git a/mwevents/types/tests/test_page.py b/mwevents/types/tests/test_page.py new file mode 100644 index 0000000..fb7fd3f --- /dev/null +++ b/mwevents/types/tests/test_page.py @@ -0,0 +1,18 @@ +from nose.tools import eq_ + +from ..page import Page + + +def test_construction_and_values(): + id = 129 + namespace = 272 + title = "foobar!" + + page = Page(id, namespace, title) + + eq_(page.id, id) + eq_(page.namespace, namespace) + eq_(page.title, title) + + eq_(page, Page(page)) + eq_(page, Page(page.to_json())) diff --git a/mwevents/types/tests/test_protection.py b/mwevents/types/tests/test_protection.py index 8250f96..dfdf22c 100644 --- a/mwevents/types/tests/test_protection.py +++ b/mwevents/types/tests/test_protection.py @@ -2,6 +2,7 @@ from ..protection import Protection from ..timestamp import Timestamp +from ..unavailable import Unavailable def test_construction_and_values(): @@ -32,3 +33,17 @@ def test_from_params(): ], list(Protection.from_params(log_params)) ) + + +def test_unavailable_values(): + action = "edit" + group = Unavailable + expiration = None + + protection = Protection(action, group, expiration) + + eq_(protection.action, action) + eq_(protection.group, group) + eq_(protection.expiration, expiration) + + eq_(protection, Protection(protection.to_json())) diff --git a/mwevents/types/tests/test_revision.py b/mwevents/types/tests/test_revision.py new file mode 100644 index 0000000..dead33d --- /dev/null +++ b/mwevents/types/tests/test_revision.py @@ -0,0 +1,64 @@ +from nose.tools import eq_ + +from ..revision import Revision +from ..unavailable import Unavailable + + +def test_construction_and_values(): + id = 129 + parent_id = 105 + bytes = 2324 + sha1 = "1234567890123457890123457890ab" + page_id = 12 + minor = False + + revision = Revision(id, parent_id, bytes, sha1, page_id, minor) + + eq_(revision.id, id) + eq_(revision.parent_id, parent_id) + eq_(revision.bytes, bytes) + eq_(revision.sha1, sha1) + eq_(revision.page_id, page_id) + eq_(revision.minor, minor) + + eq_(revision, Revision(revision)) + eq_(revision, Revision(revision.to_json())) + +def test_unavailable_values(): + id = 129 + parent_id = None + bytes = 2324 + sha1 = Unavailable + page_id = 12 + minor = Unavailable + + revision = Revision(id, parent_id, bytes, sha1, page_id, minor) + + eq_(revision.id, id) + eq_(revision.parent_id, 0) # Should convert None to zero + eq_(revision.bytes, bytes) + eq_(revision.sha1, sha1) + eq_(revision.page_id, page_id) + eq_(revision.minor, minor) + + eq_(revision, Revision(revision)) + eq_(revision, Revision(revision.to_json())) + + id = None + parent_id = 105 + bytes = Unavailable + sha1 = "1234567890123457890123457890ab" + page_id = Unavailable + minor = False + + revision = Revision(id, parent_id, bytes, sha1, page_id, minor) + + eq_(revision.id, id) + eq_(revision.parent_id, parent_id) + eq_(revision.bytes, bytes) + eq_(revision.sha1, sha1) + eq_(revision.page_id, page_id) + eq_(revision.minor, minor) + + eq_(revision, Revision(revision)) + eq_(revision, Revision(revision.to_json())) diff --git a/mwevents/types/tests/test_timestamp.py b/mwevents/types/tests/test_timestamp.py new file mode 100644 index 0000000..e868986 --- /dev/null +++ b/mwevents/types/tests/test_timestamp.py @@ -0,0 +1,15 @@ +from nose.tools import eq_ + +from ..timestamp import Timestamp + + +def test_construction_and_values(): + unix = 1234567890 + expected_json = "20090213233130" + timestamp = Timestamp(unix) + + eq_(timestamp.unix(), unix) + eq_(timestamp.to_json(), expected_json) + + eq_(timestamp, Timestamp(timestamp)) + eq_(timestamp, Timestamp(timestamp.to_json())) diff --git a/mwevents/types/tests/test_unavailable.py b/mwevents/types/tests/test_unavailable.py new file mode 100644 index 0000000..838eeb3 --- /dev/null +++ b/mwevents/types/tests/test_unavailable.py @@ -0,0 +1,10 @@ +from nose.tools import eq_ + +from ..unavailable import Unavailable + + +def test_otherwise(): + eq_(Unavailable, Unavailable.otherwise(Unavailable, int)) + eq_(None, Unavailable.otherwise(None, int)) + eq_(10, Unavailable.otherwise(10, int)) + eq_(10, Unavailable.otherwise("10", int)) diff --git a/mwevents/types/tests/test_user.py b/mwevents/types/tests/test_user.py new file mode 100644 index 0000000..b18790b --- /dev/null +++ b/mwevents/types/tests/test_user.py @@ -0,0 +1,30 @@ +from nose.tools import eq_ + +from ..unavailable import Unavailable +from ..user import User + + +def test_construction_and_values(): + id = 129 + text = "I am a username" + + user = User(id, text) + + eq_(user.id, id) + eq_(user.text, text) + + eq_(user, User(user)) + eq_(user, User(user.to_json())) + +def test_unavailable_id(): + + id = Unavailable + text = "I am a username" + + user = User(id, text) + + eq_(user.id, id) + eq_(user.text, text) + + eq_(user, User(user)) + eq_(user, User(user.to_json())) diff --git a/mwevents/types/unavailable.py b/mwevents/types/unavailable.py new file mode 100644 index 0000000..799c4dc --- /dev/null +++ b/mwevents/types/unavailable.py @@ -0,0 +1,85 @@ +from jsonable import JSONable + + +class UnavailableType(JSONable): + """ + A NoneType-like singleton instance that represents missing data because it + is unavailable -- as opposed to real None/NULL-type values. + + Example: + >>> from mwevents.types import UnavailableType + >>> Unavailable = UnavailableType() + >>> Unavailable.to_json() + {'Unavailable': True} + >>> Unavailable(Unavailable.to_json()) + Unavailable + >>> Unavailable is None + False + + + """ + NAME = "Unavailable" + _instance = None + def __new__(cls): + if cls._instance is None: + cls._instance = super().__new__(cls) + return cls._instance + + def __call__(self, doc_or_inst): + if isinstance(doc_or_inst, self.__class__): + return doc_or_inst + elif isinstance(doc_or_inst, dict): + return self.from_json(doc_or_inst) + else: + raise TypeError("Expected {0}, ".format(self) + \ + "got {0}".format(repr(doc_or_inst))) + + def __init__(self, *args, **kwargs): pass + def initialize(self): pass + + def __str__(self): + return self.__repr__() + + def __repr__(self): + return self.NAME + + @classmethod + def otherwise(cls, val, func, none_ok=True): + if none_ok and val is None: + return val + else: + try: + return cls._instance(val) + except TypeError as e: + return func(val) + + + @classmethod + def to_json(cls): + return {cls.NAME: True} + + @classmethod + def from_json(cls, doc): + if isinstance(doc, dict) and cls.NAME in doc: + return cls() + else: + raise TypeError("doc is not of the right type. " + \ + "Expected: {0} ".format(cls.to_json()) + \ + "Got: {0}".format(doc)) + + +Unavailable = UnavailableType() +""" +A NoneType-like singleton instance that represents missing data because it is +unavailable -- as opposed to real NULL values. + +Example: + >>> from mwevents.types import Unavailable + >>> Unavailable.to_json() + {'Unavailable': True} + >>> Unavailable(Unavailable.to_json()) + Unavailable + >>> Unavailable is None + False + +""" diff --git a/mwevents/types/user.py b/mwevents/types/user.py index fe050e5..fc691cb 100644 --- a/mwevents/types/user.py +++ b/mwevents/types/user.py @@ -1,8 +1,10 @@ from jsonable import JSONable +from .unavailable import Unavailable + class User(JSONable): __slots__ = ('id', 'text') def initialize(self, id, text): - self.id = int(id) if id is not None else None + self.id = self.id = Unavailable.otherwise(id, int) self.text = str(text) diff --git a/mwevents/util.py b/mwevents/util.py new file mode 100644 index 0000000..1b051a9 --- /dev/null +++ b/mwevents/util.py @@ -0,0 +1,10 @@ +from mw.lib.title import normalize + + +def split_page_name(ns, page_name): + if ns == 0: + return "", normalize(page_name) + else: + nsname, title = page_name.split(":", 1) + + return normalize(nsname), normalize(title) diff --git a/setup.py b/setup.py index e69de29..fc41a2a 100644 --- a/setup.py +++ b/setup.py @@ -0,0 +1,42 @@ +import os +from distutils.core import setup + +from setuptools import find_packages, setup + + +def read(fname): + return open(os.path.join(os.path.dirname(__file__), fname)).read() + + +def requirements(fname): + for line in open(os.path.join(os.path.dirname(__file__), fname)): + yield line.strip() + + +setup( + name='mwevents', + version=read('VERSION').strip(), + author='Aaron Halfaker', + author_email='aaron.halfaker@gmail.com', + packages=find_packages(), + scripts=[], + url='http://pypi.python.org/pypi/mwevents', + license=open('LICENSE').read(), + description='Standardized public MediaWiki events for tools and Science.', + long_description=read('README.rst'), + requirements=requirements('requirments.txt'), + test_suite='nose.collector', + classifiers=[ + "Development Status :: 4 - Beta", + "Programming Language :: Python", + "Programming Language :: Python :: 3", + "Environment :: Other Environment", + "Intended Audience :: Developers", + "License :: OSI Approved :: MIT License", + "Operating System :: OS Independent", + "Topic :: Software Development :: Libraries :: Python Modules" + "Topic :: Text Processing :: General", + "Topic :: Utilities", + "Topic :: Scientific/Engineering" + ], +)