Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add support for importing Indico Event JSONs #45

Open
wants to merge 3 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
43 changes: 43 additions & 0 deletions demo/indico.ini
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
# Example config for the Indico import handler with real XDC2021 schedule
# Run from the repo root, like so: ./schedule.py -vvd -c demo/indico.ini

# Download the event export with this URL (replace the event ID):
# https://indico.example.com/export/event/ID.json?occ=yes&detail=contributions&pretty=yes

# self.global_config
[conference]
title = X Developers Conference 2021
acronym = xdc2021
day_count = 3
start = 2021-09-15
end = 2021-09-17
time_slot_duration = 00:30
license = CC-BY 4.0

# Indico-specific fields:
language = en
fallback_speaker = XDC 2021 Organizers

[track2room]
Main Track = Conference
Demos / Lightning talks I = Conference
Demos / Lightning talks II = Conference
Workshop = Workshops


[import]
active = indico

# self.config
[import:indico]
type = indico
path = demo/indico.json


[export]
active = basic

# self.config
[export:basic]
type = xml-basic
path = xdc2021.xml
2,051 changes: 2,051 additions & 0 deletions demo/indico.json

Large diffs are not rendered by default.

54 changes: 27 additions & 27 deletions fahrplan/datetime.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,33 +5,19 @@
TIME_FORMAT = "%H:%M"


def format_datetime(datetime: dt.datetime):
def format_datetime(datetime: dt.datetime) -> str:
return datetime.isoformat()


def parse_datetime(date_string: str):
def parse_datetime(date_string: str) -> dt.datetime:
return dt.datetime.fromisoformat(date_string)


def format_time(time: dt.time):
return time.strftime(TIME_FORMAT)


def parse_time(time_string: str):
try:
hours, _, minutes = time_string.partition(":")
hours = int(hours, 10)
minutes = int(minutes, 10)
return dt.time(hours, minutes)
except ValueError:
raise FahrplanError(f"{time_string} is not in required format %H:%M")


def format_date(date: dt.date):
def format_date(date: dt.date) -> str:
return str(date)


def parse_date(date_string: str):
def parse_date(date_string: str) -> dt.date:
try:
items = [int(i, 10) for i in date_string.split("-")]
return dt.date(*items)
Expand All @@ -40,16 +26,30 @@ def parse_date(date_string: str):
raise FahrplanError(f"{date_string} is not in required format %Y-%m-%d")


def format_duration(duration: dt.timedelta):
def _parse_time_items(time_string: str) -> list[int]:
try:
if '.' in time_string: # ...just drop miliseconds.
time_string = time_string[:time_string.index('.')]

return [int(i, 10) for i in time_string.split(":")]
except ValueError:
raise FahrplanError(f"{time_string} is not in required format %H:%M[:%S]")


def format_time(time: dt.time) -> str:
return time.strftime(TIME_FORMAT)


def parse_time(time_string: str) -> dt.time:
return dt.time(*_parse_time_items(time_string))


def format_duration(duration: dt.timedelta) -> str:
# just cut away the seconds part
return str(duration)[:-3]


def parse_duration(duration_string: str):
try:
hours, _, minutes = duration_string.partition(":")
hours = int(hours, 10)
minutes = int(minutes, 10)
return dt.timedelta(hours=hours, minutes=minutes)
except ValueError:
raise FahrplanError(f"{duration_string} is not in required format %H:%M")
def parse_duration(duration_string: str) -> dt.timedelta:
items = _parse_time_items(duration_string)
seconds = items[2] if len(items) >= 3 else 0
return dt.timedelta(hours=items[0], minutes=items[1], seconds=seconds)
3 changes: 2 additions & 1 deletion handlers/directory.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,15 @@
from typing import Type

from .base import ImportHandler, ExportHandler
from .import_handlers import FakeImportHandler, CSVImportHandler, JSONImportHandler, ProyektorImportHandler
from .import_handlers import FakeImportHandler, CSVImportHandler, JSONImportHandler, ProyektorImportHandler, IndicoImportHandler
from .export_handlers import BasicXMLExportHandler, ExtendedXMLExportHandler, FrabJsonExportHandler

import_handlers = {
"csv": CSVImportHandler,
"fake": FakeImportHandler,
"json": JSONImportHandler,
"proyektor": ProyektorImportHandler,
"indico": IndicoImportHandler,
}

export_handlers = {
Expand Down
1 change: 1 addition & 0 deletions handlers/import_handlers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,4 @@
from .fake import FakeImportHandler
from .json import JSONImportHandler
from .proyektor import ProyektorImportHandler
from .indico import IndicoImportHandler
121 changes: 121 additions & 0 deletions handlers/import_handlers/indico.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
import json
import logging
import datetime as dt

from pytz import timezone

from ..base import ImportHandler
from fahrplan.datetime import parse_date, parse_time, parse_duration
from fahrplan.exception import FahrplanError
from fahrplan.model.conference import Conference
from fahrplan.model.event import Event
from fahrplan.model.schedule import Schedule
from fahrplan.slug import StandardSlugGenerator
from hacks import noexcept


log = logging.getLogger(__name__)


class IndicoImportHandler(ImportHandler):
"""Imports data from Indico JSON exports.

See https://docs.getindico.io/en/latest/http-api/ for more info.
"""

@noexcept(log)
def run(self) -> Schedule:
if 'path' not in self.config:
raise Error('Path to exported Indico JSON must be provided in the config file.')

with open(self.config['path']) as f:
indico_json = json.load(f)['results'][0]

conference = Conference(
title=self.global_config.get('conference', 'title'),
acronym=self.global_config.get('conference', 'acronym'),
day_count=int(self.global_config.get('conference', 'day_count')),
start=parse_date(self.global_config.get('conference', 'start')),
end=parse_date(self.global_config.get('conference', 'end')),
time_slot_duration=parse_duration(self.global_config.get('conference', 'time_slot_duration'))
)

schedule = Schedule(conference=conference)
language = self.global_config.get('conference', 'language')
license = self.global_config.get('conference', 'license')

slugifier = StandardSlugGenerator(conference)

speaker_ids = set()
for co in indico_json['contributions']:
people = co['speakers'] + co['primaryauthors'] + co['coauthors']
for person in people:
speaker_ids.add(person['person_id'])

fallback_speaker_id = max(speaker_ids) + 1
fallback_speakers = {
fallback_speaker_id: self.global_config.get('conference', 'fallback_speaker')
}

for co in indico_json['contributions']:
start_dt = self.parse_indico_date(co['startDate'])
day = (start_dt.date() - conference.start).days + 1

# For talks with no proper room attached, try to get a valid
# name from the config [track2room], session -> room name.
room = co['roomFullname'] or co['room'] or co['location'] or \
self.global_config.get('track2room', co['session'].strip(), fallback="Unknown")

event = Event(
uid=co['friendly_id'],
date=start_dt,
start=start_dt.time(),
duration=dt.timedelta(minutes=co['duration']),
title=co['title'].strip(),
language=language,
slug=slugifier,
persons=self.collect_speakers(
fallback_speakers, # In case there's nobody defined here...

# Coauthors are generally not speakers. Event metadata is often
# broken and has primary authors only, no speakers, so combine
# these and force uniqueness:
*co['speakers'],
*co['primaryauthors'],
),

description=co['description'].strip(),
links={"Indico Contribution Page": co['url']},
recording_license=license,

# Indico has this swapped around:
track=co['session'] or "",
event_type=co['track'] or "",
)

schedule.add_room(room)
schedule.add_event(day, room, event)

return schedule

@staticmethod
def parse_indico_date(indico_date: dict) -> dt.datetime:
try:
date = parse_date(indico_date['date'])
time = parse_time(indico_date['time'])
tz = timezone(indico_date['tz'])
return dt.datetime.combine(date, time, tzinfo=tz)
except ValueError:
raise FahrplanError(f"{indico_date} is not a valid Indico date")

@staticmethod
def collect_speakers(fallback_speakers, *speakers) -> list[str]:
# We can't just list(set([f"..." for s in speakers])) because
# the native Python set is unordered and we need that.
parsed = {}

for s in speakers:
if s['person_id'] not in parsed:
parsed[s['person_id']] = f"{s['first_name'].strip()} {s['last_name'].strip()}"

return parsed if len(parsed) > 0 else fallback_speakers
3 changes: 2 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
translitcodec
pytz
translitcodec