Skip to content

Commit

Permalink
Add helpers to scrape missing Bern events
Browse files Browse the repository at this point in the history
  • Loading branch information
nicoSWD committed May 10, 2023
1 parent a2e727a commit 19603d5
Show file tree
Hide file tree
Showing 12 changed files with 1,165 additions and 48 deletions.
13 changes: 13 additions & 0 deletions app/config/services.yml
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ services:
- '@nicoSWD\IfscCalendar\Infrastructure\Events\IFSCGuzzleEventsFetcher'
- '@nicoSWD\IfscCalendar\Domain\YouTube\YouTubeLinkFetcher'
- '@nicoSWD\IfscCalendar\Domain\YouTube\YouTubeLinkMatcher'
- '@nicoSWD\IfscCalendar\Domain\Calendar\IFSCCalendarPostFix'

nicoSWD\IfscCalendar\Infrastructure\Calendar\JsonCalendar:
class: nicoSWD\IfscCalendar\Infrastructure\Calendar\JsonCalendar
Expand Down Expand Up @@ -74,6 +75,18 @@ services:
arguments:
- '@GuzzleHttp\Client'

nicoSWD\IfscCalendar\Domain\Calendar\IFSCCalendarPostFix:
class: nicoSWD\IfscCalendar\Domain\Calendar\IFSCCalendarPostFix
arguments:
- '@nicoSWD\IfscCalendar\Domain\Calendar\Fixes\SeasonFix2023'

nicoSWD\IfscCalendar\Domain\Calendar\Fixes\SeasonFix2023:
class: nicoSWD\IfscCalendar\Domain\Calendar\Fixes\SeasonFix2023
arguments:
- '@nicoSWD\IfscCalendar\Infrastructure\HttpClient\HttpGuzzleClient'
- '@nicoSWD\IfscCalendar\Domain\Event\IFSCEventFactory'
- '@nicoSWD\IfscCalendar\Domain\Event\Helpers\Normalizer'

# Events
nicoSWD\IfscCalendar\Infrastructure\Events\IFSCGuzzleEventsFetcher:
class: nicoSWD\IfscCalendar\Infrastructure\Events\IFSCGuzzleEventsFetcher
Expand Down
10 changes: 5 additions & 5 deletions src/Application/Command/BuildCommand.php
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ protected function execute(InputInterface $input, OutputInterface $output): int
$leaguesByName = [];

foreach ($seasons[$selectedSeason]->leagues as $league) {
$leaguesByName[$league->name] = $league;
$leaguesByName[$league->name] = $league->id;
}

if (!$selectedLeague) {
Expand All @@ -75,7 +75,7 @@ protected function execute(InputInterface $input, OutputInterface $output): int
$pathInfo = pathinfo($fileName);
$fileName = "{$pathInfo['dirname']}/{$pathInfo['filename']}.{$calFormat}";

$response = $this->buildCalendar($selectedSeason, [$league], $calFormat, $output, $fetchYouTubeUrls);
$response = $this->buildCalendar($selectedSeason, $league, $calFormat, $output, $fetchYouTubeUrls);
$this->saveCalendar($fileName, $response->calendarContents, $output);
}

Expand All @@ -86,7 +86,7 @@ protected function execute(InputInterface $input, OutputInterface $output): int

public function buildCalendar(
int $selectedSeason,
array $leagues,
int $league,
string $format,
OutputInterface $output,
bool $fetchYouTubeUrls
Expand All @@ -96,7 +96,7 @@ public function buildCalendar(
return $this->buildCalendarUseCase->execute(
new BuildCalendarRequest(
season: $selectedSeason,
leagues: $leagues,
league: $league,
format: $format,
fetchYouTubeUrls: $fetchYouTubeUrls,
)
Expand Down Expand Up @@ -133,7 +133,7 @@ public function getSelectedSeason(array $seasons, Helper $helper, InputInterface
public function getSelectedLeague(array $leaguesByName, Helper $helper, InputInterface $input, OutputInterface $output): string
{
$question = new ChoiceQuestion(
'Please select a or multiple leagues (defaults to "' . key($leaguesByName) . '")',
'Please select a league (defaults to "' . key($leaguesByName) . '")',
array_keys($leaguesByName),
0
);
Expand Down
20 changes: 6 additions & 14 deletions src/Application/UseCase/BuildCalendar/BuildCalendarRequest.php
Original file line number Diff line number Diff line change
Expand Up @@ -7,21 +7,13 @@
*/
namespace nicoSWD\IfscCalendar\Application\UseCase\BuildCalendar;

use nicoSWD\IfscCalendar\Domain\League\IFSCLeague;

final readonly class BuildCalendarRequest
{
/** @var IFSCLeague[] */
public array $leagues;
public int $season;
public string $format;
public bool $fetchYouTubeUrls;

public function __construct(int $season, array $leagues, string $format, bool $fetchYouTubeUrls)
{
$this->season = $season;
$this->leagues = $leagues;
$this->format = $format;
$this->fetchYouTubeUrls = $fetchYouTubeUrls;
public function __construct(
public int $season,
public int $league,
public string $format,
public bool $fetchYouTubeUrls
) {
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,9 @@ public function execute(BuildCalendarRequest $buildCalendarRequest): BuildCalend

public function buildCalendar(BuildCalendarRequest $buildCalendarRequest): string
{
return $this->calendarBuilder->generateForLeagues(
return $this->calendarBuilder->generateForLeague(
season: $buildCalendarRequest->season,
leagues: $buildCalendarRequest->leagues,
league: $buildCalendarRequest->league,
format: $buildCalendarRequest->format,
fetchYouTubeUrls: $buildCalendarRequest->fetchYouTubeUrls,
);
Expand Down
101 changes: 101 additions & 0 deletions src/Domain/Calendar/Fixes/SeasonFix2023.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
<?php declare(strict_types=1);

/**
* @license http://opensource.org/licenses/mit-license.php MIT
* @link https://github.com/nicoSWD
* @author Nicolas Oelgart <[email protected]>
*/
namespace nicoSWD\IfscCalendar\Domain\Calendar\Fixes;

use Closure;
use DateTime;
use DateTimeImmutable;
use DateTimeZone;
use nicoSWD\IfscCalendar\Domain\Event\Helpers\Normalizer;
use nicoSWD\IfscCalendar\Domain\Event\IFSCEvent;
use nicoSWD\IfscCalendar\Domain\Event\IFSCEventFactory;
use nicoSWD\IfscCalendar\Domain\Event\Month;
use nicoSWD\IfscCalendar\Domain\HttpClient\HttpClientInterface;

final readonly class SeasonFix2023
{
private const BERN_SCHEDULE_URL = 'https://www.ifsc-climbing.org/bern-2023/schedule';

public function __construct(
private HttpClientInterface $httpClient,
private IFSCEventFactory $eventFactory,
private Normalizer $normalizer,
) {
}

/**
* @param IFSCEvent[] $events
* @return IFSCEvent[]
*/
public function fix(array $events): array
{
// Add missing Bern events, which are listed on a separate page in an
// entirely different format. Thanks, y'all.
$bernEvents = array_filter($events, $this->isBernEvent());

if (!$bernEvents) {
$events = array_merge($events, $this->fetchBernEvents());
}

return $events;
}

private function fetchBernEvents(): array
{
// Use DOM/XPath
$regex = '~<div data-tag=[^>]+>\s*(?:<div[^>]+>\s*){2}\s*(?<date>\d{1,2}\sAUGUST\s\|\|\s\d{1,2}:\d{2})\s*</div>\s*<h3[^>]+>(?<name>[^<]+)~s';
$html = $this->httpClient->get(self::BERN_SCHEDULE_URL);
$events = [];

if (!preg_match_all($regex, $html, $matches)) {
return [];
}

foreach ($matches['date'] as $key => $date) {
$startDateTime = $this->createStartDate($date);
$endDateTime = $this->getEndDateTime($startDateTime);

$events[] = $this->eventFactory->create(
name: $this->normalizer->cupName($matches['name'][$key]),
id: 1301,
description: 'IFSC - Climbing World Championships (B,L,S,B&L) - Bern (SUI) 2023',
streamUrl: '',
poster: '',
startTime: $startDateTime,
endTime: $endDateTime,
);
}

return $events;
}

private function createStartDate(string $date): DateTimeImmutable
{
[$day, $month, $hour, $minute] = sscanf(trim($date), '%d %s || %d:%d');

$date = new DateTime();
$date->setTimezone(new DateTimeZone('Europe/Zurich'));
$date->setDate(2023, Month::fromName($month)->value, $day);
$date->setTime($hour, $minute);

return DateTimeImmutable::createFromMutable($date);
}

private function getEndDateTime(DateTimeImmutable $date): DateTimeImmutable
{
$endDate = DateTime::createFromImmutable($date);
$endDate->modify('+3 hours');

return DateTimeImmutable::createFromMutable($endDate);
}

private function isBernEvent(): Closure
{
return static fn(IFSCEvent $event): bool => str_contains($event->name, 'Bern (SUI)');
}
}
27 changes: 8 additions & 19 deletions src/Domain/Calendar/IFSCCalendarBuilder.php
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@
use Exception;
use nicoSWD\IfscCalendar\Domain\Event\IFSCEvent;
use nicoSWD\IfscCalendar\Domain\Event\IFSCEventFetcherInterface;
use nicoSWD\IfscCalendar\Domain\League\IFSCLeague;
use nicoSWD\IfscCalendar\Domain\YouTube\YouTubeLinkFetcher;
use nicoSWD\IfscCalendar\Domain\YouTube\YouTubeLinkMatcher;

Expand All @@ -21,35 +20,25 @@ public function __construct(
private IFSCEventFetcherInterface $eventFetcher,
private YouTubeLinkFetcher $linkFetcher,
private YouTubeLinkMatcher $linkMatcher,
private IFSCCalendarPostFix $calendarPostFix,
) {
}

/**
* @param int $season
* @param IFSCLeague[] $leagues
* @param string $format
* @param bool $fetchYouTubeUrls
* @return string
* @throws Exception
*/
public function generateForLeagues(int $season, array $leagues, string $format, bool $fetchYouTubeUrls): string
/** @throws Exception */
public function generateForLeague(int $season, int $league, string $format, bool $fetchYouTubeUrls): string
{
$events = [];
$events = $this->eventFetcher->fetchEventsForLeague($season, $league);

foreach ($leagues as $league) {
$leagueEvents = $this->eventFetcher->fetchEventsForLeague($season, $league);

if (empty($leagueEvents)) {
throw new Exception("No events found for league '{$league->name}'");
}

$events += $leagueEvents;
if (empty($events)) {
throw new Exception("No events found for league '{$league}'");
}

if ($fetchYouTubeUrls) {
$this->fetchEventStreamUrls($events);
}

$events = $this->calendarPostFix->fix($season, $events);

return $this->calendarBuilderFactory->generateForFormat($format, $events);
}

Expand Down
35 changes: 35 additions & 0 deletions src/Domain/Calendar/IFSCCalendarPostFix.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
<?php declare(strict_types=1);

/**
* @license http://opensource.org/licenses/mit-license.php MIT
* @link https://github.com/nicoSWD
* @author Nicolas Oelgart <[email protected]>
*/
namespace nicoSWD\IfscCalendar\Domain\Calendar;

use nicoSWD\IfscCalendar\Domain\Calendar\Fixes\SeasonFix2023;
use nicoSWD\IfscCalendar\Domain\Event\IFSCEvent;

final readonly class IFSCCalendarPostFix
{
public function __construct(
private SeasonFix2023 $seasonFix2023,
) {
}

/**
* @param int $season
* @param IFSCEvent[] $events
* @return IFSCEvent[]
*/
public function fix(int $season, array $events): array
{
switch ($season) {
case 2023:
$events = $this->seasonFix2023->fix($events);
break;
}

return $events;
}
}
14 changes: 12 additions & 2 deletions src/Domain/Event/Helpers/Normalizer.php
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,14 @@

final readonly class Normalizer
{
public function cupName(string $league): string
public function cupName(string $cupName): string
{
return ucwords(strtolower(trim($league)));
$cupName = trim($cupName);
$cupName = strtolower($cupName);
$cupName = $this->removeNewLines($cupName);
$cupName = preg_replace('~\s+-\s+(lead|boulder)\s+round$~', '', $cupName);

return ucwords($cupName);
}

public function normalizeTime(string $time): string
Expand Down Expand Up @@ -46,4 +51,9 @@ public function firstUrl(string $urls): string
{
return preg_split('~\s+~', $urls, flags: PREG_SPLIT_NO_EMPTY)[0] ?? '';
}

private function removeNewLines(string $string): string
{
return preg_replace('~[\r\n]+~', ' ', $string);
}
}
4 changes: 1 addition & 3 deletions src/Domain/Event/IFSCEventFetcherInterface.php
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,8 @@
*/
namespace nicoSWD\IfscCalendar\Domain\Event;

use nicoSWD\IfscCalendar\Domain\League\IFSCLeague;

interface IFSCEventFetcherInterface
{
/** @return IFSCEvent[] */
public function fetchEventsForLeague(int $season, IFSCLeague $league): array;
public function fetchEventsForLeague(int $season, int $league): array;
}
6 changes: 3 additions & 3 deletions src/Infrastructure/Events/IFSCGuzzleEventsFetcher.php
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ public function __construct(
* @inheritdoc
* @throws IFSCEventsScraperException
*/
public function fetchEventsForLeague(int $season, IFSCLeague $league): array
public function fetchEventsForLeague(int $season, int $league): array
{
$response = $this->fetchHtmlForLeague($league);
$events = [];
Expand Down Expand Up @@ -60,10 +60,10 @@ public function buildLeagueUri(int $leagueId): string
}

/** @throws IFSCEventsScraperException */
public function fetchHtmlForLeague(IFSCLeague $league): object
public function fetchHtmlForLeague(int $league): object
{
try {
$response = $this->client->get($this->buildLeagueUri($league->id))->getBody()->getContents();
$response = $this->client->get($this->buildLeagueUri($league))->getBody()->getContents();

return @json_decode($response, flags: JSON_THROW_ON_ERROR);
} catch (GuzzleException $e) {
Expand Down
Loading

0 comments on commit 19603d5

Please sign in to comment.