Skip to content

Commit

Permalink
Refactor, and make 2022 scrapable
Browse files Browse the repository at this point in the history
  • Loading branch information
nicoSWD committed May 10, 2023
1 parent f2b5f02 commit a2e727a
Show file tree
Hide file tree
Showing 13 changed files with 1,018 additions and 31 deletions.
7 changes: 7 additions & 0 deletions app/config/services.yml
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
parameters:
ifsc_youtube_channel_id: 'UC2MGuhIaOP6YLpUx106kTQw'
calendar_product_identifier: '-//ifsc/ical//2.0/EN'
site_event_url: 'https://ifsc.stream/#/season/%%d/event/%%d'

services:
nicoSWD\IfscCalendar\Application\Command\:
Expand Down Expand Up @@ -84,6 +85,7 @@ services:
class: nicoSWD\IfscCalendar\Domain\Event\IFSCEventsScraper
arguments:
- '@nicoSWD\IfscCalendar\Infrastructure\HttpClient\HttpGuzzleClient'
- '@nicoSWD\IfscCalendar\Domain\Event\IFSCEventFactory'
- '@nicoSWD\IfscCalendar\Domain\Event\Helpers\DOMHelper'
- '@nicoSWD\IfscCalendar\Domain\Event\Helpers\Normalizer'

Expand All @@ -93,6 +95,11 @@ services:
nicoSWD\IfscCalendar\Domain\Event\Helpers\Normalizer:
class: nicoSWD\IfscCalendar\Domain\Event\Helpers\Normalizer

nicoSWD\IfscCalendar\Domain\Event\IFSCEventFactory:
class: nicoSWD\IfscCalendar\Domain\Event\IFSCEventFactory
arguments:
- '%site_event_url%'

# HTTP Client
nicoSWD\IfscCalendar\Infrastructure\HttpClient\HttpGuzzleClient:
class: nicoSWD\IfscCalendar\Infrastructure\HttpClient\HttpGuzzleClient
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
* @link https://github.com/nicoSWD
* @author Nicolas Oelgart <[email protected]>
*/
namespace nicoSWD\IfscCalendar\Domain\Event;
namespace nicoSWD\IfscCalendar\Domain\Event\Exceptions;

use Exception;

Expand Down
14 changes: 14 additions & 0 deletions src/Domain/Event/Exceptions/InvalidURLException.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
<?php declare(strict_types=1);

/**
* @license http://opensource.org/licenses/mit-license.php MIT
* @link https://github.com/nicoSWD
* @author Nicolas Oelgart <[email protected]>
*/
namespace nicoSWD\IfscCalendar\Domain\Event\Exceptions;

use Exception;

final class InvalidURLException extends Exception
{
}
16 changes: 14 additions & 2 deletions src/Domain/Event/Helpers/Normalizer.php
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
{
public function cupName(string $league): string
{
return ucwords(strtolower($league));
return ucwords(strtolower(trim($league)));
}

public function normalizeTime(string $time): string
Expand All @@ -20,6 +20,9 @@ public function normalizeTime(string $time): string
// We don't know the exact time yet. We'll set it to 8:00 for now
// as it will automatically update once IFSC sets it
$time = '8:00';
} else {
// Convert 12-hour format to 24-hour
$time = date('H:i', strtotime($time));
}

return $time;
Expand All @@ -32,6 +35,15 @@ public function nonEmptyLines(string $matches): array

public function removeNonAsciiCharacters(string $text): string
{
return preg_replace('~[^\w\s\'\r\n:,-./?=]+~', ' ', $text);
// This fixes a parsing issue for season 2022
// This is fun
$text = preg_replace('~\n\s{10,}~', ' ', $text);

return preg_replace('~[^\w\s\r\n\':,-./?=&]+~', ' ', $text);
}

public function firstUrl(string $urls): string
{
return preg_split('~\s+~', $urls, flags: PREG_SPLIT_NO_EMPTY)[0] ?? '';
}
}
7 changes: 2 additions & 5 deletions src/Domain/Event/IFSCEvent.php
Original file line number Diff line number Diff line change
Expand Up @@ -11,20 +11,16 @@

final readonly class IFSCEvent
{
private const EVENT_INFO_URL = 'https://ifsc.stream/#/event/%d';

public string $siteUrl;

public function __construct(
public string $name,
public int $id,
public string $description,
public string $streamUrl,
public string $siteUrl,
public string $poster,
public DateTimeImmutable $startTime,
public DateTimeImmutable $endTime,
) {
$this->siteUrl = sprintf(self::EVENT_INFO_URL, $id);
}

public function updateStreamUrl(string $streamUrl): self
Expand All @@ -34,6 +30,7 @@ public function updateStreamUrl(string $streamUrl): self
id: $this->id,
description: $this->description,
streamUrl: $streamUrl,
siteUrl: $this->siteUrl,
poster: $this->poster,
startTime: $this->startTime,
endTime: $this->endTime,
Expand Down
39 changes: 39 additions & 0 deletions src/Domain/Event/IFSCEventFactory.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
<?php declare(strict_types=1);

/**
* @license http://opensource.org/licenses/mit-license.php MIT
* @link https://github.com/nicoSWD
* @author Nicolas Oelgart <[email protected]>
*/
namespace nicoSWD\IfscCalendar\Domain\Event;

use DateTimeImmutable;

final readonly class IFSCEventFactory
{
public function __construct(
private string $siteUrl,
) {
}

public function create(
string $name,
int $id,
string $description,
string $streamUrl,
string $poster,
DateTimeImmutable $startTime,
DateTimeImmutable $endTime,
): IFSCEvent {
return new IFSCEvent(
name: $name,
id: $id,
description: $description,
streamUrl: $streamUrl,
siteUrl: sprintf($this->siteUrl, $startTime->format('Y'), $id),
poster: $poster,
startTime: $startTime,
endTime: $endTime,
);
}
}
40 changes: 25 additions & 15 deletions src/Domain/Event/IFSCEventsScraper.php
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@
use DateTimeZone;
use DOMElement;
use DOMXPath;
use nicoSWD\IfscCalendar\Domain\Event\Exceptions\IFSCEventsScraperException;
use nicoSWD\IfscCalendar\Domain\Event\Exceptions\InvalidURLException;
use nicoSWD\IfscCalendar\Domain\Event\Helpers\DOMHelper;
use nicoSWD\IfscCalendar\Domain\Event\Helpers\Normalizer;
use nicoSWD\IfscCalendar\Domain\HttpClient\HttpClientInterface;
Expand All @@ -22,12 +24,17 @@

public function __construct(
private HttpClientInterface $client,
private IFSCEventFactory $eventFactory,
private DOMHelper $domHelper,
private Normalizer $normalizer,
) {
}

/** @throws IFSCEventsScraperException */
/**
* @throws IFSCEventsScraperException
* @throws InvalidURLException
* @return IFSCEvent[]
*/
public function fetchEventsForLeague(int $season, int $eventId, string $timezone, string $eventName): array
{
$xpath = $this->getXPathForEventsWithId($eventId);
Expand All @@ -47,9 +54,9 @@ public function fetchEventsForLeague(int $season, int $eventId, string $timezone
$schedules[] = IFSCSchedule::create(
day: (int) $matches['day'][$key],
month: Month::fromName($matches['month'][$key]),
time: $this->normalizer->normalizeTime($eventTime),
time: $eventTime,
season: $season,
cupName: $this->normalizer->cupName($cupName),
cupName: $cupName,
streamUrl: $streamUrl,
);
}
Expand All @@ -63,7 +70,7 @@ public function fetchEventsForLeague(int $season, int $eventId, string $timezone
$startDateTime = $this->getStartDateTime($schedule, $timezone);
$endDateTime = $this->getEndDateTime($startDateTime);

$events[] = new IFSCEvent(
$events[] = $this->eventFactory->create(
name: $schedule->cupName,
id: $eventId,
description: $eventName,
Expand Down Expand Up @@ -116,21 +123,24 @@ private function buildDateRegex(): string
~xsi";
}

/** @throws IFSCEventsScraperException */
private function parseEventDetails(string $line): array
{
$parts = preg_split('~(\s{2,}|\s\W+\s)~', $line, flags: PREG_SPLIT_NO_EMPTY);

if (count($parts) >= 3) {
[$time, $eventName, $streamUrl] = $parts;
} else {
[$time, $eventName] = $parts;
$regex = '~^
(?<time>(\d{1,2}:\d{1,2}(?:\s+(?:AM|PM))?|TBC|TBD))\s+
(?<name>[\w\'\-&\s]+)
(?<url>\s*(http[^\s]+\s*)*)
$~x';

if (!preg_match($regex, trim($line), $match)) {
throw new IFSCEventsScraperException("No event found in line: {$line}");
}

return [
$time,
$eventName,
$streamUrl ?? '',
];
$startTime = $this->normalizer->normalizeTime($match['time']);
$eventName = $this->normalizer->cupName($match['name']);
$streamUrl = $this->normalizer->firstUrl($match['url'] ?? '');

return [$startTime, $eventName, $streamUrl];
}

private function buildLeagueUri(int $id): string
Expand Down
13 changes: 13 additions & 0 deletions src/Domain/Event/IFSCSchedule.php
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,11 @@
*/
namespace nicoSWD\IfscCalendar\Domain\Event;

use nicoSWD\IfscCalendar\Domain\Event\Exceptions\InvalidURLException;

final readonly class IFSCSchedule
{
/** @throws InvalidURLException */
private function __construct(
public int $day,
public Month $month,
Expand All @@ -17,8 +20,10 @@ private function __construct(
public string $cupName,
public string $streamUrl,
) {
$this->assertValidUrl($streamUrl);
}

/** @throws InvalidURLException */
public static function create(
int $day,
Month $month,
Expand All @@ -36,4 +41,12 @@ public static function create(
$streamUrl,
);
}

/** @throws InvalidURLException */
public function assertValidUrl(string $streamUrl): void
{
if (!empty($streamUrl) && !filter_var($streamUrl, FILTER_VALIDATE_URL)) {
throw new InvalidURLException("Invalid URL: {$streamUrl}");
}
}
}
2 changes: 2 additions & 0 deletions src/Domain/Event/Month.php
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,8 @@ enum Month: int
/** @throws InvalidArgumentException */
public static function fromName(string $name): self
{
$name = strtoupper($name);

foreach (self::cases() as $case) {
if ($case->name === $name) {
return $case;
Expand Down
26 changes: 18 additions & 8 deletions src/Infrastructure/Events/IFSCGuzzleEventsFetcher.php
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,11 @@
namespace nicoSWD\IfscCalendar\Infrastructure\Events;

use GuzzleHttp\Client;
use GuzzleHttp\Exception\GuzzleException;
use JsonException;
use nicoSWD\IfscCalendar\Domain\Event\Exceptions\IFSCEventsScraperException;
use nicoSWD\IfscCalendar\Domain\Event\IFSCEventFetcherInterface;
use nicoSWD\IfscCalendar\Domain\Event\IFSCEventsScraper;
use nicoSWD\IfscCalendar\Domain\Event\IFSCEventsScraperException;
use nicoSWD\IfscCalendar\Domain\League\IFSCLeague;

final readonly class IFSCGuzzleEventsFetcher implements IFSCEventFetcherInterface
Expand All @@ -29,13 +31,7 @@ public function __construct(
*/
public function fetchEventsForLeague(int $season, IFSCLeague $league): array
{
$response = $this->client->get($this->buildLeagueUri($league->id))->getBody()->getContents();
$response = @json_decode($response);

if (json_last_error()) {
throw new \Exception(json_last_error_msg());
}

$response = $this->fetchHtmlForLeague($league);
$events = [];

foreach ($response->events as $event) {
Expand All @@ -62,4 +58,18 @@ public function buildLeagueUri(int $leagueId): string
{
return sprintf(self::IFSC_LEAGUE_API_ENDPOINT, $leagueId);
}

/** @throws IFSCEventsScraperException */
public function fetchHtmlForLeague(IFSCLeague $league): object
{
try {
$response = $this->client->get($this->buildLeagueUri($league->id))->getBody()->getContents();

return @json_decode($response, flags: JSON_THROW_ON_ERROR);
} catch (GuzzleException $e) {
throw new IFSCEventsScraperException("Unable to retrieve HTML: {$e->getMessage()}");
} catch (JsonException $e) {
throw new IFSCEventsScraperException("Unable to parse JSON: {$e->getMessage()}");
}
}
}
Loading

0 comments on commit a2e727a

Please sign in to comment.