diff --git a/src/Infrastructure/Schedule/HTMLNormalizer.php b/src/Infrastructure/Schedule/HTMLNormalizer.php index 00341df..c2344f6 100644 --- a/src/Infrastructure/Schedule/HTMLNormalizer.php +++ b/src/Infrastructure/Schedule/HTMLNormalizer.php @@ -24,7 +24,8 @@ public function normalize(string $html): string $html, ); $html = html_entity_decode($html); - $html = substr($html, strpos($html, 'PROGRAMME')); + $offset = $this->cutOffOffset($html); + $html = substr($html, $offset); $lines = preg_split('~\n~', $html, -1, PREG_SPLIT_NO_EMPTY); $lines = array_map('trim', $lines); $lines = array_filter($lines); @@ -32,4 +33,15 @@ public function normalize(string $html): string return strip_tags($html); } + + private function cutOffOffset(string $html): int + { + $pos = strpos($html, 'PROGRAMME'); + + if ($pos === false) { + $pos = strpos($html, 'Schedule'); + } + + return $pos ?: 0; + } }