Skip to content

Commit

Permalink
Merge pull request #213 from Boardfy/test/max-depth-bug-when-respecti…
Browse files Browse the repository at this point in the history
…ng-robots

Test for issue #181
  • Loading branch information
brendt authored Apr 4, 2019
2 parents e431654 + 188e049 commit 7822cbd
Show file tree
Hide file tree
Showing 2 changed files with 26 additions and 2 deletions.
4 changes: 2 additions & 2 deletions src/LinkAdder.php
Original file line number Diff line number Diff line change
Expand Up @@ -87,8 +87,8 @@ protected function normalizeUrl(UriInterface $url): UriInterface

protected function shouldCrawl(Node $node): bool
{
if ($this->crawler->mustRespectRobots()) {
return $this->crawler->getRobotsTxt()->allows($node->getValue());
if ($this->crawler->mustRespectRobots() && ! $this->crawler->getRobotsTxt()->allows($node->getValue())) {
return false;
}

$maximumDepth = $this->crawler->getMaximumDepth();
Expand Down
24 changes: 24 additions & 0 deletions tests/CrawlerRobotsTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -112,4 +112,28 @@ private function createCrawler(): Crawler
->setMaximumDepth(3)
->setCrawlObserver(new CrawlLogger());
}

/** @test */
public function it_should_check_depth_when_respecting_robots()
{
Crawler::create()
->respectRobots()
->setMaximumDepth(1)
->setCrawlObserver(new CrawlLogger())
->startCrawling('http://localhost:8080');

$this->assertNotCrawled([['url' => 'http://localhost:8080/link3', 'foundOn' => 'http://localhost:8080/link2']]);
}

/** @test */
public function it_should_check_depth_when_ignoring_robots()
{
Crawler::create()
->ignoreRobots()
->setMaximumDepth(1)
->setCrawlObserver(new CrawlLogger())
->startCrawling('http://localhost:8080');

$this->assertNotCrawled([['url' => 'http://localhost:8080/link3', 'foundOn' => 'http://localhost:8080/link2']]);
}
}

0 comments on commit 7822cbd

Please sign in to comment.