Skip to content

Commit

Permalink
page crawler update
Browse files Browse the repository at this point in the history
  • Loading branch information
1dson committed Aug 2, 2022
1 parent ddb587a commit ec8a6e6
Show file tree
Hide file tree
Showing 3 changed files with 10 additions and 9 deletions.
2 changes: 1 addition & 1 deletion pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
<nexus.releases>https://nexus.olcs.dev-dvsacloud.uk/repository/maven-releases</nexus.releases>
<nexus.snapshots>https://nexus.olcs.dev-dvsacloud.uk/repository/maven-snapshots</nexus.snapshots>
<maven.compiler.version>3.8.1</maven.compiler.version>
<active-support.version>1.6.3</active-support.version>
<active-support.version>1.6.5</active-support.version>
<slf4j-log4j12.version>2.17.0</slf4j-log4j12.version>
<java.version>1.8</java.version>
<junit.jupiter.version>5.6.0</junit.jupiter.version>
Expand Down
9 changes: 4 additions & 5 deletions src/main/java/SpiderCrawler.java
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ public class SpiderCrawler{
private static final Logger LOGGER = LogManager.getLogger(SpiderCrawler.class);

public static Document request(String url, ArrayList<String> visitedURL) {
Document doc = null;
Document doc;
try {
if (isUrlValid(url)) {
doc = Jsoup.connect(url).get();
Expand All @@ -27,21 +27,20 @@ public static Document request(String url, ArrayList<String> visitedURL) {
return doc;
}
} catch (IOException e) {
return doc;
return null;
}
return null;
}


public static void crawler(int level, String url, ArrayList<String> visited) {
if (level <= 5) {
if (level <= 3) {
Document doc = request(url, visited);
if (doc != null) {
for (Element link : doc.select("a[href]")) {
String formattedLink = link.absUrl("href");
if (!visited.contains(formattedLink)) {
if (!visited.contains(formattedLink))
crawler(level++, formattedLink, visited);
}
}
}
}
Expand Down
8 changes: 5 additions & 3 deletions src/test/java/PageCrawlerTest.java
Original file line number Diff line number Diff line change
@@ -1,20 +1,22 @@
import activesupport.driver.Browser;
import org.junit.Before;
import org.junit.Test;
import org.openqa.selenium.By;

import java.util.ArrayList;


public class PageCrawlerTest {

String baseURL = System.getProperty("url");
String baseURL = "https://ssweb.qa.olcs.dev-dvsacloud.uk/auth/login/";

@Before
public void setUp() {
System.setProperty("browser", "chrome");
System.setProperty("browser", "headless");
}

@Test
public void someTest(){
public void someTest() {
SpiderCrawler.crawler(1, baseURL, new ArrayList<>());
}
}

0 comments on commit ec8a6e6

Please sign in to comment.