Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use chromedriver for kleinanzeigen #511

Merged
merged 1 commit into from
Dec 16, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions flathunter/chrome_wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,11 +59,14 @@
logger.info('Initializing Chrome WebDriver for crawler...')
chrome_options = uc.ChromeOptions() # pylint: disable=no-member
if platform == "darwin":
chrome_options.add_argument("--headless")

Check warning on line 62 in flathunter/chrome_wrapper.py

View check run for this annotation

Codecov / codecov/patch

flathunter/chrome_wrapper.py#L62

Added line #L62 was not covered by tests
if driver_arguments is not None:
for driver_argument in driver_arguments:
chrome_options.add_argument(driver_argument)
chrome_version = get_chrome_version()
# something is weird with the patched driver version (maybe only in python3.11), I had to patch
# the chrome options to make it work
setattr(chrome_options, "headless", True)
driver = uc.Chrome(version_main=chrome_version, options=chrome_options) # pylint: disable=no-member

driver.execute_cdp_cmd('Network.setBlockedURLs',
Expand Down
27 changes: 24 additions & 3 deletions flathunter/crawler/kleinanzeigen.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,15 @@
"""Expose crawler for Ebay Kleinanzeigen"""
import re
import datetime
from typing import Optional

from bs4 import Tag
from selenium.webdriver import Chrome

from flathunter.logging import logger
from flathunter.abstract_crawler import Crawler
from flathunter.chrome_wrapper import get_chrome_driver
from flathunter.exceptions import DriverLoadException
from flathunter.logging import logger

class Kleinanzeigen(Crawler):
"""Implementation of Crawler interface for Ebay Kleinanzeigen"""
Expand All @@ -29,13 +33,30 @@
def __init__(self, config):
super().__init__(config)
self.config = config
self.driver = None


def get_driver(self) -> Optional[Chrome]:
"""Lazy method to fetch the driver as required at runtime"""
if self.driver is not None:
return self.driver
driver_arguments = self.config.captcha_driver_arguments()
self.driver = get_chrome_driver(driver_arguments)
return self.driver

def get_driver_force(self) -> Chrome:
"""Fetch the driver, and throw an exception if it is not configured or available"""
res = self.get_driver()

Check warning on line 49 in flathunter/crawler/kleinanzeigen.py

View check run for this annotation

Codecov / codecov/patch

flathunter/crawler/kleinanzeigen.py#L49

Added line #L49 was not covered by tests
if res is None:
raise DriverLoadException("Unable to load chrome driver when expected")
return res

Check warning on line 52 in flathunter/crawler/kleinanzeigen.py

View check run for this annotation

Codecov / codecov/patch

flathunter/crawler/kleinanzeigen.py#L51-L52

Added lines #L51 - L52 were not covered by tests

def get_page(self, search_url, driver=None, page_no=None):
"""Applies a page number to a formatted search URL and fetches the exposes at that page"""
return self.get_soup_from_url(search_url)
return self.get_soup_from_url(search_url, driver=self.get_driver())

def get_expose_details(self, expose):
soup = self.get_page(expose['url'])
soup = self.get_page(expose['url'], self.get_driver())
for detail in soup.find_all('li', {"class": "addetailslist--detail"}):
if re.match(r'Verfügbar ab', detail.text):
date_string = re.match(r'(\w+) (\d{4})', detail.text)
Expand Down
Loading