Skip to content

Commit

Permalink
Use chromedriver for kleinanzeigen
Browse files Browse the repository at this point in the history
  • Loading branch information
Dmitrij Vinokour committed Dec 15, 2023
1 parent 4ffcde9 commit 674d12f
Show file tree
Hide file tree
Showing 2 changed files with 27 additions and 3 deletions.
3 changes: 3 additions & 0 deletions flathunter/chrome_wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,9 @@ def get_chrome_driver(driver_arguments):
for driver_argument in driver_arguments:
chrome_options.add_argument(driver_argument)
chrome_version = get_chrome_version()
# something is weird with the patched driver version (maybe only in python3.11), I had to patch
# the chrome options to make it work
setattr(chrome_options, "headless", True)
driver = uc.Chrome(version_main=chrome_version, options=chrome_options) # pylint: disable=no-member

driver.execute_cdp_cmd('Network.setBlockedURLs',
Expand Down
27 changes: 24 additions & 3 deletions flathunter/crawler/kleinanzeigen.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,15 @@
"""Expose crawler for Ebay Kleinanzeigen"""
import re
import datetime
from typing import Optional

from bs4 import Tag
from selenium.webdriver import Chrome

from flathunter.logging import logger
from flathunter.abstract_crawler import Crawler
from flathunter.chrome_wrapper import get_chrome_driver
from flathunter.exceptions import DriverLoadException
from flathunter.logging import logger

class Kleinanzeigen(Crawler):
"""Implementation of Crawler interface for Ebay Kleinanzeigen"""
Expand All @@ -29,13 +33,30 @@ class Kleinanzeigen(Crawler):
def __init__(self, config):
super().__init__(config)
self.config = config
self.driver = None


def get_driver(self) -> Optional[Chrome]:
"""Lazy method to fetch the driver as required at runtime"""
if self.driver is not None:
return self.driver
driver_arguments = self.config.captcha_driver_arguments()
self.driver = get_chrome_driver(driver_arguments)
return self.driver

def get_driver_force(self) -> Chrome:
"""Fetch the driver, and throw an exception if it is not configured or available"""
res = self.get_driver()

Check warning on line 49 in flathunter/crawler/kleinanzeigen.py

View check run for this annotation

Codecov / codecov/patch

flathunter/crawler/kleinanzeigen.py#L49

Added line #L49 was not covered by tests
if res is None:
raise DriverLoadException("Unable to load chrome driver when expected")
return res

Check warning on line 52 in flathunter/crawler/kleinanzeigen.py

View check run for this annotation

Codecov / codecov/patch

flathunter/crawler/kleinanzeigen.py#L51-L52

Added lines #L51 - L52 were not covered by tests

def get_page(self, search_url, driver=None, page_no=None):
"""Applies a page number to a formatted search URL and fetches the exposes at that page"""
return self.get_soup_from_url(search_url)
return self.get_soup_from_url(search_url, driver=self.get_driver())

def get_expose_details(self, expose):
soup = self.get_page(expose['url'])
soup = self.get_page(expose['url'], self.get_driver())
for detail in soup.find_all('li', {"class": "addetailslist--detail"}):
if re.match(r'Verfügbar ab', detail.text):
date_string = re.match(r'(\w+) (\d{4})', detail.text)
Expand Down

0 comments on commit 674d12f

Please sign in to comment.