diff --git a/haystack/nodes/connector/crawler.py b/haystack/nodes/connector/crawler.py index 16f703d5e1..f89fad0beb 100644 --- a/haystack/nodes/connector/crawler.py +++ b/haystack/nodes/connector/crawler.py @@ -13,7 +13,7 @@ from webdriver_manager.chrome import ChromeDriverManager from selenium.webdriver.chrome.service import Service from selenium.webdriver.common.by import By - from selenium.common.exceptions import StaleElementReferenceException + from selenium.common.exceptions import StaleElementReferenceException, WebDriverException from selenium import webdriver except (ImportError, ModuleNotFoundError) as ie: from haystack.utils.import_utils import _optional_component_not_installed @@ -22,6 +22,7 @@ from haystack.nodes.base import BaseComponent from haystack.schema import Document +from haystack.errors import NodeError logger = logging.getLogger(__name__) @@ -94,15 +95,15 @@ def __init__( options.add_argument("--no-sandbox") options.add_argument("--disable-dev-shm-usage") self.driver = webdriver.Chrome(service=Service("chromedriver"), options=options) - except: - raise Exception( + except WebDriverException as exc: + raise NodeError( """ \'chromium-driver\' needs to be installed manually when running colab. Follow the below given commands: !apt-get update !apt install chromium-driver !cp /usr/lib/chromium-browser/chromedriver /usr/bin If it has already been installed, please check if it has been copied to the right directory i.e. to \'/usr/bin\'""" - ) + ) from exc else: logger.info("'chrome-driver' will be automatically installed.") self.driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options) diff --git a/setup.cfg b/setup.cfg index 68d78abac1..32e7e19e74 100644 --- a/setup.cfg +++ b/setup.cfg @@ -165,7 +165,7 @@ audio = beir = beir; platform_system != 'Windows' crawler = - selenium !=4.1.4 # due to https://github.com/SeleniumHQ/selenium/issues/10612 + selenium>=4.0.0,!=4.1.4 # Avoid 4.1.4 due to https://github.com/SeleniumHQ/selenium/issues/10612 webdriver-manager preprocessing = beautifulsoup4 @@ -188,7 +188,8 @@ ray = aiorwlock>=1.3.0,<2 colab = - grpcio==1.43.0 + grpcio==1.47.0 + requests>=2.25 # Needed to avoid dependency conflict with crawler https://github.com/deepset-ai/haystack/pull/2921 dev = pre-commit # Type check