Skip to content

Commit

Permalink
Update minimum selenium version supported for crawler (deepset-ai#2921)
Browse files Browse the repository at this point in the history
* Update minimum requirement for selenium for using the crawler

* Updating pin of grpcio to match default in google colab

* Adding requests requirement
  • Loading branch information
sjrl authored Aug 3, 2022
1 parent 2c56305 commit bde3261
Show file tree
Hide file tree
Showing 2 changed files with 8 additions and 6 deletions.
9 changes: 5 additions & 4 deletions haystack/nodes/connector/crawler.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.common.exceptions import StaleElementReferenceException
from selenium.common.exceptions import StaleElementReferenceException, WebDriverException
from selenium import webdriver
except (ImportError, ModuleNotFoundError) as ie:
from haystack.utils.import_utils import _optional_component_not_installed
Expand All @@ -22,6 +22,7 @@

from haystack.nodes.base import BaseComponent
from haystack.schema import Document
from haystack.errors import NodeError


logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -94,15 +95,15 @@ def __init__(
options.add_argument("--no-sandbox")
options.add_argument("--disable-dev-shm-usage")
self.driver = webdriver.Chrome(service=Service("chromedriver"), options=options)
except:
raise Exception(
except WebDriverException as exc:
raise NodeError(
"""
\'chromium-driver\' needs to be installed manually when running colab. Follow the below given commands:
!apt-get update
!apt install chromium-driver
!cp /usr/lib/chromium-browser/chromedriver /usr/bin
If it has already been installed, please check if it has been copied to the right directory i.e. to \'/usr/bin\'"""
)
) from exc
else:
logger.info("'chrome-driver' will be automatically installed.")
self.driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options)
Expand Down
5 changes: 3 additions & 2 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -165,7 +165,7 @@ audio =
beir =
beir; platform_system != 'Windows'
crawler =
selenium !=4.1.4 # due to https://github.com/SeleniumHQ/selenium/issues/10612
selenium>=4.0.0,!=4.1.4 # Avoid 4.1.4 due to https://github.com/SeleniumHQ/selenium/issues/10612
webdriver-manager
preprocessing =
beautifulsoup4
Expand All @@ -188,7 +188,8 @@ ray =
aiorwlock>=1.3.0,<2

colab =
grpcio==1.43.0
grpcio==1.47.0
requests>=2.25 # Needed to avoid dependency conflict with crawler https://github.com/deepset-ai/haystack/pull/2921
dev =
pre-commit
# Type check
Expand Down

0 comments on commit bde3261

Please sign in to comment.