diff --git a/lazyops/libs/abcs/clients/http.py b/lazyops/libs/abcs/clients/http.py index b036e50..79412dd 100644 --- a/lazyops/libs/abcs/clients/http.py +++ b/lazyops/libs/abcs/clients/http.py @@ -83,6 +83,7 @@ def cachify_get_exclude_keys(self, func: str, **kwargs) -> List[str]: 'callback', 'retryable', 'retry_limit', + 'validate_url', ] def configure_api_client(self, *args, **kwargs) -> aiohttpx.Client: @@ -351,7 +352,7 @@ def _validate_url(self, url: str) -> Union[bool, str]: Quickly validates a URL """ try: - response = self.api.head(url) + response = self.api.head(url, follow_redirects = True) try: response.raise_for_status() return True @@ -365,7 +366,7 @@ def _fetch_content_type(self, url: str) -> Optional[str]: Fetches the content type """ try: - response = self.api.head(url) + response = self.api.head(url, follow_redirects = True) return response.headers.get('content-type') except Exception as e: return None @@ -387,7 +388,7 @@ async def _avalidate_url(self, url: str) -> Union[bool, str]: Quickly validates a URL """ try: - response = await self.api.async_head(url) + response = await self.api.async_head(url, follow_redirects = True) try: response.raise_for_status() return True @@ -401,7 +402,7 @@ async def _afetch_content_type(self, url: str) -> Optional[str]: Fetches the content type """ try: - response = await self.api.async_head(url) + response = await self.api.async_head(url, follow_redirects = True) return response.headers.get('content-type') except Exception as e: return None @@ -578,17 +579,19 @@ def cachify_get_name_builder_kwargs(self, func: str, **kwargs) -> Dict[str, Any] def __get_pdftotext( self, url: str, + validate_url: Optional[bool] = False, raise_errors: Optional[bool] = None, **kwargs ) -> Optional[str]: """ Transform a PDF File to Text directly from URL """ - validate_result = self._validate_url(url) - if validate_result != True: - if raise_errors: raise ValueError(f'Invalid URL: {url}. {validate_result}') - self.logger.error(f'Invalid URL: {url}. {validate_result}') - return None + if validate_url: + validate_result = self._validate_url(url) + if validate_result != True: + if raise_errors: raise ValueError(f'Invalid URL: {url}. {validate_result}') + self.logger.error(f'Invalid URL: {url}. {validate_result}') + return None cmd = f'curl -s {url} | pdftotext -layout -nopgbrk -eol unix -colspacing 0.7 -y 58 -x 0 -H 741 -W 596 - -' process = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) @@ -605,6 +608,7 @@ def __get_pdftotext( def _get_pdftotext( self, url: str, + validate_url: Optional[bool] = False, retryable: Optional[bool] = False, retry_limit: Optional[int] = 3, raise_errors: Optional[bool] = None, @@ -623,17 +627,19 @@ def _get_pdftotext( async def __aget_pdftotext( self, url: str, + validate_url: Optional[bool] = False, raise_errors: Optional[bool] = None, **kwargs ) -> Optional[str]: """ Transform a PDF File to Text directly from URL """ - validate_result = await self._avalidate_url(url) - if validate_result != True: - if raise_errors: raise ValueError(f'Invalid URL: {url}. {validate_result}') - self.logger.error(f'Invalid URL: {url}. {validate_result}') - return None + if validate_url: + validate_result = await self._avalidate_url(url) + if validate_result != True: + if raise_errors: raise ValueError(f'Invalid URL: {url}. {validate_result}') + self.logger.error(f'Invalid URL: {url}. {validate_result}') + return None cmd = f'curl -s {url} | pdftotext -layout -nopgbrk -eol unix -colspacing 0.7 -y 58 -x 0 -H 741 -W 596 - -' process = await asyncio.subprocess.create_subprocess_shell(cmd, shell=True, stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE) try: diff --git a/lazyops/libs/abcs/sql/database/base.py b/lazyops/libs/abcs/sql/database/base.py index 5bb9a1b..e0a749d 100644 --- a/lazyops/libs/abcs/sql/database/base.py +++ b/lazyops/libs/abcs/sql/database/base.py @@ -696,4 +696,22 @@ async def index_exists( ) async with self.session() as session: result = await session.execute(statement, {'index_name': index_name}) - return result.scalar_one() \ No newline at end of file + return result.scalar_one() + + + async def database_exists( + self, + db_name: str, + ) -> bool: + """ + Checks if the database exists + """ + statement = self.text( + f"SELECT 1 FROM pg_database WHERE datname = '{db_name}';" + ) + async with self.session() as session: + result = await session.execute(statement) + output = result.scalar_one() + logger.info(f'Database Exists: {output}') + return output + \ No newline at end of file