Skip to content

Commit

Permalink
Merge pull request #1231 from swirlai/revert-1224-DS-1723
Browse files Browse the repository at this point in the history
Revert "Bump cryptography and Django, remove snowflake."
  • Loading branch information
erikspears authored Feb 28, 2024
2 parents 3b8dca0 + ff5741a commit 80ffd16
Show file tree
Hide file tree
Showing 10 changed files with 302 additions and 99 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/urls-checker.yml
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ jobs:
retry_count: 3

# A comma separated patterns to exclude during URL checks
exclude_patterns: localhost,api,apis,rss,etc,xx,googleapis,hostname,graph.microsoft.com,login.microsoftonline.com,my-host.com
exclude_patterns: localhost,api,apis,rss,etc,xx,googleapis,hostname,snowflake,graph.microsoft.com,login.microsoftonline.com,my-host.com

# Exclude these files from the checker
exclude_files: Swirl.postman_collection.json,docs/googlec95caf0bd4a8c5df.html,docs/Gemfile,docs/Gemfile.lock,docs/_config.yml,tests/,SearchProviders/
30 changes: 30 additions & 0 deletions SearchProviders/company_snowflake.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
{
"name": "Free Company Records - Snowflake",
"active": false,
"default": false,
"authenticator": "",
"connector": "Snowflake",
"url": "<snowflake-instance-address>",
"query_template": "SELECT {fields} FROM {table} WHERE {field1} ILIKE '%{query_string}%' AND NULLIF(TRIM(founded), '') IS NOT NULL ORDER BY TRY_TO_NUMBER(REGEXP_REPLACE(SPLIT_PART(size, '-', 1), '[^0-9]', '')) DESC;",
"post_query_template": {},
"http_request_headers": {},
"page_fetch_config_json": {},
"query_processors": [
"AdaptiveQueryProcessor"
],
"query_mappings": "fields=*,sort_by_date=founded,table=FREECOMPANYDATASET,field1=name",
"result_grouping_field": "",
"result_processors": [
"MappingResultProcessor",
"CosineRelevancyResultProcessor"
],
"response_mappings": "",
"result_mappings": "title='{name} ({founded})',body='{name} was founded in {founded} in {country}. It has {size} employees and operates in the {industry} industry.',url='https://{linkedin_url}',date_published=founded,NO_PAYLOAD",
"results_per_query": 10,
"credentials": "<username>:<password>:FREE_COMPANY_DATASET:COMPUTE_WH",
"eval_credentials": "",
"tags": [
"Company",
"Snowflake"
]
}
30 changes: 30 additions & 0 deletions SearchProviders/preloaded.json
Original file line number Diff line number Diff line change
Expand Up @@ -1390,6 +1390,36 @@
"MongoDB"
]
},
{
"name": "Free Company Records - Snowflake",
"active": false,
"default": false,
"authenticator": "",
"connector": "Snowflake",
"url": "<snowflake-instance-address>",
"query_template": "SELECT {fields} FROM {table} WHERE {field1} ILIKE '%{query_string}%' AND NULLIF(TRIM(founded), '') IS NOT NULL ORDER BY TRY_TO_NUMBER(REGEXP_REPLACE(SPLIT_PART(size, '-', 1), '[^0-9]', '')) DESC;",
"post_query_template": {},
"http_request_headers": {},
"page_fetch_config_json": {},
"query_processors": [
"AdaptiveQueryProcessor"
],
"query_mappings": "fields=*,sort_by_date=founded,table=FREECOMPANYDATASET,field1=name",
"result_grouping_field": "",
"result_processors": [
"MappingResultProcessor",
"CosineRelevancyResultProcessor"
],
"response_mappings": "",
"result_mappings": "title='{name} ({founded})',body='{name} was founded in {founded} in {country}. It has {size} employees and operates in the {industry} industry.',url='https://{linkedin_url}',date_published=founded,NO_PAYLOAD",
"results_per_query": 10,
"credentials": "<username>:<password>:FREE_COMPANY_DATASET:COMPUTE_WH",
"eval_credentials": "",
"tags": [
"Company",
"Snowflake"
]
},
{
"name": "Entities - LittleSis.org",
"active": false,
Expand Down
165 changes: 103 additions & 62 deletions docs/Developer-Reference.md

Large diffs are not rendered by default.

65 changes: 33 additions & 32 deletions docs/User-Guide.md

Large diffs are not rendered by default.

6 changes: 6 additions & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ amqp==5.2.0
annotated-types==0.6.0
anyio==4.3.0
asgiref==3.7.2
asn1crypto==1.5.1
attrs==23.2.0
autobahn==23.6.2
Automat==22.10.0
Expand Down Expand Up @@ -44,6 +45,7 @@ docutils==0.20.1
drf-yasg==1.21.7
elastic-transport==8.12.0
elasticsearch==8.12.0
filelock==3.13.1
google-api-core==2.17.1
google-auth==2.28.1
google-cloud-bigquery==3.17.2
Expand Down Expand Up @@ -78,6 +80,7 @@ opensearch-py==2.4.2
oracledb==2.0.1
packaging==23.2
pika==1.3.2
platformdirs==3.11.0
ply==3.11
preshed==3.0.9
prompt-toolkit==3.0.43
Expand Down Expand Up @@ -107,6 +110,8 @@ simplejson==3.19.2
six==1.16.0
smart-open==6.4.0
sniffio==1.3.0
snowflake-connector-python==3.7.1
sortedcontainers==2.4.0
soupsieve==2.5
spacy==3.7.4
spacy-legacy==3.0.12
Expand All @@ -118,6 +123,7 @@ textblob==0.18.0.post0
thinc==8.2.3
tika==2.6.0
tiktoken==0.6.0
tomlkit==0.12.4
tqdm==4.66.2
Twisted==23.10.0
txaio==23.1.1
Expand Down
1 change: 1 addition & 0 deletions swirl/connectors/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
from swirl.connectors.microsoft_graph import M365SharePointSites
from swirl.connectors.microsoft_graph import MicrosoftTeams
from swirl.connectors.mongodb import MongoDB
from swirl.connectors.snowflake import Snowflake
from swirl.connectors.oracle import Oracle

# uncomment the line below to enable PostgreSQL
Expand Down
7 changes: 4 additions & 3 deletions swirl/connectors/db_connector.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,7 @@ def validate_query(self, session=None):
return False

return True

########################################

def normalize_response(self):
Expand All @@ -136,7 +136,7 @@ def normalize_response(self):
if not self.response:
# assume the connector took care of it
return

rows = self.response

trimmed_rows = []
Expand All @@ -146,7 +146,7 @@ def normalize_response(self):
n_field = 0
if self.column_names:
for field in column_names:
# to handle None columns
# to handle None columns e.g. Snowflake
if row[n_field]:
dict_row[field] = row[n_field]
else:
Expand All @@ -168,3 +168,4 @@ def normalize_response(self):
self.retrieved = retrieved
self.results = trimmed_rows
return

92 changes: 92 additions & 0 deletions swirl/connectors/snowflake.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
'''
@author: Sid Probstein
@contact: [email protected]
'''

from sys import path
from os import environ

import snowflake.connector
from snowflake.connector import ProgrammingError

import json

import django

from swirl.utils import swirl_setdir
path.append(swirl_setdir()) # path to settings.py file
environ.setdefault('DJANGO_SETTINGS_MODULE', 'swirl_server.settings')
django.setup()

from celery.utils.log import get_task_logger
from logging import DEBUG
logger = get_task_logger(__name__)
# logger.setLevel(DEBUG)

from swirl.connectors.db_connector import DBConnector
from swirl.connectors.utils import bind_query_mappings

########################################
########################################

class Snowflake(DBConnector):

type = "Snowflake"

########################################

def execute_search(self, session=None):

logger.debug(f"{self}: execute_search()")

if self.provider.credentials:
if ':' in self.provider.credentials:
credlist = self.provider.credentials.split(':')
if len(credlist) == 4:
username = credlist[0]
password = credlist[1]
database = credlist[2]
warehouse = credlist[3]
else:
self.warning("Invalid credentials, should be: username:password:database:warehouse")
else:
self.warning("No credentials!")
account = self.provider.url

try:
# Create a new connection
conn = snowflake.connector.connect(user=username, password=password, account=account)
cursor = conn.cursor()
cursor.execute(f"USE WAREHOUSE {warehouse}")
cursor.execute(f"USE DATABASE {database}")

cursor.execute(self.count_query)
count_result = cursor.fetchone()
found = count_result[0] if count_result else 0
if found == 0:
self.message(f"Retrieved 0 of 0 results from: {self.provider.name}")
self.status = 'READY'
self.found = 0
self.retrieved = 0
return

cursor.execute(self.query_to_provider)
self.column_names = [col[0].lower() for col in cursor.description]
results = cursor.fetchall()

except ProgrammingError as err:
self.error(f"{err} querying {self.type}")
self.status = 'ERR'
cursor.close()
conn.close()
return

self.response = list(results)

cursor.close()
conn.close()

self.found = found
self.retrieved = self.provider.results_per_query
return

3 changes: 2 additions & 1 deletion swirl/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,8 @@ class SearchProvider(models.Model):
('M365SharePointSites', 'M365 SharePoint Sites'),
('MicrosoftTeams', 'Microsoft Teams'),
('MongoDB', 'MongoDB'),
('Oracle','Oracle')
('Oracle','Oracle'),
('Snowflake','Snowflake')
]
connector = models.CharField(max_length=200, default='RequestsGet', choices=CONNECTOR_CHOICES)
url = models.CharField(max_length=2048, default=str, blank=True)
Expand Down

0 comments on commit 80ffd16

Please sign in to comment.