Skip to content

Commit

Permalink
feat: escape dangerous strings in data explorer downloads. (#2686)
Browse files Browse the repository at this point in the history
This is to mitigate the chance of csv/xls injection.
  • Loading branch information
niross authored Jul 4, 2023
1 parent d5ca72f commit 63617c2
Show file tree
Hide file tree
Showing 2 changed files with 16 additions and 2 deletions.
17 changes: 15 additions & 2 deletions dataworkspace/dataworkspace/apps/explorer/exporters.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,13 @@
import csv
import json
import re
import string
import uuid
from datetime import datetime
from io import BytesIO, StringIO
from numbers import Number

import waffle
from django.conf import settings
from django.core.serializers.json import DjangoJSONEncoder
from django.utils.module_loading import import_string
Expand All @@ -28,6 +31,16 @@ def __init__(self, querylog, request):
self.request = request
self.user = request.user

@staticmethod
def _escape_field(field):
if not waffle.switch_is_active(settings.EXPLORER_CSV_INJECTION_PROTECTION_FLAG):
return field
# Allow numbers or numbers that are prefixed with . or -
if isinstance(field, Number) or re.search(r"^([.\-]\d|-.\d|\d)", field):
return field
# Insert a ' as the first char if the string starts with =, +, - or @
return re.sub(r"^([=+\-@])", r"'\1", field)

def get_output(self, **kwargs):
value = self.get_file_output(**kwargs).getvalue()
return value
Expand Down Expand Up @@ -66,7 +79,7 @@ def _get_output(self, headers, data, **kwargs):
writer = csv.writer(csv_data, delimiter=delim)
writer.writerow(headers)
for row in data:
writer.writerow(row)
writer.writerow([self._escape_field(field) for field in row])
return csv_data


Expand Down Expand Up @@ -121,7 +134,7 @@ def _get_output(self, headers, data, **kwargs):
# JSON and Array fields
if isinstance(data_row, (dict, list)):
data_row = json.dumps(data_row)
ws.write(row, col, data_row)
ws.write(row, col, self._escape_field(data_row))
col += 1
row += 1
col = 0
Expand Down
1 change: 1 addition & 0 deletions dataworkspace/dataworkspace/settings/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -760,6 +760,7 @@ def sort_database_config(database_list):
ALLOW_USER_ACCESS_TO_DASHBOARD_IN_BULK = "ALLOW_USER_ACCESS_TO_DASHBOARD_IN_BULK"
SECURITY_CLASSIFICATION_FLAG = "SECURITY_CLASSIFICATION_FLAG"
REFERENCE_DATASET_PIPELINE_SYNC = "REFERENCE_DATASET_PIPELINE_SYNC"
EXPLORER_CSV_INJECTION_PROTECTION_FLAG = "EXPLORER_CSV_INJECTION_PROTECTION_FLAG"

DATASET_FINDER_SEARCH_RESULTS_PER_PAGE = 200

Expand Down

0 comments on commit 63617c2

Please sign in to comment.