Skip to content

Commit

Permalink
Merge pull request #218 from KennaSecurity/user_audit_with_api
Browse files Browse the repository at this point in the history
User audit with api
  • Loading branch information
ishgaror authored Feb 28, 2024
2 parents 2288d75 + c0c3d94 commit e4457b4
Show file tree
Hide file tree
Showing 2 changed files with 185 additions and 1 deletion.
24 changes: 23 additions & 1 deletion User Audit/README.md
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
# Kenna User Audit

This Python script will output all of the users and roles for the client into a single Excel spreadsheet. Users which have never logged in are highlighted in red, while users who have not logged in over the past 30 days are highlighted in yellow.
Python script - `useraudit.py` will output all of the users and roles for the client into a single Excel spreadsheet. Users which have never logged in are highlighted in red, while users who have not logged in over the past 30 days are highlighted in yellow.

The script assumes standard US date formatting for use in Excel (i.e. m/d/YYYY). Users with European or other default date formats will want to modify the date format in the script to their locale.

Python script - `audit_test_full.py` will perform all the actions listed in the 'useraudit.py' script above in addition to providing the details on users that have "source" as "API" in "Audit Logs" and used their key in the stipulated audit period listed as step #3 in the *updates/edits needed to execute the script* section below.

Tested on Python version 3.9.6. Please see requirements.txt for full dependency list.

## Installation
Expand All @@ -13,3 +15,23 @@ Tested on Python version 3.9.6. Please see requirements.txt for full dependency
## Usage

`python useraudit.py <API_TOKEN>`

`python audit_test_full.py`

## Updates/Edits needed to execute the "audit_test_full.py" script

### 1: Update the base_url
By default, https://api.kennasecurity.com/ is being used. Update it to w.r.t your environment.

### 2: API Key Token
Set an environment variable named API_KEY with your actual API key as its value. The way you do this can vary depending on your operating system and the interface you're using (command line, graphical interface, etc.).
#### Windows:
You can set an environment variable in Windows using the setx command in the command prompt:
*setx API_KEY "your-api-key"*

#### Mac OS or Linux:
In macOS or Linux, you can set an environment variable in the terminal using the export command:
*export API_KEY=your-api-key*

### 3: Fetch Audit Logs
Update the start_date (Line #70) and end_date (Line #71) in the script as per the time period you want to pull the logs for. Based on this time period, the script will check which user used their API keys in that time frame.
162 changes: 162 additions & 0 deletions User Audit/audit_test_full.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,162 @@
import requests
import pandas as pd
import json
import sys
import io
import os
import datetime
import gzip
import jsonlines
from tqdm import tqdm
from pandas import json_normalize
from openpyxl import load_workbook
from openpyxl.styles import PatternFill

def flatten_json(nested_json, exclude=['roles']):
out = {}
def flatten(x, name='', exclude=exclude):
if type(x) is dict:
for a in x:
if a not in exclude:
flatten(x[a], name + a + '_')
elif type(x) is list:
i = 0
for a in x:
flatten(a, name + str(i) + '_')
i += 1
else:
out[name[:-1]] = x

flatten(nested_json)
return out

token = os.environ.get('API_KEY')
base_url = "https://api.kennasecurity.com"
users_url= base_url + "/users"
roles_url = base_url + "/roles"
audit_logs_url = base_url + "/audit_logs/"

headers = {"Accept": "application/json", "X-Risk-Token":token}

users_response = requests.get(users_url, headers=headers).json()

users_df = pd.DataFrame(json_normalize([flatten_json(x) for x in users_response['users']]))
users_df = users_df.rename(columns={"id":"user_id","created_at":"user_created_at","updated_at":"user_updated_at"})

users_df['user_created_at'] = pd.to_datetime(users_df['user_created_at'], format='%Y-%m-%d', errors='coerce').dt.date
users_df['last_sign_in_at'] = pd.to_datetime(users_df['last_sign_in_at'], format='%Y-%m-%d', errors='coerce').dt.date

roles_response = requests.get(roles_url, headers=headers).json()

roles_df = pd.DataFrame(json_normalize([flatten_json(x) for x in roles_response['roles']]))
roles_df = roles_df.rename(columns={"id": "role_id","created_at":"role_created_at","updated_at":"role_updated_at","name":"role_name"})

# Save 'Users' and 'Roles' data to Excel and apply conditional formatting
with pd.ExcelWriter('cvm_user_audit.xlsx', engine='xlsxwriter', date_format='m/d/yyyy') as writer:
users_df.to_excel(writer, sheet_name='Users')
roles_df.to_excel(writer, sheet_name='Roles')

workbook = writer.book
red_format = workbook.add_format({'bg_color': '#FFC7CE','font_color': '#9C0006'})
yellow_format = workbook.add_format({'bg_color': '#FFEB9C','font_color': '#9C6500'})

users_sheet = writer.sheets['Users']

users_sheet.conditional_format('$J$2:$J$99999', {'type': 'blanks', 'format': red_format})
users_sheet.conditional_format('$J$2:$J$99999', {'type': 'formula', 'criteria': '=J2<TODAY()-30', 'format': yellow_format})

# Fetch audit logs data
params = {
"start_date": "2024-01-03T00:00:00",
"end_date": "2024-01-06T00:00:00",
}

# Update headers for audit_logs_url
headers = {
"Accept": "application/gzip",
"X-Risk-Token": token,
"User-Agent": 'user_audit/1.0.0 (Kenna Security)'
}

print('Fetching audit logs data...')
response = requests.get(audit_logs_url, headers=headers, params=params, timeout=60, stream=True)

print('Content-Encoding:', response.headers.get('Content-Encoding'))
print('Receiving data...')

gzip_content = b""
for chunk in tqdm(response.iter_content(chunk_size=8192)): # Wrap the iterable with tqdm()
if chunk: # filter out keep-alive new chunks
gzip_content += chunk

gzip_file = io.BytesIO(gzip_content)

print('Request sent. Decompressing response...')
with gzip.GzipFile(fileobj=gzip_file) as f:
audit_logs_data = f.read().decode()

# Create a StringIO object from the string
audit_logs_io = io.StringIO(audit_logs_data)

# Use the jsonlines library to read the data
print('Response decompressed. Parsing JSON...')
audit_logs = []
with jsonlines.Reader(audit_logs_io) as reader:
for i, obj in enumerate(reader.iter(type=dict, skip_invalid=True), start=1):
#print(f"Line {i}: {obj}") # print line number and JSON object
audit_logs.append(obj)

print('JSON parsed. Processing data...')
audit_logs_processed = []
for log in audit_logs:
details = log.get("audit_log_event", {}).get("details", {})
url = details.get("url")
source = details.get("source")
# Ignore this entry if 'url' is '{base_url}/reports/sla_adherences' and 'source' is 'API'
if url == f"{base_url}/reports/sla_adherences" and source == 'API':
continue
event_data = {
"kenna_user_id": log.get("audit_log_event", {}).get("kenna_user_id"),
"user_email": log.get("audit_log_event", {}).get("user_email"),
"source": source,
"http_method": details.get("http_method"),
"url": url,
"name": log.get("audit_log_event", {}).get("name"),
}
audit_logs_processed.append(event_data)

audit_df = pd.DataFrame(audit_logs_processed)

# Open the existing workbook with 'openpyxl'
wb = load_workbook('cvm_user_audit.xlsx')

# Write the 'Audit Logs' DataFrame to the workbook
with pd.ExcelWriter('cvm_user_audit.xlsx', engine='openpyxl') as writer:
writer.book = wb
merged_df = pd.merge(users_df, audit_df, left_on="user_id", right_on="kenna_user_id", how="inner")
merged_df.to_excel(writer, sheet_name='Audit Logs')

# Get the 'Users' sheet
users_sheet = wb['Users']

# Define a fill for highlighting cells
green_fill = PatternFill(start_color='00FF00', end_color='00FF00', fill_type='solid')

# Get emails from audit logs data with 'source' as 'API'
api_emails = [log['user_email'] for log in audit_logs_processed if log['source'] == 'API']

# Iterate over the rows in the 'Users' DataFrame
for i, row in users_df.iterrows():
email = row['email'] # Assuming 'email' is a column in your DataFrame
# Check if the email is in api_emails
if email in api_emails:
# If it is, highlight the entire row
for j in range(1, len(row) + 1):
users_sheet.cell(row=i+2, column=j).fill = green_fill # i+2 because DataFrame is 0-indexed and Worksheet is 1-indexed, and we have a header row

# Save the workbook
wb.save('cvm_user_audit.xlsx')

print('User, role, and audit log data has been saved to the file cvm_user_audit.xlsx.')
print('Users that have never logged in are highlighted in red. Users that have not logged in for over 30 days are highlighted in yellow.')
print('Users that have "source" as "API" in "Audit Logs" and used their key in the stipulated audit period are highlighted in green.')

0 comments on commit e4457b4

Please sign in to comment.