Skip to content

Commit

Permalink
Script exists.
Browse files Browse the repository at this point in the history
  • Loading branch information
augustjohnson committed Oct 7, 2024
1 parent 677d6fa commit 0454d5c
Showing 1 changed file with 41 additions and 0 deletions.
41 changes: 41 additions & 0 deletions scripts/data_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
# The purpose of this file is to:
# Go through the fixtures in the /data directory
# Review the values in them for proper encoding and any weird characters.
# Fail if it finds any, and error with a good message to find it.

import argparse
import requests
from pathlib import Path

def main():
parser = argparse.ArgumentParser()

parser.add_argument('-d', '--dir', required=True, type=Path,
help='Directory to recursively check files for validation.')

args = parser.parse_args()
print("For a given input directory, this goes and checks each .json")
print("file in all subdirectories for valid/invalid characters.")

error = False

for path in Path(args.dir).rglob('*.json'):
with open(path,'r',encoding='utf-8') as f:
lines = f.readlines()
line_no = 1
for line in lines:

if "\\u" in line:
error = True
char = "\\u{}".format(line.split("\\u")[1][0:4])
# This should detect if any explicit unicode
# characters are being specified as part of
# descriptions.
print("UNICODE ESCAPE CHARS FOUND: {}:{} {}".format(path, line_no, char))
line_no+=1

if error:
exit("Found Unicode Characters")

if __name__ == '__main__':
main()

0 comments on commit 0454d5c

Please sign in to comment.