-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathgenerate-data-quality-report.py
40 lines (35 loc) · 1.14 KB
/
generate-data-quality-report.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
-- Active: 1671480993031@@127.0.0.1@3306
import os
import psycopg2
import pandas as pd
from tabulate import tabulate
import mytests
# import the data quality checks
from dataqualitychecks import check_for_nulls
from dataqualitychecks import check_for_min_max
from dataqualitychecks import check_for_valid_values
from dataqualitychecks import check_for_duplicates
from dataqualitychecks import run_data_quality_check
# connect to database
pgpassword = os.environ.get('POSTGRES_PASSWORD')
conn = psycopg2.connect(
user = "postgres",
password = pgpassword,
host = "localhost",
port = "5432",
database = "billingDW")
print("Connected to data warehouse")
#Start of data quality checks
results = []
tests = {key:value for key,value in mytests.__dict__.items() if key.startswith('test')}
for testname,test in tests.items():
test['conn'] = conn
results.append(run_data_quality_check(**test))
#print(results)
df=pd.DataFrame(results)
df.index+=1
df.columns = ['Test Name', 'Table','Column','Test Passed']
print(tabulate(df,headers='keys',tablefmt='psql'))
#End of data quality checks
conn.close()
print("Disconnected from data warehouse")