-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.py
158 lines (134 loc) · 4.68 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
from dbfread import DBF
from tqdm import tqdm
import pandas as pd
import sqlite3
import time
import csv
import os
load_text="""
#########################################################################
# #
# #
# ..::DBF2SQL::.. #
# Easily Convert Multiple DBF Files To a single SQL Database #
# #
#########################################################################
"""
print(load_text)
print('Selecting y will rebuild the database from dbf files.\n')
print('Load Database?')
load = input('-->')
start_time = time.process_time()
db_filename = 'new_database.db'
fileCheck = os.path.isfile(db_filename)
if fileCheck == True and load.lower() =='y':
os.remove(db_filename)
conn = sqlite3.connect(db_filename)
conn.text_factory = str
def writeCsv(sql_file,file_name='temp',split_count=0):
#This function will export a csv from the sqlite db
#sql_file is for a query string
#file_name is the file name that will be output, don't add csv
#split_count will split the file exporting by the integer passed
#so if you pass it 10 for a 100 record file you will end up with 12 files
#if total row count is less than split count only one file will be exported
name = file_name
cursor = conn.execute(sql_file)
results = cursor.fetchall()
names = [description[0] for description in cursor.description]
ret_count = len(results)
if split_count>0 and split_count<ret_count:
count = 0
file = 1
ret = []
over_count = 0
for r in results:
ret.append(r)
count+=1
over_count+=1
if count==split_count or ret_count==over_count:
if file in [1,2,3,4,5,6,7,8,9]:
l_zero='0'
else:
l_zero=''
split_filename = name+'_'+l_zero+str(file)
print(f"{split_filename.capitalize()} created with {count} rows...\n")
with open('\nexport/'+split_filename+'.csv', 'w') as f:
writer = csv.writer(f)
writer.writerow(names)
writer.writerows(ret)
count=0
file+=1
ret=[]
if split_count==0 or split_count>=ret_count:
print(f"\n{file_name.capitalize()} created with {ret_count} rows...\n")
with open('export/'+name+'.csv', 'w') as f:
writer = csv.writer(f)
writer.writerow(names)
writer.writerows(results)
def dbIndex(table_name,field):
#creates indexes on single table for 1 or more column
#table must be a string, field can be string or list
if type(field) is not list: field = [field]
i_query = ""
for i in field:
idxQuery = "CREATE INDEX idx_"+i+"_"+table_name+" ON "+table_name+" ("+i+" ASC);"
i_query = i_query + idxQuery+'\n'
i_query = i_query[:-1]
conn.execute(i_query)
return i_query
if load.lower()=='y':
files = os.listdir('./data')
files = [x for x in files if not x.startswith('.')]
files = [x for x in files if x.lower().endswith('.dbf')]
files.sort()
print(f"\nA total of {len(files)} tables will be loaded...\n")
time.sleep(2)
#loading tables into database
for table in tqdm(files):
#print(f'Loading {table}...')
dbf = DBF(f'data/{table}')
df = pd.DataFrame(iter(dbf))
if not df.empty:
df.to_sql(name=table[:-4], con=conn, if_exists = 'replace', index=False)
#creating table that gives you an over view of the database
table_overview_ct="""
CREATE TABLE IF NOT EXISTS TABLE_OVERVIEW (
TABLE_NAME TEXT,
COLUMN_NAME TEXT,
DATA_TYPE TEXT,
ROW_COUNT INT
);
"""
conn.execute(table_overview_ct)
print('\nCreating Stats...')
for file in tqdm(files):
table =file[:-4]
#print(f'Creating Stats for {table}...')
cur = conn.execute(f'PRAGMA TABLE_INFO({table})')
data = cur.fetchall()
for d in data:
column_name = d[1]
data_type = d[2]
row_count_q = f"SELECT COUNT(*) FROM {table}"
insert = f"""
INSERT INTO TABLE_OVERVIEW(TABLE_NAME,COLUMN_NAME,DATA_TYPE,ROW_COUNT)
VALUES('{table}','{column_name}','{data_type}',({row_count_q}))
"""
conn.execute(insert)
#example index
#table name can be a single column in as string
#or multiple columns in a list can be passed
dbIndex('TABLE_OVERVIEW','TABLE_NAME')
#clear out previous export files
contents = os.listdir('./export')
contents = [x for x in contents if not x.startswith('.')]
for file in contents:
os.remove('export/'+file)
#example csv export
#Add ,split_count=50 to create multiple files at 50 lines per file,
#or what ever number you like
writeCsv("SELECT * FROM TABLE_OVERVIEW ORDER BY ROW_COUNT DESC",'TABLE_OVERVIEW')
time_took= str(round(time.process_time() - start_time,3))
print(f"Seconds taken... {time_took}\n")
conn.commit()