forked from kanedata/find-that-charity
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathbcp.py
91 lines (70 loc) · 2.59 KB
/
bcp.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
#!/usr/bin/env python
import sys
import csv
def convert(bcpdata, lineterminator='*@@*', delimiter='@**@', quote='"', newdelimiter=',', escapechar='\\', newline='\n'):
"""
returns data from a string of BCP data. Default is to present as CSV data.
"""
bcpdata = bcpdata.replace(escapechar, escapechar + escapechar)
bcpdata = bcpdata.replace(quote, escapechar + quote)
bcpdata = bcpdata.replace(delimiter, quote + newdelimiter + quote)
bcpdata = bcpdata.replace(lineterminator, quote + newline + quote)
bcpdata = quote + bcpdata + quote
return bcpdata
def stream(file, lineterminator='*@@*', delimiter='@**@', encoding='utf-8'):
"""
Returns a generator for parsing a BCP file. The input is a file object, and
the output is a list for each row in the data.
Usage:
with open("bcp_file.bcp", 'rb') as bcpfile:
for bcpfields in bcp_stream(bcpfile):
print(bcpfields)
More pythonic, but slower than using the `convert()` function above and then
parsing the CSV file, as the python CSV utilities are written in pure C.
"""
buffer = ''
fields = []
while True:
byte = file.read(1)
try:
byte = byte.decode(encoding)
except AttributeError:
pass
buffer += byte
if not byte:
break
# check if we've seen a field delimiter
if(buffer[-len(delimiter):] == delimiter):
fields.append(buffer[:-len(delimiter)])
buffer = ''
# check if we've seen a line delimiter
if(buffer[-len(lineterminator):] == lineterminator):
fields.append(buffer[:-len(lineterminator)])
buffer = ''
yield fields
fields = []
yield fields
def main():
# get arguments
bcp_filename = sys.argv[1]
try:
csv_filename = sys.argv[2]
except IndexError:
csv_filename = bcp_filename.replace('.bcp', '.csv')
# have to check system version annoyingly
# for python 3 >
if sys.version_info >= (3, 0):
with open(bcp_filename, 'r') as bcpfile:
with open(csv_filename, 'w', newline='') as csvfile:
writer = csv.writer(csvfile)
for bcpfields in stream(bcpfile):
writer.writerow(bcpfields)
# for python 2
else:
with open(bcp_filename, 'rb') as bcpfile:
with open(csv_filename, 'wb') as csvfile:
writer = csv.writer(csvfile)
for bcpfields in stream(bcpfile):
writer.writerow(bcpfields)
if __name__ == '__main__':
main()