Skip to content

Commit

Permalink
Python code style changes (started using a linter!)
Browse files Browse the repository at this point in the history
  • Loading branch information
drkane committed Jun 27, 2017
1 parent 8b31fe5 commit d40201f
Show file tree
Hide file tree
Showing 7 changed files with 361 additions and 274 deletions.
44 changes: 23 additions & 21 deletions data_import/bcp.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import sys
import csv


def convert(bcpdata, lineterminator='*@@*', delimiter='@**@', quote='"', newdelimiter=',', escapechar='\\', newline='\n'):
"""
returns data from a string of BCP data. Default is to present as CSV data.
Expand All @@ -13,50 +14,52 @@ def convert(bcpdata, lineterminator='*@@*', delimiter='@**@', quote='"', newdeli
bcpdata = quote + bcpdata + quote
return bcpdata


def stream(file, lineterminator='*@@*', delimiter='@**@', encoding='utf-8'):
"""
Returns a generator for parsing a BCP file. The input is a file object, and
Returns a generator for parsing a BCP file. The input is a file object, and
the output is a list for each row in the data.
Usage:
with open("bcp_file.bcp", 'rb') as bcpfile:
for bcpfields in bcp_stream(bcpfile):
print(bcpfields)
More pythonic, but slower than using the `convert()` function above and then
More pythonic, but slower than using the `convert()` function above and then
parsing the CSV file, as the python CSV utilities are written in pure C.
"""

buffer = ''
fields = []

while True:
byte = file.read(1)
try:
byte = byte.decode(encoding)
except AttributeError:
pass

buffer += byte

if not byte:
break

# check if we've seen a field delimiter
if( buffer[-len(delimiter):] == delimiter):
fields.append( buffer[:-len(delimiter)] )
if(buffer[-len(delimiter):] == delimiter):
fields.append(buffer[:-len(delimiter)])
buffer = ''

# check if we've seen a line delimiter
if( buffer[-len(lineterminator):] == lineterminator):
fields.append( buffer[:-len(lineterminator)] )
if(buffer[-len(lineterminator):] == lineterminator):
fields.append(buffer[:-len(lineterminator)])
buffer = ''
yield fields
fields = []

yield fields


def main():

# get arguments
Expand All @@ -65,20 +68,19 @@ def main():
csv_filename = sys.argv[2]
except IndexError:
csv_filename = bcp_filename.replace('.bcp', '.csv')

# have to check system version annoyingly
# for python 3 >
if sys.version_info >= (3,0):
if sys.version_info >= (3, 0):

with open(bcp_filename, 'r') as bcpfile:
with open(csv_filename, 'w', newline='') as csvfile:
writer = csv.writer(csvfile)
for bcpfields in stream(bcpfile):
writer.writerow(bcpfields)

# for python 2
else:

with open(bcp_filename, 'rb') as bcpfile:
with open(csv_filename, 'wb') as csvfile:
writer = csv.writer(csvfile)
Expand Down
17 changes: 9 additions & 8 deletions data_import/create_elasticsearch.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
}
]


def main():

parser = argparse.ArgumentParser(description='Setup elasticsearch indexes.')
Expand All @@ -51,17 +52,17 @@ def main():
INDEXES[0]["mapping"][0] = args.es_type

for i in INDEXES:
if es.indices.exists( i["name"] ) and args.reset:
print("[elasticsearch] deleting '%s' index..." % ( i["name"] ))
res = es.indices.delete(index = i["name"] )
if es.indices.exists(i["name"]) and args.reset:
print("[elasticsearch] deleting '%s' index..." % (i["name"]))
res = es.indices.delete(index=i["name"])
print("[elasticsearch] response: '%s'" % (res))
if not es.indices.exists( i["name"] ):
print("[elasticsearch] creating '%s' index..." % ( i["name"] ))
res = es.indices.create(index = i["name"] )
if not es.indices.exists(i["name"]):
print("[elasticsearch] creating '%s' index..." % (i["name"]))
res = es.indices.create(index=i["name"])

if "mapping" in i:
res = es.indices.put_mapping(i["mapping"][0], i["mapping"][1], index= i["name"] )
print("[elasticsearch] set mapping on %s index" % ( i["name"] ))
res = es.indices.put_mapping(i["mapping"][0], i["mapping"][1], index=i["name"])
print("[elasticsearch] set mapping on %s index" % (i["name"]))

if __name__ == '__main__':
main()
7 changes: 4 additions & 3 deletions data_import/fetch_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
if not os.path.exists("data"):
os.makedirs("data")


def main():
parser = argparse.ArgumentParser(description='Fetch needed data sources.')
parser.add_argument('--dual', type=str,
Expand Down Expand Up @@ -37,7 +38,7 @@ def main():
if args.oscr:
with zipfile.ZipFile(args.oscr) as oscrzip:
files = oscrzip.infolist()
if len(files)!=1:
if len(files) != 1:
raise ValueError("More than one file in OSCR zip")
with open("data/oscr.csv", "wb") as oscrcsv:
oscrcsv.write(oscrzip.read(files[0]))
Expand All @@ -47,7 +48,7 @@ def main():
ccew_html = urllib.request.urlopen(args.ccew)
ccew_out = "data\ccew.zip"
ccew_folder = "data\ccew"
if ccew_html.status!=200:
if ccew_html.status != 200:
raise ValueError("[CCEW] Could not find Charity Commission data page. Status %s %s" % (ccew_data.status, ccew_data.reason))
ccew_html = ccew_html.read()
ccew_soup = BeautifulSoup(ccew_html, 'html.parser')
Expand All @@ -64,7 +65,7 @@ def main():
bcp_content = ccew_zip.read(f)
csv_content = bcp.convert(bcp_content.decode("latin1"))
csv_filename = f.filename.replace(".bcp", ".csv")
with open( os.path.join(ccew_folder, csv_filename), "w", encoding="latin1" ) as a:
with open(os.path.join(ccew_folder, csv_filename), "w", encoding="latin1") as a:
a.write(csv_content.replace('\x00', ''))
print("[CCEW] write %s" % csv_filename)

Expand Down
Loading

0 comments on commit d40201f

Please sign in to comment.