-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathmain.py
53 lines (43 loc) · 1.76 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
import argparse
import json
import os
import re
from scraper import yield_iherb_nutrients
def save(json_obj, directory, category):
prod_cd = json_obj.get('prod_cd', '')
filepath = '{}/{}/iherb_{}.json'.format(directory, category, prod_cd)
with open(filepath, 'w', encoding='utf-8-sig') as fp:
json.dump(json_obj, fp, indent=2, ensure_ascii=False)
def main():
parser = argparse.ArgumentParser()
parser.add_argument('--directory', type=str,
default='./output',
help='Output directory')
parser.add_argument('--sleep', type=float, default=1, help='Sleep time for each submission (post)')
parser.add_argument('--verbose', dest='VERBOSE', action='store_true')
args = parser.parse_args()
directory = args.directory
sleep = args.sleep
VERBOSE = args.VERBOSE
if not os.path.exists(directory):
os.makedirs(directory)
with open('categories.txt', 'r', encoding='utf-8-sig') as f:
text = f.read()
lines = text.split('\n')
lines = filter(lambda x: x != '' and x is not None, lines)
categories = sorted(set(lines))
print(categories)
for category in categories:
os.makedirs(directory+'/{}'.format(category))
print('Starts to scrap {} data...'.format(category))
n_exceptions = 0
for prod_data in yield_iherb_nutrients(category, sleep):
try:
save(prod_data, directory, category)
except Exception as e:
n_exceptions += 1
continue
if n_exceptions > 0:
print('Exist %d nutrient exceptions' % n_exceptions)
if __name__ == '__main__':
main()