forked from aradhakrishnanGFDL/CatalogBuilder
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathgen_intake_s3.py
executable file
·38 lines (36 loc) · 1.71 KB
/
gen_intake_s3.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
#!/usr/bin/env python3
import os
from intakebuilder import getinfo, s3crawler, CSVwriter
import logging
logger = logging.getLogger('local')
hdlr = logging.FileHandler('/Users/ar46/logs/local.log')
logger.addHandler(hdlr)
logger.setLevel(logging.INFO)
def main():
#######INPUT HERE OR USE FROM A CONFIG FILE LATER######
region = 'us-east-1' #which region is the bucket in?
project_root = 's3://esgf-world/CMIP6/' #DRS Compliant bucket
csvfile = "/Users/ar46/PycharmProjects/CatalogBuilder/intakebuilder/test/intake_s3.csv"
######### SEARCH FILTERS ###########################
dictFilter = {}
dictFilter["source_prefix"]= 'CMIP6/' #/CMIP/NOAA-GFDL/GFDL-ESM4/' #Must specify something here, at least the project level
#COMMENT dictFilter["miptable"] = "Amon" #Remove this if you don't want to filter by miptable
#COMMENT dictFilter["varname"] = "tas" #Remove this if you don't want to filter by variable name
#######################################################
project_bucket = project_root.split("/")[1].lstrip("/")
project_name = project_root.split("/")[2]
dictInfo = {}
print(project_root)
project_root = project_root.rstrip("/")
logger.info("Running s3crawler.sss_crawler")
list_files = s3crawler.sss_crawler(project_root,dictFilter, project_root,logger)
print(list_files)
#TODO make search strings a dict for later
#merge project_root and project_bucket as needed
headers = CSVwriter.getHeader()
if (not os.path.exists(csvfile)):
os.makedirs(os.path.dirname(csvfile), exist_ok=True)
CSVwriter.listdict_to_csv(list_files, headers, csvfile)
logger.info("CSV generated at"+ os.path.abspath(csvfile))
if __name__ == '__main__':
main()