-
Notifications
You must be signed in to change notification settings - Fork 6
/
Copy pathvariable_used_stats.py
49 lines (37 loc) · 1.18 KB
/
variable_used_stats.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
# run this script in "semcog_urbansim" folder
# extracts all variables used in yaml files
# In[ ]:
import yaml
###
# extract all variables used and save the counts
# %%
from os import listdir
from os.path import isfile, join
from collections import Counter
fpath = "./configs/repm" # repm
fpath = "./configs/hlcm_calib" # hlcm calib
fpath = "./configs/hlcm/large_area_income_quartile" # income quartile
fpath = "./configs/elcm/large_area_sector" # income quartile
onlyfiles = [f for f in listdir(fpath) if isfile(join(fpath, f))]
# %%
lstv = []
for f in onlyfiles:
with open(join(fpath, f), "r") as file:
ym = yaml.safe_load(file)
if isinstance(ym["model_expression"], dict):
expss = ym["model_expression"]["right_side"]
else:
expss = ym["model_expression"]
if isinstance(expss, list):
lstv += expss
else:
lstv += expss.replace(" ", "").split("+")
dfc = pd.DataFrame(Counter(lstv).items(), columns=["variables", "counts"])
dfc.sort_values(by="counts", ascending=False).to_csv(
fpath.replace("./", "").replace("/", "_") + ".csv"
)
#%%
"right_side" in ym["model_expression"].keys()
# %%
ym
# %%