-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathapp.py
222 lines (170 loc) · 9.76 KB
/
app.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
import subprocess
import os
import streamlit as st
import pandas as pd
from PIL import Image
import re
def run_r_script(script_name, input_file=None, output_file=None, apply_mice=None, upsampling=None, m_estimator=None, custom_models=None, m_est_weight=None, n_loops = None,
correlation_cutoff=None, RMSE_cutoff=None, supmodel=None, glmb=None, cutoff = None, wcutoff = None):
"""
Run an R script with optional arguments for output file,
and additional optional arguments for applying MICE, m_estimator and removing catastrophic outliers.
"""
#command = ["/Library/Frameworks/R.framework/Resources/bin/Rscript", script_name]# for mac
command = ["Rscript", script_name] # R.home() any for linux or windows
if output_file is not None:
command.append(output_file)
if input_file is not None:
command.append(input_file)
if apply_mice is not None:
command.append(str(apply_mice))
if upsampling is not None:
command.append(str(upsampling))
if m_estimator is not None:
command.append(str(m_estimator))
if custom_models is not None:
command.append(str(custom_models))
if m_est_weight is not None:
command.append(str(m_est_weight))
if n_loops is not None:
command.append(str(n_loops))
if correlation_cutoff is not None:
command.append(str(correlation_cutoff))
if RMSE_cutoff is not None:
command.append(str(RMSE_cutoff))
if cutoff is not None:
command.append(str(cutoff))
if wcutoff is not None:
command.append(str(wcutoff))
subprocess.run(command, check=True)
def clean_up(*file_paths):
"""Delete temporary files."""
for file_path in file_paths:
if os.path.exists(file_path):
os.remove(file_path)
def convert_rmd_to_r(rmd_file, r_file):
"""
Convert an R Markdown (.rmd) file to an R script (.r) file by extracting R code chunks.
Parameters:
- rmd_file: str, path to the .rmd file
- r_file: str, path to the output .r file
"""
with open(rmd_file, 'r') as file:
content = file.read()
# Regular expression to extract R code chunks
code_chunks = re.findall(r'```{r.*?}(.*?)```', content, re.DOTALL)
# Join all extracted code chunks
r_code = "\n".join(code_chunks)
with open(r_file, 'w') as file:
file.write(r_code)
print(f"Converted {rmd_file} to {r_file}")
# Example of how you might call this function
# run_rmd_script("your_script.Rmd", output_file="output.html", apply_mice=True)
#subprocess.run(["Rscript", "install_r2u.R"], check=True) # comment this out if your are running on local computer
st.title("**Redshift Classifier**")
use_m_estimator = "No"
use_mice = "No"
apply_smote = "No"
st.write("**Please provide Raw X-ray data**")
use_m_estimator = st.selectbox("Use M-Estimator?", ["No", "Yes"])
if use_m_estimator == "Yes":
weight_cutoff = st.slider("Set Weight Cutoff:", 0.0, 1.0, 0.65, step=0.05)
st.write(f"Weight cutoff is set to: {weight_cutoff}")
# Second Dropdown: If M-Estimator is used, do MICE or not
use_mice = st.selectbox("Apply MICE?", ["No", "Yes"])
if use_mice == "Yes":
# Third Dropdown: If MICE is used, apply SMOTE balancing or not
apply_smote = st.selectbox("Apply SMOTE Balancing?", ["No", "Yes"])
if apply_smote == "Yes":
st.write("You chose to use M-Estimator, apply MICE, and apply SMOTE balancing.")
else:
st.write("You chose to use M-Estimator, apply MICE, but not apply SMOTE balancing.")
else:
st.write("You chose to use M-Estimator but not apply MICE.")
else:
st.write("You chose not to use M-Estimator.")
redshift_cutoff = st.slider('Choose Redshift Cutoff',
min_value=2.0, # minimum value
max_value=4.0, # maximum value
value=3.0, # default value
step=0.1 # step size
)
uploaded_file = st.file_uploader("**Choose a file (.csv or .txt)**", type=['csv', 'txt'])
process_file = st.button("**Submit**")
if uploaded_file is not None and process_file:
if use_m_estimator == "No" and use_mice == "No" and apply_smote == "No":
plot_dir = "CURRENT_ANALYSIS/Graphics/XRAY/RAW_WITHOUT_M-estimator/"
temp_input_file = "training_data.csv"
with open(temp_input_file, "wb") as f:
f.write(uploaded_file.getbuffer())
# Example usage
convert_rmd_to_r('data-XRAY-Classification_ON_RAW-WITHOUT-M-estimator.Rmd', 'data-XRAY-Classification_ON_RAW-WITHOUT-M-estimator.R')
run_r_script("data-XRAY-Classification_ON_RAW-WITHOUT-M-estimator.R",temp_input_file,cutoff = redshift_cutoff)
superlearner_plots = [os.path.join(plot_dir, filename) for filename in os.listdir(plot_dir)
if filename.endswith(".png")
and not (filename.startswith("AlgoRiskHisto") or filename.startswith("AlgoWeightHisto"))]
#superlearner_plots = [" InvRedshiftDistribution fulldataset.png"]
for i, plot_file in enumerate(superlearner_plots):
if os.path.exists(plot_file):
image = Image.open(plot_file)
st.image(image)
elif use_m_estimator == "Yes" and use_mice == "No" and apply_smote == "No":
plot_dir = "CURRENT_ANALYSIS/Graphics/XRAY/RAW_WITH_M-estimator/"
temp_input_file = "training_data.csv"
with open(temp_input_file, "wb") as f:
f.write(uploaded_file.getbuffer())
# Example usage
convert_rmd_to_r('M-estimator_XRAY_ON_RAW_DATA.Rmd','M-estimator_XRAY_ON_RAW_DATA.R')
run_r_script("M-estimator_XRAY_ON_RAW_DATA.R",temp_input_file,wcutoff = weight_cutoff)
convert_rmd_to_r('data-XRAY-Classification_ON_RAW-WITH-M-estimator.Rmd', 'data-XRAY-Classification_ON_RAW-WITH-M-estimator.R')
run_r_script("data-XRAY-Classification_ON_RAW-WITH-M-estimator.R",cutoff = redshift_cutoff)
superlearner_plots = [os.path.join(plot_dir, filename) for filename in os.listdir(plot_dir)
if filename.endswith(".png")
and not (filename.startswith("AlgoRiskHisto") or filename.startswith("AlgoWeightHisto"))]
#superlearner_plots = [" InvRedshiftDistribution fulldataset.png"]
for i, plot_file in enumerate(superlearner_plots):
if os.path.exists(plot_file):
image = Image.open(plot_file)
st.image(image)
elif use_m_estimator == "Yes" and use_mice == "Yes" and apply_smote == "No":
plot_dir = "CURRENT_ANALYSIS/Graphics/XRAY/MICE_WITH_M-estimator/"
temp_input_file = "training_data.csv"
with open(temp_input_file, "wb") as f:
f.write(uploaded_file.getbuffer())
# Example usage
convert_rmd_to_r('M-estimator_XRAY_ON_RAW_DATA.Rmd','M-estimator_XRAY_ON_RAW_DATA.R')
run_r_script("M-estimator_XRAY_ON_RAW_DATA.R",temp_input_file,wcutoff = weight_cutoff)
convert_rmd_to_r("MICE-Imputation_RAW-with_M-estimator.Rmd","MICE-Imputation_RAW-with_M-estimator.R")
run_r_script("MICE-Imputation_RAW-with_M-estimator.R",temp_input_file)
convert_rmd_to_r('data-XRAY-Classification_ON_MICE-RAW-WITH-M-estimator.Rmd', 'data-XRAY-Classification_ON_MICE-RAW-WITH-M-estimator.R')
run_r_script("data-XRAY-Classification_ON_MICE-RAW-WITH-M-estimator.R",cutoff = redshift_cutoff)
superlearner_plots = [os.path.join(plot_dir, filename) for filename in os.listdir(plot_dir)
if filename.endswith(".png")
and not (filename.startswith("AlgoRiskHisto") or filename.startswith("AlgoWeightHisto"))]
#superlearner_plots = [" InvRedshiftDistribution fulldataset.png"]
for i, plot_file in enumerate(superlearner_plots):
if os.path.exists(plot_file):
image = Image.open(plot_file)
st.image(image)
elif use_m_estimator == "Yes" and use_mice == "Yes" and apply_smote == "Yes":
plot_dir = "CURRENT_ANALYSIS/Graphics/XRAY/BALANCING/MICE_WITH_M-estimator"
temp_input_file = "training_data.csv"
with open(temp_input_file, "wb") as f:
f.write(uploaded_file.getbuffer())
# Example usage
convert_rmd_to_r('M-estimator_XRAY_ON_RAW_DATA.Rmd','M-estimator_XRAY_ON_RAW_DATA.R')
run_r_script("M-estimator_XRAY_ON_RAW_DATA.R",temp_input_file,wcutoff = weight_cutoff)
convert_rmd_to_r("MICE-Imputation_RAW-with_M-estimator.Rmd","MICE-Imputation_RAW-with_M-estimator.R")
run_r_script("MICE-Imputation_RAW-with_M-estimator.R",temp_input_file)
convert_rmd_to_r("balancing_ubSMOTE_XRAY_MICE.Rmd","balancing_ubSMOTE_XRAY_MICE.R")
run_r_script("balancing_ubSMOTE_XRAY_MICE.R")
convert_rmd_to_r('data-XRAY-Classification_ON_ubBALANCE-MICE-RAW_WITH-M-estimator.Rmd', 'data-XRAY-Classification_ON_ubBALANCE-MICE-RAW_WITH-M-estimator.R')
run_r_script('data-XRAY-Classification_ON_ubBALANCE-MICE-RAW_WITH-M-estimator.R',cutoff = redshift_cutoff)
superlearner_plots = [os.path.join(plot_dir, filename) for filename in os.listdir(plot_dir)
if filename.endswith(".png")
and not (filename.startswith("AlgoRiskHisto") or filename.startswith("AlgoWeightHisto"))]
#superlearner_plots = [" InvRedshiftDistribution fulldataset.png"]
for i, plot_file in enumerate(superlearner_plots):
if os.path.exists(plot_file):
image = Image.open(plot_file)
st.image(image)