-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy path01_0_transform_data_to_wide_format.py
85 lines (66 loc) · 1.93 KB
/
01_0_transform_data_to_wide_format.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
# ---
# jupyter:
# jupytext:
# cell_metadata_filter: title,tags,-all
# text_representation:
# extension: .py
# format_name: percent
# format_version: '1.3'
# jupytext_version: 1.16.2
# kernelspec:
# display_name: Python 3
# language: python
# name: python3
# ---
# %% [markdown]
# # Transfer data for NAGuideR format
#
# %% tags=["hide-input"]
import pandas as pd
import pimmslearn
import pimmslearn.models
from pimmslearn.io import datasplits
# %% tags=["hide-input"]
# catch passed parameters
args = None
args = dict(globals()).keys()
# %% [markdown]
# Papermill script parameters:
# %% tags=["parameters"]
# files and folders
# Datasplit folder with data for experiment
folder_experiment: str = 'runs/example'
folder_data: str = '' # specify data directory if needed
file_format_in: str = 'csv' # file format of original splits, default pickle (pkl)
file_format_out: str = 'csv' # file format of transformed splits, default csv
# %% tags=["hide-input"]
args = pimmslearn.nb.get_params(args, globals=globals())
args
# %% tags=["hide-input"]
params = pimmslearn.nb.args_from_dict(args)
# params = OmegaConf.create(args)
params
# %% tags=["hide-input"]
splits = datasplits.DataSplits.from_folder(params.data, file_format=params.file_format_in)
# %% tags=["hide-input"]
train_data = splits.train_X.unstack()
train_data
# %% [markdown]
# Save placeholder sample annotation for use in NAGuideR app which requires such a file
# %% tags=["hide-input"]
annotation = pd.Series('test', train_data.index).to_frame('group')
annotation.index.name = 'Samples'
annotation
# %% tags=["hide-input"]
fname = params.data / 'sample_annotation_placeholder.csv'
annotation.to_csv(fname)
fname
# %% [markdo]
# Save with samples in columns
# %% tags=["hide-input"]
fname = params.data / 'data_wide_sample_cols.csv'
# fillna('Filtered')
train_data.T.to_csv(fname)
fname
# %% tags=["hide-input"]
# 'data_wide_sample_cols.csv'