-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdata_format.py
70 lines (65 loc) · 3.33 KB
/
data_format.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
import dataset
from pathlib import Path
import numpy as np
import torch
from CNN import CNN
#####old code no longer used
## path_annotations = Path("annotations/original_train_labels.pkl")
## path_samples = Path("samples/original_train")
## audio_data = dataset.MagnaTagATune(path_annotations, path_samples)
## audio_data_test = dataset.MagnaTagATune(Path("annotations/original_test_labels.pkl"), Path("samples/test"))
## # df_oldtrain = audio_data.dataset
## #df_test = audio_data_test.dataset
## # df_oldtrain["file_path"] = df_oldtrain["file_path"].str.replace('train/', '')
## # df_val = df_oldtrain.groupby("part").get_group("c")
## # df_train = df_oldtrain[df_oldtrain['part'] != 'c']
## #df_test["file_path"] = df_test["file_path"].str.replace('val/', '')
# ## print(df_test)
# ## print(df_val)
# ## print(df_train)
# ## df_test.to_pickle(Path("annotations/test_labels.pkl"))
# ## df_val.to_pickle(Path("annotations/val_labels.pkl"))
# ## df_train.to_pickle(Path("annotations/train_labels.pkl"))
## uncomment to pickle a new annotations folder with the updated split
# path_annotations = Path("annotations/original_train_labels.pkl")
# path_samples = Path("samples/train")
# audio_data_train = dataset.MagnaTagATune(path_annotations, path_samples)
# audio_data_test = dataset.MagnaTagATune(Path("annotations/original_val_labels.pkl"), Path("samples/test"))
# df_oldtrain = audio_data_train.dataset
# df_test = audio_data_test.dataset
# df_val = df_oldtrain.groupby("part").get_group("c")
# df_train = df_oldtrain[df_oldtrain['part'] != 'c']
# df_test.to_pickle(Path("annotations/test_labels.pkl"))
# df_val.to_pickle(Path("annotations/val_labels.pkl"))
# df_train.to_pickle(Path("annotations/train_labels.pkl"))
# exploration to see samples with common tags
# for i in range(len(list(df_test['label']))):
# #if df_test['label'].iloc[i][30] == 1 and df_test['label'].iloc[i][31] == 1:
# if df_test['label'].iloc[i][37] == 1 and df_test['label'].iloc[i][22] == 1:
# print(df_test.iloc[i]['file_path'])
# print(df_test.iloc[i]['label'])
# if df_test['label'].iloc[i][19] == 1 and df_test['label'].iloc[i][22] == 1:
# print(df_test.iloc[i]['file_path'])
# print(df_test.iloc[i]['label'])
# if df_test['label'].iloc[i][19] == 1 and df_test['label'].iloc[i][37] == 1:
# print(df_test.iloc[i]['file_path'])
# print(df_test.iloc[i]['label'])
##exploration of the dataset and their predictions
# path_annotations_test = Path("annotations/test_labels.pkl")
# test_dataset = dataset.MagnaTagATune(path_annotations_test, Path("samples/"))
# test_loader = torch.utils.data.DataLoader(
# test_dataset,
# shuffle=False,
# pin_memory=True,
# )
# best_state = torch.load(Path("models/CNN_batch_size=10_epochs=30_learning_rate=0.0075_momentum=0.95_dropout=0_length_conv=256_stride_conv=256_normalisation=minmax_outchannel_stride=32_model=Basic_inner_norm=None_run_27"))
# best_model = CNN(**best_state["kwargs"])
# best_model.load_state_dict(best_state["model"])
# best_model = best_model.to(torch.device("cuda"))
# best_model.eval()
# with torch.no_grad():
# for filename, batch, labels in test_loader:
# if 'val/d/beth_quist-shall_we_dance-02-monarch_dance-204-233.npy' in filename:
# logits = best_model(batch.to(torch.device("cuda")))
# print(torch.where(logits>0.1, 1,0))
# print(labels)