-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathzzz_rename&move_pics.R
160 lines (139 loc) · 6.53 KB
/
zzz_rename&move_pics.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
# TOOLS & DATA
require(here)
source(here::here('R/tools.R'))
# April_MAY_2021
d = data.table(
f = c(list.files(path = here::here('April_May_2021/Photos_PRE_sample_ID_MAY/'), pattern = '.jpg', recursive = TRUE, full.names = TRUE)),
f2 = c(list.files(path = here::here('April_May_2021/Photos_PRE_sample_ID_MAY/'), pattern = '.jpg', recursive = TRUE, full.names = FALSE))
)
d = d[!grepl('metadata', f2, fixed = TRUE)]
d[ , sample_ID := substring(f2,6,8)]
d[, file_name :=sub(".*/", "", f2)]
#d[nchar(file_name)<32]
d[sample_ID %in% c('400'), file_name := paste(substring(file_name,1,8), '2021-04-19', substring(file_name,10))]
d[sample_ID %in% c('401'), file_name := paste(substring(file_name,1,9), '2021-04-19', substring(file_name,11))]
d[sample_ID %in% c('402'), file_name := paste(substring(file_name,1,8), '2021-04-20', substring(file_name,11))]
#paste(substring('Ruff 320 Snap-591.jpg',1,8), '2021-04-20', substring('Ruff 320 Snap-591.jpg',10))
#dd = d[sample_ID %in% c('400')]
d[, new_name := paste0("Ruff ", sample_ID, "_", substring(file_name, 6))]
d[substring(new_name, nchar(new_name)-8, nchar(new_name)-7) == "p-", new_name := gsub("p-", "p-0", new_name)]
#d$new_name
# rename & copy
for(i in 1:nrow(d)){
#i = 1
file.copy(from = d$f[i], to = glue('all_photos/',d$new_name[i]))
}
# June_2021
d = data.table(
f = c(list.files(path = here::here('June_2021/Photos_PRE_sample_ID_no'), pattern = '.jpg', recursive = TRUE, full.names = TRUE)),
f2 = c(list.files(path = here::here('June_2021/Photos_PRE_sample_ID_no'), pattern = '.jpg', recursive = TRUE, full.names = FALSE))
)
d = d[!grepl('metadata', f2, fixed = TRUE)]
d[ , sample_ID := substring(f2,6,8)]
d[, file_name :=sub(".*/", "", f2)]
#d[nchar(file_name)<32]
d[, new_name := paste0("Ruff ", sample_ID, "_", substring(file_name, 6))]
#d[substring(new_name, nchar(new_name)-8, nchar(new_name)-7) == "p-", new_name := gsub("p-", "p-0", new_name)]
# rename & copy
for(i in 1:nrow(d)){
#i = 1
file.copy(from = d$f[i], to = glue('all_photos/',d$new_name[i]))
}
# test
d = data.table(
f = c(list.files(path = here::here('all_photos'), pattern = '.jpg', recursive = TRUE, full.names = TRUE)),
f2 = c(list.files(path = here::here('all_photos'), pattern = '.jpg', recursive = TRUE, full.names = FALSE))
)
d[ , sample_ID := substring(f2,6,8)]
nrow(d)
unique(d$sample_ID)
length(unique(d$sample_ID))
# REST with correct names
d = data.table(
f = c(list.files(path = here::here('June_2021/Photos_WITH_sample_ID_no'), pattern = '.jpg', recursive = TRUE, full.names = TRUE)),
f2 = c(list.files(path = here::here('June_2021/Photos_WITH_sample_ID_no'), pattern = '.jpg', recursive = TRUE, full.names = FALSE))
)
d = d[!grepl('metadata', f2, fixed = TRUE)]
d[ , sample_ID := substring(f2,6,8)]
d[, file_name :=sub(".*/", "", f2)]
#d[nchar(file_name)<32]
# test whether abnormal sperm sample present
d[grepl('abn', f2, fixed = TRUE)]
#d[, new_name := paste0("Ruff ", sample_ID, "_", substring(file_name, 6))]
#d[substring(new_name, nchar(new_name)-8, nchar(new_name)-7) == "p-", new_name := gsub("p-", "p-0", new_name)]
# copy
for(i in 1:nrow(d)){
#i = 1
file.copy(from = d$f[i], to = glue('all_photos/',d$file_name[i]))
}
# test
d = data.table(
f = c(list.files(path = here::here('all_photos'), pattern = '.jpg', recursive = TRUE, full.names = TRUE)),
f2 = c(list.files(path = here::here('all_photos'), pattern = '.jpg', recursive = TRUE, full.names = FALSE))
)
d[ , sample_ID := substring(f2,6,8)]
nrow(d)
unique(d$sample_ID)
length(unique(d$sample_ID))
summary(factor(d$sample_ID))
dd = d[ , .N , by = sample_ID ]
dd[N<10]
# RANDOMIZE and RENAME
d1 = data.table(
f = c(list.files(path = here::here('all_photos/1'), pattern = '.jpg', recursive = TRUE, full.names = TRUE)),
f2 = c(list.files(path = here::here('all_photos/1'), pattern = '.jpg', recursive = TRUE, full.names = FALSE))
)
d2 = data.table(
f = c(list.files(path = here::here('all_photos/2'), pattern = '.jpg', recursive = TRUE, full.names = TRUE)),
f2 = c(list.files(path = here::here('all_photos/2'), pattern = '.jpg', recursive = TRUE, full.names = FALSE))
)
d3 = data.table(
f = c(list.files(path = here::here('all_photos/3'), pattern = '.jpg', recursive = TRUE, full.names = TRUE)),
f2 = c(list.files(path = here::here('all_photos/3'), pattern = '.jpg', recursive = TRUE, full.names = FALSE))
)
d4 = data.table(
f = c(list.files(path = here::here('all_photos/4'), pattern = '.jpg', recursive = TRUE, full.names = TRUE)),
f2 = c(list.files(path = here::here('all_photos/4'), pattern = '.jpg', recursive = TRUE, full.names = FALSE))
)
d5 = data.table(
f = c(list.files(path = here::here('all_photos/5'), pattern = '.jpg', recursive = TRUE, full.names = TRUE)),
f2 = c(list.files(path = here::here('all_photos/5'), pattern = '.jpg', recursive = TRUE, full.names = FALSE))
)
d6 = data.table(
f = c(list.files(path = here::here('all_photos/6'), pattern = '.jpg', recursive = TRUE, full.names = TRUE)),
f2 = c(list.files(path = here::here('all_photos/6'), pattern = '.jpg', recursive = TRUE, full.names = FALSE))
)
d7 = data.table(
f = c(list.files(path = here::here('all_photos/7'), pattern = '.jpg', recursive = TRUE, full.names = TRUE)),
f2 = c(list.files(path = here::here('all_photos/7'), pattern = '.jpg', recursive = TRUE, full.names = FALSE))
)
d = rbind(d1,d2,d3,d4,d5,d6,d7)
d[ , sample_ID := substring(f2,6,8)]
d[, file_name :=sub(".*/", "", f2)]
d[, bird_ID :=substring(f2, 10,nchar(f2)-29)]
d[, bird_ID :=substring(f2, 10,nchar(f2)-29)]
d[bird_ID=="", bird_ID :=substring(f2, 10,nchar(f2)-25)]
#substring('Ruff 018_1307 2021-11-10 Snap-3277.jpg', 10,nchar('Ruff 018_1307 2021-11-10 Snap-3277.jpg')-25)
d[bird_ID == 'G200067 2', bird_ID := 'G200067']
length(unique(d$bird_ID)) # 1 - 14, 2 - 16, 3 - 26, 4 - 16, 5 - 23
#d[nchar(file_name)<32]
# TEST
dd = d[ , .N , by = sample_ID ]
#dd
dd[N<10]
dd = d[ , .N , by = bird_ID ]
#dd
dd[N<10]
dd[N>10]
d[bird_ID%in%dd$bird_ID[dd$N>10],.N, by = list(bird_ID,sample_ID)]
# add randomization
d[, id := as.character(sample.int(n = nrow(d), size = nrow(d)))]
d[nchar(id)==1, id := paste0('00',id)]
d[nchar(id)==2, id := paste0('0',id)]
d =d[order(id)]
fwrite(d, file = 'R/all_randomized_2022-03-21.csv')
for(i in 1:nrow(d)){
#i = 1
file.copy(from = d$f[i], to = glue('random_inv/',d$id[i], '.jpg'))
print(i)
}