-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtranslator.R
93 lines (67 loc) · 4.73 KB
/
translator.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
#' ---
#' title: Script to automatically translate a CSV file
#' author: Damian Oswald
#' date: 2023-11-23
#' ---
#' Print out an opener
cat("Federal Office for Agriculture (2023)\n\n █████ ████ █████ ███████████ \n ░░███ ░░███ ░░███ ░░███░░░░░███ \n ███████ ████████ ██████ ████████ █████ ░███ ██████ ███████ ██████ ░███ ░███ \n░░░███░ ░░███░░███ ░░░░░███ ░░███░░███ ███░░ ░███ ░░░░░███ ░░░███░ ███░░███ ░██████████ \n ░███ ░███ ░░░ ███████ ░███ ░███ ░░█████ ░███ ███████ ░███ ░███ ░███ ░███░░░░░███ \n ░███ ███ ░███ ███░░███ ░███ ░███ ░░░░███ ░███ ███░░███ ░███ ███░███ ░███ ░███ ░███ \n ░░█████ █████ ░░████████ ████ █████ ██████ █████░░████████ ░░█████ ░░██████ █████ █████\n ░░░░░ ░░░░░ ░░░░░░░░ ░░░░ ░░░░░ ░░░░░░ ░░░░░ ░░░░░░░░ ░░░░░ ░░░░░░ ░░░░░ ░░░░░\n\n")
#' Function to print out progress
progressbar <- function (i, n, message = "") {
w <- (options("width")$width - 6 - nchar(message))/n
cat("\r", message, " [", strrep("=", ceiling(i * w)), ">",
strrep("-", floor((n - i) * w)), "] ", paste0(format(round(i/n * 100, 1), nsmall = 1), "% "), sep = "")
}
#' Define all languages to which we want to translate the (English) master data
languages <- c("French", "German", "Italian")
#' Fix random processes
set.seed(1)
#' Attach packages to search path.
library(openai)
#' Set secret OpenAI API key
Sys.setenv(OPENAI_API_KEY = readLines("my-secret-API-key"))
#' Read English data
data <- read.csv(file.path("English", "data-English.csv"), sep = ";")
#' Define the columns we want to translate
relevant_columns <- c(1,4,6)
#' define which variables are factors
is_factor <- c("Sector","Status")
#' read the prepared translations for factor levels
factor_names <- read.csv(file.path("resources","factor-translations.csv"))
#' Loop through every chosen languange
for (language in languages) {
# copy data frame and rename it
X <- data
# assign factors and change their names
for (i in is_factor) {
X[,i] <- factor(tolower(X[,i]),
levels = tolower(factor_names[factor_names[,"Language"]=="English" & factor_names[,"Variable"]==i,"Translation"]),
labels = tolower(factor_names[factor_names[,"Language"]==language & factor_names[,"Variable"]==i,"Translation"]))
}
# loop over columns to be translated
for (j in relevant_columns) {
# loop over every row
for (i in 1:nrow(data)) {
# only ask for translation if the current string is non-empty
if(!(X[i,j]=="" | is.na(X[i,j]))) {
# save prompt
prompt <- paste0("Please loosely translate the following expression to ", language, " such that it reads nicely. Make sure to only answer with the valid translation and nothing else. Keep words that are uncommon in ", language, "in the original, but make them italic.\n\n", X[i,j])
# ask gpt-4 for translation
translation <- openai::create_chat_completion(
model = "gpt-4",
messages = list(list(role = "user",
content = prompt))
)
# save translation in the data frame
X[i,j] <- translation$choices$message.content
}
# print out progress
progressbar((which(j==relevant_columns)-1)*nrow(data)+i, length(relevant_columns)*nrow(data), paste("Translating to", language))
}
}
# create a directory for the specific language
if(!dir.exists(language)) dir.create(language)
# write translated data frame as a csv
write.table(X, file = file.path(language,paste0("data-",language,".csv")), row.names = FALSE, sep = ";")
# start new line for new language
cat("\n")
}