This repository has been archived by the owner on Dec 14, 2023. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 70
/
Copy path0_fund_processing.R
76 lines (50 loc) · 2.56 KB
/
0_fund_processing.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
devtools::load_all()
use_r_packages()
# FIXME: This paths throw ERROR 1: ... unexpected INCOMPLETE_STRING
path <- "/Users/vincentjerosch-herold/Dropbox (2? Investing)/PortCheck/00_Data/03_FundData/Morningstar_RawData/XML_files/FundPool_2019Q4/UniverseFiles/"
index <- read_csv("/Users/vincentjerosch-herold/Dropbox (2? Investing)/PortCheck/00_Data/03_FundData/Morningstar_RawData/Administration/GlobalUniverse/2dii_fundlist.csv")
master <- read_csv("/Users/vincentjerosch-herold/Dropbox (2? Investing)/PortCheck/00_Data/03_FundData/Morningstar_RawData/Administration/GlobalUniverse/Global_FundPool_2019Q4.csv")
master_fund_list
portfolio
# first cross-reference what isins are associated with funds covered by morningstar.
project_fund_list <- portfolio %>%
distinct(isin)
inner_join(master_fund_list, by = "isin")
# find out which funds we do not already cover and subset by id
missing_fund_list <- project_fund_list %>%
filter(covered != TRUE) %>%
distinct(id)
# then create a index of ids we need to load fund data for.
project_fund_list <- project_fund_list %>%
distinct(id)
# binding to the master list of missing funds and take only distinct ids
# from here we would run the python script that pulls the missing funds and does the funds of funds looping
global_missing_fund_list <- bind_rows(missing_fund_list, global_missing_fund_list) %>%
distinct(id)
# every fund holdings are stored as a seperated .csv so we only load what is project relevant!
# each fund will be stored with the columns: Fund.id, Holding.isin, Holding.Weight
for (i in 1:nrow(project_fund_list)) {
id <- project_fund_list[i, "id"]
fund <- read_csv(paste0(ANALYSIS.INPUTS.PATH, id, ".csv"))
extracted_fund_list[[i]] <- fund
}
fund_data <- bind_rows(extracted_data_list) # binding all of the files together
# since we each fund is stored based on the id we have to match these ids to isins in the master_fund_list
# we then use isin as the primary index, which corresponds with the existing process.
fund_data <- master_fund_list %>%
select(id, isin) %>%
right_join(fund_data, by = "id") %>%
select(-id)
for (i in 2:nrow(index)) {
Fund_id <- index[i, "Fundid"]
id <- index[i, "id"]
xml <- read_xml(paste0(path, Fund_id, ".xml"))
write_xml(xml, paste0(path, id, ".xml"))
cat(paste0("#", i, "; "))
}
master <- read_csv("/Users/vincentjerosch-herold/Dropbox (2? Investing)/PortCheck/00_Data/03_FundData/Morningstar_RawData/Administration/GlobalUniverse/Global_FundPool_2019Q4.csv")
test <- master %>%
distinct(id)
temp <- master %>%
left_join(index, by = "id") %>%
filter(!is.na(Fundid))