upload-02

crjhonor · Jun 13, 2023 · e6bacdf · e6bacdf
1 parent 12d79ed
commit e6bacdf
Show file tree

Hide file tree

Showing 388 changed files with 746,541 additions and 2,595 deletions.
diff --git a/ASSA/AUTO_CORRELATION.Rmd b/ASSA/AUTO_CORRELATION.Rmd
@@ -0,0 +1,106 @@
+---
+title: "AUTO_CORRELATION"
+author: "CHEN RUJIE"
+date: "2020/1/12"
+output: html_document
+---
+
+```{r setup, include=FALSE}
+knitr::opts_chunk$set(echo = TRUE,
+                      warning = FALSE)
+AC <- new.env() #Enviornment for autocorrelation analysis
+#Global Enviornment Setup------------------------
+AC$globalWD <- getwd()
+#Loading Libraries.------------------------------
+library(readxl)
+library(timeDate)
+library(timeSeries)
+library(fBasics)
+```
+
+## Load Monthly Trading Data
+
+For the interests of trading soymeal, monthly trading datas of DCE Soymeal and CBOT Soybeans are loaded.
+
+```{r Load Trading Data}
+#Initialize Funcyions.===========================
+#Functions to Read Trading Data From Excel File
+AC$read_excel_TD <- function(contract_name){
+  #Read Excel File Data.
+  TD.read <- read_excel(file.path(AC$globalWD, "DATA", paste0(contract_name, ".xls")),
+                        col_types = c("text", "text", "date",
+                                      "numeric", "numeric", "numeric", 
+                                      "numeric", "numeric", "numeric", 
+                                      "numeric", "numeric", "numeric"),
+                        col_names = TRUE
+  )
+  colnames(TD.read) <- c(paste0(contract_name, "_Code"),
+                         paste0(contract_name, "_Contract"),
+                         paste0(contract_name, "_Date"),
+                         paste0(contract_name, "_Open"),
+                         paste0(contract_name, "_High"),
+                         paste0(contract_name, "_Low"),
+                         paste0(contract_name, "_Close"),
+                         paste0(contract_name, "_Chg"),
+                         paste0(contract_name, "_RateofChg"),
+                         paste0(contract_name, "_Vol"),
+                         paste0(contract_name, "_Amn"),
+                         paste0(contract_name, "_Settle"))
+  #For not deleting most recent data, I have to delete the settle column
+  TD.read <- TD.read[, -12]
+  TD.read.na <- na.omit(TD.read)
+  return(TD.read.na)
+}
+
+#Executing Loading Process.======================
+##Seting up
+setwd(AC$globalWD)
+AC$ref_TD <- c("M0_Monthly", "ZS00Y_Monthly", "GC00Y_Monthly", "AU0_Monthly")
+AC$ref_TD.text <- c("DCE Soymeal Futures", 
+                    "CBOT Soybean Futures",
+                    "COMEX Gold Futures",
+                    "SHFE Gold Futures")
+AC$MonthlyTD <- list(NULL)
+##Loading Monthly Trading Data
+for(i in 1:length(AC$ref_TD)){
+  AC$MonthlyTD[i] <- list(AC$read_excel_TD(AC$ref_TD[i]))
+  names(AC$MonthlyTD)[i] <- c(paste0(AC$ref_TD[i], "lyTD"))
+}
+```
+
+```{r Experimental Part I}
+#Obtain Monthly Return of CBOT SOYBEAN monthly close. Transfer the cloase data into time series object for convience.
+x.ts <- timeSeries(AC$MonthlyTD$ZS00Y_MonthlylyTD$ZS00Y_Monthly_Close, 
+                   charvec = AC$MonthlyTD$ZS00Y_MonthlylyTD$ZS00Y_Monthly_Date,
+                   units = "ZS00Y_Monthly_Close"
+                   )
+#Calculate simple return. No logarithm is used.
+x.rt <- x.ts[2:length(x.ts)]/x.ts[1:length(x.ts)-1] - 1
+#Convert the simple return into time series object.
+x.rt.ts <- timeSeries(x.rt,
+                      charvec = AC$MonthlyTD$ZS00Y_MonthlylyTD$ZS00Y_Monthly_Date[2:length(x.ts)],
+                      units = "Simple_Return")
+basicStats(x.rt.ts)
+```
+
+```{r Experimental Part II - 1}
+plot(x.ts)
+plot(x.rt.ts)
+acf(x.ts, lag = 106)
+acf(x.rt.ts, lag = 106)
+```
+
+```{r Experimental Part II - 2}
+#With 18 months seasonality assumption.
+AC$eighteen.dummy <- c(0,0,0,1,rep(0,14))
+#Create dummy for data
+x.dummy <- c(rep(AC$eighteen.dummy, ceiling(length(x.ts)/18)))
+x.dummy <- x.dummy[2:length(x.ts)]
+model1 <- lm(x.rt.ts$Simple_Return~x.dummy)
+model1
+
+model2 <- arima(x.rt.ts, order = c(1,0,0), seasonal = list(order = c(1,0,1), 
+                period = 18), include.mean = F)
+model2
+```
+
diff --git a/ASSA/DeepLearn_FOR_Cycles_Note.Rmd b/ASSA/DeepLearn_FOR_Cycles_Note.Rmd
@@ -0,0 +1,158 @@
+---
+title: "R Notebook"
+output: html_notebook
+---
+
+```{r setup, include=FALSE}
+#Rmarkdown Global Setup--------------------------------------------------
+library(knitr)
+knitr::opts_chunk$set(echo = FALSE,
+                      warning = FALSE,
+                      fig.width = 14,
+                      fig.height = 10)
+DLC <- new.env()
+DLC$Time.start <- Sys.time()
+```
+
+RCNN for CYCLES
+==================================================
+
+Data Preparation Process
+
+```{r Data Preparation}
+library(imputeTS)
+library(zoo)
+library(xts)
+##Functions.-------------------------------------
+#Function to obtain return of dataframe.
+DLC$return_DF <- function(dataset = as.data.frame(NULL), 
+                          k = 1, #Order of difference
+                          percentage = TRUE){ #X100 or not.
+  lag0 <- dataset[-1:-k, ]
+  lagk <- dataset[1:(dim(dataset)[1]-k), ]
+  if(percentage == TRUE){
+    return.R <- (lag0 - lagk) / lagk
+    return.R <- return.R * 100
+  }else{
+    return.R <- (lag0 - lagk) / lagk
+  }
+  return(return.R)
+}
+##Lading Data into Memories.---------------------
+#Import trends dataset from csv files.
+DLC$Data_Loaded <- read.csv("DEEPLEARN/Cycles_Reengineered_Target1.csv", header = TRUE)
+
+#Removing Missing Values.
+#Subset the dataset after 2015-05-25 as China 10yr Treasury future started to trade at that date.
+DLC$Data_Loaded.na <- subset.data.frame(DLC$Data_Loaded, as.Date(X) >= "2015-03-25")
+DLC$Data_Loaded.na <- na.interpolation(DLC$Data_Loaded.na, option = "linear")
+
+#Dataset Preparation
+DLC$Model_Data.date <- as.Date(DLC$Data_Loaded.na[, 1])
+DLC$Model_Data.nodate <- DLC$Data_Loaded.na[, -1]
+#Return of cycles, percentage = TRUE
+DLC$Model_Data.R <- DLC$return_DF(DLC$Model_Data.nodate, k = 1)
+
+##Ploting results and outputs.-------------------
+plot(DLC$Model_Data.R[, 1], type = "l", main = "Return of Lag 1 of cycles.", ylim = c(-500, 500))
+```
+
+From the Very Begining, Simple Network model is implemented into very simple Cycle dataset.
+
+First Look at a neutral network model.
+
+Inspired by iris project. The cycle of target, which was continuous, can be converted to discrete or even classes type. 
+
+```{r First Look at a Neutral network model}
+##Containing a 3 column dataset which are target, yield and currency respectively.
+DLC$Sim_Net.Dataset <- DLC$Model_Data.nodate[, 1:3]
+DLC$Sim_Net.Dataset.date <- DLC$Model_Data.date
+
+##Obtaining training and testing dataset.
+#Make testing dataset as the data of last 21 daily cycles.
+DLC$Sim_Net.test.x <- DLC$Sim_Net.Dataset[(dim(DLC$Sim_Net.Dataset)[1] - 21 + 1):dim(DLC$Sim_Net.Dataset)[1], 2:3]
+DLC$Sim_Net.test.y <- DLC$Sim_Net.Dataset[(dim(DLC$Sim_Net.Dataset)[1] - 21 + 1):dim(DLC$Sim_Net.Dataset)[1], 1]
+DLC$Sim_Net.test.date <- DLC$Sim_Net.Dataset.date[(dim(DLC$Sim_Net.Dataset)[1] - 21 + 1):dim(DLC$Sim_Net.Dataset)[1]]
+DLC$Sim_Net.test.label <- round((DLC$Sim_Net.test.y/100), digits = 0)
+#And daily cycles before the last 21 days as the Training dataset.
+DLC$Sim_Net.train.x <- DLC$Sim_Net.Dataset[1:(dim(DLC$Sim_Net.Dataset)[1] - 21), 2:3]
+DLC$Sim_Net.train.y <- DLC$Sim_Net.Dataset[1:(dim(DLC$Sim_Net.Dataset)[1] - 21), 1]
+DLC$Sim_Net.train.date <- DLC$Sim_Net.Dataset.date[1:(dim(DLC$Sim_Net.Dataset)[1] - 21)]
+DLC$Sim_Net.train.label <- round((DLC$Sim_Net.train.y/100), digits = 0)
+
+plot(DLC$Sim_Net.train.label,
+     type = "h", main = "Target Plot", xlab = "Date", ylab = "Discrete")
+
+```
+
+```{r}
+##Creating Network
+##Loading Libraries.
+library(keras)
+
+DLC$units.c <- max(DLC$Sim_Net.train.label) - min(DLC$Sim_Net.train.label) + 1
+
+##Network Architecture
+DLC$Sim_Net.network <- keras_model_sequential() %>%
+  layer_dense(units = 512, activation = "relu", input_shape = c(2)) %>%
+  layer_dense(units = DLC$units.c, activation = "softmax")
+
+##Compiling Step
+DLC$Sim_Net.network %>% compile(
+  optimizer = "rmsprop",
+  loss = "categorical_crossentropy",
+  metrics = c("accuracy")
+)
+
+##Reshaping the input dataset
+DLC$Sim_Net.test.x <- array_reshape(as.matrix.data.frame(DLC$Sim_Net.test.x), c(dim(DLC$Sim_Net.test.x)[1], 2))
+DLC$Sim_Net.train.x <- array_reshape(as.matrix.data.frame(DLC$Sim_Net.train.x), c(dim(DLC$Sim_Net.train.x)[1], 2))
+
+##Categoricalized the labels.
+DLC$Sim_Net.train.label <- to_categorical(DLC$Sim_Net.train.label, num_classes = DLC$units.c)
+DLC$Sim_Net.test.label <- to_categorical(DLC$Sim_Net.test.label, num_classes = DLC$units.c)
+
+##Fitting process.
+DLC$Sim_Net.network %>% fit(DLC$Sim_Net.train.x, DLC$Sim_Net.train.label, epochs = 5, batch_size = 128)
+
+##Prediction.
+DLC$Sim_Net.network %>% predict_classes(DLC$Sim_Net.test.x)
+
+```
+
+But we achieve very low accuracy. In the following step, I will try concept of 3D tensor and implement a model of 3D tensor flow. The concept of 3D tensor to timeseries includes: The first axis is samples, which is the commordities I am trading, each type of commordity equals one sample; Second axis is the timesteps, which are the trading date either daily or hourly; Third axis is the features, which can be trading price, volume or others.
+
+The the Convolutional Network is considered to be the model for dataset.
+
+```{r Simple Convolutional Nework}
+##Network Architecture
+DLC$Convnet <- keras_model_sequential() %>%
+  layer_conv_2d(filters = 32, kernel_size = c(1, 1), activation = "relu", input_shape = c(2, 1, 1))
+
+##Dataset Preparation
+##Containing a 3 column dataset which are target, yield and currency respectively.
+DLC$Convnet.Dataset <- DLC$Model_Data.nodate[, 1:3]
+DLC$Convnet.Dataset.date <- DLC$Model_Data.date
+
+##Obtaining training and testing dataset.
+#Make testing dataset as the data of last 21 daily cycles.
+DLC$Convnet.test.x <- array(NA, c(21, 1, 2), dimnames = list(c(as.character(DLC$Convnet.test.date)), 
+                                                          c("HPFC"), 
+                                                          c(colnames(DLC$Convnet.Dataset)[2:3])))
+  array(DLC$Convnet.Dataset[(dim(DLC$Convnet.Dataset)[1] - 21 + 1):dim(DLC$Convnet.Dataset)[1], 2])
+  DLC$Sim_Net.Dataset[(dim(DLC$Convnet.Dataset)[1] - 21 + 1):dim(DLC$Convnet.Dataset)[1], 2:3]
+DLC$Convnet.test.y <- DLC$Sim_Net.Dataset[(dim(DLC$Convnet.Dataset)[1] - 21 + 1):dim(DLC$Convnet.Dataset)[1], 1]
+DLC$Convnet.test.date <- DLC$Convnet.Dataset.date[(dim(DLC$Convnet.Dataset)[1] - 21 + 1):dim(DLC$Convnet.Dataset)[1]]
+DLC$Convnet.test.label <- round((DLC$Convnet.test.y/100), digits = 0)
+#And daily cycles before the last 21 days as the Training dataset.
+DLC$Convnet.train.x <- DLC$Convnet.Dataset[1:(dim(DLC$Convnet.Dataset)[1] - 21), 2:3]
+DLC$Convnet.train.y <- DLC$Convnet.Dataset[1:(dim(DLC$Convnet.Dataset)[1] - 21), 1]
+DLC$Convnet.train.date <- DLC$Convnet.Dataset.date[1:(dim(DLC$Convnet.Dataset)[1] - 21)]
+DLC$Convnet.train.label <- round((DLC$Convnet.train.y/100), digits = 0)
+
+##Reshaping the dataset
+DLC$Convnet.test.x <- array_reshape(as.matrix.data.frame(DLC$Convnet.test.x), c(dim(DLC$Convnet.test.x)[1], 1))
+DLC$Sim_Net.train.x <- array_reshape(as.matrix.data.frame(DLC$Sim_Net.train.x), c(dim(DLC$Sim_Net.train.x)[1], 2))
+
+```
+
diff --git a/ASSA/Script01_Garch.Rmd b/ASSA/Script01_Garch.Rmd
@@ -0,0 +1,114 @@
+---
+title: "BAYESIAN DCC GARCH REPORT......"
+output: 
+  html_document:
+    df_print: paged
+---
+
+```{r Setup, include=FALSE}
+#DESTINATION.====================================
+#This script is to implement Garch model sperated from Moring main rmarkdown script. This should smooth the
+#data analysis process by reducing the computing time of the main script. Bayesian DCC Garch is implemented to
+#trade data, currencies and yields respectively.
+#Libraries.======================================
+library(fGarch)
+library(ggplot2)
+library(knitr)
+library(scales)
+#Using environment for clearness.
+S01 <- new.env()
+S01$Time.start <- Sys.time()
+knitr::opts_chunk$set(echo = FALSE,
+                      warning = FALSE,
+                      fig.width = 16,
+                      fig.height = 6)
+```
+
+```{r Functions and Preparations}
+#Functions.--------------------------------------
+#Function to apply univeriate t GARCH model.
+S01$Garch_fGarch <- function(Garch_dataset, #univeriate data
+                             tprob = 0.99){ #Probability level
+  #t - GARCH
+  gfit <- garchFit(formula = ~garch(1, 1), data = Garch_dataset, 
+                   cond.dist = "std", trace = FALSE)
+  sigma <- predict(gfit, n.ahead = 1)[3]
+  df <- coef(gfit)["shape"]
+  ES <- sigma * (dt(qt(tprob, df), df)/(1 - tprob)) * ((df + (qt(tprob, df)) ^ 2)/(df - 1))
+  #Return Expected Shortfalls
+  return(ES)
+}
+
+#Function to Restructure.------------------------
+S01$ReStr.Fun <- function(dataset){
+  for(ir in 2:dim(dataset)[2]){
+    if(ir == 2){
+      ReStr.Dataset <- dataset[, c(1,ir)]
+      colnames(ReStr.Dataset) <- c("Date", "HPFC")
+      ReStr.Dataset$LEGENDS <- names(dataset)[ir]
+      return.ReStr <- ReStr.Dataset
+    }else{
+      ReStr.Dataset <- dataset[, c(1, ir)]
+      colnames(ReStr.Dataset) <- c("Date", "HPFC")
+      ReStr.Dataset$LEGENDS <- names(dataset)[ir]
+      return.ReStr <- rbind(return.ReStr,
+                            ReStr.Dataset)
+    }
+  }
+  return(return.ReStr)
+}
+```
+
+```{r Bayesian DCC GARCH to yields, include=FALSE}
+#Functions.======================================
+#Function to bind
+S01$tGarch_Bind <- function(dataset1, dataset2){
+  bind.begin <- dim(dataset1)[1] - dim(dataset2)[1] + 1 + 1
+  bind.end <- dim(dataset1)[1]
+  ##convert to dataframe
+  bind.a <- as.data.frame(dataset1)[bind.begin:bind.end, 1]
+  bind.b <- as.data.frame(dataset2)[-1, 1]
+  return.bind <- cbind(bind.a, bind.b)
+  ##convert to timeSeries again and return the result
+  return.bind.ts <- timeSeries(return.bind,
+                               time(dataset1)[bind.begin:bind.end])
+  return(return.bind.ts)
+}
+
+#Processing.=====================================
+S01$Time.start <- Sys.time()
+#Reading Dataset.--------------------------------
+S01$yields.Garch.dataset <- read.csv("GARCH/yields_GARCH_Dataset.csv", header = TRUE)
+S01$tGarch.yields.dataset <- timeSeries(S01$yields.Garch.dataset[["CN_10yry_hpfc"]],
+                                        as.Date(S01$yields.Garch.dataset[["X"]]))
+
+#Implementing univeriate t-GARCH MODEL.--------------
+#Using 256 days a a window for time span
+S01$w <- 256
+S01$from <- time(S01$tGarch.yields.dataset)[-c((nrow(S01$tGarch.yields.dataset) -
+                                                  S01$w):nrow(S01$tGarch.yields.dataset))]
+S01$to <- time(S01$tGarch.yields.dataset)[-c(1:S01$w)]
+S01$tGarch.yields.ES <- fapply(S01$tGarch.yields.dataset, from = S01$from, to = S01$to, FUN = S01$Garch_fGarch)
+S01$tGarch.yields.ES.L1 <- lag(S01$tGarch.yields.ES, 1)
+S01$tGarch.yields.res <- S01$tGarch_Bind(S01$tGarch.yields.dataset, S01$tGarch.yields.ES.L1)
+
+colnames(S01$tGarch.yields.res) <- c("CYCLE", "ES")
+#Ploting out results.
+plot(S01$tGarch.yields.res[, 2], col = "blue", ylim = range(S01$tGarch.yields.res), 
+     main = "CN_10yry: CYCLE and ES 99%", ylab = "Distence from the mean.")
+points(S01$tGarch.yields.res[, 1], type = "p", cex = 0.2, pch = 19, col = "black")
+legend("topleft", legend = c("CYCLE", "ES"), col = c("black", "blue"), lty = c(NA, 1), pch = c(19, NA))
+
+
+#Saving Output to CSV files.---------------------
+write.csv(S01$tGarch.yields.res, "GARCH/t-GARCH_yields_Output.csv", row.names = TRUE)
+
+S01$Time.end <- Sys.time()
+S01$Time.used <- S01$Time.end - S01$Time.start
+```
+
+PART I : t - Garch to yields.
+
+Report of t - Garch to yields, generated at: `r Sys.time()` ; 
+
+Time Used For Modeling: `r S01$Time.used` Minutes.