Skip to content

Commit

Permalink
upload-02
Browse files Browse the repository at this point in the history
  • Loading branch information
crjhonor committed Jun 13, 2023
1 parent 12d79ed commit e6bacdf
Show file tree
Hide file tree
Showing 388 changed files with 746,541 additions and 2,595 deletions.
106 changes: 106 additions & 0 deletions ASSA/AUTO_CORRELATION.Rmd
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
---
title: "AUTO_CORRELATION"
author: "CHEN RUJIE"
date: "2020/1/12"
output: html_document
---

```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE,
warning = FALSE)
AC <- new.env() #Enviornment for autocorrelation analysis
#Global Enviornment Setup------------------------
AC$globalWD <- getwd()
#Loading Libraries.------------------------------
library(readxl)
library(timeDate)
library(timeSeries)
library(fBasics)
```

## Load Monthly Trading Data

For the interests of trading soymeal, monthly trading datas of DCE Soymeal and CBOT Soybeans are loaded.

```{r Load Trading Data}
#Initialize Funcyions.===========================
#Functions to Read Trading Data From Excel File
AC$read_excel_TD <- function(contract_name){
#Read Excel File Data.
TD.read <- read_excel(file.path(AC$globalWD, "DATA", paste0(contract_name, ".xls")),
col_types = c("text", "text", "date",
"numeric", "numeric", "numeric",
"numeric", "numeric", "numeric",
"numeric", "numeric", "numeric"),
col_names = TRUE
)
colnames(TD.read) <- c(paste0(contract_name, "_Code"),
paste0(contract_name, "_Contract"),
paste0(contract_name, "_Date"),
paste0(contract_name, "_Open"),
paste0(contract_name, "_High"),
paste0(contract_name, "_Low"),
paste0(contract_name, "_Close"),
paste0(contract_name, "_Chg"),
paste0(contract_name, "_RateofChg"),
paste0(contract_name, "_Vol"),
paste0(contract_name, "_Amn"),
paste0(contract_name, "_Settle"))
#For not deleting most recent data, I have to delete the settle column
TD.read <- TD.read[, -12]
TD.read.na <- na.omit(TD.read)
return(TD.read.na)
}
#Executing Loading Process.======================
##Seting up
setwd(AC$globalWD)
AC$ref_TD <- c("M0_Monthly", "ZS00Y_Monthly", "GC00Y_Monthly", "AU0_Monthly")
AC$ref_TD.text <- c("DCE Soymeal Futures",
"CBOT Soybean Futures",
"COMEX Gold Futures",
"SHFE Gold Futures")
AC$MonthlyTD <- list(NULL)
##Loading Monthly Trading Data
for(i in 1:length(AC$ref_TD)){
AC$MonthlyTD[i] <- list(AC$read_excel_TD(AC$ref_TD[i]))
names(AC$MonthlyTD)[i] <- c(paste0(AC$ref_TD[i], "lyTD"))
}
```

```{r Experimental Part I}
#Obtain Monthly Return of CBOT SOYBEAN monthly close. Transfer the cloase data into time series object for convience.
x.ts <- timeSeries(AC$MonthlyTD$ZS00Y_MonthlylyTD$ZS00Y_Monthly_Close,
charvec = AC$MonthlyTD$ZS00Y_MonthlylyTD$ZS00Y_Monthly_Date,
units = "ZS00Y_Monthly_Close"
)
#Calculate simple return. No logarithm is used.
x.rt <- x.ts[2:length(x.ts)]/x.ts[1:length(x.ts)-1] - 1
#Convert the simple return into time series object.
x.rt.ts <- timeSeries(x.rt,
charvec = AC$MonthlyTD$ZS00Y_MonthlylyTD$ZS00Y_Monthly_Date[2:length(x.ts)],
units = "Simple_Return")
basicStats(x.rt.ts)
```

```{r Experimental Part II - 1}
plot(x.ts)
plot(x.rt.ts)
acf(x.ts, lag = 106)
acf(x.rt.ts, lag = 106)
```

```{r Experimental Part II - 2}
#With 18 months seasonality assumption.
AC$eighteen.dummy <- c(0,0,0,1,rep(0,14))
#Create dummy for data
x.dummy <- c(rep(AC$eighteen.dummy, ceiling(length(x.ts)/18)))
x.dummy <- x.dummy[2:length(x.ts)]
model1 <- lm(x.rt.ts$Simple_Return~x.dummy)
model1
model2 <- arima(x.rt.ts, order = c(1,0,0), seasonal = list(order = c(1,0,1),
period = 18), include.mean = F)
model2
```

158 changes: 158 additions & 0 deletions ASSA/DeepLearn_FOR_Cycles_Note.Rmd
Original file line number Diff line number Diff line change
@@ -0,0 +1,158 @@
---
title: "R Notebook"
output: html_notebook
---

```{r setup, include=FALSE}
#Rmarkdown Global Setup--------------------------------------------------
library(knitr)
knitr::opts_chunk$set(echo = FALSE,
warning = FALSE,
fig.width = 14,
fig.height = 10)
DLC <- new.env()
DLC$Time.start <- Sys.time()
```

RCNN for CYCLES
==================================================

Data Preparation Process

```{r Data Preparation}
library(imputeTS)
library(zoo)
library(xts)
##Functions.-------------------------------------
#Function to obtain return of dataframe.
DLC$return_DF <- function(dataset = as.data.frame(NULL),
k = 1, #Order of difference
percentage = TRUE){ #X100 or not.
lag0 <- dataset[-1:-k, ]
lagk <- dataset[1:(dim(dataset)[1]-k), ]
if(percentage == TRUE){
return.R <- (lag0 - lagk) / lagk
return.R <- return.R * 100
}else{
return.R <- (lag0 - lagk) / lagk
}
return(return.R)
}
##Lading Data into Memories.---------------------
#Import trends dataset from csv files.
DLC$Data_Loaded <- read.csv("DEEPLEARN/Cycles_Reengineered_Target1.csv", header = TRUE)
#Removing Missing Values.
#Subset the dataset after 2015-05-25 as China 10yr Treasury future started to trade at that date.
DLC$Data_Loaded.na <- subset.data.frame(DLC$Data_Loaded, as.Date(X) >= "2015-03-25")
DLC$Data_Loaded.na <- na.interpolation(DLC$Data_Loaded.na, option = "linear")
#Dataset Preparation
DLC$Model_Data.date <- as.Date(DLC$Data_Loaded.na[, 1])
DLC$Model_Data.nodate <- DLC$Data_Loaded.na[, -1]
#Return of cycles, percentage = TRUE
DLC$Model_Data.R <- DLC$return_DF(DLC$Model_Data.nodate, k = 1)
##Ploting results and outputs.-------------------
plot(DLC$Model_Data.R[, 1], type = "l", main = "Return of Lag 1 of cycles.", ylim = c(-500, 500))
```

From the Very Begining, Simple Network model is implemented into very simple Cycle dataset.

First Look at a neutral network model.

Inspired by iris project. The cycle of target, which was continuous, can be converted to discrete or even classes type.

```{r First Look at a Neutral network model}
##Containing a 3 column dataset which are target, yield and currency respectively.
DLC$Sim_Net.Dataset <- DLC$Model_Data.nodate[, 1:3]
DLC$Sim_Net.Dataset.date <- DLC$Model_Data.date
##Obtaining training and testing dataset.
#Make testing dataset as the data of last 21 daily cycles.
DLC$Sim_Net.test.x <- DLC$Sim_Net.Dataset[(dim(DLC$Sim_Net.Dataset)[1] - 21 + 1):dim(DLC$Sim_Net.Dataset)[1], 2:3]
DLC$Sim_Net.test.y <- DLC$Sim_Net.Dataset[(dim(DLC$Sim_Net.Dataset)[1] - 21 + 1):dim(DLC$Sim_Net.Dataset)[1], 1]
DLC$Sim_Net.test.date <- DLC$Sim_Net.Dataset.date[(dim(DLC$Sim_Net.Dataset)[1] - 21 + 1):dim(DLC$Sim_Net.Dataset)[1]]
DLC$Sim_Net.test.label <- round((DLC$Sim_Net.test.y/100), digits = 0)
#And daily cycles before the last 21 days as the Training dataset.
DLC$Sim_Net.train.x <- DLC$Sim_Net.Dataset[1:(dim(DLC$Sim_Net.Dataset)[1] - 21), 2:3]
DLC$Sim_Net.train.y <- DLC$Sim_Net.Dataset[1:(dim(DLC$Sim_Net.Dataset)[1] - 21), 1]
DLC$Sim_Net.train.date <- DLC$Sim_Net.Dataset.date[1:(dim(DLC$Sim_Net.Dataset)[1] - 21)]
DLC$Sim_Net.train.label <- round((DLC$Sim_Net.train.y/100), digits = 0)
plot(DLC$Sim_Net.train.label,
type = "h", main = "Target Plot", xlab = "Date", ylab = "Discrete")
```

```{r}
##Creating Network
##Loading Libraries.
library(keras)
DLC$units.c <- max(DLC$Sim_Net.train.label) - min(DLC$Sim_Net.train.label) + 1
##Network Architecture
DLC$Sim_Net.network <- keras_model_sequential() %>%
layer_dense(units = 512, activation = "relu", input_shape = c(2)) %>%
layer_dense(units = DLC$units.c, activation = "softmax")
##Compiling Step
DLC$Sim_Net.network %>% compile(
optimizer = "rmsprop",
loss = "categorical_crossentropy",
metrics = c("accuracy")
)
##Reshaping the input dataset
DLC$Sim_Net.test.x <- array_reshape(as.matrix.data.frame(DLC$Sim_Net.test.x), c(dim(DLC$Sim_Net.test.x)[1], 2))
DLC$Sim_Net.train.x <- array_reshape(as.matrix.data.frame(DLC$Sim_Net.train.x), c(dim(DLC$Sim_Net.train.x)[1], 2))
##Categoricalized the labels.
DLC$Sim_Net.train.label <- to_categorical(DLC$Sim_Net.train.label, num_classes = DLC$units.c)
DLC$Sim_Net.test.label <- to_categorical(DLC$Sim_Net.test.label, num_classes = DLC$units.c)
##Fitting process.
DLC$Sim_Net.network %>% fit(DLC$Sim_Net.train.x, DLC$Sim_Net.train.label, epochs = 5, batch_size = 128)
##Prediction.
DLC$Sim_Net.network %>% predict_classes(DLC$Sim_Net.test.x)
```

But we achieve very low accuracy. In the following step, I will try concept of 3D tensor and implement a model of 3D tensor flow. The concept of 3D tensor to timeseries includes: The first axis is samples, which is the commordities I am trading, each type of commordity equals one sample; Second axis is the timesteps, which are the trading date either daily or hourly; Third axis is the features, which can be trading price, volume or others.

The the Convolutional Network is considered to be the model for dataset.

```{r Simple Convolutional Nework}
##Network Architecture
DLC$Convnet <- keras_model_sequential() %>%
layer_conv_2d(filters = 32, kernel_size = c(1, 1), activation = "relu", input_shape = c(2, 1, 1))
##Dataset Preparation
##Containing a 3 column dataset which are target, yield and currency respectively.
DLC$Convnet.Dataset <- DLC$Model_Data.nodate[, 1:3]
DLC$Convnet.Dataset.date <- DLC$Model_Data.date
##Obtaining training and testing dataset.
#Make testing dataset as the data of last 21 daily cycles.
DLC$Convnet.test.x <- array(NA, c(21, 1, 2), dimnames = list(c(as.character(DLC$Convnet.test.date)),
c("HPFC"),
c(colnames(DLC$Convnet.Dataset)[2:3])))
array(DLC$Convnet.Dataset[(dim(DLC$Convnet.Dataset)[1] - 21 + 1):dim(DLC$Convnet.Dataset)[1], 2])
DLC$Sim_Net.Dataset[(dim(DLC$Convnet.Dataset)[1] - 21 + 1):dim(DLC$Convnet.Dataset)[1], 2:3]
DLC$Convnet.test.y <- DLC$Sim_Net.Dataset[(dim(DLC$Convnet.Dataset)[1] - 21 + 1):dim(DLC$Convnet.Dataset)[1], 1]
DLC$Convnet.test.date <- DLC$Convnet.Dataset.date[(dim(DLC$Convnet.Dataset)[1] - 21 + 1):dim(DLC$Convnet.Dataset)[1]]
DLC$Convnet.test.label <- round((DLC$Convnet.test.y/100), digits = 0)
#And daily cycles before the last 21 days as the Training dataset.
DLC$Convnet.train.x <- DLC$Convnet.Dataset[1:(dim(DLC$Convnet.Dataset)[1] - 21), 2:3]
DLC$Convnet.train.y <- DLC$Convnet.Dataset[1:(dim(DLC$Convnet.Dataset)[1] - 21), 1]
DLC$Convnet.train.date <- DLC$Convnet.Dataset.date[1:(dim(DLC$Convnet.Dataset)[1] - 21)]
DLC$Convnet.train.label <- round((DLC$Convnet.train.y/100), digits = 0)
##Reshaping the dataset
DLC$Convnet.test.x <- array_reshape(as.matrix.data.frame(DLC$Convnet.test.x), c(dim(DLC$Convnet.test.x)[1], 1))
DLC$Sim_Net.train.x <- array_reshape(as.matrix.data.frame(DLC$Sim_Net.train.x), c(dim(DLC$Sim_Net.train.x)[1], 2))
```

114 changes: 114 additions & 0 deletions ASSA/Script01_Garch.Rmd
Original file line number Diff line number Diff line change
@@ -0,0 +1,114 @@
---
title: "BAYESIAN DCC GARCH REPORT......"
output:
html_document:
df_print: paged
---

```{r Setup, include=FALSE}
#DESTINATION.====================================
#This script is to implement Garch model sperated from Moring main rmarkdown script. This should smooth the
#data analysis process by reducing the computing time of the main script. Bayesian DCC Garch is implemented to
#trade data, currencies and yields respectively.
#Libraries.======================================
library(fGarch)
library(ggplot2)
library(knitr)
library(scales)
#Using environment for clearness.
S01 <- new.env()
S01$Time.start <- Sys.time()
knitr::opts_chunk$set(echo = FALSE,
warning = FALSE,
fig.width = 16,
fig.height = 6)
```

```{r Functions and Preparations}
#Functions.--------------------------------------
#Function to apply univeriate t GARCH model.
S01$Garch_fGarch <- function(Garch_dataset, #univeriate data
tprob = 0.99){ #Probability level
#t - GARCH
gfit <- garchFit(formula = ~garch(1, 1), data = Garch_dataset,
cond.dist = "std", trace = FALSE)
sigma <- predict(gfit, n.ahead = 1)[3]
df <- coef(gfit)["shape"]
ES <- sigma * (dt(qt(tprob, df), df)/(1 - tprob)) * ((df + (qt(tprob, df)) ^ 2)/(df - 1))
#Return Expected Shortfalls
return(ES)
}
#Function to Restructure.------------------------
S01$ReStr.Fun <- function(dataset){
for(ir in 2:dim(dataset)[2]){
if(ir == 2){
ReStr.Dataset <- dataset[, c(1,ir)]
colnames(ReStr.Dataset) <- c("Date", "HPFC")
ReStr.Dataset$LEGENDS <- names(dataset)[ir]
return.ReStr <- ReStr.Dataset
}else{
ReStr.Dataset <- dataset[, c(1, ir)]
colnames(ReStr.Dataset) <- c("Date", "HPFC")
ReStr.Dataset$LEGENDS <- names(dataset)[ir]
return.ReStr <- rbind(return.ReStr,
ReStr.Dataset)
}
}
return(return.ReStr)
}
```

```{r Bayesian DCC GARCH to yields, include=FALSE}
#Functions.======================================
#Function to bind
S01$tGarch_Bind <- function(dataset1, dataset2){
bind.begin <- dim(dataset1)[1] - dim(dataset2)[1] + 1 + 1
bind.end <- dim(dataset1)[1]
##convert to dataframe
bind.a <- as.data.frame(dataset1)[bind.begin:bind.end, 1]
bind.b <- as.data.frame(dataset2)[-1, 1]
return.bind <- cbind(bind.a, bind.b)
##convert to timeSeries again and return the result
return.bind.ts <- timeSeries(return.bind,
time(dataset1)[bind.begin:bind.end])
return(return.bind.ts)
}
#Processing.=====================================
S01$Time.start <- Sys.time()
#Reading Dataset.--------------------------------
S01$yields.Garch.dataset <- read.csv("GARCH/yields_GARCH_Dataset.csv", header = TRUE)
S01$tGarch.yields.dataset <- timeSeries(S01$yields.Garch.dataset[["CN_10yry_hpfc"]],
as.Date(S01$yields.Garch.dataset[["X"]]))
#Implementing univeriate t-GARCH MODEL.--------------
#Using 256 days a a window for time span
S01$w <- 256
S01$from <- time(S01$tGarch.yields.dataset)[-c((nrow(S01$tGarch.yields.dataset) -
S01$w):nrow(S01$tGarch.yields.dataset))]
S01$to <- time(S01$tGarch.yields.dataset)[-c(1:S01$w)]
S01$tGarch.yields.ES <- fapply(S01$tGarch.yields.dataset, from = S01$from, to = S01$to, FUN = S01$Garch_fGarch)
S01$tGarch.yields.ES.L1 <- lag(S01$tGarch.yields.ES, 1)
S01$tGarch.yields.res <- S01$tGarch_Bind(S01$tGarch.yields.dataset, S01$tGarch.yields.ES.L1)
colnames(S01$tGarch.yields.res) <- c("CYCLE", "ES")
#Ploting out results.
plot(S01$tGarch.yields.res[, 2], col = "blue", ylim = range(S01$tGarch.yields.res),
main = "CN_10yry: CYCLE and ES 99%", ylab = "Distence from the mean.")
points(S01$tGarch.yields.res[, 1], type = "p", cex = 0.2, pch = 19, col = "black")
legend("topleft", legend = c("CYCLE", "ES"), col = c("black", "blue"), lty = c(NA, 1), pch = c(19, NA))
#Saving Output to CSV files.---------------------
write.csv(S01$tGarch.yields.res, "GARCH/t-GARCH_yields_Output.csv", row.names = TRUE)
S01$Time.end <- Sys.time()
S01$Time.used <- S01$Time.end - S01$Time.start
```

PART I : t - Garch to yields.

Report of t - Garch to yields, generated at: `r Sys.time()` ;

Time Used For Modeling: `r S01$Time.used` Minutes.
Loading

0 comments on commit e6bacdf

Please sign in to comment.