Skip to content

Commit

Permalink
v3.2.5
Browse files Browse the repository at this point in the history
  • Loading branch information
chen-001 committed Sep 16, 2022
1 parent b5b598b commit eb6d6b4
Show file tree
Hide file tree
Showing 4 changed files with 96 additions and 84 deletions.
4 changes: 2 additions & 2 deletions pure_ocean_breeze/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@
一个量化多因子研究的框架,包含数据、回测、因子加工等方面的功能
"""

__updated__ = "2022-09-15 23:53:25"
__version__ = "3.2.4"
__updated__ = "2022-09-16 10:32:33"
__version__ = "3.2.5"
__author__ = "chenzongwei"
__author_email__ = "[email protected]"
__url__ = "https://github.com/chen-001/pure_ocean_breeze"
Expand Down
82 changes: 33 additions & 49 deletions pure_ocean_breeze/data/read_data.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
__updated__ = "2022-09-13 18:05:53"
__updated__ = "2022-09-16 10:32:20"

import os
import numpy as np
Expand Down Expand Up @@ -81,86 +81,70 @@ def read_daily(
if path:
return pd.read_feather(homeplace.daily_data_file + path).set_index("date")
elif open:
opens = pd.read_feather(
homeplace.daily_data_file + "opens.feather"
).set_index("date")
opens = pd.read_feather(homeplace.daily_data_file + "opens.feather")
df = opens
df = df.set_index(list(df.columns)[0])
elif close:
closes = pd.read_feather(
homeplace.daily_data_file + "closes.feather"
).set_index("date")
closes = pd.read_feather(homeplace.daily_data_file + "closes.feather")
df = closes
df = df.set_index(list(df.columns)[0])
elif high:
highs = pd.read_feather(
homeplace.daily_data_file + "highs.feather"
).set_index("date")
highs = pd.read_feather(homeplace.daily_data_file + "highs.feather")
df = highs
df = df.set_index(list(df.columns)[0])
elif low:
lows = pd.read_feather(
homeplace.daily_data_file + "lows.feather"
).set_index("date")
lows = pd.read_feather(homeplace.daily_data_file + "lows.feather")
df = lows
df = df.set_index(list(df.columns)[0])
elif tr:
trs = pd.read_feather(homeplace.daily_data_file + "trs.feather").set_index(
"date"
)
trs = pd.read_feather(homeplace.daily_data_file + "trs.feather")
df = trs
df = df.set_index(list(df.columns)[0])
elif sharenum:
sharenums = pd.read_feather(
homeplace.daily_data_file + "sharenums.feather"
).set_index("date")
sharenums = pd.read_feather(homeplace.daily_data_file + "sharenums.feather")
df = sharenums
df = df.set_index(list(df.columns)[0])
elif volume:
volumes = pd.read_feather(
homeplace.daily_data_file + "volumes.feather"
).set_index("date")
volumes = pd.read_feather(homeplace.daily_data_file + "volumes.feather")
df = volumes
df = df.set_index(list(df.columns)[0])
elif age:
age = pd.read_feather(homeplace.daily_data_file + "ages.feather").set_index(
"date"
)
age = pd.read_feather(homeplace.daily_data_file + "ages.feather")
df = age
df = df.set_index(list(df.columns)[0])
elif flow_cap:
closes = pd.read_feather(
homeplace.daily_data_file + "closes_unadj.feather"
).set_index("date")
sharenums = pd.read_feather(
homeplace.daily_data_file + "sharenums.feather"
).set_index("date")
closes = pd.read_feather(homeplace.daily_data_file + "closes_unadj.feather")
sharenums = pd.read_feather(homeplace.daily_data_file + "sharenums.feather")
closes = closes.set_index(list(closes.columns)[0])
sharenums = sharenums.set_index(list(sharenums.columns)[0])
flow_cap = closes * sharenums
df = flow_cap
elif st:
st = pd.read_feather(homeplace.daily_data_file + "sts.feather").set_index(
"date"
)
st = pd.read_feather(homeplace.daily_data_file + "sts.feather")
df = st
df = df.set_index(list(df.columns)[0])
elif state:
state = pd.read_feather(
homeplace.daily_data_file + "states.feather"
).set_index("date")
state = pd.read_feather(homeplace.daily_data_file + "states.feather")
df = state
df = df.set_index(list(df.columns)[0])
else:
raise IOError("阁下总得读点什么吧?🤒")
else:
if open:
opens = pd.read_feather(
homeplace.daily_data_file + "opens.feather"
).set_index("date")
opens = pd.read_feather(homeplace.daily_data_file + "opens.feather")
df = opens
df = df.set_index(list(df.columns)[0])
elif close:
closes = pd.read_feather(
homeplace.daily_data_file + "closes.feather"
).set_index("date")
closes = pd.read_feather(homeplace.daily_data_file + "closes.feather")
df = closes
df = df.set_index(list(df.columns)[0])
elif high:
highs = pd.read_feather(
homeplace.daily_data_file + "highs.feather"
).set_index("date")
highs = pd.read_feather(homeplace.daily_data_file + "highs.feather")
df = highs
df = df.set_index(list(df.columns)[0])
elif low:
lows = pd.read_feather(
homeplace.daily_data_file + "lows.feather"
).set_index("date")
lows = pd.read_feather(homeplace.daily_data_file + "lows.feather")
df = df.set_index(list(df.columns)[0])
df = lows
else:
raise IOError("阁下总得读点什么吧?🤒")
Expand Down
90 changes: 57 additions & 33 deletions pure_ocean_breeze/labor/process.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
__updated__ = "2022-09-15 22:05:26"
__updated__ = "2022-09-16 10:38:19"

import numpy as np
import pandas as pd
Expand Down Expand Up @@ -1327,39 +1327,44 @@ class pure_moon(object):
def __init__(
cls,
startdate: int,
zxindustry_dummies=0,
swindustry_dummies=0,
):
cls.homeplace = HomePlace()
# 已经算好的月度st状态文件
cls.sts_monthly_file = homeplace.daily_data_file + "sts_monthly.feather"
# 已经算好的月度交易状态文件
cls.states_monthly_file = homeplace.daily_data_file + "states_monthly.feather"

if swindustry_dummies:
cls.industry_dummy = (
pd.read_feather(cls.homeplace.daily_data_file + "申万行业2021版哑变量.feather")
.set_index("date")
.groupby("code")
.resample("M")
.last()
)
else:
cls.industry_dummy = (
pd.read_feather(cls.homeplace.daily_data_file + "中信一级行业哑变量代码版.feather")
.fillna(0)
.set_index("date")
.groupby("code")
.resample("M")
.last()
)
cls.industry_dummy = cls.industry_dummy.drop(columns=["code"]).reset_index()
cls.industry_ws = [f"w{i}" for i in range(1, cls.industry_dummy.shape[1] - 1)]
col = ["code", "date"] + cls.industry_ws
cls.industry_dummy.columns = col
cls.industry_dummy = cls.industry_dummy[
cls.industry_dummy.date >= pd.Timestamp(str(startdate))
]
cls.swindustry_dummy = (
pd.read_feather(cls.homeplace.daily_data_file + "申万行业2021版哑变量.feather")
.fillna(0)
.set_index("date")
.groupby("code")
.resample("M")
.last()
)

cls.zxindustry_dummy = (
pd.read_feather(cls.homeplace.daily_data_file + "中信一级行业哑变量代码版.feather")
.fillna(0)
.set_index("date")
.groupby("code")
.resample("M")
.last()
.fillna(0)
)

def deal_dummy(industry_dummy):
industry_dummy = industry_dummy.drop(columns=["code"]).reset_index()
industry_ws = [f"w{i}" for i in range(1, industry_dummy.shape[1] - 1)]
col = ["code", "date"] + industry_ws
industry_dummy.columns = col
industry_dummy = industry_dummy[
industry_dummy.date >= pd.Timestamp(str(startdate))
]
return industry_dummy

cls.swindustry_dummy = deal_dummy(cls.swindustry_dummy)
cls.zxindustry_dummy = deal_dummy(cls.zxindustry_dummy)

def __call__(self):
"""调用对象则返回因子值"""
Expand Down Expand Up @@ -1513,7 +1518,11 @@ def single(x):
else:
cls.cap["cap_size"] = np.log(cls.cap["cap_size"])

def get_neutral_factors(self):
def get_neutral_factors(
self,
zxindustry_dummies=0,
swindustry_dummies=0,
):
"""对因子进行市值中性化"""
self.factors = self.factors.set_index("date")
self.factors.index = self.factors.index + pd.DateOffset(months=1)
Expand All @@ -1530,7 +1539,14 @@ def get_neutral_factors(self):
self.factors = pd.merge(
self.factors, self.cap, how="inner", on=["date", "code"]
)
self.factors = pd.merge(self.factors, self.industry_dummy, on=["date", "code"])
if swindustry_dummies:
self.factors = pd.merge(
self.factors, self.swindustry_dummy, on=["date", "code"]
)
else:
self.factors = pd.merge(
self.factors, self.zxindustry_dummy, on=["date", "code"]
)
self.factors = self.factors.set_index(["date", "code"])
self.factors = self.factors.groupby(["date"]).apply(self.neutralize_factors)
self.factors = self.factors.reset_index()
Expand Down Expand Up @@ -1847,6 +1863,8 @@ def run(
rets_sheetname=None,
on_paper=False,
sheetname=None,
zxindustry_dummies=0,
swindustry_dummies=0,
):
"""运行回测部分"""
if comments_writer and not (comments_sheetname or sheetname):
Expand All @@ -1857,11 +1875,17 @@ def run(
raise IOError("把group_rets输出到excel中时,必须指定sheetname🤒")
if neutralize:
self.get_log_cap()
self.get_neutral_factors()
self.get_neutral_factors(
swindustry_dummies=swindustry_dummies,
zxindustry_dummies=zxindustry_dummies,
)
self.deal_with_factors_after_neutralize()
elif boxcox:
self.get_log_cap(boxcox=True)
self.get_neutral_factors()
self.get_neutral_factors(
swindustry_dummies=swindustry_dummies,
zxindustry_dummies=zxindustry_dummies,
)
self.deal_with_factors_after_neutralize()
else:
self.deal_with_factors()
Expand Down Expand Up @@ -2087,8 +2111,6 @@ def __init__(
capitals = read_daily(flow_cap=1, start=start).resample("M").last()
self.shen = pure_moon(
startdate=start,
zxindustry_dummies=zxindustry_dummies,
swindustry_dummies=swindustry_dummies,
)
self.shen.set_basic_data(
age=ages,
Expand Down Expand Up @@ -2120,6 +2142,8 @@ def __init__(
rets_sheetname=rets_sheetname,
on_paper=on_paper,
sheetname=sheetname,
swindustry_dummies=swindustry_dummies,
zxindustry_dummies=zxindustry_dummies,
)

def __call__(self) -> pd.DataFrame:
Expand Down
4 changes: 4 additions & 0 deletions 更新日志/version3.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
## 更新日志🗓 — v3

* v3.2.5 — 2022.09.16
> 1. 修复了读取日频数据函数read_daily由于索引名称更改导致的bug
> 1. 修复了缓存机制导致同一内核中,无法转换中信行业和申万行业的bug
> 1. 给用clickhouse的分钟数据计算因子值的类pure_fall_frequent增加了notebook进度条功能,当tqdm_inside指定为-1时,即使用tqdm.tqdm_notebook功能
* v3.2.4 — 2022.09.15
> 1. 改善了以clickhouse和questdb分钟数据计算因子的循环逻辑,将需要计算的时间拆分为多段相邻时间来计算,并补充了起始第一天的计算
> 1. 将保存最终因子值的函数database_save_final_factors增加了去除全空行的功能
Expand Down

0 comments on commit eb6d6b4

Please sign in to comment.