Skip to content

Commit

Permalink
v3.3.2
Browse files Browse the repository at this point in the history
  • Loading branch information
chen-001 committed Oct 1, 2022
1 parent a95d15d commit 25b72f5
Show file tree
Hide file tree
Showing 5 changed files with 82 additions and 58 deletions.
4 changes: 2 additions & 2 deletions pure_ocean_breeze/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@
一个量化多因子研究的框架,包含数据、回测、因子加工等方面的功能
"""

__updated__ = "2022-09-30 19:41:26"
__version__ = "3.3.1"
__updated__ = "2022-10-01 02:34:56"
__version__ = "3.3.2"
__author__ = "chenzongwei"
__author_email__ = "[email protected]"
__url__ = "https://github.com/chen-001/pure_ocean_breeze"
Expand Down
4 changes: 2 additions & 2 deletions pure_ocean_breeze/data/read_data.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
__updated__ = "2022-09-30 23:50:56"
__updated__ = "2022-10-01 10:14:38"

import os
import numpy as np
Expand Down Expand Up @@ -168,7 +168,7 @@ def read_daily(
elif ret_night:
df = (
read_daily(open=1, start=start)
/ read_daily(close, start=start).shift(1)
/ read_daily(close=1, start=start).shift(1)
- 1
)
elif vol:
Expand Down
10 changes: 5 additions & 5 deletions pure_ocean_breeze/data/write_data.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
__updated__ = "2022-09-30 23:50:57"
__updated__ = "2022-10-01 09:55:11"

try:
import rqdatac
Expand Down Expand Up @@ -1264,8 +1264,8 @@ def save(df, old, file):


def database_update_idiosyncratic_ret():
pb = read_daily(pb=1, start=20040101)
cap = read_daily(flow_cap=1, start=20040101)
pb = read_daily(pb=1, start=20100101)
cap = read_daily(flow_cap=1, start=20100101).dropna(how='all')
fama = pure_fama([cap, pb])
fama().reset_index().to_feather("idiosyncratic_ret.feather")
logger.success("特质收益率已经更新完成")
fama().reset_index().to_feather(homeplace.daily_data_file+"idiosyncratic_ret.feather")
logger.success("特质收益率已经更新完成")
116 changes: 67 additions & 49 deletions pure_ocean_breeze/labor/process.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
__updated__ = "2022-10-01 02:01:16"
__updated__ = "2022-10-01 10:14:27"

import warnings

Expand Down Expand Up @@ -1367,52 +1367,54 @@ class pure_moon(object):
"factor_turnover_rates",
"factor_turnover_rate",
"group_rets_std",
"group_rets_stds"
"group_rets_stds",
]

@classmethod
@lru_cache(maxsize=None)
def __init__(
cls,
startdate: int,
no_read_indu: bool = 0,
):
cls.homeplace = HomePlace()
# 已经算好的月度st状态文件
cls.sts_monthly_file = homeplace.daily_data_file + "sts_monthly.feather"
# 已经算好的月度交易状态文件
cls.states_monthly_file = homeplace.daily_data_file + "states_monthly.feather"

cls.swindustry_dummy = (
pd.read_feather(cls.homeplace.daily_data_file + "申万行业2021版哑变量.feather")
.fillna(0)
.set_index("date")
.groupby("code")
.resample("M")
.last()
)
if not no_read_indu:
cls.swindustry_dummy = (
pd.read_feather(cls.homeplace.daily_data_file + "申万行业2021版哑变量.feather")
.fillna(0)
.set_index("date")
.groupby("code")
.resample("M")
.last()
)

cls.zxindustry_dummy = (
pd.read_feather(cls.homeplace.daily_data_file + "中信一级行业哑变量代码版.feather")
.fillna(0)
.set_index("date")
.groupby("code")
.resample("M")
.last()
.fillna(0)
)
cls.zxindustry_dummy = (
pd.read_feather(cls.homeplace.daily_data_file + "中信一级行业哑变量代码版.feather")
.fillna(0)
.set_index("date")
.groupby("code")
.resample("M")
.last()
.fillna(0)
)

def deal_dummy(industry_dummy):
industry_dummy = industry_dummy.drop(columns=["code"]).reset_index()
industry_ws = [f"w{i}" for i in range(1, industry_dummy.shape[1] - 1)]
col = ["code", "date"] + industry_ws
industry_dummy.columns = col
industry_dummy = industry_dummy[
industry_dummy.date >= pd.Timestamp(str(startdate))
]
return industry_dummy
def deal_dummy(industry_dummy):
industry_dummy = industry_dummy.drop(columns=["code"]).reset_index()
industry_ws = [f"w{i}" for i in range(1, industry_dummy.shape[1] - 1)]
col = ["code", "date"] + industry_ws
industry_dummy.columns = col
industry_dummy = industry_dummy[
industry_dummy.date >= pd.Timestamp(str(startdate))
]
return industry_dummy

cls.swindustry_dummy = deal_dummy(cls.swindustry_dummy)
cls.zxindustry_dummy = deal_dummy(cls.zxindustry_dummy)
cls.swindustry_dummy = deal_dummy(cls.swindustry_dummy)
cls.zxindustry_dummy = deal_dummy(cls.zxindustry_dummy)

@property
def factors_out(self):
Expand Down Expand Up @@ -1540,7 +1542,10 @@ def neutralize_factors(cls, df):
industry_codes = list(df.columns)
industry_codes = [i for i in industry_codes if i.startswith("w")]
industry_codes_str = "+".join(industry_codes)
ols_result = smf.ols("fac~cap_size+" + industry_codes_str, data=df).fit()
if len(industry_codes_str) > 0:
ols_result = smf.ols("fac~cap_size+" + industry_codes_str, data=df).fit()
else:
ols_result = smf.ols("fac~cap_size", data=df).fit()
ols_w = ols_result.params["cap_size"]
ols_b = ols_result.params["Intercept"]
ols_bs = {}
Expand Down Expand Up @@ -1569,11 +1574,9 @@ def single(x):
cls.cap["cap_size"] = np.log(cls.cap["cap_size"])

def get_neutral_factors(
self,
zxindustry_dummies=0,
swindustry_dummies=0,
self, zxindustry_dummies=0, swindustry_dummies=0, only_cap=0
):
"""对因子进行市值中性化"""
"""对因子进行行业市值中性化"""
self.factors = self.factors.set_index("date")
self.factors.index = self.factors.index + pd.DateOffset(months=1)
self.factors = self.factors.resample("M").last()
Expand All @@ -1589,14 +1592,15 @@ def get_neutral_factors(
self.factors = pd.merge(
self.factors, self.cap, how="inner", on=["date", "code"]
)
if swindustry_dummies:
self.factors = pd.merge(
self.factors, self.swindustry_dummy, on=["date", "code"]
)
else:
self.factors = pd.merge(
self.factors, self.zxindustry_dummy, on=["date", "code"]
)
if not only_cap:
if swindustry_dummies:
self.factors = pd.merge(
self.factors, self.swindustry_dummy, on=["date", "code"]
)
else:
self.factors = pd.merge(
self.factors, self.zxindustry_dummy, on=["date", "code"]
)
self.factors = self.factors.set_index(["date", "code"])
self.factors = self.factors.groupby(["date"]).apply(self.neutralize_factors)
self.factors = self.factors.reset_index()
Expand Down Expand Up @@ -1803,7 +1807,7 @@ def in_g(df):
self.group_rets_stds = self.data.groupby(["date", "group"]).apply(
lambda x: x.ret.std()
)
self.group_rets_std = self.group_rets_stds.groupby('group').mean()
self.group_rets_std = self.group_rets_stds.groupby("group").mean()
# dropna是因为如果股票行情数据比因子数据的截止日期晚,而最后一个月发生月初跌停时,会造成最后某组多出一个月的数据
self.group_rets = self.group_rets.unstack()
self.group_rets = self.group_rets[
Expand Down Expand Up @@ -1944,6 +1948,7 @@ def run(
sheetname=None,
zxindustry_dummies=0,
swindustry_dummies=0,
only_cap=0,
):
"""运行回测部分"""
if comments_writer and not (comments_sheetname or sheetname):
Expand All @@ -1964,6 +1969,7 @@ def run(
self.get_neutral_factors(
swindustry_dummies=swindustry_dummies,
zxindustry_dummies=zxindustry_dummies,
only_cap=only_cap,
)
self.deal_with_factors_after_neutralize()
else:
Expand Down Expand Up @@ -2113,6 +2119,8 @@ def __init__(
opens: pd.DataFrame = None,
closes: pd.DataFrame = None,
capitals: pd.DataFrame = None,
no_read_indu: bool = 0,
only_cap: bool = 0,
) -> None:
"""一键回测框架,测试单因子的月频调仓的分组表现
每月月底计算因子值,月初第一天开盘时买入,月末收盘最后一天收盘时卖出
Expand Down Expand Up @@ -2168,16 +2176,20 @@ def __init__(
行业中性化时,选用申万一级行业, by default 0
ages : pd.DataFrame, optional
输入股票上市天数的数据,index是时间,columns是股票代码,values是天数, by default None
sts : pd.DataFrame,
sts : pd.DataFrame, optional
输入股票每天是否st的数据,是st股即为1,否则为0,index是时间,columns是股票代码,values是0或1, by default None
states : pd.DataFrame,
states : pd.DataFrame, optional
输入股票每天交易状态的数据,正常交易为1,否则为0,index是时间,columns是股票代码,values是0或1, by default None
opens : pd.DataFrame,
opens : pd.DataFrame, optional
输入股票的复权开盘价数据,index是时间,columns是股票代码,values是价格, by default None
closes : pd.DataFrame,
closes : pd.DataFrame, optional
输入股票的复权收盘价数据,index是时间,columns是股票代码,values是价格, by default None
capitals : pd.DataFrame,
capitals : pd.DataFrame, optional
输入股票的每月月末流通市值数据,index是时间,columns是股票代码,values是流通市值, by default None
no_read_indu : bool, optional
不读入行业数据, by default 0
only_cap : bool, optional
仅做市值中性化, by default 0
"""

if isinstance(factors, pure_fallmount):
Expand All @@ -2203,8 +2215,13 @@ def __init__(
from pure_ocean_breeze.state.states import NET_VALUES_WRITER

net_values_writer = NET_VALUES_WRITER
if boxcox + neutralize == 0:
no_read_indu = 1
if only_cap + no_read_indu > 0:
only_cap = no_read_indu = 1
self.shen = pure_moon(
startdate=start,
no_read_indu=no_read_indu,
)
self.shen.set_basic_data(
age=ages,
Expand Down Expand Up @@ -2238,6 +2255,7 @@ def __init__(
sheetname=sheetname,
swindustry_dummies=swindustry_dummies,
zxindustry_dummies=zxindustry_dummies,
only_cap=only_cap,
)

def __call__(self) -> pd.DataFrame:
Expand Down
6 changes: 6 additions & 0 deletions 更新日志/version3.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,11 @@
## 更新日志🗓 — v3

* v3.3.2 — 2022.10.01
> 1. 修复了读取隔夜收益率的bug
> 1. 将更新特质收益率数据的起始时间点改为2010年1月1日,并修复了其中的bug
> 1. 给pure_moon和pure_moonnight增加了no_read_indu参数,使回测时不必读入行业哑变量数据,便于调试
> 1. 给pure_moon和pure_moonnight增加了only_cap参数,使回测时只做市值中性化,而不做行业中性化
> 1. 优化了pure_moon和pure_moonnight的参数逻辑,当neutralize和boxcox均为0时,自动开启no_read_indu参数;当no_read_indu和only_cap任一为0时,自动开启另一个参数
* v3.3.1 — 2022.10.01
> 1. 给一键导入库的requires中,增加了import pyfinance.ols as go
> 1. 增加了用于fama三因子与特质收益率相关的类pure_fama,可以计算各期因子收益率、个股各期特质收益率、个股各期因子暴露、超额收益率等内容
Expand Down

0 comments on commit 25b72f5

Please sign in to comment.