Skip to content

Commit

Permalink
v3.6.5
Browse files Browse the repository at this point in the history
  • Loading branch information
chen-001 committed Jan 5, 2023
1 parent 1d04491 commit 7a7f26e
Show file tree
Hide file tree
Showing 8 changed files with 105 additions and 69 deletions.
4 changes: 2 additions & 2 deletions pure_ocean_breeze/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@
一个量化多因子研究的框架,包含数据、回测、因子加工等方面的功能
"""

__updated__ = "2022-12-26 13:05:01"
__version__ = "3.6.4"
__updated__ = "2023-01-01 14:06:46"
__version__ = "3.6.5"
__author__ = "chenzongwei"
__author_email__ = "[email protected]"
__url__ = "https://github.com/chen-001/pure_ocean_breeze"
Expand Down
21 changes: 12 additions & 9 deletions pure_ocean_breeze/data/database.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
__updated__ = "2022-11-11 23:04:12"
__updated__ = "2023-01-05 17:17:09"

import pandas as pd
import pymysql
Expand Down Expand Up @@ -594,7 +594,7 @@ def get_data_old(self, sql_order: str) -> pd.DataFrame:
"""
a = pd.read_sql(sql_order, con=self.engine)
return a

def get_data(
self, sql_order: str, only_array: bool = 0
) -> Union[pd.DataFrame, np.ndarray]:
Expand Down Expand Up @@ -778,12 +778,13 @@ class Questdb(DriverOfPostgre):

def __init__(
self,
user="admin",
password="quest",
host="127.0.0.1",
port="8812",
database="qdb",
tmp_csv_path="tmp_dataframe_for_questdb.csv",
user: str = "admin",
password: str = "quest",
host: str = "127.0.0.1",
port: str = "8812",
database: str = "qdb",
tmp_csv_path: str = "tmp_dataframe_for_questdb.csv",
web_port: str = "9001",
) -> None:
"""通过postgre的psycopg2驱动连接questdb数据库
Expand All @@ -801,6 +802,8 @@ def __init__(
数据库, by default "qdb"
tmp_csv_path : str, optional
通过csv导入数据时,csv文件的暂存位置, by default "/opt/homebrew/var/questdb/copy_path/tmp_dataframe.csv"
web_port : str, optional
questdb控制台的端口号,安装questdb软件时默认为9000,本库默认为9001, by default 9001
"""
super().__init__(user, password, host, port, database)
self.user = user
Expand Down Expand Up @@ -907,7 +910,7 @@ def write_via_csv(self, df: pd.DataFrame, table: str, index_id: str = None) -> N
cursor = conn.cursor()
try:
csv = {"data": (table, f)}
server = "http://localhost:9001/imp"
server = f"http://localhost:{self.web_port}/imp"
response = requests.post(server, files=csv)
except (Exception, pg.DatabaseError) as error:
print("Error: %s" % error)
Expand Down
14 changes: 13 additions & 1 deletion pure_ocean_breeze/data/read_data.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
__updated__ = "2022-12-22 23:23:13"
__updated__ = "2023-01-06 02:29:53"

import os
import numpy as np
Expand All @@ -24,6 +24,7 @@ def read_daily(
tr: bool = 0,
sharenum: bool = 0,
volume: bool = 0,
money: bool = 0,
age: bool = 0,
flow_cap: bool = 0,
st: bool = 0,
Expand All @@ -41,6 +42,7 @@ def read_daily(
pe: bool = 0,
iret: bool = 0,
ivol: bool = 0,
illiquidity: bool = 0,
start: int = STATES["START"],
) -> pd.DataFrame:
"""直接读取常用的量价读取日频数据,默认为复权价格,
Expand All @@ -64,6 +66,8 @@ def read_daily(
为1则选择读取流通股数, by default 0
volume : bool, optional
为1则选择读取成交量, by default 0
money : bool, optional
为1则表示读取成交额, by default 0
age : bool, optional
为1则选择读取上市天数, by default 0
flow_cap : bool, optional
Expand Down Expand Up @@ -98,6 +102,8 @@ def read_daily(
为1则表示读取20日回归的fama三因子(市场、流通市值、市净率)特质收益率, by default 0
ivol : bool, optional
为1则表示读取20日回归的20日fama三因子(市场、流通市值、市净率)特质波动率, by default 0
illiquidity : bool, optional
为1则表示读取当日amihud非流动性指标, by default 0
start : int, optional
起始日期,形如20130101, by default STATES["START"]
Expand Down Expand Up @@ -137,6 +143,10 @@ def read_daily(
elif volume:
volumes = pd.read_parquet(homeplace.daily_data_file + "volumes.parquet")
df = volumes
elif money:
df = pd.read_parquet(
homeplace.factor_data_file + "日频数据-每日成交额/每日成交额.parquet"
)
elif age:
age = pd.read_parquet(homeplace.daily_data_file + "ages.parquet")
df = age
Expand Down Expand Up @@ -188,6 +198,8 @@ def read_daily(
elif ivol:
df = read_daily(iret=1, start=start)
df = df.rolling(20, min_periods=10).std()
elif illiquidity:
df = pd.read_parquet(homeplace.daily_data_file + "illiquidity.parquet")
else:
raise IOError("阁下总得读点什么吧?🤒")
else:
Expand Down
3 changes: 1 addition & 2 deletions pure_ocean_breeze/data/tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
针对一些不常见的文件格式,读取数据文件的一些工具函数,以及其他数据工具
"""

__updated__ = "2022-12-29 16:37:19"
__updated__ = "2023-01-06 02:30:20"

import os
import pandas as pd
Expand Down Expand Up @@ -1068,7 +1068,6 @@ def feather_to_parquet(folder: str):
logger.warning(f"{file}不是parquet文件")



def feather_to_parquet_all():
"""将数据库中所有的feather文件都转化为parquet文件"""
homeplace = HomePlace()
Expand Down
9 changes: 8 additions & 1 deletion pure_ocean_breeze/data/write_data.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
__updated__ = "2022-11-19 10:15:42"
__updated__ = "2023-01-06 02:30:21"

import time

Expand Down Expand Up @@ -1293,3 +1293,10 @@ def database_update_idiosyncratic_ret():
fama = pure_fama([cap, pb])
fama().to_parquet(homeplace.daily_data_file + "idiosyncratic_ret.parquet")
logger.success("特质收益率已经更新完成")


def database_update_illiquidity():
ret = read_daily(ret=1, start=20100101)
money = read_daily(money=1, start=20100101)
illi = ret.abs() / money
illi.to_parquet(homeplace.daily_data_file + "illiquidity.parquet")
78 changes: 38 additions & 40 deletions pure_ocean_breeze/labor/process.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
__updated__ = "2022-12-29 18:25:47"
__updated__ = "2023-01-05 17:32:06"

import warnings

Expand Down Expand Up @@ -889,7 +889,7 @@ def boom_fours(
list[list[pd.DataFrame]]
每个因子进行boom_four后的结果
"""
return boom_four(df=dfs,backsee=backsee,daily=daily,min_periods=min_periods)
return boom_four(df=dfs, backsee=backsee, daily=daily, min_periods=min_periods)


@do_on_dfs
Expand Down Expand Up @@ -1480,6 +1480,7 @@ def __call__(self):
return self.factors_out

@classmethod
@lru_cache(maxsize=None)
def set_basic_data(
cls,
ages: pd.DataFrame = None,
Expand All @@ -1489,6 +1490,18 @@ def set_basic_data(
closes: pd.DataFrame = None,
capitals: pd.DataFrame = None,
):
if ages is None:
ages = read_daily(age=1, start=20100101)
if sts is None:
sts = read_daily(st=1, start=20100101)
if states is None:
states = read_daily(state=1, start=20100101)
if opens is None:
opens = read_daily(open=1, start=20100101)
if closes is None:
closes = read_daily(close=1, start=20100101)
if capitals is None:
capitals = read_daily(flow_cap=1, start=20100101).resample(cls.freq).last()
# 上市天数文件
cls.ages = ages
# st日子标志文件
Expand Down Expand Up @@ -1858,13 +1871,6 @@ def get_data(self, groups_num):
)
self.data = pd.concat([self.data, rets_monthly_limit_downs])

def select_data_time(self, time_start, time_end):
"""筛选特定的时间段"""
if time_start:
self.data = self.data[self.data.date >= time_start]
if time_end:
self.data = self.data[self.data.date <= time_end]

def make_start_to_one(self, l):
"""让净值序列的第一个数变成1"""
min_date = self.factors.date.min()
Expand Down Expand Up @@ -2061,10 +2067,14 @@ def plot_net_values(self, y2, filename, iplot=1, ilegend=1, without_breakpoint=0
if not STATES["NO_SAVE"]:
plt.savefig(filename_path)
else:
tris = pd.concat(
[self.group_net_values, self.factor_cross_stds, self.rankics],
axis=1,
).rename(columns={0: "因子截面标准差"})
tris = (
pd.concat(
[self.group_net_values, self.factor_cross_stds, self.rankics],
axis=1,
)
.rename(columns={0: "因子截面标准差"})
.dropna()
)
if without_breakpoint:
tris = tris.dropna()
figs = cf.figures(
Expand Down Expand Up @@ -2195,8 +2205,6 @@ def run(
plt_plot=True,
plotly_plot=False,
filename="分组净值图",
time_start=None,
time_end=None,
print_comments=True,
comments_writer=None,
net_values_writer=None,
Expand Down Expand Up @@ -2239,7 +2247,6 @@ def run(
self.deal_with_factors()
self.get_limit_ups_downs()
self.get_data(groups_num)
self.select_data_time(time_start, time_end)
self.get_group_rets_net_values(
groups_num=groups_num, value_weighted=value_weighted
)
Expand Down Expand Up @@ -2413,7 +2420,7 @@ class pure_moonnight(object):
"""封装选股框架"""

__slots__ = ["shen"]

def __init__(
self,
factors: pd.DataFrame,
Expand Down Expand Up @@ -2482,9 +2489,9 @@ def __init__(
filename : str, optional
分组净值曲线的图保存的名称, by default "分组净值图"
time_start : int, optional
回测起始时间(此参数已废弃,请在因子上直接截断), by default None
回测起始时间, by default None
time_end : int, optional
回测终止时间(此参数已废弃,请在因子上直接截断), by default None
回测终止时间, by default None
print_comments : bool, optional
是否打印出评价指标, by default 1
comments_writer : pd.ExcelWriter, optional
Expand Down Expand Up @@ -2539,19 +2546,6 @@ def __init__(

if not isinstance(factors, pd.DataFrame):
factors = factors()
start = datetime.datetime.strftime(factors.index.min(), "%Y%m%d")
if ages is None:
ages = read_daily(age=1, start=start)
if sts is None:
sts = read_daily(st=1, start=start)
if states is None:
states = read_daily(state=1, start=start)
if opens is None:
opens = read_daily(open=1, start=start)
if closes is None:
closes = read_daily(close=1, start=start)
if capitals is None:
capitals = read_daily(flow_cap=1, start=start).resample(freq).last()
if comments_writer is None and sheetname is not None:
from pure_ocean_breeze.state.states import COMMENTS_WRITER

Expand All @@ -2564,14 +2558,20 @@ def __init__(
from pure_ocean_breeze.state.states import ON_PAPER

on_paper = ON_PAPER
from pure_ocean_breeze.state.states import MOON_START
if time_start is None:
from pure_ocean_breeze.state.states import MOON_START

if MOON_START is not None:
factors = factors[factors.index >= pd.Timestamp(str(MOON_START))]
from pure_ocean_breeze.state.states import MOON_END
if MOON_START is not None:
factors = factors[factors.index >= pd.Timestamp(str(MOON_START))]
else:
factors = factors[factors.index >= pd.Timestamp(str(time_start))]
if time_end is None:
from pure_ocean_breeze.state.states import MOON_END

if MOON_END is not None:
factors = factors[factors.index <= pd.Timestamp(str(MOON_END))]
if MOON_END is not None:
factors = factors[factors.index <= pd.Timestamp(str(MOON_END))]
else:
factors = factors[factors.index <= pd.Timestamp(str(time_end))]
if boxcox + neutralize == 0:
no_read_indu = 1
if only_cap + no_read_indu > 0:
Expand Down Expand Up @@ -2613,8 +2613,6 @@ def __init__(
plt_plot=plt_plot,
plotly_plot=plotly_plot,
filename=filename,
time_start=time_start,
time_end=time_end,
print_comments=print_comments,
comments_writer=comments_writer,
net_values_writer=net_values_writer,
Expand Down
37 changes: 23 additions & 14 deletions pure_ocean_breeze/state/decorators.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,31 +2,40 @@
用于标注函数功能的一些装饰器(用处不大)
"""

__updated__ = "2022-12-29 16:12:50"
__updated__ = "2023-01-01 11:36:51"
from typing import Iterable

def _list_value(x,list_num_order):
if isinstance(x,Iterable):

def _list_value(x, list_num_order):
if isinstance(x, Iterable):
return x[list_num_order]
else:
return x

def _dict_value(x,list_num_order):
dfs={}
for k,v in x.items():
if isinstance(v,Iterable):
dfs[k]=v[list_num_order]

def _dict_value(x, list_num_order):
dfs = {}
for k, v in x.items():
if isinstance(v, Iterable):
dfs[k] = v[list_num_order]
else:
dfs[k]=v
dfs[k] = v
return dfs



def do_on_dfs(func):
def wrapper(df,*args,**kwargs):
if isinstance(df,list):
dfs=[func(i,*[_list_value(i,num) for i in args],**_dict_value(kwargs,num)) for num,i in enumerate(df)]
def wrapper(df, *args, **kwargs):
if isinstance(df, list):
dfs = [
func(
i, *[_list_value(i, num) for i in args], **_dict_value(kwargs, num)
)
for num, i in enumerate(df)
]
return dfs
else:
return func(df)
return func(df, *args, **kwargs)

return wrapper


Expand Down
8 changes: 8 additions & 0 deletions 更新日志/version3.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,13 @@
## 更新日志🗓 — v3

* v3.6.5 — 2023.1.6
> 1. 给Questdb初始化新增了web_port参数,用于表示控制台的端口号
> 1. 给read_daily函数新增了money参数用于读取每日个股成交额、illiquidity参数用于读取每日个股非流动性
> 1. 新增了database_update_illiquidity函数用于更新每天非流动性数据
> 1. 删去了pure_moon中的select_data_time方法,在set_basic_data函数中新增了基础数据为None时从本地读入的方法
> 1. 优化了因子值时间超过基础数据时间时,结果的展示方式
> 1. 优化了pure_moonnight的运算逻辑,对回测进行提速,并恢复了time_start和time_end参数的使用,可以为每次回测单独设定回测区间
> 1. 修复了do_on_dfs装饰器在仅作用于一个目标时,参数不生效的bug
* v3.6.4 — 2022.12.29
> 1. 新增了do_on_dfs装饰器,用于将一个作用于单个dataframe的函数,改造为可以分别对多个dataframe运算,dataframe须处于第一个参数的位置,此外如果对每个dataframe,后续的某个参数各不相同,可使用列表依次输入。
> 2. 修复了clip函数的bug
Expand Down

0 comments on commit 7a7f26e

Please sign in to comment.