From 7a7f26e81bc740c164ccc3f914cda6140402cfbb Mon Sep 17 00:00:00 2001 From: chenzongwei <63836858+chen-001@users.noreply.github.com> Date: Fri, 6 Jan 2023 02:35:51 +0800 Subject: [PATCH] v3.6.5 --- pure_ocean_breeze/__init__.py | 4 +- pure_ocean_breeze/data/database.py | 21 ++--- pure_ocean_breeze/data/read_data.py | 14 +++- pure_ocean_breeze/data/tools.py | 3 +- pure_ocean_breeze/data/write_data.py | 9 ++- pure_ocean_breeze/labor/process.py | 78 +++++++++---------- pure_ocean_breeze/state/decorators.py | 37 +++++---- .../version3.md" | 8 ++ 8 files changed, 105 insertions(+), 69 deletions(-) diff --git a/pure_ocean_breeze/__init__.py b/pure_ocean_breeze/__init__.py index 3493557..a52e599 100644 --- a/pure_ocean_breeze/__init__.py +++ b/pure_ocean_breeze/__init__.py @@ -2,8 +2,8 @@ 一个量化多因子研究的框架,包含数据、回测、因子加工等方面的功能 """ -__updated__ = "2022-12-26 13:05:01" -__version__ = "3.6.4" +__updated__ = "2023-01-01 14:06:46" +__version__ = "3.6.5" __author__ = "chenzongwei" __author_email__ = "winterwinter999@163.com" __url__ = "https://github.com/chen-001/pure_ocean_breeze" diff --git a/pure_ocean_breeze/data/database.py b/pure_ocean_breeze/data/database.py index ef6c2bc..81aacc1 100644 --- a/pure_ocean_breeze/data/database.py +++ b/pure_ocean_breeze/data/database.py @@ -1,4 +1,4 @@ -__updated__ = "2022-11-11 23:04:12" +__updated__ = "2023-01-05 17:17:09" import pandas as pd import pymysql @@ -594,7 +594,7 @@ def get_data_old(self, sql_order: str) -> pd.DataFrame: """ a = pd.read_sql(sql_order, con=self.engine) return a - + def get_data( self, sql_order: str, only_array: bool = 0 ) -> Union[pd.DataFrame, np.ndarray]: @@ -778,12 +778,13 @@ class Questdb(DriverOfPostgre): def __init__( self, - user="admin", - password="quest", - host="127.0.0.1", - port="8812", - database="qdb", - tmp_csv_path="tmp_dataframe_for_questdb.csv", + user: str = "admin", + password: str = "quest", + host: str = "127.0.0.1", + port: str = "8812", + database: str = "qdb", + tmp_csv_path: str = "tmp_dataframe_for_questdb.csv", + web_port: str = "9001", ) -> None: """通过postgre的psycopg2驱动连接questdb数据库 @@ -801,6 +802,8 @@ def __init__( 数据库, by default "qdb" tmp_csv_path : str, optional 通过csv导入数据时,csv文件的暂存位置, by default "/opt/homebrew/var/questdb/copy_path/tmp_dataframe.csv" + web_port : str, optional + questdb控制台的端口号,安装questdb软件时默认为9000,本库默认为9001, by default 9001 """ super().__init__(user, password, host, port, database) self.user = user @@ -907,7 +910,7 @@ def write_via_csv(self, df: pd.DataFrame, table: str, index_id: str = None) -> N cursor = conn.cursor() try: csv = {"data": (table, f)} - server = "http://localhost:9001/imp" + server = f"http://localhost:{self.web_port}/imp" response = requests.post(server, files=csv) except (Exception, pg.DatabaseError) as error: print("Error: %s" % error) diff --git a/pure_ocean_breeze/data/read_data.py b/pure_ocean_breeze/data/read_data.py index c3e8079..6fa3dcd 100644 --- a/pure_ocean_breeze/data/read_data.py +++ b/pure_ocean_breeze/data/read_data.py @@ -1,4 +1,4 @@ -__updated__ = "2022-12-22 23:23:13" +__updated__ = "2023-01-06 02:29:53" import os import numpy as np @@ -24,6 +24,7 @@ def read_daily( tr: bool = 0, sharenum: bool = 0, volume: bool = 0, + money: bool = 0, age: bool = 0, flow_cap: bool = 0, st: bool = 0, @@ -41,6 +42,7 @@ def read_daily( pe: bool = 0, iret: bool = 0, ivol: bool = 0, + illiquidity: bool = 0, start: int = STATES["START"], ) -> pd.DataFrame: """直接读取常用的量价读取日频数据,默认为复权价格, @@ -64,6 +66,8 @@ def read_daily( 为1则选择读取流通股数, by default 0 volume : bool, optional 为1则选择读取成交量, by default 0 + money : bool, optional + 为1则表示读取成交额, by default 0 age : bool, optional 为1则选择读取上市天数, by default 0 flow_cap : bool, optional @@ -98,6 +102,8 @@ def read_daily( 为1则表示读取20日回归的fama三因子(市场、流通市值、市净率)特质收益率, by default 0 ivol : bool, optional 为1则表示读取20日回归的20日fama三因子(市场、流通市值、市净率)特质波动率, by default 0 + illiquidity : bool, optional + 为1则表示读取当日amihud非流动性指标, by default 0 start : int, optional 起始日期,形如20130101, by default STATES["START"] @@ -137,6 +143,10 @@ def read_daily( elif volume: volumes = pd.read_parquet(homeplace.daily_data_file + "volumes.parquet") df = volumes + elif money: + df = pd.read_parquet( + homeplace.factor_data_file + "日频数据-每日成交额/每日成交额.parquet" + ) elif age: age = pd.read_parquet(homeplace.daily_data_file + "ages.parquet") df = age @@ -188,6 +198,8 @@ def read_daily( elif ivol: df = read_daily(iret=1, start=start) df = df.rolling(20, min_periods=10).std() + elif illiquidity: + df = pd.read_parquet(homeplace.daily_data_file + "illiquidity.parquet") else: raise IOError("阁下总得读点什么吧?🤒") else: diff --git a/pure_ocean_breeze/data/tools.py b/pure_ocean_breeze/data/tools.py index 540884c..3a84a9c 100644 --- a/pure_ocean_breeze/data/tools.py +++ b/pure_ocean_breeze/data/tools.py @@ -2,7 +2,7 @@ 针对一些不常见的文件格式,读取数据文件的一些工具函数,以及其他数据工具 """ -__updated__ = "2022-12-29 16:37:19" +__updated__ = "2023-01-06 02:30:20" import os import pandas as pd @@ -1068,7 +1068,6 @@ def feather_to_parquet(folder: str): logger.warning(f"{file}不是parquet文件") - def feather_to_parquet_all(): """将数据库中所有的feather文件都转化为parquet文件""" homeplace = HomePlace() diff --git a/pure_ocean_breeze/data/write_data.py b/pure_ocean_breeze/data/write_data.py index e9d7ee3..f9e2580 100644 --- a/pure_ocean_breeze/data/write_data.py +++ b/pure_ocean_breeze/data/write_data.py @@ -1,4 +1,4 @@ -__updated__ = "2022-11-19 10:15:42" +__updated__ = "2023-01-06 02:30:21" import time @@ -1293,3 +1293,10 @@ def database_update_idiosyncratic_ret(): fama = pure_fama([cap, pb]) fama().to_parquet(homeplace.daily_data_file + "idiosyncratic_ret.parquet") logger.success("特质收益率已经更新完成") + + +def database_update_illiquidity(): + ret = read_daily(ret=1, start=20100101) + money = read_daily(money=1, start=20100101) + illi = ret.abs() / money + illi.to_parquet(homeplace.daily_data_file + "illiquidity.parquet") diff --git a/pure_ocean_breeze/labor/process.py b/pure_ocean_breeze/labor/process.py index 674fae6..2b3a8f7 100644 --- a/pure_ocean_breeze/labor/process.py +++ b/pure_ocean_breeze/labor/process.py @@ -1,4 +1,4 @@ -__updated__ = "2022-12-29 18:25:47" +__updated__ = "2023-01-05 17:32:06" import warnings @@ -889,7 +889,7 @@ def boom_fours( list[list[pd.DataFrame]] 每个因子进行boom_four后的结果 """ - return boom_four(df=dfs,backsee=backsee,daily=daily,min_periods=min_periods) + return boom_four(df=dfs, backsee=backsee, daily=daily, min_periods=min_periods) @do_on_dfs @@ -1480,6 +1480,7 @@ def __call__(self): return self.factors_out @classmethod + @lru_cache(maxsize=None) def set_basic_data( cls, ages: pd.DataFrame = None, @@ -1489,6 +1490,18 @@ def set_basic_data( closes: pd.DataFrame = None, capitals: pd.DataFrame = None, ): + if ages is None: + ages = read_daily(age=1, start=20100101) + if sts is None: + sts = read_daily(st=1, start=20100101) + if states is None: + states = read_daily(state=1, start=20100101) + if opens is None: + opens = read_daily(open=1, start=20100101) + if closes is None: + closes = read_daily(close=1, start=20100101) + if capitals is None: + capitals = read_daily(flow_cap=1, start=20100101).resample(cls.freq).last() # 上市天数文件 cls.ages = ages # st日子标志文件 @@ -1858,13 +1871,6 @@ def get_data(self, groups_num): ) self.data = pd.concat([self.data, rets_monthly_limit_downs]) - def select_data_time(self, time_start, time_end): - """筛选特定的时间段""" - if time_start: - self.data = self.data[self.data.date >= time_start] - if time_end: - self.data = self.data[self.data.date <= time_end] - def make_start_to_one(self, l): """让净值序列的第一个数变成1""" min_date = self.factors.date.min() @@ -2061,10 +2067,14 @@ def plot_net_values(self, y2, filename, iplot=1, ilegend=1, without_breakpoint=0 if not STATES["NO_SAVE"]: plt.savefig(filename_path) else: - tris = pd.concat( - [self.group_net_values, self.factor_cross_stds, self.rankics], - axis=1, - ).rename(columns={0: "因子截面标准差"}) + tris = ( + pd.concat( + [self.group_net_values, self.factor_cross_stds, self.rankics], + axis=1, + ) + .rename(columns={0: "因子截面标准差"}) + .dropna() + ) if without_breakpoint: tris = tris.dropna() figs = cf.figures( @@ -2195,8 +2205,6 @@ def run( plt_plot=True, plotly_plot=False, filename="分组净值图", - time_start=None, - time_end=None, print_comments=True, comments_writer=None, net_values_writer=None, @@ -2239,7 +2247,6 @@ def run( self.deal_with_factors() self.get_limit_ups_downs() self.get_data(groups_num) - self.select_data_time(time_start, time_end) self.get_group_rets_net_values( groups_num=groups_num, value_weighted=value_weighted ) @@ -2413,7 +2420,7 @@ class pure_moonnight(object): """封装选股框架""" __slots__ = ["shen"] - + def __init__( self, factors: pd.DataFrame, @@ -2482,9 +2489,9 @@ def __init__( filename : str, optional 分组净值曲线的图保存的名称, by default "分组净值图" time_start : int, optional - 回测起始时间(此参数已废弃,请在因子上直接截断), by default None + 回测起始时间, by default None time_end : int, optional - 回测终止时间(此参数已废弃,请在因子上直接截断), by default None + 回测终止时间, by default None print_comments : bool, optional 是否打印出评价指标, by default 1 comments_writer : pd.ExcelWriter, optional @@ -2539,19 +2546,6 @@ def __init__( if not isinstance(factors, pd.DataFrame): factors = factors() - start = datetime.datetime.strftime(factors.index.min(), "%Y%m%d") - if ages is None: - ages = read_daily(age=1, start=start) - if sts is None: - sts = read_daily(st=1, start=start) - if states is None: - states = read_daily(state=1, start=start) - if opens is None: - opens = read_daily(open=1, start=start) - if closes is None: - closes = read_daily(close=1, start=start) - if capitals is None: - capitals = read_daily(flow_cap=1, start=start).resample(freq).last() if comments_writer is None and sheetname is not None: from pure_ocean_breeze.state.states import COMMENTS_WRITER @@ -2564,14 +2558,20 @@ def __init__( from pure_ocean_breeze.state.states import ON_PAPER on_paper = ON_PAPER - from pure_ocean_breeze.state.states import MOON_START + if time_start is None: + from pure_ocean_breeze.state.states import MOON_START - if MOON_START is not None: - factors = factors[factors.index >= pd.Timestamp(str(MOON_START))] - from pure_ocean_breeze.state.states import MOON_END + if MOON_START is not None: + factors = factors[factors.index >= pd.Timestamp(str(MOON_START))] + else: + factors = factors[factors.index >= pd.Timestamp(str(time_start))] + if time_end is None: + from pure_ocean_breeze.state.states import MOON_END - if MOON_END is not None: - factors = factors[factors.index <= pd.Timestamp(str(MOON_END))] + if MOON_END is not None: + factors = factors[factors.index <= pd.Timestamp(str(MOON_END))] + else: + factors = factors[factors.index <= pd.Timestamp(str(time_end))] if boxcox + neutralize == 0: no_read_indu = 1 if only_cap + no_read_indu > 0: @@ -2613,8 +2613,6 @@ def __init__( plt_plot=plt_plot, plotly_plot=plotly_plot, filename=filename, - time_start=time_start, - time_end=time_end, print_comments=print_comments, comments_writer=comments_writer, net_values_writer=net_values_writer, diff --git a/pure_ocean_breeze/state/decorators.py b/pure_ocean_breeze/state/decorators.py index d64940b..556518f 100644 --- a/pure_ocean_breeze/state/decorators.py +++ b/pure_ocean_breeze/state/decorators.py @@ -2,31 +2,40 @@ 用于标注函数功能的一些装饰器(用处不大) """ -__updated__ = "2022-12-29 16:12:50" +__updated__ = "2023-01-01 11:36:51" from typing import Iterable -def _list_value(x,list_num_order): - if isinstance(x,Iterable): + +def _list_value(x, list_num_order): + if isinstance(x, Iterable): return x[list_num_order] else: return x -def _dict_value(x,list_num_order): - dfs={} - for k,v in x.items(): - if isinstance(v,Iterable): - dfs[k]=v[list_num_order] + +def _dict_value(x, list_num_order): + dfs = {} + for k, v in x.items(): + if isinstance(v, Iterable): + dfs[k] = v[list_num_order] else: - dfs[k]=v + dfs[k] = v return dfs - + + def do_on_dfs(func): - def wrapper(df,*args,**kwargs): - if isinstance(df,list): - dfs=[func(i,*[_list_value(i,num) for i in args],**_dict_value(kwargs,num)) for num,i in enumerate(df)] + def wrapper(df, *args, **kwargs): + if isinstance(df, list): + dfs = [ + func( + i, *[_list_value(i, num) for i in args], **_dict_value(kwargs, num) + ) + for num, i in enumerate(df) + ] return dfs else: - return func(df) + return func(df, *args, **kwargs) + return wrapper diff --git "a/\346\233\264\346\226\260\346\227\245\345\277\227/version3.md" "b/\346\233\264\346\226\260\346\227\245\345\277\227/version3.md" index f832239..e67f6e4 100644 --- "a/\346\233\264\346\226\260\346\227\245\345\277\227/version3.md" +++ "b/\346\233\264\346\226\260\346\227\245\345\277\227/version3.md" @@ -1,5 +1,13 @@ ## 更新日志🗓 — v3 +* v3.6.5 — 2023.1.6 +> 1. 给Questdb初始化新增了web_port参数,用于表示控制台的端口号 +> 1. 给read_daily函数新增了money参数用于读取每日个股成交额、illiquidity参数用于读取每日个股非流动性 +> 1. 新增了database_update_illiquidity函数用于更新每天非流动性数据 +> 1. 删去了pure_moon中的select_data_time方法,在set_basic_data函数中新增了基础数据为None时从本地读入的方法 +> 1. 优化了因子值时间超过基础数据时间时,结果的展示方式 +> 1. 优化了pure_moonnight的运算逻辑,对回测进行提速,并恢复了time_start和time_end参数的使用,可以为每次回测单独设定回测区间 +> 1. 修复了do_on_dfs装饰器在仅作用于一个目标时,参数不生效的bug * v3.6.4 — 2022.12.29 > 1. 新增了do_on_dfs装饰器,用于将一个作用于单个dataframe的函数,改造为可以分别对多个dataframe运算,dataframe须处于第一个参数的位置,此外如果对每个dataframe,后续的某个参数各不相同,可使用列表依次输入。 > 2. 修复了clip函数的bug