Skip to content

Commit

Permalink
v3.8.2
Browse files Browse the repository at this point in the history
  • Loading branch information
chen-001 committed Mar 15, 2023
1 parent 3ae9a0a commit 2daef39
Show file tree
Hide file tree
Showing 7 changed files with 128 additions and 60 deletions.
4 changes: 2 additions & 2 deletions pure_ocean_breeze/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@
一个量化多因子研究的框架,包含数据、回测、因子加工等方面的功能
"""

__updated__ = "2023-03-06 11:23:05"
__version__ = "3.8.1"
__updated__ = "2023-03-08 13:36:51"
__version__ = "3.8.2"
__author__ = "chenzongwei"
__author_email__ = "[email protected]"
__url__ = "https://github.com/chen-001/pure_ocean_breeze"
Expand Down
2 changes: 1 addition & 1 deletion pure_ocean_breeze/data/database.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
__updated__ = "2023-03-01 02:22:56"
__updated__ = "2023-03-13 13:10:01"

import pandas as pd
import pymysql
Expand Down
34 changes: 31 additions & 3 deletions pure_ocean_breeze/data/tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
针对一些不常见的文件格式,读取数据文件的一些工具函数,以及其他数据工具
"""

__updated__ = "2023-03-06 11:22:03"
__updated__ = "2023-03-12 11:54:26"

import os
import pandas as pd
Expand Down Expand Up @@ -993,6 +993,28 @@ def get_list_std(delta_sts: list[pd.DataFrame]) -> pd.DataFrame:
return delta_sts_std


def get_list_std_weighted(delta_sts: list[pd.DataFrame], weights: list) -> pd.DataFrame:
"""对多个df对应位置上的值求加权标准差
Parameters
----------
delta_sts : list[pd.DataFrame]
多个dataframe
weights : list
权重序列
Returns
-------
pd.DataFrame
标准差序列
"""
weights = [i / sum(weights) for i in weights]
delta_sts_mean = sum(delta_sts) / len(delta_sts)
delta_sts_std = [(i - delta_sts_mean) ** 2 for i in delta_sts]
delta_sts_std = sum([i * j for i, j in zip(delta_sts_std, weights)])
return delta_sts_std**0.5


@do_on_dfs
def to_group(df: pd.DataFrame, group: int = 10) -> pd.DataFrame:
"""把一个index为时间,code为时间的df,每个截面上的值,按照排序分为group组,将值改为组号,从0开始
Expand Down Expand Up @@ -1215,7 +1237,10 @@ def get_fac_via_corr(
.fillna(method=fillna_method)
.dropna(axis=1)
)
corr = df0.corr(method=corr_method)
if corr_method == "spearman":
corr = df0.rank().corr()
else:
corr = df0.corr(method=corr_method)
if abs:
corr = corr.abs()
df0 = corr.mean().to_frame(end)
Expand Down Expand Up @@ -1254,7 +1279,10 @@ def get_fac_via_corr(
.fillna(method=fillna_method)
.dropna(axis=1)
)
corr = df0.corr(method=corr_method)
if corr_method == "spearman":
corr = df0.rank().corr()
else:
corr = df0.corr(method=corr_method)
if abs:
corr = corr.abs()
df0 = corr.mean().to_frame(end)
Expand Down
56 changes: 26 additions & 30 deletions pure_ocean_breeze/data/write_data.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
__updated__ = "2023-03-07 18:08:00"
__updated__ = "2023-03-10 12:14:50"

import time

Expand Down Expand Up @@ -51,6 +51,7 @@
import pickledb
import tqdm.auto
from functools import reduce
from typing import Union
import dcube as dc
from tenacity import retry, stop_after_attempt
import questdb.ingress as qdbing
Expand Down Expand Up @@ -1108,9 +1109,7 @@ def database_update_industry_rets_for_stock():
logger.success("股票对应申万一级行业每日收益率已经更新完")



class FactorReader():

class FactorReader:
def __init__(
self,
user: str = "admin",
Expand Down Expand Up @@ -1139,12 +1138,7 @@ def __init__(
self.host = host
self.port = port
self.database = database
infos=self.show_all_factors_information()
self.keys=list(infos.数据键名)
self.names=list(infos.因子名称)
self.keys_names = {k: v for k, v in zip(self.keys, self.names)}
self.names_keys = {k: v for k, v in zip(self.names, self.keys)}


def __connect(self):
conn = pg.connect(
user=self.user,
Expand All @@ -1154,27 +1148,27 @@ def __connect(self):
database=self.database,
)
return conn
def __update_factor(self,table_name:str,df:pd.DataFrame):
tables=self.__get_data("show tables").table.tolist()

def update_factor(self, table_name: str, df: pd.DataFrame):
tables = self.__get_data("show tables").table.tolist()
if table_name in tables:
logger.info(f'{table_name}已经存在了,即将更新')
old_end=self.__get_data(f"select max(date) from {table_name}").iloc[0,0]
new=df[df.index>old_end]
new=new.stack().reset_index()
new.columns=['date','code','fac']
logger.info(f"{table_name}已经存在了,即将更新")
old_end = self.__get_data(f"select max(date) from {table_name}").iloc[0, 0]
new = df[df.index > old_end]
new = new.stack().reset_index()
new.columns = ["date", "code", "fac"]
else:
logger.info(f'{table_name}第一次上传')
new=df.stack().reset_index()
new.columns=['date','code','fac']
self.__write_via_df(new,table_name)
logger.info(f"{table_name}第一次上传")
new = df.stack().reset_index()
new.columns = ["date", "code", "fac"]
self.__write_via_df(new, table_name)

def __write_via_df(
self,
df: pd.DataFrame,
table_name: str,
symbols= None,
tuple_col= None,
symbols=None,
tuple_col=None,
) -> None:
"""通过questdb的python库直接将dataframe写入quested数据库
Expand Down Expand Up @@ -1202,11 +1196,9 @@ def __write_via_df(
else:
with qdbing.Sender(self.host, 9009) as sender:
sender.dataframe(df, table_name=table_name)

@retry(stop=stop_after_attempt(10))
def __get_data(
self, sql_order: str
) -> pd.DataFrame:
def __get_data(self, sql_order: str) -> pd.DataFrame:
"""以sql命令的方式,从数据库中读取数据
Parameters
Expand All @@ -1225,4 +1217,8 @@ def __get_data(
df_data = cursor.fetchall()
columns = [i[0] for i in cursor.description]
df = pd.DataFrame(df_data, columns=columns)
return df
return df

def add_token(self, tokens: list[str], users: list[str]):
tus = pd.DataFrame({"token": tokens, "user": users})
self.__write_via_df(tus, "tokenlines")
24 changes: 19 additions & 5 deletions pure_ocean_breeze/labor/comment.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
__updated__ = "2023-02-03 10:41:10"
__updated__ = "2023-03-10 15:41:06"

import numpy as np
import pandas as pd
Expand Down Expand Up @@ -178,28 +178,35 @@ def make_relative_comments(
如果没指定任何一个指数,将报错
"""

if hs300 and zz500:
if hs300 == 1 and zz500 == 1 and zz1000 == 0 and gz2000 == 0:
net_index = read_index_single("000906.SH").resample("M").last()
else:
net_indexs = []
weights = []
if hs300:
net_index = read_index_single("000300.SH").resample("M").last()
net_indexs.append(net_index)
weights.append(300)
if zz500:
net_index = read_index_single("000905.SH").resample("M").last()
net_indexs.append(net_index)
weights.append(500)
if zz1000:
net_index = read_index_single("000852.SH").resample("M").last()
net_indexs.append(net_index)
weights.append(1000)
if gz2000:
net_index = read_index_single("399303.SZ").resample("M").last()
net_indexs.append(net_index)
weights.append(2000)
if (hs300 + zz500 + zz1000 + gz2000) == 0:
raise IOError("你总得指定一个股票池吧?")
net_index = pd.concat(net_indexs, axis=1)
ret_index = net_index.pct_change()
if isinstance(ret_index, pd.DataFrame):
ret_index = ret_index.mean(axis=1)
ret_index = sum(
[ret_index.iloc[:, i] * weights[i] for i in range(len(weights))]
) / sum(weights)
if day is not None:
ret_index = ret_index[ret_index.index >= pd.Timestamp(day)]
ret = ret_fac - ret_index
Expand Down Expand Up @@ -252,28 +259,35 @@ def make_relative_comments_plot(
`IOError`
如果没指定任何一个指数,将报错
"""
if hs300 and zz500:
if hs300 == 1 and zz500 == 1 and zz1000 == 0 and gz2000 == 0:
net_index = read_index_single("000906.SH").resample("M").last()
else:
net_indexs = []
weights = []
if hs300:
net_index = read_index_single("000300.SH").resample("M").last()
net_indexs.append(net_index)
weights.append(300)
if zz500:
net_index = read_index_single("000905.SH").resample("M").last()
net_indexs.append(net_index)
weights.append(500)
if zz1000:
net_index = read_index_single("000852.SH").resample("M").last()
net_indexs.append(net_index)
weights.append(1000)
if gz2000:
net_index = read_index_single("399303.SZ").resample("M").last()
net_indexs.append(net_index)
weights.append(2000)
if (hs300 + zz500 + zz1000 + gz2000) == 0:
raise IOError("你总得指定一个股票池吧?")
net_index = pd.concat(net_indexs, axis=1)
ret_index = net_index.pct_change()
if isinstance(ret_index, pd.DataFrame):
ret_index = ret_index.mean(axis=1)
ret_index = sum(
[ret_index.iloc[:, i] * weights[i] for i in range(len(weights))]
) / sum(weights)
if day is not None:
ret_index = ret_index[ret_index.index >= pd.Timestamp(day)]
ret = ret_fac - ret_index
Expand Down
Loading

0 comments on commit 2daef39

Please sign in to comment.