Merge branch 'dev'

# Conflicts: # .gitignore
hugo2046 · Apr 12, 2023 · 2197eb1 · 2197eb1
2 parents d18cf8b + 838f731
commit 2197eb1
Show file tree

Hide file tree

Showing 21 changed files with 2,494 additions and 3 deletions.
diff --git a/.gitignore b/.gitignore
@@ -2,6 +2,9 @@ C-择时类/特征分布建模择时/scr/config.json
 C-择时类/特征分布建模择时/data/billboard.csv
 C-择时类/技术分析算法框架与实战二/data/
 C-择时类/行业指数顶部和底部信号/data/
-B-因子构建类/筹码因子/factor_data/
-B-因子构建类/筹码因子/qlib_data/
-B-因子构建类/筹码因子/cn_data/
+B-因子构建类/凸显理论STR因子/qlib_data
+B-因子构建类/筹码因子/cn_data
+B-因子构建类/筹码因子/qlib_data
+B-因子构建类/筹码因子/factor_data
+B-因子构建类/凸显理论STR因子/qlib_data
+B-因子构建类/凸显理论STR因子/mlruns
diff --git a/B-因子构建类/凸显理论STR因子/img/20230411_1.png b/B-因子构建类/凸显理论STR因子/img/20230411_1.png
diff --git a/B-因子构建类/凸显理论STR因子/scr/__init__.py b/B-因子构建类/凸显理论STR因子/scr/__init__.py
@@ -0,0 +1,7 @@
+from .plotting import (
+    model_performance_graph,
+    plot_factor_autocorr,
+    plot_group_score_return,
+    plot_group_turnover,
+    plot_score_ic,
+)
diff --git a/B-因子构建类/凸显理论STR因子/scr/__pycache__/__init__.cpython-38.pyc b/B-因子构建类/凸显理论STR因子/scr/__pycache__/__init__.cpython-38.pyc
diff --git a/B-因子构建类/凸显理论STR因子/scr/__pycache__/core.cpython-38.pyc b/B-因子构建类/凸显理论STR因子/scr/__pycache__/core.cpython-38.pyc
diff --git a/B-因子构建类/凸显理论STR因子/scr/__pycache__/factor_analyze.cpython-38.pyc b/B-因子构建类/凸显理论STR因子/scr/__pycache__/factor_analyze.cpython-38.pyc
diff --git a/B-因子构建类/凸显理论STR因子/scr/__pycache__/factor_expr.cpython-38.pyc b/B-因子构建类/凸显理论STR因子/scr/__pycache__/factor_expr.cpython-38.pyc
diff --git a/B-因子构建类/凸显理论STR因子/scr/__pycache__/ops.cpython-38.pyc b/B-因子构建类/凸显理论STR因子/scr/__pycache__/ops.cpython-38.pyc
diff --git a/B-因子构建类/凸显理论STR因子/scr/__pycache__/plotting.cpython-38.pyc b/B-因子构建类/凸显理论STR因子/scr/__pycache__/plotting.cpython-38.pyc
diff --git a/B-因子构建类/凸显理论STR因子/scr/__pycache__/qlib_workflow.cpython-38.pyc b/B-因子构建类/凸显理论STR因子/scr/__pycache__/qlib_workflow.cpython-38.pyc
diff --git a/B-因子构建类/凸显理论STR因子/scr/core.py b/B-因子构建类/凸显理论STR因子/scr/core.py
@@ -0,0 +1,49 @@
+'''
+Author: hugo2046 [email protected]
+Date: 2023-04-11 15:34:21
+LastEditors: hugo2046 [email protected]
+LastEditTime: 2023-04-11 15:41:59
+Description: 计算组件
+'''
+
+import pandas as pd
+import numpy as np
+
+
+def calc_sigma(df: pd.DataFrame, bench: pd.Series = None) -> pd.DataFrame:
+    """计算sigma
+
+    Args:
+        df (pd.DataFrame): 当日截面pct_chg
+        bench (pd.Series, optional): 指数收益序列 index-datetime values-pct_chg. Defaults to None.
+                                     当为None时,使用截面上的所有股票的平均收益率作为benchmark
+
+    Returns:
+        pd.Series: index-datetime columns-code values-sigma
+    """
+
+    if bench is None:
+        bench: pd.DataFrame = df.mean(axis=1)
+
+    a: pd.DataFrame = df.sub(bench, axis=0).abs()
+    b: pd.DataFrame = df.abs().add(bench.abs(), axis=0) + 0.1
+
+    return a.div(b)
+
+
+def calc_weight(sigma: pd.DataFrame, delta: float = 0.7) -> pd.DataFrame:
+    """计算权重
+
+    Args:
+        sigma (pd.DataFrame): index-datetime columns-code values-sigma
+
+    Returns:
+        pd.DataFrame: index-datetime columns-code values-weight
+    """
+
+    rank: pd.DataFrame = sigma.rank(axis=1,ascending=False)
+
+    a: pd.DataFrame = rank.apply(lambda x: np.power(delta, x), axis=1)
+    # b: pd.DataFrame = a.apply(lambda x: np.multiply(x, 1 / len(x)), axis=1).sum(axis=1)
+    b: pd.DataFrame = a.mean(axis=1)
+    return a.div(b, axis=0)
diff --git a/B-因子构建类/凸显理论STR因子/scr/factor_analyze.py b/B-因子构建类/凸显理论STR因子/scr/factor_analyze.py
@@ -0,0 +1,68 @@
+'''
+Author: hugo2046 [email protected]
+Date: 2023-04-04 10:49:17
+LastEditors: hugo2046 [email protected]
+LastEditTime: 2023-04-06 20:18:28
+Description: 
+'''
+
+from alphalens.utils import quantize_factor
+import pandas as pd
+from typing import Dict,List
+
+
+
+def clean_factor_data(factor_data: pd.DataFrame) -> pd.DataFrame:
+    """预处理因子数据
+
+    Args:
+        factor_data (pd.DataFrame): MultiIndex level0:datetime level1:instrument MultiColumns level0:feature level1:label
+
+    Returns:
+        pd.DataFrame: MultiIndex level0:date level1:assert columns->factor next_ret
+    """
+    clean_factor: pd.DataFrame = factor_data.copy()
+    if isinstance(clean_factor.columns,pd.MultiIndex):
+        clean_factor.columns = clean_factor.columns.droplevel(0)
+
+    clean_factor.index.names = ["date", "assert"]
+
+    return clean_factor
+
+
+def get_factor_group_returns(
+    clean_factor: pd.DataFrame, quantile: int, no_raise: bool = False
+) -> pd.DataFrame:
+    """获取单因子分组收益
+
+    Args:
+        clean_factor (pd.DataFrame): MultiIndex level0:date level1:assert columns->factor next_ret
+        quantile (int): 分组
+        no_raise (bool, optional):Defaults to False.
+
+    Returns:
+        pd.DataFrame: _description_
+    """
+    sel_cols: List = [col for col in clean_factor.columns.tolist() if col != "next_ret"]
+
+    returns_dict: Dict = {}
+    for col in sel_cols:
+        clean_factor[f"{col}_group"] = quantize_factor(
+            clean_factor.rename(columns={col: "factor"})[["factor"]],
+            quantiles=quantile,
+            no_raise=no_raise,
+        )
+        returns_dict[col] = pd.pivot_table(
+            clean_factor.reset_index(),
+            index="date",
+            columns=f"{col}_group",
+            values="next_ret",
+        )
+
+    df: pd.DataFrame = pd.concat(returns_dict, axis=1)
+    df.index.names = ["date"]
+    df.columns.names = ["factor_name", "group"]
+    return df
+
+
+