1)Improve news recorder 2)add query tag quotes api

zvtvz · Jun 20, 2024 · 4e2b4ab · 4e2b4ab
1 parent d0217ce
commit 4e2b4ab
Show file tree

Hide file tree

Showing 16 changed files with 201 additions and 34 deletions.
diff --git a/api-tests/event/get_stock_news_analysis.http b/api-tests/event/get_stock_news_analysis.http
@@ -0,0 +1,5 @@
+GET http://127.0.0.1:8090/api/event/get_stock_news_analysis
+accept: application/json
+
+
+
diff --git a/api-tests/trading/query_tag_quotes.http b/api-tests/trading/query_tag_quotes.http
@@ -0,0 +1,13 @@
+POST http://127.0.0.1:8090/api/trading/query_tag_quotes
+accept: application/json
+Content-Type: application/json
+
+{
+  "main_tags": [
+    "低空经济",
+    "半导体",
+    "化工",
+    "消费电子"
+  ],
+  "stock_pool_name": "main_line"
+}
diff --git a/examples/query_snippet.py b/examples/query_snippet.py
@@ -5,7 +5,7 @@
 from zvt.contract import Exchange
 from zvt.domain import Stock, BlockStock
 from zvt.recorders.em import em_api
-from zvt.tag import StockTags
+from zvt.tag.tag_schemas import StockTags
 
 
 def query_json():

diff --git a/examples/utils.py b/examples/utils.py
@@ -6,7 +6,7 @@
 import pandas as pd
 
 from zvt.domain import StockNews, Stock, LimitUpInfo
-from zvt.utils import date_time_by_interval, today
+from zvt.utils.time_utils import date_time_by_interval, today
 
 logger = logging.getLogger(__name__)
 

diff --git a/src/zvt/config.json b/src/zvt/config.json
@@ -10,5 +10,7 @@
   "wechat_app_id": "",
   "wechat_app_secrect": "",
   "qmt_mini_data_path": "D:\\qmt\\userdata_mini",
-  "qmt_account_id": ""
+  "qmt_account_id": "",
+  "moonshot_api_key": "",
+  "qwen_api_key": ""
 }
diff --git a/src/zvt/contract/recorder.py b/src/zvt/contract/recorder.py
@@ -5,6 +5,7 @@
 from typing import List
 
 import pandas as pd
+import requests
 from sqlalchemy.orm import Session
 
 from zvt.contract import IntervalLevel
@@ -66,7 +67,7 @@ def __init__(self, force_update: bool = False, sleeping_time: int = 10) -> None:
 
         #: using to do db operations
         self.session = get_db_session(provider=self.provider, data_schema=self.data_schema)
-        self.http_session = None
+        self.http_session = requests.Session()
 
     def run(self):
         raise NotImplementedError

diff --git a/src/zvt/domain/misc/stock_news.py b/src/zvt/domain/misc/stock_news.py
@@ -1,5 +1,5 @@
 # -*- coding: utf-8 -*-
-from sqlalchemy import Column, String
+from sqlalchemy import Column, String, JSON
 from sqlalchemy.orm import declarative_base
 
 from zvt.contract import Mixin
@@ -11,8 +11,16 @@
 class StockNews(NewsBase, Mixin):
     __tablename__ = "stock_news"
 
+    #: 新闻编号
+    news_code = Column(String)
+    #: 新闻地址
+    news_url = Column(String)
     #: 新闻标题
     news_title = Column(String)
+    #: 新闻内容
+    news_content = Column(String)
+    #: 新闻解读
+    news_analysis = Column(JSON)
 
 
 register_schema(providers=["em"], db_name="stock_news", schema_base=NewsBase, entity_type="stock")

diff --git a/src/zvt/recorders/em/em_api.py b/src/zvt/recorders/em/em_api.py
@@ -705,10 +705,14 @@ def get_block_stocks(block_id, name=""):
     return the_list
 
 
-def get_news(entity_id, ps=200, index=1, start_timestamp=None):
+def get_news(entity_id, ps=200, index=1, start_timestamp=None, session=None, latest_code=None):
     sec_id = to_em_sec_id(entity_id=entity_id)
     url = f"https://np-listapi.eastmoney.com/comm/wap/getListInfo?cb=callback&client=wap&type=1&mTypeAndCode={sec_id}&pageSize={ps}&pageIndex={index}&callback=jQuery1830017478247906740352_{now_timestamp() - 1}&_={now_timestamp()}"
-    resp = requests.get(url)
+    logger.info(f"get news from: {url}")
+    if session:
+        resp = session.get(url)
+    else:
+        resp = requests.get(url)
     # {
     #     "Art_ShowTime": "2022-02-11 14:29:25",
     #     "Art_Image": "",
@@ -728,13 +732,19 @@ def get_news(entity_id, ps=200, index=1, start_timestamp=None):
             if json_result:
                 news = [
                     {
-                        "id": f'{entity_id}_{item["Art_ShowTime"]}',
+                        "id": f'{entity_id}_{item.get("Art_ShowTime", "")}',
                         "entity_id": entity_id,
-                        "timestamp": to_pd_timestamp(item["Art_ShowTime"]),
-                        "news_title": item["Art_Title"],
+                        "timestamp": to_pd_timestamp(item.get("Art_ShowTime", "")),
+                        "news_code": item.get("Art_Code", ""),
+                        "news_url": item.get("Art_Url", ""),
+                        "news_title": item.get("Art_Title", ""),
                     }
                     for index, item in enumerate(json_result)
-                    if not start_timestamp or (to_pd_timestamp(item["Art_ShowTime"]) >= start_timestamp)
+                    if not start_timestamp
+                    or (
+                        (to_pd_timestamp(item["Art_ShowTime"]) >= start_timestamp)
+                        and (item.get("Art_Code", "") != latest_code)
+                    )
                 ]
                 if len(news) < len(json_result):
                     return news
@@ -743,6 +753,10 @@ def get_news(entity_id, ps=200, index=1, start_timestamp=None):
                     return news + next_data
                 else:
                     return news
+        else:
+            return None
+
+    logger.error(f"request em data code: {resp.status_code}, error: {resp.text}")
 
 
 # utils to transform zvt entity to em entity
@@ -871,12 +885,12 @@ def to_zvt_code(code):
     # pprint(get_free_holders(code='000338', end_date='2021-03-31'))
     # pprint(get_ii_holder(code='000338', report_date='2021-03-31',
     #                      org_type=actor_type_to_org_type(ActorType.corporation)))
-    print(
-        get_ii_summary(code="600519", report_date="2021-03-31", org_type=actor_type_to_org_type(ActorType.corporation))
-    )
+    # print(
+    #     get_ii_summary(code="600519", report_date="2021-03-31", org_type=actor_type_to_org_type(ActorType.corporation))
+    # )
     # df = get_kdata(entity_id="index_sz_399370", level="1wk")
     # df = get_tradable_list(entity_type="stockhk")
-    # df = get_news("stock_sz_300999")
+    df = get_news("stock_sz_300999", ps=1)
     # print(df)
     # print(len(df))
     # df = get_tradable_list(entity_type="block")

diff --git a/src/zvt/recorders/em/misc/em_stock_news_recorder.py b/src/zvt/recorders/em/misc/em_stock_news_recorder.py
@@ -6,7 +6,7 @@
 from zvt.domain import Stock
 from zvt.domain.misc.stock_news import StockNews
 from zvt.recorders.em import em_api
-from zvt.utils.time_utils import to_pd_timestamp, count_interval, now_pd_timestamp
+from zvt.utils.time_utils import count_interval, now_pd_timestamp, recent_year_date
 
 
 class EMStockNewsRecorder(FixedCycleDataRecorder):
@@ -19,23 +19,34 @@ class EMStockNewsRecorder(FixedCycleDataRecorder):
     provider = "em"
 
     def record(self, entity, start, end, size, timestamps):
-        if not start or (start <= to_pd_timestamp("2018-01-01")):
-            start = to_pd_timestamp("2018-01-01")
+        from_date = recent_year_date()
+        if not start or (start < from_date):
+            start = from_date
+
         if count_interval(start, now_pd_timestamp()) <= 30:
             ps = 30
         else:
             ps = 200
-        news = em_api.get_news(entity_id=entity.id, ps=ps, start_timestamp=start)
+
+        latest_news: StockNews = self.get_latest_saved_record(entity=entity)
+
+        news = em_api.get_news(
+            session=self.http_session,
+            entity_id=entity.id,
+            ps=ps,
+            start_timestamp=start,
+            latest_code=latest_news.news_code if latest_news else None,
+        )
         if news:
             df = pd.DataFrame.from_records(news)
             self.logger.info(df)
             df_to_db(df=df, data_schema=self.data_schema, provider=self.provider, force_update=self.force_update)
 
 
 if __name__ == "__main__":
-    df = Stock.query_data(filters=[Stock.exchange == "bj"], provider="em")
-    entity_ids = df["entity_id"].tolist()
-    r = EMStockNewsRecorder(entity_ids=entity_ids, sleeping_time=0)
+    # df = Stock.query_data(filters=[Stock.exchange == "bj"], provider="em")
+    # entity_ids = df["entity_id"].tolist()
+    r = EMStockNewsRecorder(entity_ids=["stock_sh_600345"], sleeping_time=0)
     r.run()
 # the __all__ is generated
 __all__ = ["EMStockNewsRecorder"]
diff --git a/src/zvt/recorders/em/quotes/em_kdata_recorder.py b/src/zvt/recorders/em/quotes/em_kdata_recorder.py
@@ -1,7 +1,5 @@
 # -*- coding: utf-8 -*-
 
-import requests
-
 from zvt.api.kdata import get_kdata_schema
 from zvt.contract import IntervalLevel, AdjustType
 from zvt.contract.api import df_to_db
@@ -84,9 +82,6 @@ def __init__(
         )
 
     def record(self, entity, start, end, size, timestamps):
-        if not self.http_session:
-            self.http_session = requests.Session()
-
         df = get_kdata(
             session=self.http_session, entity_id=entity.id, limit=size, adjust_type=self.adjust_type, level=self.level
         )

diff --git a/src/zvt/rest/trading.py b/src/zvt/rest/trading.py
@@ -15,6 +15,8 @@
     StockQuoteStatsModel,
     QueryStockQuoteSettingModel,
     BuildQueryStockQuoteSettingModel,
+    QueryTagQuoteModel,
+    TagQuoteStatsModel,
 )
 from zvt.trading.trading_schemas import QueryStockQuoteSetting
 from zvt.utils.time_utils import current_date
@@ -44,6 +46,11 @@ def build_query_stock_quote_setting(build_query_stock_quote_setting_model: Build
     )
 
 
+@trading_router.post("/query_tag_quotes", response_model=List[TagQuoteStatsModel])
+def query_trading_plan(query_tag_quote_model: QueryTagQuoteModel):
+    return trading_service.query_tag_quotes(query_tag_quote_model)
+
+
 @trading_router.post("/query_stock_quotes", response_model=StockQuoteStatsModel)
 def query_trading_plan(query_stock_quote_model: QueryStockQuoteModel):
     return trading_service.query_stock_quotes(query_stock_quote_model)

diff --git a/src/zvt/tag/tag_service.py b/src/zvt/tag/tag_service.py
@@ -21,6 +21,7 @@
     get_sub_tags,
     build_initial_stock_pool_info,
     get_concept_main_tag_mapping,
+    build_initial_main_tag_info,
 )
 from zvt.tag.tagger import StockTagger
 from zvt.utils.time_utils import to_pd_timestamp, to_time_str, current_date
@@ -479,9 +480,9 @@ def activate_main_tag_by_sub_tags(activate_sub_tags_model: ActivateSubTagsModel)
 
 if __name__ == "__main__":
     # refresh_all_main_tag_by_sub_tag()
-    activate_main_tag_by_sub_tags(ActivateSubTagsModel(sub_tags=["民爆概念"]))
+    # activate_main_tag_by_sub_tags(ActivateSubTagsModel(sub_tags=["民爆概念"]))
     # activate_main_tag_by_industry(industry="电力行业")
-    # build_initial_main_tag_info()
+    build_initial_main_tag_info()
     # build_initial_sub_tag_info()
     # build_initial_hidden_tag_info()
     # build_initial_stock_pool_info()

diff --git a/src/zvt/tag/tag_utils.py b/src/zvt/tag/tag_utils.py
@@ -203,6 +203,42 @@ def get_stock_pool_names():
     return df["stock_pool_name"].tolist()
 
 
+def match_tag_by_type(alias, tag_type="main_tag"):
+    if tag_type == "main_tag":
+        tags = get_main_tags()
+    elif tag_type == "sub_tag":
+        tags = get_sub_tags()
+    else:
+        assert False
+
+    max_intersection_length = 0
+    max_tag = None
+
+    for tag in tags:
+        intersection_length = len(set(alias) & set(tag))
+        # at least 2 same chars
+        if intersection_length < 2:
+            continue
+
+        if intersection_length > max_intersection_length:
+            max_intersection_length = intersection_length
+            max_tag = tag
+
+    return max_tag
+
+
+def match_tag(alias):
+    tag = match_tag_by_type(alias, tag_type="main_tag")
+    if tag:
+        return "main_tag", tag
+
+    tag = match_tag_by_type(alias, tag_type="sub_tag")
+    if tag:
+        return "sub_tag", tag
+
+    return "new_tag", alias
+
+
 if __name__ == "__main__":
     # with open("missed_concept.json", "w") as json_file:
     #     json.dump(check_missed_concept(), json_file, indent=2, ensure_ascii=False)
@@ -215,7 +251,9 @@ def get_stock_pool_names():
     #         result[tag] = main_tag
     # with open("industry_main_tag_mapping.json", "w") as json_file:
     #     json.dump(result, json_file, indent=2, ensure_ascii=False)
-    print(list(get_industry_list()))
+    build_initial_main_tag_info()
+    build_initial_sub_tag_info()
+    print(list(get_main_tags()))
 
 
 # the __all__ is generated
@@ -242,4 +280,5 @@ def get_stock_pool_names():
     "build_initial_hidden_tag_info",
     "get_hidden_tags",
     "get_stock_pool_names",
+    "match_tag",
 ]
diff --git a/src/zvt/trading/trading_models.py b/src/zvt/trading/trading_models.py
@@ -29,12 +29,18 @@ def stock_pool_name_existed(cls, v: str) -> str:
         return v
 
 
+class QueryTagQuoteModel(CustomModel):
+    stock_pool_name: str
+    main_tags: List[str]
+
+
 class QueryStockQuoteModel(CustomModel):
 
     main_tag: Optional[str] = Field(default=None)
     entity_ids: Optional[List[str]] = Field(default=None)
     stock_pool_name: Optional[str] = Field(default=None)
-    limit: int = Field(default=50)
+    # the amount is not huge, just ignore now
+    # limit: int = Field(default=200)
     order_by_type: Optional[OrderByType] = Field(default=OrderByType.desc)
     order_by_field: Optional[str] = Field(default="change_pct")
 
@@ -77,6 +83,22 @@ class StockQuoteModel(MixinModel):
     hidden_tags: Union[List[str], None] = Field(default=None)
 
 
+class TagQuoteStatsModel(CustomModel):
+    main_tag: str
+    #: 涨停数
+    limit_up_count: int
+    #: 跌停数
+    limit_down_count: int
+    #: 上涨数
+    up_count: int
+    #: 下跌数
+    down_count: int
+    #: 涨幅
+    change_pct: float
+    #: 成交额
+    turnover: float
+
+
 class StockQuoteStatsModel(CustomModel):
     #: 涨停数
     limit_up_count: int
@@ -141,6 +163,7 @@ class QueryTradingPlanModel(BaseModel):
 
 # the __all__ is generated
 __all__ = [
+    "QueryTagQuoteModel",
     "QueryStockQuoteSettingModel",
     "BuildQueryStockQuoteSettingModel",
     "QueryStockQuoteModel",
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,5 @@
		GET http://127.0.0.1:8090/api/event/get_stock_news_analysis
		accept: application/json