From 3221c4113500833cddbe1f5e918564e8ebb54ea2 Mon Sep 17 00:00:00 2001 From: Koyomi781 <1371315815@qq.com> Date: Thu, 26 Sep 2024 10:54:17 +0800 Subject: [PATCH 1/2] =?UTF-8?q?=F0=9F=8E=A8:=20Update=20Bilibili=20API?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- app/api/endpoints/bilibili_web.py | 273 +++++++++++++++++++++++---- crawlers/bilibili/web/config.yaml | 2 +- crawlers/bilibili/web/endpoints.py | 16 +- crawlers/bilibili/web/models.py | 39 ++++ crawlers/bilibili/web/utils.py | 185 +++++------------- crawlers/bilibili/web/web_crawler.py | 154 +++++++++++---- crawlers/bilibili/web/wrid.py | 18 -- 7 files changed, 452 insertions(+), 235 deletions(-) create mode 100644 crawlers/bilibili/web/models.py diff --git a/app/api/endpoints/bilibili_web.py b/app/api/endpoints/bilibili_web.py index e0028522b..288290b56 100644 --- a/app/api/endpoints/bilibili_web.py +++ b/app/api/endpoints/bilibili_web.py @@ -46,6 +46,48 @@ async def fetch_one_video(request: Request, raise HTTPException(status_code=status_code, detail=detail.dict()) +# 获取视频流地址 +@router.get("/fetch_video_playurl", response_model=ResponseModel, summary="获取视频流地址/Get video playurl") +async def fetch_one_video(request: Request, + bv_id: str = Query(example="BV1y7411Q7Eq", description="作品id/Video id"), + cid:str = Query(example="171776208", description="作品cid/Video cid")): + """ + # [中文] + ### 用途: + - 获取视频流地址 + ### 参数: + - bv_id: 作品id + - cid: 作品cid + ### 返回: + - 视频流地址 + + # [English] + ### Purpose: + - Get video playurl + ### Parameters: + - bv_id: Video id + - cid: Video cid + ### Return: + - Video playurl + + # [示例/Example] + bv_id = "BV1y7411Q7Eq" + cid = "171776208" + """ + try: + data = await BilibiliWebCrawler.fetch_video_playurl(bv_id, cid) + return ResponseModel(code=200, + router=request.url.path, + data=data) + except Exception as e: + status_code = 400 + detail = ErrorResponseModel(code=status_code, + router=request.url.path, + params=dict(request.query_params), + ) + raise HTTPException(status_code=status_code, detail=detail.dict()) + + # 获取用户发布视频作品数据 @router.get("/fetch_user_post_videos", response_model=ResponseModel, summary="获取用户主页作品数据/Get user homepage video data") @@ -385,6 +427,44 @@ async def fetch_collect_folders(request: Request, raise HTTPException(status_code=status_code, detail=detail.dict()) +# 获取视频实时弹幕 +@router.get("/fetch_video_danmaku", response_model=ResponseModel, summary="获取视频实时弹幕/Get Video Danmaku") +async def fetch_one_video(request: Request, + cid: str = Query(example="1639235405", description="作品cid/Video cid")): + """ + # [中文] + ### 用途: + - 获取视频实时弹幕 + ### 参数: + - cid: 作品cid + ### 返回: + - 视频实时弹幕 + + # [English] + ### Purpose: + - Get Video Danmaku + ### Parameters: + - cid: Video cid + ### Return: + - Video Danmaku + + # [示例/Example] + cid = "1639235405" + """ + try: + data = await BilibiliWebCrawler.fetch_video_danmaku(cid) + return ResponseModel(code=200, + router=request.url.path, + data=data) + except Exception as e: + status_code = 400 + detail = ErrorResponseModel(code=status_code, + router=request.url.path, + params=dict(request.query_params), + ) + raise HTTPException(status_code=status_code, detail=detail.dict()) + + # 获取指定直播间信息 @router.get("/fetch_live_room_detail", response_model=ResponseModel, summary="获取指定直播间信息/Get information of specified live room") @@ -424,43 +504,86 @@ async def fetch_collect_folders(request: Request, raise HTTPException(status_code=status_code, detail=detail.dict()) -# # 获取指定直播间视频流 -# @router.get("/fetch_live_videos", response_model=ResponseModel, -# summary="获取直播间视频流/Get live video data of specified room") -# async def fetch_collect_folders(request: Request, -# room_id: str = Query(example="22816111", description="直播间ID/Live room ID")): -# """ -# # [中文] -# ### 用途: -# - 获取指定直播间视频流 -# ### 参数: -# - room_id: 直播间ID -# ### 返回: -# - 指定直播间视频流 -# -# # [English] -# ### Purpose: -# - Get live video data of specified room -# ### Parameters: -# - room_id: Live room ID -# ### Return: -# - live video data of specified room -# -# # [示例/Example] -# room_id = "22816111" -# """ -# try: -# data = await BilibiliWebCrawler.fetch_live_videos(room_id) -# return ResponseModel(code=200, -# router=request.url.path, -# data=data) -# except Exception as e: -# status_code = 400 -# detail = ErrorResponseModel(code=status_code, -# router=request.url.path, -# params=dict(request.query_params), -# ) -# raise HTTPException(status_code=status_code, detail=detail.dict()) +# 获取指定直播间视频流 +@router.get("/fetch_live_videos", response_model=ResponseModel, + summary="获取直播间视频流/Get live video data of specified room") +async def fetch_collect_folders(request: Request, + room_id: str = Query(example="1815229528", description="直播间ID/Live room ID")): + """ + # [中文] + ### 用途: + - 获取指定直播间视频流 + ### 参数: + - room_id: 直播间ID + ### 返回: + - 指定直播间视频流 + + # [English] + ### Purpose: + - Get live video data of specified room + ### Parameters: + - room_id: Live room ID + ### Return: + - live video data of specified room + + # [示例/Example] + room_id = "1815229528" + """ + try: + data = await BilibiliWebCrawler.fetch_live_videos(room_id) + return ResponseModel(code=200, + router=request.url.path, + data=data) + except Exception as e: + status_code = 400 + detail = ErrorResponseModel(code=status_code, + router=request.url.path, + params=dict(request.query_params), + ) + raise HTTPException(status_code=status_code, detail=detail.dict()) + + +# 获取指定分区正在直播的主播 +@router.get("/fetch_live_streamers", response_model=ResponseModel, + summary="获取指定分区正在直播的主播/Get live streamers of specified live area") +async def fetch_collect_folders(request: Request, + area_id: str = Query(example="9", description="直播分区id/Live area ID"), + pn: int = Query(default=1, description="页码/Page number")): + """ + # [中文] + ### 用途: + - 获取指定分区正在直播的主播 + ### 参数: + - area_id: 直播分区id + - pn: 页码 + ### 返回: + - 指定分区正在直播的主播 + + # [English] + ### Purpose: + - Get live streamers of specified live area + ### Parameters: + - area_id: Live area ID + - pn: Page number + ### Return: + - live streamers of specified live area + + # [示例/Example] + area_id = "9" + pn = 1 + """ + try: + data = await BilibiliWebCrawler.fetch_live_streamers(area_id, pn) + return ResponseModel(code=200, + router=request.url.path, + data=data) + except Exception as e: + status_code = 400 + detail = ErrorResponseModel(code=status_code, + router=request.url.path, + params=dict(request.query_params), + ) + raise HTTPException(status_code=status_code, detail=detail.dict()) # 获取所有直播分区列表 @@ -496,3 +619,79 @@ async def fetch_collect_folders(request: Request,): params=dict(request.query_params), ) raise HTTPException(status_code=status_code, detail=detail.dict()) + + +# 通过bv号获得视频aid号 +@router.get("/bv_to_aid", response_model=ResponseModel, summary="通过bv号获得视频aid号/Generate aid by bvid") +async def fetch_one_video(request: Request, + bv_id: str = Query(example="BV1M1421t7hT", description="作品id/Video id")): + """ + # [中文] + ### 用途: + - 通过bv号获得视频aid号 + ### 参数: + - bv_id: 作品id + ### 返回: + - 视频aid号 + + # [English] + ### Purpose: + - Generate aid by bvid + ### Parameters: + - bv_id: Video id + ### Return: + - Video aid + + # [示例/Example] + bv_id = "BV1M1421t7hT" + """ + try: + data = await BilibiliWebCrawler.bv_to_aid(bv_id) + return ResponseModel(code=200, + router=request.url.path, + data=data) + except Exception as e: + status_code = 400 + detail = ErrorResponseModel(code=status_code, + router=request.url.path, + params=dict(request.query_params), + ) + raise HTTPException(status_code=status_code, detail=detail.dict()) + + +# 通过bv号获得视频分p信息 +@router.get("/fetch_video_parts", response_model=ResponseModel, summary="通过bv号获得视频分p信息/Get Video Parts By bvid") +async def fetch_one_video(request: Request, + bv_id: str = Query(example="BV1vf421i7hV", description="作品id/Video id")): + """ + # [中文] + ### 用途: + - 通过bv号获得视频分p信息 + ### 参数: + - bv_id: 作品id + ### 返回: + - 视频分p信息 + + # [English] + ### Purpose: + - Get Video Parts By bvid + ### Parameters: + - bv_id: Video id + ### Return: + - Video Parts + + # [示例/Example] + bv_id = "BV1vf421i7hV" + """ + try: + data = await BilibiliWebCrawler.fetch_video_parts(bv_id) + return ResponseModel(code=200, + router=request.url.path, + data=data) + except Exception as e: + status_code = 400 + detail = ErrorResponseModel(code=status_code, + router=request.url.path, + params=dict(request.query_params), + ) + raise HTTPException(status_code=status_code, detail=detail.dict()) diff --git a/crawlers/bilibili/web/config.yaml b/crawlers/bilibili/web/config.yaml index e33b0c44c..03a355a00 100644 --- a/crawlers/bilibili/web/config.yaml +++ b/crawlers/bilibili/web/config.yaml @@ -5,7 +5,7 @@ TokenManager: 'origin': https://www.bilibili.com 'referer': https://space.bilibili.com/ 'origin_2': https://space.bilibili.com - 'cookie': buvid3=D6E58E7B-E3A9-7CD3-7BE5-B5F255788A3020034infoc; b_nut=1723702120; _uuid=6E10D69A10-A711-9DA8-6833-1010262296C24B21337infoc; buvid_fp=6cf2ea8e143bbc49f3b7c0dcb2465fc2; buvid4=748EC8F0-82E2-1672-A286-8445DDB2A80C06110-023112304-; bili_ticket=eyJhbGciOiJIUzI1NiIsImtpZCI6InMwMyIsInR5cCI6IkpXVCJ9.eyJleHAiOjE3MjM5NjEzMjIsImlhdCI6MTcyMzcwMjA2MiwicGx0IjotMX0.IWOEMLCDKqWAX24rePU-1Qgm9Isf5CU8Tz0O-j6GHfo; bili_ticket_expires=1723961262; CURRENT_FNVAL=4048; rpdid=|(JluY|JJ|RR0J'u~kJ~|kkuY; b_lsid=E10B83DC4_191552166D6; header_theme_version=CLOSE; enable_web_push=DISABLE; home_feed_column=5; browser_resolution=1488-714; sid=873ujj7i + 'cookie': buvid4=748EC8F0-82E2-1672-A286-8445DDB2A80C06110-023112304-; buvid3=73EF1E2E-B7A9-78DD-F2AE-9AB2B476E27638524infoc; b_nut=1727075638; _uuid=77AA4910F-5C8F-9647-7DA3-F583C8108BD7942063infoc; buvid_fp=75b22e5d0c3dbc642b1c80956c62c7da; bili_ticket=eyJhbGciOiJIUzI1NiIsImtpZCI6InMwMyIsInR5cCI6IkpXVCJ9.eyJleHAiOjE3MjczNDI1NTYsImlhdCI6MTcyNzA4MzI5NiwicGx0IjotMX0.G3pvk6OC4FDWBL7GNgKkkVtUMl29UtNdgok_cANoKsw; bili_ticket_expires=1727342496; header_theme_version=CLOSE; enable_web_push=DISABLE; home_feed_column=5; browser_resolution=1488-712; b_lsid=5B4EDF8A_1921EAA1BDA 'user-agent': Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.212 Safari/537.36 proxies: diff --git a/crawlers/bilibili/web/endpoints.py b/crawlers/bilibili/web/endpoints.py index da56f41f2..5378af61c 100644 --- a/crawlers/bilibili/web/endpoints.py +++ b/crawlers/bilibili/web/endpoints.py @@ -11,8 +11,11 @@ class BilibiliAPIEndpoints: # 作品信息 (Post Detail) POST_DETAIL = f"{BILIAPI_DOMAIN}/x/web-interface/view" - # 用户播放列表 (用于爬取用户所有视频数据) - USER_POST = f"{BILIAPI_DOMAIN}/x/v2/medialist/resource/list" + # 作品视频流 + VIDEO_PLAYURL = f"{BILIAPI_DOMAIN}/x/player/wbi/playurl" + + # 用户发布视频作品数据 + USER_POST = f"{BILIAPI_DOMAIN}/x/space/wbi/arc/search" # 收藏夹列表 COLLECT_FOLDERS = f"{BILIAPI_DOMAIN}/x/v3/fav/folder/created/list-all" @@ -35,9 +38,15 @@ class BilibiliAPIEndpoints: # 视频评论 VIDEO_COMMENTS = f"{BILIAPI_DOMAIN}/x/v2/reply" + # 用户动态 + USER_DYNAMIC = f"{BILIAPI_DOMAIN}/x/polymer/web-dynamic/v1/feed/space" + # 评论的回复 COMMENT_REPLY = f"{BILIAPI_DOMAIN}/x/v2/reply/reply" + # 视频分p信息 + VIDEO_PARTS = f"{BILIAPI_DOMAIN}/x/player/pagelist" + # 直播间信息 LIVEROOM_DETAIL = f"{LIVE_DOMAIN}/room/v1/Room/get_info" @@ -47,4 +56,7 @@ class BilibiliAPIEndpoints: # 直播间视频流 LIVE_VIDEOS = f"{LIVE_DOMAIN}/room/v1/Room/playUrl" + # 正在直播的主播 + LIVE_STREAMER = f"{LIVE_DOMAIN}/xlive/web-interface/v1/second/getList" + diff --git a/crawlers/bilibili/web/models.py b/crawlers/bilibili/web/models.py new file mode 100644 index 000000000..3723e6a3e --- /dev/null +++ b/crawlers/bilibili/web/models.py @@ -0,0 +1,39 @@ +import time +from pydantic import BaseModel + + +class BaseRequestsModel(BaseModel): + wts: str = str(round(time.time())) + + +class UserPostVideos(BaseRequestsModel): + dm_img_inter: str = '{"ds":[],"wh":[3557,5674,5],"of":[154,308,154]}' + dm_img_list: list = [] + mid: str + pn: int + ps: str = "20" + + +class UserProfile(BaseRequestsModel): + mid: str + + +class UserDynamic(BaseRequestsModel): + host_mid: str + offset: str + wts: str = str(round(time.time())) + + +class ComPopular(BaseRequestsModel): + pn: int + ps: str = "20" + web_location: str = "333.934" + + +class PlayUrl(BaseRequestsModel): + qn: str + fnval: str = '4048' + bvid: str + cid: str + + diff --git a/crawlers/bilibili/web/utils.py b/crawlers/bilibili/web/utils.py index 742b6a262..20844fc6d 100644 --- a/crawlers/bilibili/web/utils.py +++ b/crawlers/bilibili/web/utils.py @@ -1,159 +1,58 @@ -import time from urllib.parse import urlencode -import random from crawlers.bilibili.web import wrid from crawlers.utils.logger import logger +from crawlers.bilibili.web.endpoints import BilibiliAPIEndpoints -# 装饰器 检查是否正确生成endpoint -def Check_gen(func): - def checker(*args, **kwargs): - try: - result = func(*args, **kwargs) - return result - except Exception as e: - raise RuntimeError("生成w_rid失败:{0}, 函数地址:{1}".format(e, func.__name__)) - return checker - -class EndpointModels: - def __init__(self): - # 实例化WridManager - self.wridmanager = WridManager() - # 当前时间戳 - self.wts = round(time.time()) - # 固定inter也能获得结果。如果失效见--WridManager().get_inter - self.inter = '{"ds":[],"wh":[3557,5674,5],"of":[154,308,154]}' - - # 获取wrid示例 通过uid 生成包含w_rid和wts的字典 - @Check_gen - async def get_wrid_wts_by_uid(self, uid: str) -> dict: - params = { - 'dm_cover_img_str': 'QU5HTEUgKE5WSURJQSwgTlZJRElBIEdlRm9yY2UgUlRYIDMwNTAgTGFwdG9wIEdQVSAoMHgwMDAwMjVBMikgRGlyZWN0M0QxMSB2c181XzAgcHNfNV8wLCBEM0QxMSlHb29nbGUgSW5jLiAoTlZJRElBKQ', - 'dm_img_inter': self.inter, - 'dm_img_list': [], - 'dm_img_str': 'V2ViR0wgMS4wIChPcGVuR0wgRVMgMi4wIENocm9taXVtKQ', - 'mid': uid, - 'platform': 'web', - 'token': '', - 'web_location': '1550101', - 'wts': f'{self.wts}ea1db124af3c7062474693fa704f4ff8' - } - # 获取w_rid参数 - w_rid = await self.wridmanager.get_wrid(params=params) - reslut = { - "w_rid": w_rid, - "wts": self.wts - } - return reslut +class EndpointGenerator: + def __init__(self, params: dict): + self.params = params # 获取用户发布视频作品数据 生成enpoint - @Check_gen - async def user_post_videos_endpoint(self, uid: str, pn: int, ps: int = 30) -> str: - # 编码inter - new_inter = self.inter.replace(" ", "").replace('{', "%7B").replace("'", "%22").replace("}", "%7D") - # 构建请求参数 - params = { - "dm_cover_img_str": "QU5HTEUgKE5WSURJQSwgTlZJRElBIEdlRm9yY2UgUlRYIDMwNTAgTGFwdG9wIEdQVSAoMHgwMDAwMjVBMikgRGlyZWN0M0QxMSB2c181XzAgcHNfNV8wLCBEM0QxMSlHb29nbGUgSW5jLiAoTlZJRElBKQ", - "dm_img_inter": self.inter, - "dm_img_list": [], - "dm_img_str": "V2ViR0wgMS4wIChPcGVuR0wgRVMgMi4wIENocm9taXVtKQ", - "keyword": "", - "mid": uid, - "order": "pubdate", - "order_avoided": "true", - "platform": "web", - "pn": pn, - "ps": ps, - "tid": "0", - "web_location": "1550101", - "wts": f"{self.wts}ea1db124af3c7062474693fa704f4ff8", - } - # 获取wrid - w_rid = await self.wridmanager.get_wrid(params=params) - # 将上面结果拼接成最终结果并返回 - final_endpoint = f'https://api.bilibili.com/x/space/wbi/arc/search?mid={uid}&ps={ps}&tid=0&pn={pn}&keyword=&order=pubdate&platform=web&web_location=1550101&order_avoided=true&dm_img_list=[]&dm_img_str=V2ViR0wgMS4wIChPcGVuR0wgRVMgMi4wIENocm9taXVtKQ&dm_cover_img_str=QU5HTEUgKE5WSURJQSwgTlZJRElBIEdlRm9yY2UgUlRYIDMwNTAgTGFwdG9wIEdQVSAoMHgwMDAwMjVBMikgRGlyZWN0M0QxMSB2c181XzAgcHNfNV8wLCBEM0QxMSlHb29nbGUgSW5jLiAoTlZJRElBKQ&dm_img_inter={new_inter}&w_rid={w_rid}&wts={self.wts}' + async def user_post_videos_endpoint(self) -> str: + # 添加w_rid + endpoint = await WridManager.wrid_model_endpoint(params=self.params) + # 拼接成最终结果并返回 + final_endpoint = BilibiliAPIEndpoints.USER_POST + '?' + endpoint + return final_endpoint + + # 获取视频流地址 生成enpoint + async def video_playurl_endpoint(self) -> str: + # 添加w_rid + endpoint = await WridManager.wrid_model_endpoint(params=self.params) + # 拼接成最终结果并返回 + final_endpoint = BilibiliAPIEndpoints.VIDEO_PLAYURL + '?' + endpoint return final_endpoint # 获取指定用户的信息 生成enpoint - @Check_gen - async def user_profile_endpoint(self, uid: str) -> str: - # 编码inter - new_inter = self.inter.replace(" ", "").replace('{', "%7B").replace("'", "%22").replace("}", "%7D") - # 构建请求参数 - params = { - 'dm_cover_img_str': 'QU5HTEUgKE5WSURJQSwgTlZJRElBIEdlRm9yY2UgUlRYIDMwNTAgTGFwdG9wIEdQVSAoMHgwMDAwMjVBMikgRGlyZWN0M0QxMSB2c181XzAgcHNfNV8wLCBEM0QxMSlHb29nbGUgSW5jLiAoTlZJRElBKQ', - 'dm_img_inter': self.inter, - 'dm_img_list': [], - 'dm_img_str': 'V2ViR0wgMS4wIChPcGVuR0wgRVMgMi4wIENocm9taXVtKQ', - 'mid': uid, - 'platform': 'web', - 'token': '', - 'web_location': '1550101', - 'wts': f'{self.wts}ea1db124af3c7062474693fa704f4ff8' - } - # 获取wrid - w_rid = await self.wridmanager.get_wrid(params=params) - # 将上面结果拼接成最终字符串并返回 - final_endpoint = f'https://api.bilibili.com/x/space/wbi/acc/info?mid={uid}&token=&platform=web&web_location=1550101&dm_img_list=[]&dm_img_str=V2ViR0wgMS4wIChPcGVuR0wgRVMgMi4wIENocm9taXVtKQ&dm_cover_img_str=QU5HTEUgKE5WSURJQSwgTlZJRElBIEdlRm9yY2UgUlRYIDMwNTAgTGFwdG9wIEdQVSAoMHgwMDAwMjVBMikgRGlyZWN0M0QxMSB2c181XzAgcHNfNV8wLCBEM0QxMSlHb29nbGUgSW5jLiAoTlZJRElBKQ&dm_img_inter={new_inter}&w_rid={w_rid}&wts={self.wts}' + async def user_profile_endpoint(self) -> str: + # 添加w_rid + endpoint = await WridManager.wrid_model_endpoint(params=self.params) + # 拼接成最终结果并返回 + final_endpoint = BilibiliAPIEndpoints.USER_DETAIL + '?' + endpoint return final_endpoint # 获取综合热门视频信息 生成enpoint - @Check_gen - async def com_popular_endpoint(self, pn: int) -> str: - # 构建请求参数 - params = { - "pn": pn, - "ps": "20", - "web_location": "333.934", - "wts": f"{self.wts}ea1db124af3c7062474693fa704f4ff8", - } - # 获取wrid - w_rid = await self.wridmanager.get_wrid(params=params) - # 将上面结果拼接成最终结果并返回 - final_endpoint = f"https://api.bilibili.com/x/web-interface/popular?ps=20&pn={pn}&web_location=333.934&w_rid={w_rid}&wts={self.wts}" + async def com_popular_endpoint(self) -> str: + # 添加w_rid + endpoint = await WridManager.wrid_model_endpoint(params=self.params) + # 拼接成最终结果并返回 + final_endpoint = BilibiliAPIEndpoints.COM_POPULAR + '?' + endpoint return final_endpoint # 获取指定用户动态 - @Check_gen - async def user_dynamic_endpoint(self, uid: str, offset: str): - # 编码inter - new_inter = self.inter.replace(" ", "").replace('{', "%7B").replace("'", "%22").replace("}", "%7D") - # 构建请求参数 - params = { - "dm_cover_img_str": "QU5HTEUgKE5WSURJQSwgTlZJRElBIEdlRm9yY2UgUlRYIDMwNTAgTGFwdG9wIEdQVSAoMHgwMDAwMjVBMikgRGlyZWN0M0QxMSB2c181XzAgcHNfNV8wLCBEM0QxMSlHb29nbGUgSW5jLiAoTlZJRElBKQ", - "dm_img_inter": self.inter, - "dm_img_list": [], - "dm_img_str": "V2ViR0wgMS4wIChPcGVuR0wgRVMgMi4wIENocm9taXVtKQ&features=itemOpusStyle%2ClistOnlyfans%2CopusBigCover%2ConlyfansVote%2CdecorationCard%2CforwardListHidden%2CugcDelete", - "host_mid": uid, - "offset": offset, - "platform": "web", - "timezone_offset": "-480", - "web_location": "333.999", - "wts": self.wts, - "x-bili-device-req-json": "%7B%22platform%22%3A%22web%22%2C%22device%22%3A%22pc%22%7D", - "x-bili-web-req-json": "%7B%22spm_id%22%3A%22333.999%22%7Dea1db124af3c7062474693fa704f4ff8" - } - # 获取wrid - w_rid = await self.wridmanager.get_wrid(params=params) - # 将上面结果拼接成最终结果并返回 - final_endpoint = f'https://api.bilibili.com/x/polymer/web-dynamic/v1/feed/space?offset={offset}&host_mid={uid}&timezone_offset=-480&platform=web&features=itemOpusStyle,listOnlyfans,opusBigCover,onlyfansVote,decorationCard,forwardListHidden,ugcDelete&web_location=333.999&dm_img_list=[]&dm_img_str=V2ViR0wgMS4wIChPcGVuR0wgRVMgMi4wIENocm9taXVtKQ&dm_cover_img_str=QU5HTEUgKE5WSURJQSwgTlZJRElBIEdlRm9yY2UgUlRYIDMwNTAgTGFwdG9wIEdQVSAoMHgwMDAwMjVBMikgRGlyZWN0M0QxMSB2c181XzAgcHNfNV8wLCBEM0QxMSlHb29nbGUgSW5jLiAoTlZJRElBKQ&dm_img_inter={new_inter}&x-bili-device-req-json=%7B%22platform%22:%22web%22,%22device%22:%22pc%22%7D&x-bili-web-req-json=%7B%22spm_id%22:%22333.999%22%7D&w_rid={w_rid}&wts={self.wts}' + async def user_dynamic_endpoint(self): + # 添加w_rid + endpoint = await WridManager.wrid_model_endpoint(params=self.params) + # 拼接成最终结果并返回 + final_endpoint = BilibiliAPIEndpoints.USER_DYNAMIC + '?' + endpoint return final_endpoint class WridManager: - - def s(self) -> list: - x = random.randint(0, 113) - return [2 * 1488 + 2 * 311 + 3 * x, 4 * 1488 - 311 + x, x] - - def d(self) -> list: - x = random.randint(0, 513) - return [x, 2 * x, x] - - def get_inter(self) -> dict: - return {"ds": [], "wh": self.s(), "of": self.d()} - - async def get_encode_query(self, params: dict) -> str: + @classmethod + async def get_encode_query(cls, params: dict) -> str: + params['wts'] = params['wts'] + "ea1db124af3c7062474693fa704f4ff8" params = dict(sorted(params.items())) # 按照 key 重排参数 # 过滤 value 中的 "!'()*" 字符 params = { @@ -164,13 +63,18 @@ async def get_encode_query(self, params: dict) -> str: query = urlencode(params) # 序列化参数 return query - async def get_wrid(self, params: dict) -> str: - encode_query = await self.get_encode_query(params) + @classmethod + async def wrid_model_endpoint(cls, params: dict) -> str: + wts = params["wts"] + encode_query = await cls.get_encode_query(params) # 获取w_rid参数 w_rid = wrid.get_wrid(e=encode_query) - return w_rid + params["wts"] = wts + params["w_rid"] = w_rid + return "&".join(f"{k}={v}" for k, v in params.items()) -async def bv2av(bv_id:str) -> int: +# BV号转为对应av号 +async def bv2av(bv_id: str) -> int: table = "fZodR9XQDSUm21yCkr6zBqiveYah8bt4xsWpHnJE7jL5VG3guMTKNPAwcF" s = [11, 10, 3, 8, 4, 6, 2, 9, 5, 7] xor = 177451812 @@ -188,7 +92,6 @@ async def bv2av(bv_id:str) -> int: aid = (r - add) ^ xor return aid - # 响应分析 class ResponseAnalyzer: # 用户收藏夹信息 diff --git a/crawlers/bilibili/web/web_crawler.py b/crawlers/bilibili/web/web_crawler.py index bfb8b6236..f4fb0ed19 100644 --- a/crawlers/bilibili/web/web_crawler.py +++ b/crawlers/bilibili/web/web_crawler.py @@ -6,9 +6,10 @@ # 基础爬虫客户端和哔哩哔哩API端点 from crawlers.base_crawler import BaseCrawler from crawlers.bilibili.web.endpoints import BilibiliAPIEndpoints - # 哔哩哔哩工具类 -from crawlers.bilibili.web.utils import EndpointModels, bv2av, ResponseAnalyzer +from crawlers.bilibili.web.utils import EndpointGenerator, bv2av, ResponseAnalyzer +# 数据请求模型 +from crawlers.bilibili.web.models import UserPostVideos, UserProfile, ComPopular, UserDynamic, PlayUrl # 配置文件路径 @@ -50,6 +51,22 @@ async def fetch_one_video(self, bv_id: str) -> dict: response = await crawler.fetch_get_json(endpoint) return response + # 获取视频流地址 + async def fetch_video_playurl(self, bv_id: str, cid: str, qn: str = "64") -> dict: + # 获取请求头信息 + kwargs = await self.get_bilibili_headers() + # 创建基础爬虫对象 + base_crawler = BaseCrawler(proxies=kwargs["proxies"], crawler_headers=kwargs["headers"]) + async with base_crawler as crawler: + # 通过模型生成基本请求参数 + params = PlayUrl(bvid=bv_id, cid=cid, qn=qn) + # 创建请求endpoint + generator = EndpointGenerator(params.dict()) + endpoint = await generator.video_playurl_endpoint() + # 发送请求,获取请求响应结果 + response = await crawler.fetch_get_json(endpoint) + return response + # 获取用户发布视频作品数据 async def fetch_user_post_videos(self, uid: str, pn: int) -> dict: """ @@ -62,8 +79,11 @@ async def fetch_user_post_videos(self, uid: str, pn: int) -> dict: # 创建基础爬虫对象 base_crawler = BaseCrawler(proxies=kwargs["proxies"], crawler_headers=kwargs["headers"]) async with base_crawler as crawler: + # 通过模型生成基本请求参数 + params = UserPostVideos(mid=uid, pn=pn) # 创建请求endpoint - endpoint = await EndpointModels().user_post_videos_endpoint(uid=uid, pn=pn) + generator = EndpointGenerator(params.dict()) + endpoint = await generator.user_post_videos_endpoint() # 发送请求,获取请求响应结果 response = await crawler.fetch_get_json(endpoint) return response @@ -107,9 +127,13 @@ async def fetch_user_profile(self, uid: str) -> dict: # 创建基础爬虫对象 base_crawler = BaseCrawler(proxies=kwargs["proxies"], crawler_headers=kwargs["headers"]) async with base_crawler as crawler: + # 通过模型生成基本请求参数 + params = UserProfile(mid=uid) # 创建请求endpoint - endpoint = await EndpointModels().user_profile_endpoint(uid=uid) - response = await crawler.fetch_get_json(endpoint=endpoint) + generator = EndpointGenerator(params.dict()) + endpoint = await generator.user_profile_endpoint() + # 发送请求,获取请求响应结果 + response = await crawler.fetch_get_json(endpoint) return response # 获取综合热门视频信息 @@ -119,9 +143,13 @@ async def fetch_com_popular(self, pn: int) -> dict: # 创建基础爬虫对象 base_crawler = BaseCrawler(proxies=kwargs["proxies"], crawler_headers=kwargs["headers"]) async with base_crawler as crawler: + # 通过模型生成基本请求参数 + params = ComPopular(pn=pn) # 创建请求endpoint - endpoint = await EndpointModels().com_popular_endpoint(pn=pn) - response = await crawler.fetch_get_json(endpoint=endpoint) + generator = EndpointGenerator(params.dict()) + endpoint = await generator.com_popular_endpoint() + # 发送请求,获取请求响应结果 + response = await crawler.fetch_get_json(endpoint) return response # 获取指定视频的评论 @@ -165,12 +193,29 @@ async def fetch_user_dynamic(self, uid: str, offset: str) -> dict: # 创建基础爬虫对象 base_crawler = BaseCrawler(proxies=kwargs["proxies"], crawler_headers=kwargs["headers"]) async with base_crawler as crawler: + # 通过模型生成基本请求参数 + params = UserDynamic(host_mid=uid, offset=offset) # 创建请求endpoint - endpoint = await EndpointModels().user_dynamic_endpoint(uid=uid, offset=offset) + generator = EndpointGenerator(params.dict()) + endpoint = await generator.user_dynamic_endpoint() + print(endpoint) # 发送请求,获取请求响应结果 response = await crawler.fetch_get_json(endpoint) return response + # 获取视频实时弹幕 + async def fetch_video_danmaku(self, cid: str): + # 获取请求头信息 + kwargs = await self.get_bilibili_headers() + # 创建基础爬虫对象 + base_crawler = BaseCrawler(proxies=kwargs["proxies"], crawler_headers=kwargs["headers"]) + async with base_crawler as crawler: + # 创建请求endpoint + endpoint = f"https://comment.bilibili.com/{cid}.xml" + # 发送请求,获取请求响应结果 + response = await crawler.fetch_response(endpoint) + return response.text + # 获取指定直播间信息 async def fetch_live_room_detail(self, room_id: str) -> dict: # 获取请求头信息 @@ -185,24 +230,50 @@ async def fetch_live_room_detail(self, room_id: str) -> dict: return response # 获取指定直播间视频流 - # async def fetch_live_videos(self, room_id: str) -> dict: - # # 获取请求头信息 - # kwargs = await self.get_bilibili_headers() - # # 创建基础爬虫对象 - # base_crawler = BaseCrawler(proxies=kwargs["proxies"], crawler_headers=kwargs["headers"]) - # async with base_crawler as crawler: - # # 创建请求endpoint - # endpoint = f"{BilibiliAPIEndpoints.LIVE_VIDEOS}?cid={room_id}&quality=4" - # # 发送请求,获取请求响应结果 - # response = await crawler.fetch_get_json(endpoint) - # return response + async def fetch_live_videos(self, room_id: str) -> dict: + # 获取请求头信息 + kwargs = await self.get_bilibili_headers() + # 创建基础爬虫对象 + base_crawler = BaseCrawler(proxies=kwargs["proxies"], crawler_headers=kwargs["headers"]) + async with base_crawler as crawler: + # 创建请求endpoint + endpoint = f"{BilibiliAPIEndpoints.LIVE_VIDEOS}?cid={room_id}&quality=4" + # 发送请求,获取请求响应结果 + response = await crawler.fetch_get_json(endpoint) + return response + + # 获取指定分区正在直播的主播 + async def fetch_live_streamers(self, area_id: str, pn: int): + # 获取请求头信息 + kwargs = await self.get_bilibili_headers() + # 创建基础爬虫对象 + base_crawler = BaseCrawler(proxies=kwargs["proxies"], crawler_headers=kwargs["headers"]) + async with base_crawler as crawler: + # 创建请求endpoint + endpoint = f"{BilibiliAPIEndpoints.LIVE_STREAMER}?platform=web&parent_area_id={area_id}&page={pn}" + # 发送请求,获取请求响应结果 + response = await crawler.fetch_get_json(endpoint) + return response "-------------------------------------------------------utils接口列表-------------------------------------------------------" # 通过bv号获得视频aid号 - async def get_aid(self, bv_id: str) -> int: + async def bv_to_aid(self, bv_id: str) -> int: aid = await bv2av(bv_id=bv_id) return aid + # 通过bv号获得视频分p信息 + async def fetch_video_parts(self, bv_id: str) -> str: + # 获取请求头信息 + kwargs = await self.get_bilibili_headers() + # 创建基础爬虫对象 + base_crawler = BaseCrawler(proxies=kwargs["proxies"], crawler_headers=kwargs["headers"]) + async with base_crawler as crawler: + # 创建请求endpoint + endpoint = f"{BilibiliAPIEndpoints.VIDEO_PARTS}?bvid={bv_id}" + # 发送请求,获取请求响应结果 + response = await crawler.fetch_get_json(endpoint) + return response + # 获取所有直播分区列表 async def fetch_all_live_areas(self) -> dict: # 获取请求头信息 @@ -216,12 +287,6 @@ async def fetch_all_live_areas(self) -> dict: response = await crawler.fetch_get_json(endpoint) return response - # 根据uid生成wts及其对应w_rid参数(包含dm_img_inter参数) - # (仅示例 不同接口所需要传进去的参数不同)(待改进) - async def uid_to_wrid(self, uid: str) -> dict: - result = await EndpointModels().get_wrid_wts_by_uid(uid=uid) - return result - "-------------------------------------------------------main-------------------------------------------------------" async def main(self): @@ -231,8 +296,14 @@ async def main(self): # result = await self.fetch_one_video(bv_id=bv_id) # print(result) + # 获取视频流地址 + # bv_id = 'BV1y7411Q7Eq' + # cid = '171776208' + # result = await self.fetch_video_playurl(bv_id=bv_id, cid=cid) + # print(result) + # 获取用户发布作品数据 - # uid = '178360345' + # uid = '94510621' # pn = 1 # result = await self.fetch_user_post_videos(uid=uid, pn=pn) # print(result) @@ -273,35 +344,46 @@ async def main(self): # 获取指定用户动态 # uid = "16015678" - # offset = "953154282154098691" # 翻页索引,为空即从最新动态开始,可从获得到的动态数据里面获得 + # offset = "" # 翻页索引,为空即从最新动态开始 # result = await self.fetch_user_dynamic(uid=uid, offset=offset) # print(result) + # 获取视频实时弹幕 + # cid = "1639235405" + # result = await self.fetch_video_danmaku(cid=cid) + # print(result) + # 获取指定直播间信息 - # room_id = "22816111" + # room_id = "1815229528" # result = await self.fetch_live_room_detail(room_id=room_id) # print(result) # 获取直播间视频流 - # room_id = "22816111" - # result = await self.fetch_user_live_videos_by_room_id(room_id=room_id) + # room_id = "1815229528" + # result = await self.fetch_live_videos(room_id=room_id) # print(result) + # 获取指定分区正在直播的主播 + pn = 1 + area_id = '9' + result = await self.fetch_live_streamers(area_id=area_id, pn=pn) + print(result) + "-------------------------------------------------------utils接口列表-------------------------------------------------------" # 通过bv号获得视频aid号 # bv_id = 'BV1M1421t7hT' # aid = await self.get_aid(bv_id=bv_id) # print(aid) + # 通过bv号获得视频分p信息 + # bv_id = "BV1vf421i7hV" + # result = await self.fetch_video_parts(bv_id=bv_id) + # print(result) + # 获取所有直播分区列表 # result = await self.fetch_all_live_areas() # print(result) - # 根据uid生成wts及其对应w_rid参数(包含dm_img_inter参数) - # (仅示例 不同接口所需要传进去的参数不同)(待改进) - # uid = '178360345' - # w_rid = await self.uid_to_wrid(uid=uid) - # print(w_rid) if __name__ == '__main__': diff --git a/crawlers/bilibili/web/wrid.py b/crawlers/bilibili/web/wrid.py index 97ed1ef95..385960c67 100644 --- a/crawlers/bilibili/web/wrid.py +++ b/crawlers/bilibili/web/wrid.py @@ -184,21 +184,3 @@ def get_wrid(e): n = None i = twords_to_bytes(o(e, n)) return tbytes_to_hex(i) - -# def test(): -# e = "dm_cover_img_str=QU5HTEUgKE5WSURJQSwgTlZJRElBIEdlRm9yY2UgUlRYIDMwNTAgTGFwdG9wIEdQVSAoMHgwMDAwMjVBMikgRGlyZWN0M0QxMSB2c181XzAgcHNfNV8wLCBEM0QxMSlHb29nbGUgSW5jLiAoTlZJRElBKQ&dm_img_inter=%7B%22ds%22%3A%5B%5D%2C%22wh%22%3A%5B3697%2C5674%2C33%5D%2C%22of%22%3A%5B222%2C444%2C222%5D%7D&dm_img_list=%5B%5D&dm_img_str=V2ViR0wgMS4wIChPcGVuR0wgRVMgMi4wIENocm9taXVtKQ&mid=3546666038725258&platform=web&token=&web_location=1550101&wts=1723867512ea1db124af3c7062474693fa704f4ff8" -# n = None -# x = o(e, n) -# i = twords_to_bytes(x) -# return tbytes_to_hex(i) - -# if __name__ == '__main__': -# # test() -# encode_query = "dm_cover_img_str=QU5HTEUgKE5WSURJQSwgTlZJRElBIEdlRm9yY2UgUlRYIDMwNTAgTGFwdG9wIEdQVSAoMHgwMDAwMjVBMikgRGlyZWN0M0QxMSB2c181XzAgcHNfNV8wLCBEM0QxMSlHb29nbGUgSW5jLiAoTlZJRElBKQ&dm_img_inter=%7B%22ds%22%3A%5B%5D%2C%22wh%22%3A%5B3697%2C5674%2C33%5D%2C%22of%22%3A%5B222%2C444%2C222%5D%7D&dm_img_list=%5B%5D&dm_img_str=V2ViR0wgMS4wIChPcGVuR0wgRVMgMi4wIENocm9taXVtKQ&mid=3546666038725258&platform=web&token=&web_location=1550101&wts=1723867512ea1db124af3c7062474693fa704f4ff8" -# wrid1 = main(encode_query) -# print(wrid1) -# -# js1 = open('./wrid.js', 'r', encoding='utf-8').read() -# wrid2 = execjs.compile(js1).call('main', encode_query) -# print(wrid2) - From a6933088e2075e92f21df87ecd4aaf6c843b1550 Mon Sep 17 00:00:00 2001 From: Koyomi781 <1371315815@qq.com> Date: Thu, 26 Sep 2024 11:15:54 +0800 Subject: [PATCH 2/2] =?UTF-8?q?=F0=9F=8E=A8:=20Update=20README.md=20add=20?= =?UTF-8?q?sponsors=20info?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- README.md | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/README.md b/README.md index 1e38a7999..18dbbb785 100644 --- a/README.md +++ b/README.md @@ -223,6 +223,7 @@ TikHub的部分源代码会开源在Github上,并且会赞助一些开源项 - [x] 获取列表unique_id - 哔哩哔哩网页版API - [x] 获取单个视频详情信息 + - [x] 获取视频流地址 - [x] 获取用户发布视频作品数据 - [x] 获取用户所有收藏夹信息 - [x] 获取指定收藏夹内视频数据 @@ -231,8 +232,12 @@ TikHub的部分源代码会开源在Github上,并且会赞助一些开源项 - [x] 获取指定视频的评论 - [x] 获取视频下指定评论的回复 - [x] 获取指定用户动态 + - [x] 获取视频实时弹幕 - [x] 获取指定直播间信息 + - [x] 获取直播间视频流 + - [x] 获取指定分区正在直播的主播 - [x] 获取所有直播分区列表 + - [x] 通过bv号获得视频分p信息 --- ## 📦调用解析库(已废弃需要更新):