From 1ca6b8c00a30b329681ce7c6fbcee01ae27990da Mon Sep 17 00:00:00 2001 From: Yuukiy <76897913+Yuukiy@users.noreply.github.com> Date: Mon, 1 Apr 2024 21:17:11 +0800 Subject: [PATCH] =?UTF-8?q?javdb:=20=E6=8A=93=E5=8F=96=E6=95=B0=E6=8D=AE?= =?UTF-8?q?=E6=97=B6=E6=A3=80=E6=B5=8BFC2=E5=B0=81=E9=9D=A2=E5=9B=BE?= =?UTF-8?q?=E6=98=AF=E5=90=A6=E7=9C=9F=E7=9A=84=E5=AD=98=E5=9C=A8?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- unittest/data/FC2-2735981 (javdb).json | 26 ++++++++++++ unittest/data/FC2-3189680 (javdb).json | 35 +++++----------- unittest/data/FC2-985469 (javdb).json | 57 +++++--------------------- web/base.py | 12 ++++++ web/javdb.py | 7 +++- 5 files changed, 64 insertions(+), 73 deletions(-) create mode 100644 unittest/data/FC2-2735981 (javdb).json diff --git a/unittest/data/FC2-2735981 (javdb).json b/unittest/data/FC2-2735981 (javdb).json new file mode 100644 index 000000000..acc372ad6 --- /dev/null +++ b/unittest/data/FC2-2735981 (javdb).json @@ -0,0 +1,26 @@ +{ + "dvdid": "FC2-2735981", + "cid": null, + "url": "https://javdb365.com/v/d25M0", + "plot": null, + "cover": null, + "big_cover": null, + "genre": null, + "genre_id": null, + "genre_norm": null, + "score": "7.26", + "title": "高額寄せ集め2", + "ori_title": null, + "magnet": null, + "serial": null, + "actress": null, + "actress_pics": null, + "director": null, + "duration": null, + "producer": null, + "publisher": null, + "uncensored": null, + "publish_date": "2022-03-19", + "preview_pics": null, + "preview_video": null +} \ No newline at end of file diff --git a/unittest/data/FC2-3189680 (javdb).json b/unittest/data/FC2-3189680 (javdb).json index c91eaf4f9..7325d0de1 100644 --- a/unittest/data/FC2-3189680 (javdb).json +++ b/unittest/data/FC2-3189680 (javdb).json @@ -1,41 +1,26 @@ { "dvdid": "FC2-3189680", "cid": null, - "url": "https://javdb.com/v/rmVapJ", + "url": "https://javdb365.com/v/rmVapJ", "plot": null, "cover": "https://c0.jdbstatic.com/covers/rm/rmVapJ.jpg", "big_cover": null, - "genre": [ - "私人攝影", - "制服", - "內射" - ], - "genre_id": [ - "fc2?c1=11", - "fc2?c1=19", - "fc2?c1=18" - ], + "genre": null, + "genre_id": null, "genre_norm": null, - "score": "8.04", + "score": "7.98", "title": "【体育館倉庫】某ハーフ子役モデルを高額援助。計2回のゴムなし大量中出し。※4K特典(1時間越え)", "ori_title": null, - "magnet": [ - "magnet:?xt=urn:btih:6d4fed9103648ab2e4a22697f363bd243f7feffa&dn=FC2-3189680", - "magnet:?xt=urn:btih:4dea0950176f3c5af0f14ad96528f79d7bdc48fa&dn=FC2PPV 3189680" - ], + "magnet": null, "serial": null, - "actress": [ - "永瀬ゆい" - ], + "actress": null, "actress_pics": null, "director": null, - "duration": "49", - "producer": "体育館倉庫", + "duration": null, + "producer": null, "publisher": null, "uncensored": null, "publish_date": "2023-02-20", - "preview_pics": [ - "https://c0.jdbstatic.com/samples/rm/rmVapJ_l_0.jpg" - ], - "preview_video": "https://javdb.com/v/rmVapJ" + "preview_pics": null, + "preview_video": null } \ No newline at end of file diff --git a/unittest/data/FC2-985469 (javdb).json b/unittest/data/FC2-985469 (javdb).json index 5a8308e05..743a6abbe 100644 --- a/unittest/data/FC2-985469 (javdb).json +++ b/unittest/data/FC2-985469 (javdb).json @@ -1,63 +1,26 @@ { "dvdid": "FC2-985469", "cid": null, - "url": "https://javdb.com/v/nzA44", + "url": "https://javdb365.com/v/nzA44", "plot": null, "cover": "https://c0.jdbstatic.com/covers/nz/nzA44.jpg", "big_cover": null, - "genre": [ - "私人攝影", - "素人", - "內射", - "原作", - "無碼", - "角色扮演", - "戀物癖", - "可愛" - ], - "genre_id": [ - "fc2?c1=11", - "fc2?c1=26", - "fc2?c1=18", - "fc2?c1=7", - "fc2?c1=24", - "fc2?c1=9", - "fc2?c1=13", - "fc2?c1=20" - ], + "genre": null, + "genre_id": null, "genre_norm": null, - "score": "8.82", + "score": "8.80", "title": "【個人撮影・無】JD2回生ちゃんに中出し!エロマンガ先生のパジャマコスで中出しえっちさせててもらいました♪", "ori_title": null, - "magnet": [ - "magnet:?xt=urn:btih:903ecbf73fd1a466e11e9454388c77c854f2463f&dn=FC2PPV-985469", - "magnet:?xt=urn:btih:851c21dab8d9a4883e8940240107aa94e5e0905d&dn=FC2-PPV-985469", - "magnet:?xt=urn:btih:649d36f0fd470f5950a1edc6fbb5922a1a82a39e&dn=[7sht.me]FC2PPV-985469", - "magnet:?xt=urn:btih:b2a57edade565821bd611b89dfca1f8d0e63e891&dn=fc2-985469", - "magnet:?xt=urn:btih:6be7ae12c4be3a0f7e27d1720b4e23e1134dcfd1&dn=FC2-985469", - "magnet:?xt=urn:btih:e2eaaaa085d14a010ff014991f3ef8ac48954920&dn=FC2-PPV-985469-HD", - "magnet:?xt=urn:btih:fc56fea5e868c3c28f2e3cd1731adc347c55267c&dn=FC2-PPV-983579-985469-纱雾" - ], + "magnet": null, "serial": null, - "actress": [], + "actress": null, "actress_pics": null, "director": null, - "duration": "113", - "producer": "COS☆ぱこ", + "duration": null, + "producer": null, "publisher": null, "uncensored": null, "publish_date": "2018-11-23", - "preview_pics": [ - "https://c0.jdbstatic.com/samples/nz/nzA44_l_0.jpg", - "https://c0.jdbstatic.com/samples/nz/nzA44_l_1.jpg", - "https://c0.jdbstatic.com/samples/nz/nzA44_l_2.jpg", - "https://c0.jdbstatic.com/samples/nz/nzA44_l_3.jpg", - "https://c0.jdbstatic.com/samples/nz/nzA44_l_4.jpg", - "https://c0.jdbstatic.com/samples/nz/nzA44_l_5.jpg", - "https://c0.jdbstatic.com/samples/nz/nzA44_l_6.jpg", - "https://c0.jdbstatic.com/samples/nz/nzA44_l_7.jpg", - "https://c0.jdbstatic.com/samples/nz/nzA44_l_8.jpg", - "https://c0.jdbstatic.com/samples/nz/nzA44_l_9.jpg" - ], - "preview_video": "https://javdb.com/v/nzA44" + "preview_pics": null, + "preview_video": null } \ No newline at end of file diff --git a/web/base.py b/web/base.py index 163e1bcd1..b2e0bd278 100644 --- a/web/base.py +++ b/web/base.py @@ -43,10 +43,12 @@ def __init__(self, use_scraper=False) -> None: self.scraper = None self.__get = requests.get self.__post = requests.post + self.__head = requests.head else: self.scraper = cloudscraper.create_scraper() self.__get = self._scraper_monitor(self.scraper.get) self.__post = self._scraper_monitor(self.scraper.post) + self.__head = self._scraper_monitor(self.scraper.head) def _scraper_monitor(self, func): """监控cloudscraper的工作状态,遇到不支持的Challenge时尝试退回常规的requests请求""" @@ -82,6 +84,16 @@ def post(self, url, data, delay_raise=False): r.raise_for_status() return r + def head(self, url, delay_raise=True): + r = self.__head(url, + headers=self.headers, + proxies=self.proxies, + cookies=self.cookies, + timeout=self.timeout) + if not delay_raise: + r.raise_for_status() + return r + def get_html(self, url): r = self.get(url) html = resp2html(r) diff --git a/web/javdb.py b/web/javdb.py index 33edbc68b..e725181e0 100644 --- a/web/javdb.py +++ b/web/javdb.py @@ -206,6 +206,11 @@ def parse_clean_data(movie: MovieInfo): """解析指定番号的影片数据并进行清洗""" try: parse_data(movie) + # 检查封面URL是否真的存在对应图片 + if movie.cover is not None: + r = request.head(movie.cover) + if r.status_code != 200: + movie.cover = None except SiteBlocked: raise logger.error('JavDB: 可能触发了反爬虫机制,请稍后再试') @@ -326,7 +331,7 @@ def collect_actress_alias(type=0, use_original=True): logger.root.handlers[1].level = logging.DEBUG # collect_actress_alias() - movie = MovieInfo('JUQ-471') + movie = MovieInfo('FC2-2735981') try: parse_clean_data(movie) print(movie)