Skip to content

Commit

Permalink
无条件跳过小于指定大小的视频: 无论能否匹配到番号
Browse files Browse the repository at this point in the history
  • Loading branch information
Yuukiy committed Dec 30, 2023
1 parent 604de6e commit c0ad2c8
Show file tree
Hide file tree
Showing 2 changed files with 29 additions and 47 deletions.
55 changes: 25 additions & 30 deletions core/file.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import sys
import ctypes
import logging
import itertools
from sys import platform
from typing import List

Expand All @@ -28,50 +29,44 @@ def scan_movies(root: str) -> List[Movie]:

# 扫描所有影片文件并获取它们的番号
dic = {} # avid: [abspath1, abspath2...]
failed_path_ls = []
small_videos = {}
for dirpath, dirnames, filenames in os.walk(root):
for name in dirnames.copy():
if name.startswith('.') or name in cfg.File.ignore_folder:
dirnames.remove(name)
match_videos, unmatch_videos = {}, {}
for file in filenames:
ext = os.path.splitext(file)[1].lower()
if ext in cfg.File.media_ext:
fullpath = os.path.join(dirpath, file)
dvdid = get_id(file)
# 忽略小于指定大小的文件
filesize = os.path.getsize(fullpath)
if filesize < cfg.File.ignore_video_file_less_than:
small_videos.setdefault(file, []).append(fullpath)
continue
dvdid = get_id(fullpath)
cid = get_cid(fullpath)
# 如果文件名能匹配到cid,那么将cid视为有效id,因为此时dvdid多半是错的
avid = cid if cid else dvdid
if avid:
match_videos[fullpath] = avid
dic.setdefault(avid, []).append(fullpath)
if avid in dic:
dic[avid].append(fullpath)
else:
dic[avid] = [fullpath]
else:
unmatch_videos[fullpath] = None
# 如果一个文件夹内有视频能匹配到番号,同时也有视频无法匹配到番号,则后者很可能是广告
match_cnt, unmatch_cnt = len(match_videos), len(unmatch_videos)
if match_cnt == 0:
# 所有视频都没有匹配到番号,则尝试从文件夹寻找番号并作为所有视频的结果
dvdid = get_id(dirpath)
if dvdid:
for fullpath in unmatch_videos.keys():
dic.setdefault(dvdid, []).append(fullpath)
else:
for fullpath in unmatch_videos.keys():
failed_path_ls.append(fullpath)
fail = Movie('无法识别番号')
fail.files = [fullpath]
failed_items.append(fail)
logger.error(f"无法提取影片番号: '{fullpath}'")
else:
if unmatch_cnt > 0:
for fullpath in unmatch_videos.keys():
filesize = os.path.getsize(fullpath)
if filesize < cfg.File.ignore_video_file_less_than:
logger.debug(f"忽略匹配不到番号的小文件: '{fullpath}'")
else:
failed_path_ls.append(fullpath)
logger.error(f"无法提取影片番号: '{fullpath}'")
for fullpath in failed_path_ls:
fail = Movie('无法识别番号')
fail.files = [fullpath]
failed_items.append(fail)
# 对于前面忽略的视频生成一个简单的提示
small_videos = {k:sorted(v) for k,v in sorted(small_videos.items())}
try_avid = [get_id(i) for i in small_videos.keys()]
has_avid = [name for name, avid in zip(small_videos.keys(), try_avid) if avid]
skipped_files = list(itertools.chain(*small_videos.values()))
if len(has_avid) > 0:
logger.info(f"跳过了 {','.join(has_avid)}{len(skipped_files)}个小于指定大小的视频文件")
else:
logger.info(f"跳过了{len(skipped_files)}个小于指定大小的视频文件")
logger.debug('跳过的视频文件如下:\n' + '\n'.join(skipped_files))
# 检查是否有多部影片对应同一个番号
non_slice_dup = {} # avid: [abspath1, abspath2...]
for avid, files in dic.copy().items():
Expand Down
21 changes: 4 additions & 17 deletions unittest/test_file.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@


tmp_folder = 'TMP_' + ''.join(random.choices(string.ascii_uppercase, k=6))
DEFAULT_SIZE = 512*2**20 # 512 MiB


@pytest.fixture
Expand All @@ -21,7 +22,7 @@ def prepare_files(files):
files (list of tuple): 文件列表,仅接受相对路径
"""
if not isinstance(files, dict):
files = {i:1024 for i in files}
files = {i:DEFAULT_SIZE for i in files}
for name, size in files.items():
path = os.path.join(tmp_folder, name)
folder = os.path.split(path)[0]
Expand Down Expand Up @@ -195,30 +196,16 @@ def test_scan_movies__mix_data(prepare_files):


# 文件夹以番号命名,文件夹内同时有带番号的影片和广告
@pytest.mark.parametrize('files', [{'ABC-123/ABC-123.mp4': 1, 'ABC-123/广告1.mp4': 1024, 'ABC-123/广告2.mp4': 1048576, 'ABC-123/Advertisement.mp4': 243269631}])
@pytest.mark.parametrize('files', [{'ABC-123/ABC-123.mp4': DEFAULT_SIZE, 'ABC-123/广告1.mp4': 1024, 'ABC-123/广告2.mp4': 243269631}])
def test_scan_movies__1_video_with_ad(prepare_files):
movies = scan_movies(tmp_folder)
assert len(movies) == 1
assert movies[0].dvdid == 'ABC-123'
assert len(movies[0].files) == 1


# 文件夹以番号命名,文件夹内同时有带番号的影片和超出阈值的广告
@pytest.mark.parametrize('files', [{'ABC-123/ABC-123.mp4': 1, 'ABC-123/广告1.mp4': 1024, 'ABC-123/广告2.mp4': 1048576, 'ABC-123/Advertisement.mp4': 2**30}])
def test_scan_movies__1_video_with_large_ad(prepare_files):
before = failed_items.copy()
movies = scan_movies(tmp_folder)
after = failed_items.copy()
failed = [i for i in after if i not in before]
assert len(movies) == 1
assert movies[0].dvdid == 'ABC-123'
assert len(movies[0].files) == 1
assert len(failed) == 1 and len(failed[0].files) == 1
assert os.path.basename(failed[0].files[0]) == 'Advertisement.mp4'


# 文件夹内同时有多部带番号的影片和广告
@pytest.mark.parametrize('files', [{'ABC-123.mp4': 1, 'DEF-456.mp4': 1, '广告1.mp4': 1024, '广告2.mp4': 1048576, 'Advertisement.mp4': 243269631}])
@pytest.mark.parametrize('files', [{'ABC-123.mp4': DEFAULT_SIZE, 'DEF-456.mp4': DEFAULT_SIZE, '广告1.mp4': 1024, '广告2.mp4': 243269631}])
def test_scan_movies__n_video_with_ad(prepare_files):
movies = scan_movies(tmp_folder)
assert len(movies) == 2
Expand Down

0 comments on commit c0ad2c8

Please sign in to comment.