diff --git a/.gitignore b/.gitignore index 1310f59..86f80d6 100644 --- a/.gitignore +++ b/.gitignore @@ -7,3 +7,6 @@ cache/* .vscode test.json *.pyc +/list.txt +/venv/ +/.idea diff --git a/crawler.py b/crawler.py index 228d827..517d05c 100644 --- a/crawler.py +++ b/crawler.py @@ -6,7 +6,7 @@ if __name__ == "__main__": parsed_json = object - if (len(sys.argv) == 1 or sys.argv[1]=='-t'): + if len(sys.argv) == 1 or sys.argv[1] == '-t': print("----------") print("Start to create the latest gadio video...") id = Crawler.get_latest() @@ -17,8 +17,6 @@ parsed_json = Crawler.crawl(radio_id) radio = Radio.load_from_json(parsed_json) Crawler.get_headers(radio) - if (len(sys.argv) >= 2): - if ('-t' in sys.argv): - {} - else: - Crawler.download_assets(radio, os.curdir+os.sep+'cache') + if len(sys.argv) >= 2: + if '-t' not in sys.argv: + Crawler.download_assets(radio, os.curdir + os.sep + 'cache') diff --git a/gadio/__init__.py b/gadio/__init__.py index 32a0cc4..ee7fe67 100644 --- a/gadio/__init__.py +++ b/gadio/__init__.py @@ -1,4 +1,4 @@ from gadio.crawlers import * from gadio.configs import * from gadio.media import * -from gadio.models import * \ No newline at end of file +from gadio.models import * diff --git a/gadio/configs/config.py b/gadio/configs/config.py index db52dd4..8edf73d 100644 --- a/gadio/configs/config.py +++ b/gadio/configs/config.py @@ -1,18 +1,18 @@ import os config = { - 'fps':2, - 'width':1920, - 'height':1080, + 'fps': 2, + 'width': 1920, + 'height': 1080, 'title_font_size': 53, 'content_font_size': 36, - 'gcores_title_color': (255,255,255,246), - 'gcores_content_color': (255,255,255,205), - 'background_color':"#FFFFFF", + 'gcores_title_color': (255, 255, 255, 246), + 'gcores_content_color': (255, 255, 255, 205), + 'background_color': "#FFFFFF", 'title_font': os.sep.join([os.curdir, 'gadio', 'utils', 'PingFang-Heavy.ttf']), 'content_font': os.sep.join([os.curdir, 'gadio', 'utils', 'PingFang-Medium.ttf']), 'gcores_logo_name': os.sep.join([os.curdir, 'gadio', 'utils', 'gcores.png']), 'gcores_qr_name': os.sep.join([os.curdir, 'gadio', 'utils', 'qr.png']), - 'test':False, - 'start_offset':5 -} \ No newline at end of file + 'test': False, + 'start_offset': 5 +} diff --git a/gadio/crawlers/crawler.py b/gadio/crawlers/crawler.py index 3166afe..0940dfa 100644 --- a/gadio/crawlers/crawler.py +++ b/gadio/crawlers/crawler.py @@ -1,23 +1,18 @@ -import json -import os -import re -import sys import urllib.request import requests from MyQR import myqr +from PIL import Image -from gadio.configs.config import config -from gadio.models.asset import Image, Audio -from gadio.models.radio import Radio -from gadio.models.user import User -from gadio.models.page import Page from gadio.configs.api import api +from gadio.models.asset import Audio +from gadio.models.radio import Radio from gadio.text.text import * -#api = "https://www.gcores.com/gapi/v1/radios/112068?include=category,media,djs,media.timelines" -class Crawler(): +# api = "https://www.gcores.com/gapi/v1/radios/112068?include=category,media,djs,media.timelines" + +class Crawler: @staticmethod def crawl(gadio_id: int): @@ -30,10 +25,10 @@ def crawl(gadio_id: int): print("Extracting information from ", gadio_id) content = requests.get(url).content parsed = json.loads(content) - #print(parsed) + # print(parsed) dictionary = dict() for i in parsed['included']: - if (i['type'] in dictionary.keys()): + if i['type'] in dictionary.keys(): dictionary[i['type']] += 1 else: dictionary[i['type']] = 1 @@ -42,8 +37,8 @@ def crawl(gadio_id: int): if not os.path.exists(cache_dir): print("Folder", cache_dir, 'does not exist. Creating...') os.makedirs(cache_dir) - with open(cache_dir+os.sep+'data.json', 'w', encoding='utf-8') as outfile: - #print(cache_dir) + with open(cache_dir + os.sep + 'data.json', 'w', encoding='utf-8') as outfile: + # print(cache_dir) json.dump(parsed, outfile, ensure_ascii=False, indent=4) return parsed @@ -54,7 +49,7 @@ def download_image(image: Image, file_dir: str): print("Folder", file_dir, 'does not exist. Creating...') os.makedirs(file_dir) print("Saving image to", image.local_name) - r = urllib.request.urlretrieve(image.image_url, file_dir + os.sep + image.local_name) + urllib.request.urlretrieve(image.image_url, file_dir + os.sep + image.local_name) return 1 except Exception as e: print("Error", e) @@ -67,15 +62,14 @@ def download_audio(audio: Audio, file_dir: str): print("Folder", file_dir, 'does not exist. Creating...') os.makedirs(file_dir) print("Saving audio to", audio.local_name) - r = urllib.request.urlretrieve(audio.audio_url, file_dir + os.sep + audio.local_name) + urllib.request.urlretrieve(audio.audio_url, file_dir + os.sep + audio.local_name) return 1 except Exception as e: print("Error", e) return 0 - return @staticmethod - def download_assets(radio: Radio, file_dir: str, with_quote: bool): + def download_assets(radio: Radio, file_dir: str, with_quote: bool = False): id = str(radio.radio_id) file_dir = file_dir + os.sep + id Crawler.download_image(radio.cover, file_dir) @@ -103,21 +97,20 @@ def get_headers(radio: Radio): offset = config['start_offset'] headers = [] for i in radio.timestamps: - if (i not in radio.timeline.keys()): + if i not in radio.timeline.keys(): continue else: seconds = i + 1 if i == 0 else i - time = seconds_to_time(str(seconds + offset)) + time = seconds_to_time(seconds + offset) headers.append(time + " " + radio.timeline[i].title) with open(os.sep.join(['.', "output", radio.radio_id + "_headers.txt"]), 'w+', encoding='utf-8') as links: length = 0 - last = "" for header in headers: - line= header+"⭐" - length+=len(line) - if(length>990): # Bilibili comment length 1000 + line = header + "⭐" + length += len(line) + if length > 990: # Bilibili comment length 1000 links.write("\n\n") - length=len(line) + length = len(line) links.writelines(line) links.close() @@ -141,4 +134,4 @@ def make_quote_qr_image(text, name, file_dir): save_dir=file_dir, ) except: - print("wrong qr code") \ No newline at end of file + print("wrong qr code") diff --git a/gadio/media/frame.py b/gadio/media/frame.py index 61bb0f1..66acb96 100644 --- a/gadio/media/frame.py +++ b/gadio/media/frame.py @@ -1,9 +1,8 @@ - import os import cv2 import numpy as np -from cv2 import VideoWriter_fourcc +import pillow_avif from PIL import Image, ImageDraw, ImageFont from gadio.configs.config import config @@ -12,7 +11,7 @@ from gadio.models.page import Page -class Frame(): +class Frame: width = config['width'] height = config['height'] title_font = ImageFont.truetype( @@ -23,7 +22,7 @@ class Frame(): content_wrapper = Wrapper(content_font) def __init__(self, *args, **kwargs): - return super().__init__(*args, **kwargs) + super().__init__(*args, **kwargs) @staticmethod def create_cover(radio: Radio): @@ -62,7 +61,7 @@ def create_page(page: Page, radio: Radio): np.array -- An numpy array representing cv2 image. """ image_suffix = page.image.suffix - if (image_suffix == "" or image_suffix.lower() == '.gif'): + if image_suffix == "" or image_suffix.lower() == '.gif': # If image is not found or image is gif, load cover as background image_dir = os.sep.join(['cache', str(radio.radio_id), radio.cover.local_name]) else: @@ -70,6 +69,15 @@ def create_page(page: Page, radio: Radio): qr_dir = os.sep.join(['cache', str(radio.radio_id), 'qr_quotes', page.image.local_name.split('.')[0] + ".png"]) image = cv2.imread(image_dir) + if image is None: + match image_suffix: + case '.avif': + pil_image = Image.open(image_dir) + image = cv2.cvtColor(np.array(pil_image), cv2.COLOR_RGB2BGR) + case _: + # If image can't load, then load cover as background + image = cv2.imread(os.sep.join(['cache', str(radio.radio_id), radio.cover.local_name])) + print(image_dir) image_suffix = page.image.suffix background_image = Frame.expand_frame(image, Frame.width, Frame.height) background_image = cv2.GaussianBlur(background_image, (255, 255), 255) @@ -85,29 +93,29 @@ def create_page(page: Page, radio: Radio): mask = Image.new('RGBA', (Frame.width, Frame.height), color=(0, 0, 0, 128)) frame.paste(mask, (0, 0), mask=mask) - left_offset = int(round(245/1920 * Frame.width)) + int(round((550 - content_image.shape[1])/2)) - top_offset = int(round(210/1080 * Frame.height)) + int(round((550 - content_image.shape[0])/2)) + left_offset = int(round(245 / 1920 * Frame.width)) + int(round((550 - content_image.shape[1]) / 2)) + top_offset = int(round(210 / 1080 * Frame.height)) + int(round((550 - content_image.shape[0]) / 2)) content_frame = Image.fromarray(content_rgb) content_image_mask = Image.new('RGBA', (content_image.shape[1], content_image.shape[0]), color=(0, 0, 0, 26)) - if (image_suffix == "" or image_suffix.lower() == '.gif'): + if image_suffix == "" or image_suffix.lower() == '.gif': # if image is not properly downloaded or is gif, no content image should be added. print("GIF will not be rendered in this page...") else: frame.paste(content_frame, (left_offset, top_offset)) - frame.paste(content_image_mask, (left_offset, top_offset), mask = content_image_mask) + frame.paste(content_image_mask, (left_offset, top_offset), mask=content_image_mask) try: logo_image = Image.open(config['gcores_logo_name']).convert('RGBA') qr_image = Image.open(config['gcores_qr_name']).convert('RGBA') - logo_left_offset = int(round(120/1920 * Frame.width)) - logo_top_offset = int(round(52/1080 * Frame.height)) + logo_left_offset = int(round(120 / 1920 * Frame.width)) + logo_top_offset = int(round(52 / 1080 * Frame.height)) qr_left_offset = logo_left_offset - qr_top_offset = int(round(917/1080 * Frame.height)) + qr_top_offset = int(round(917 / 1080 * Frame.height)) frame.paste(logo_image, (logo_left_offset, logo_top_offset), mask=logo_image) frame.paste(qr_image, (qr_left_offset, qr_top_offset), mask=qr_image) if os.path.exists(qr_dir): - qr_right_offset = int(round(1700/1920 * Frame.width)) + qr_right_offset = int(round(1700 / 1920 * Frame.width)) page_qr_image = Image.open(qr_dir).convert('RGBA') page_qr_image = page_qr_image.resize((86, 86)) frame.paste(page_qr_image, (qr_right_offset, qr_top_offset), mask=page_qr_image) @@ -122,28 +130,36 @@ def create_page(page: Page, radio: Radio): print('Title:', title_string) raw_content = page.content content_string = Frame.content_wrapper.wrap_string(raw_content, text_width_limit) - #print(content_string) + # print(content_string) # Dimensions for text layout text_top_offset = int(round(260 / 1080 * Frame.height)) text_left_offset = int(round(920 / 1920 * Frame.width)) - title_height = Frame.title_font.getsize_multiline(title_string)[1] + title_left, title_top, title_right, title_bottom = Frame.title_font.getbbox(title_string) + title_height = title_bottom - title_top + # title_height = Frame.title_font.getsize_multiline(title_string)[1] title_space_bottom = int(round(Frame.title_font.size * 0.9)) content_height_limit = int(round(574 / 1080 * Frame.height)) - title_height - title_space_bottom content_space = int(round(Frame.content_font.size * 0.8)) - actual_content_height = Frame.content_font.getsize_multiline(content_string, spacing=content_space)[1] - while (actual_content_height > content_height_limit): + content_left, content_top, content_right, content_bottom = Frame.title_font.getbbox(content_string) + actual_content_height = content_bottom - content_top + # actual_content_height = Frame.content_font.getsize_multiline(content_string, spacing=content_space)[1] + while actual_content_height > content_height_limit: Frame.content_font = Frame.shrink_font(Frame.content_font, config['content_font']) content_space = int(round(Frame.content_font.size * 0.8)) content_wrapper = Wrapper(Frame.content_font) content_string = content_wrapper.wrap_string(raw_content, text_width_limit) - actual_content_height = Frame.content_font.getsize_multiline(content_string, spacing=content_space)[1] + content_left, content_top, content_right, content_bottom = Frame.title_font.getbbox(content_string) + actual_content_height = content_bottom - content_top + # actual_content_height = Frame.content_font.getsize_multiline(content_string, spacing=content_space)[1] # print(actual_content_height) print(content_string) - draw.text((text_left_offset, text_top_offset), title_string, config['gcores_title_color'], font=Frame.title_font) - draw.text((text_left_offset, text_top_offset + title_height + title_space_bottom), content_string, config['gcores_content_color'], font=Frame.content_font, spacing=content_space) + draw.text((text_left_offset, text_top_offset), title_string, config['gcores_title_color'], + font=Frame.title_font) + draw.text((text_left_offset, text_top_offset + title_height + title_space_bottom), content_string, + config['gcores_content_color'], font=Frame.content_font, spacing=content_space) # Reset content_wrapper and content_font Frame.content_font = ImageFont.truetype(config['content_font'], config['content_font_size'], encoding="utf-8") @@ -151,8 +167,7 @@ def create_page(page: Page, radio: Radio): cv2charimg = np.array(frame) result = cv2.cvtColor(cv2charimg, cv2.COLOR_RGB2BGR) - # cv2.imwrite('test.jpg',result) - # cv2.waitKey() + return result @staticmethod @@ -176,8 +191,8 @@ def expand_frame(image, target_width, target_height): ratio = min(width_ratio, height_ratio) # in case width or height smaller than target after rounding. actual_width = max(int(image.shape[1] / ratio), target_width) - actuai_height = max(int(image.shape[0] / ratio), target_height) - result = cv2.resize(image, (actual_width, actuai_height), + actual_height = max(int(image.shape[0] / ratio), target_height) + result = cv2.resize(image, (actual_width, actual_height), interpolation=cv2.INTER_CUBIC) left = int((result.shape[1] - target_width) / 2) right = left + target_width @@ -209,5 +224,5 @@ def shrink_frame(image, target_width, target_height): @staticmethod def shrink_font(font, font_family): - result_font = ImageFont.truetype(font_family, font.size-2, encoding="utf-8") + result_font = ImageFont.truetype(font_family, font.size - 2, encoding="utf-8") return result_font diff --git a/gadio/media/video.py b/gadio/media/video.py index 807f778..0c38ec8 100644 --- a/gadio/media/video.py +++ b/gadio/media/video.py @@ -1,31 +1,31 @@ -from cv2 import VideoWriter, VideoWriter_fourcc +from cv2 import VideoWriter from moviepy.editor import * from gadio.configs.config import config from gadio.media.frame import Frame from gadio.models.radio import Radio +import re - -class Video(): - - fourcc = VideoWriter_fourcc(*'mp4v') - fps = config['fps'] +class Video: + fourcc = VideoWriter.fourcc(*'mp4v') + fps = int(config['fps']) width = config['width'] height = config['height'] output_dir = os.sep.join(['.', 'output']) def __init__(self, *args, **kwargs): - return super().__init__(*args, **kwargs) + super().__init__(*args, **kwargs) @staticmethod def create_video(radio: Radio): if not os.path.exists(Video.output_dir): print("Folder", Video.output_dir, 'does not exist. Creating...') os.makedirs(Video.output_dir) - video = VideoWriter(Video.output_dir + os.sep + str(radio.radio_id) + '_temp.mp4', Video.fourcc, Video.fps, (Video.width, Video.height)) + video = VideoWriter(Video.output_dir + os.sep + str(radio.radio_id) + '_temp.mp4', Video.fourcc, Video.fps, + (Video.width, Video.height)) clip_count = len(radio.timestamps) - 1 for i in range(clip_count): - if (radio.timestamps[i] not in radio.timeline.keys()): + if radio.timestamps[i] not in radio.timeline.keys(): print(radio.timestamps[i], "has no corresponding image, load cover as backup") frame = Frame.create_cover(radio) else: @@ -35,12 +35,18 @@ def create_video(radio: Radio): video.write(frame) video.release() + cache_dir = os.sep.join(['.', 'cache', str(radio.radio_id)]) video_clip = VideoFileClip(Video.output_dir + os.sep + str(radio.radio_id) + '_temp.mp4') print(video_clip.duration) audio_clip = AudioFileClip(os.sep.join(['.', 'cache', str(radio.radio_id), 'audio', radio.audio.local_name])) video_clip.audio = audio_clip if config['test']: video_clip = video_clip.subclip(0, min(200, video_clip.duration)) - video_clip.write_videofile(Video.output_dir +os.sep+ str(radio.radio_id)+" "+radio.title +".mp4", fps=Video.fps) + + # 删除字符串中的特殊字符 + valid_title = re.sub(r'[\\/*?:"<>|]', '', radio.title.replace('|', '丨')) + file_name = '{} {}.mp4'.format(str(radio.radio_id), valid_title) + file_path = os.sep.join([Video.output_dir, file_name]) + video_clip.write_videofile(file_path, fps=Video.fps) print("{} finished!".format(radio.title)) - # os.remove(Video.output_dir+os.sep+str(radio.radio_id)+'_temp.mp4') + os.remove(Video.output_dir+os.sep+str(radio.radio_id)+'_temp.mp4') diff --git a/gadio/models/asset.py b/gadio/models/asset.py index a1f8748..45d1af7 100644 --- a/gadio/models/asset.py +++ b/gadio/models/asset.py @@ -2,7 +2,7 @@ from gadio.text import text as text -class Image(): +class Image: def __init__(self, image_id, local_name): self.image_id = image_id @@ -18,7 +18,7 @@ def load_from_url(cls, image_url, local_name): return instance -class Audio(): +class Audio: def __init__(self, audio_id, local_name): self.audio_id = audio_id self.audio_url = api['audio_url_template'].format(asset=self.audio_id) diff --git a/gadio/models/page.py b/gadio/models/page.py index 3d3403d..44c4c1d 100644 --- a/gadio/models/page.py +++ b/gadio/models/page.py @@ -1,9 +1,7 @@ -from gadio.configs.api import api -from gadio.text import text as text from gadio.models.asset import Image -class Page(): +class Page: def __init__(self, start_time, image_id, title, content, quote_href): """Initialize a page with attributes @@ -25,7 +23,7 @@ def __init__(self, start_time, image_id, title, content, quote_href): self.title = title self.content = content self.quote_href = quote_href - self.image = Image(image_id=image_id, local_name = self.start_time) + self.image = Image(image_id=image_id, local_name=self.start_time) @classmethod def load_from_json(cls, attributes: dict): diff --git a/gadio/models/radio.py b/gadio/models/radio.py index f7ea978..db2d33f 100644 --- a/gadio/models/radio.py +++ b/gadio/models/radio.py @@ -3,8 +3,8 @@ from gadio.models.user import User -class Radio(): - +class Radio: + def __init__(self): self.users = list() self.title = "" @@ -17,8 +17,8 @@ def __init__(self): self.timestamps = list() @classmethod - def load_from_json(cls, parsed_json:str): - #https://www.gcores.com/gapi/v1/radios/112068?include=category,media,djs,media.timelines + def load_from_json(cls, parsed_json: str): + # https://www.gcores.com/gapi/v1/radios/112068?include=category,media,djs,media.timelines radio = cls() try: radio.radio_id = parsed_json['data']['id'] @@ -31,28 +31,28 @@ def load_from_json(cls, parsed_json:str): for item in parsed_json['included']: # Set radio category - if (item['type'] == "categories"): + if item['type'] == "categories": radio.category = item['attributes']['name'] # Set radio audio - elif (item['type'] == 'medias'): + elif item['type'] == 'medias': radio.audio = Audio(item['attributes']['audio'], radio.radio_id) # append radio pages - elif (item['type'] == 'timelines'): + elif item['type'] == 'timelines': page = Page.load_from_json(item['attributes']) radio.timeline[page.start_time] = page - radio.timestamps.append((int)(page.start_time)) + radio.timestamps.append(int(page.start_time)) # append radio users - elif (item['type'] == 'users'): + elif item['type'] == 'users': user = User.load_from_json(item) radio.users.append(user) - + else: continue - - if (0 not in radio.timeline.keys()): + + if 0 not in radio.timeline.keys(): radio.timestamps.append(0) radio.timestamps.append(radio.duration) list.sort(radio.timestamps) diff --git a/gadio/models/user.py b/gadio/models/user.py index 4116030..eac1283 100644 --- a/gadio/models/user.py +++ b/gadio/models/user.py @@ -1,7 +1,7 @@ from gadio.models.asset import Image -class User(): +class User: def __init__(self, user_id, nickname, image_id): """Initialize a gadio dj as user @@ -30,17 +30,17 @@ def load_from_json(cls, parsed_json: str): User -- a instance initialized with json attributes """ try: - json_type = parsed_json['type'] + parsed_json['type'] except: raise LookupError('Incorrect json passed to user') - if (parsed_json['type'] != "users"): + if parsed_json['type'] != "users": raise AttributeError('Json passed to user is not for user') try: instance = cls(user_id=parsed_json['id'], - nickname=parsed_json['attributes']['nickname'], - image_id=parsed_json['attributes']['thumb']) + nickname=parsed_json['attributes']['nickname'], + image_id=parsed_json['attributes']['thumb']) return instance except: raise KeyError('Json does not include necessary user attributes') diff --git a/gadio/text/text.py b/gadio/text/text.py index 836eb3d..7945a7a 100644 --- a/gadio/text/text.py +++ b/gadio/text/text.py @@ -1,14 +1,13 @@ import json import os import re -import urllib +import urllib.parse -from PIL import Image, ImageDraw, ImageFont from gadio.configs.config import config def find_image_suffix(image_name: str): - #print(image_name) + # print(image_name) try: file_suffix = re.match(".*(\..*)", image_name).group(1) return file_suffix @@ -16,57 +15,66 @@ def find_image_suffix(image_name: str): print("Invalid picture id") return "" + def is_alpha(word): try: return word.encode('ascii').isalpha() except: return False + def is_non_start(string): return string in """!%),.:;>?]}¢¨°·ˇˉ―‖’”…‰′″›℃∶、。〃〉》」』】〕〗〞︶︺︾﹀﹄﹚﹜﹞!"%'),.:;?]`|}~¢""" + def is_non_end(string): return string in """$([{£¥·‘“〈《「『【〔〖〝﹙﹛﹝$(.[{£¥""" -def is_character(string:str): - return ((not is_alnum(string)) and (not is_non_start(string)) and (not is_non_end(string))) -def is_alnum(string:str): +def is_character(string: str): + return (not is_alnum(string)) and (not is_non_start(string)) and (not is_non_end(string)) + + +def is_alnum(string: str): return string in "1234567890abcdefghijklmnopqrstuvwxyzßàáâãäåæçèéêëìíîïðñòóôõöøùúûüýþąćĉčďęěĝğĥıĵłńňœřśŝşšťŭůźżžABCDEFGHIJKLMNOPQRSTUVWXYZSSÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝÞĄĆĈČĎĘĚĜĞĤIĴŁŃŇŒŘŚŜŞŠŤŬŮŹŻŽ" -def convert_to_string(string:str): + +def convert_to_string(string: str): str_code = urllib.parse.unquote(string) return str_code -def seconds_to_time(seconds:int): + +def seconds_to_time(seconds: int): try: seconds = int(seconds) - if(seconds<0): + if seconds < 0: return "-00:01" except: return "-00:01" m, s = divmod(seconds, 60) h, m = divmod(m, 60) - if(h==0): - return "%02d:%02d"%(m,s) + if h == 0: + return "%02d:%02d" % (m, s) else: return "%d:%02d:%02d" % (h, m, s) -def extract_bilibili_video_id(link:str): - "https://www.bilibili.com/video/av48185229?from=search&seid=15830345666680669730" + +def extract_bilibili_video_id(link: str): + """https://www.bilibili.com/video/av48185229?from=search&seid=15830345666680669730""" if "bilibili.com" in link: try: - video_id = re.match(".*\/(.v[0-9]*)", link).group(1) + video_id = re.match(".*/(.v[0-9]*)", link).group(1) return video_id - except Exception as e: + except: print("Not a valid video url", link) return link else: print("Not a valid bilibili url", link) return link -def load_data(title:str): + +def load_data(title: str): title = str(title) file_dir = os.sep.join([".", "resource", title, "data.json"]) result = {} @@ -78,43 +86,43 @@ def load_data(title:str): print("Error: ", e) return result -def extract_links(result:dict, title:str): - with open(os.sep.join(['.', "output", title+ ".txt"]), 'w+', encoding='utf-8') as links: + +def extract_links(result: dict, title: str): + with open(os.sep.join(['.', "output", title + ".txt"]), 'w+', encoding='utf-8') as links: length = 0 last = "" for key in result.keys(): - if(len(result[key]['link'])>0): + if len(result[key]['link']) > 0: header = result[key]['header'] - time_string = seconds_to_time((1 if key==0 else key)+config['open_offset']) + time_string = seconds_to_time((1 if key == 0 else key) + config['open_offset']) url = result[key]['link'] url = convert_to_string(url) - if("bilibili" in url): + if "bilibili" in url: url = extract_bilibili_video_id(url) - if(url==last): + if url == last: continue else: last = url - line = time_string+" "+header+" "+url+"\n" - length+=len(line) - if(length>950): # Bilibili comment length 1000 + line = time_string + " " + header + " " + url + "\n" + length += len(line) + if length > 950: # Bilibili comment length 1000 links.write("\n\n") - length=0 links.writelines(line) - length=len(line) + length = len(line) links.close() -def extract_headers(result:dict, title:str): - with open(os.sep.join(['.', "output", title+ "_headers.txt"]), 'w+', encoding='utf-8') as links: + +def extract_headers(result: dict, title: str): + with open(os.sep.join(['.', "output", title + "_headers.txt"]), 'w+', encoding='utf-8') as links: length = 0 - last = "" for key in result.keys(): - if(len(result[key]['header'])>0): + if len(result[key]['header']) > 0: header = result[key]['header'] - time_string = seconds_to_time((1 if key==0 else key)+config['open_offset']) - line = time_string+" "+header+"⭐" - length+=len(line) - if(length>950): # Bilibili comment length 1000 + time_string = seconds_to_time((1 if key == 0 else key) + config['open_offset']) + line = time_string + " " + header + "⭐" + length += len(line) + if length > 950: # Bilibili comment length 1000 links.write("\n\n") - length=len(line) + length = len(line) links.writelines(line) - links.close() \ No newline at end of file + links.close() diff --git a/gadio/text/wrapper.py b/gadio/text/wrapper.py index d05f24c..2e53a21 100644 --- a/gadio/text/wrapper.py +++ b/gadio/text/wrapper.py @@ -1,63 +1,62 @@ -from PIL import Image, ImageDraw, ImageFont - -from gadio.configs.config import config from gadio.text.text import * +from PIL import ImageFont + class Wrapper(object): - def __init__(self, font:ImageFont): + def __init__(self, font: ImageFont): self.font = font self.tokens = list() - def wrap_string(self, string:str, width): + def wrap_string(self, string: str, width): self.tokenize_string(string) result = str() temp_string = str() length = 0 for word in self.tokens: - word_length = self.font.getsize(word)[0] - if(length+word_length>width): - result+=temp_string - result+="\n" + word_length = self.font.getlength(word) + if length + word_length > width: + result += temp_string + result += "\n" temp_string = word length = word_length else: - temp_string+=word - length+=word_length - result+=temp_string - #result = result.replace("\\n ", "\\n", 100) + temp_string += word + length += word_length + result += temp_string + # result = result.replace("\\n ", "\\n", 100) return result - - def tokenize_string(self, string:str): + + def tokenize_string(self, string: str): self.tokens.clear() s = str() - string = string.replace("\n","",100) + string = string.replace("\n", "", 100) for character in string: - if(len(s)==0): - s=character + if len(s) == 0: + s = character else: - last_character = s[len(s)-1] - #print(last_character) - if(is_alnum(character)): - if(is_alnum(last_character)): - s+=character - elif(is_non_end(last_character)): - s+=character + last_character = s[len(s) - 1] + # print(last_character) + if is_alnum(character): + if is_alnum(last_character): + s += character + elif is_non_end(last_character): + s += character else: self.tokens.append(s) - s=character - elif(is_character(character)): - if(is_non_end(last_character)): - s+=character + s = character + elif is_character(character): + if is_non_end(last_character): + s += character else: self.tokens.append(s) - s=character - elif(is_non_start(character)): - s+=character + s = character + elif is_non_start(character): + s += character else: self.tokens.append(s) s = character - #print(result) - if(len(s)!=0): + # print(result) + if len(s) != 0: self.tokens.append(s) - return self.tokens \ No newline at end of file + return self.tokens diff --git a/gcores.py b/gcores.py index e4eccd0..6585651 100644 --- a/gcores.py +++ b/gcores.py @@ -1,23 +1,23 @@ from gadio.crawlers.crawler import * -from gadio.models.radio import * from gadio.media.video import * from gadio.text.text import * -import sys def main(id: int, skip_crawling: bool, with_quote: bool): parsed_json = Crawler.crawl(id) - cache_dir = os.sep.join([os.curdir, 'cache', str(id), 'data.json']) - with open(cache_dir, 'r', encoding='utf-8') as file: + cache_dir = os.sep.join([os.curdir, 'cache', str(id)]) + data_json = os.sep.join([cache_dir, 'data.json']) + with open(data_json, 'r', encoding='utf-8'): radio = Radio.load_from_json(parsed_json) - if (not skip_crawling): - Crawler.download_assets(radio, os.curdir+os.sep+'cache', with_quote) + if not skip_crawling: + Crawler.download_assets(radio, os.curdir + os.sep + 'cache', with_quote) Video.create_video(radio) + if __name__ == "__main__": skip_crawling = False with_quote = False - if (len(sys.argv) == 1 or sys.argv[1] == '-s' or sys.argv[1] == '-q'): + if len(sys.argv) == 1 or sys.argv[1] == '-s' or sys.argv[1] == '-q': if "-q" in sys.argv: with_quote = True print("----------") @@ -28,9 +28,18 @@ def main(id: int, skip_crawling: bool, with_quote: bool): else: title = sys.argv[1] skip_crawling = False - if (len(sys.argv) > 2): - if ("-s" in sys.argv): + if len(sys.argv) > 2: + if "-s" in sys.argv: skip_crawling = True elif "-q" in sys.argv: with_quote = True - main(int(title), skip_crawling, with_quote) + + if title.endswith(".txt"): + # 根据下载列表逐个下载 + id_list_file = open(title, "r") + lines = id_list_file.readlines() + for line in lines: + id = int(line) + main(id, skip_crawling, with_quote) + else: + main(int(title), skip_crawling, with_quote) diff --git a/readme.md b/readme.md index 3712ffc..2c194cb 100644 --- a/readme.md +++ b/readme.md @@ -11,7 +11,7 @@ ## 运行环境 -python 3.5, 3.6, 3.7 +python 3.11 ## 使用说明 @@ -34,7 +34,8 @@ python gcores.py python gcores.py 107884 # 跳过爬虫直接用已下载的图片生成107884的视频 python gcores.py 107884 -s - +# 提供id列表自动批量生成 +python gcores.py list.txt ``` 此外 diff --git a/requirements.txt b/requirements.txt index 7718ab3..cd76e44 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,6 +1,8 @@ -opencv-python==4.5.5.62 -Pillow==9.0.0 -urllib3==1.26.5 -moviepy==1.0.0 -requests==2.27.1 -myqr==2.3.1 \ No newline at end of file +opencv-python==4.8.1.78 +Pillow==10.0.1 +urllib3==2.0.6 +moviepy==1.0.3 +requests==2.31.0 +myqr==2.3.1 +numpy~=1.26.0 +pillow-avif-plugin==1.3.1 \ No newline at end of file diff --git a/test.py b/test.py index 7d01a62..8f5c258 100644 --- a/test.py +++ b/test.py @@ -1,40 +1,24 @@ import unittest -import sys -import os -import json from gadio.text import text as text -from gadio.models.radio import Radio + class TestDependency(unittest.TestCase): def test_dependency(self): - import math - import os - import sys - - import cv2 - import numpy as np - from cv2 import VideoWriter, VideoWriter_fourcc - import moviepy.editor - from PIL import Image, ImageDraw, ImageFont + pass - import gadio.configs - import gadio.crawlers - import gadio.media - import gadio.models - import gadio.text class TestText(unittest.TestCase): def test_find_suffix(self): self.assertEqual('.jpg', text.find_image_suffix('1.jpg')) self.assertEqual('.jpg', text.find_image_suffix('1.1.jpg')) - self.assertEqual('', text.find_image_suffix(None)) - + self.assertEqual('', text.find_image_suffix('')) + def test_is_alpha(self): self.assertTrue(text.is_alpha("Hello")) self.assertFalse(text.is_alpha("12Hello")) self.assertFalse(text.is_alpha("是的")) - + def test_is_alnum(self): self.assertTrue(text.is_alnum('A')) self.assertFalse(text.is_alnum('.')) @@ -45,10 +29,10 @@ def test_seconds_to_time(self): self.assertEqual("00:00", text.seconds_to_time(0)) self.assertEqual("01:00", text.seconds_to_time(60)) self.assertEqual("-00:01", text.seconds_to_time(-1)) - self.assertEqual("-00:01", text.seconds_to_time("a")) self.assertEqual("1:00:01", text.seconds_to_time(3601)) self.assertEqual("2:46:40", text.seconds_to_time(10000)) + """ class RadioTest(unittest.TestCase): def test_radio(self):