Skip to content

Commit

Permalink
feat: support gewechat voice (#171)
Browse files Browse the repository at this point in the history
  • Loading branch information
hanfangyuan4396 authored Dec 28, 2024
1 parent 84ccb68 commit 084bf0f
Show file tree
Hide file tree
Showing 3 changed files with 88 additions and 12 deletions.
21 changes: 18 additions & 3 deletions channel/gewechat/gewechat_channel.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
from common.utils import compress_imgfile, fsize
from config import conf, save_config
from lib.gewechat import GewechatClient
from voice.audio_convert import mp3_to_silk

MAX_UTF8_LEN = 2048

Expand Down Expand Up @@ -113,8 +114,19 @@ def send(self, reply: Reply, context: Context):
logger.info("[gewechat] Do send text to {}: {}".format(receiver, reply_text))
elif reply.type == ReplyType.VOICE:
try:
# TODO: mp3 to silk
content = reply.content
if content.endswith('.mp3'):
# 如果是mp3文件,转换为silk格式
silk_path = content + '.silk'
silk_path = mp3_to_silk(content, silk_path)
callback_url = conf().get("gewechat_callback_url")
silk_url = callback_url + "?file=" + silk_path
# TODO: 计算silk文件语音时长,暂时都设置为5s
self.client.post_voice(self.app_id, receiver, silk_url, 5000)
logger.info("[gewechat] Do send voice to {}: {}".format(receiver, silk_url))
return
else:
logger.error(f"[gewechat] voice file is not mp3, path: {content}, only support mp3")
except Exception as e:
logger.error(f"[gewechat] send voice failed: {e}")
elif reply.type == ReplyType.IMAGE_URL:
Expand All @@ -136,10 +148,13 @@ class Query:
def GET(self):
params = web.input(file="")
if params.file:
if os.path.exists(params.file):
with open(params.file, 'rb') as f:
# TODO: 只允许访问tmp目录下的文件
# raise web.forbidden()
if os.path.exists(file_path):
with open(file_path, 'rb') as f:
return f.read()
else:
logger.error(f"[gewechat] File not found: {file_path}")
raise web.notfound()
return "gewechat callback server is running"

Expand Down
1 change: 1 addition & 0 deletions requirements-optional.txt
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ edge-tts # edge-tts
numpy<=1.24.2
langid # language detect
elevenlabs==1.0.3 # elevenlabs TTS
pilk==0.2.4 # process wechat silk file

#install plugin
dulwich
Expand Down
78 changes: 69 additions & 9 deletions voice/audio_convert.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import os
import shutil
import wave

Expand All @@ -10,6 +11,11 @@

from pydub import AudioSegment

try:
import pilk
except ImportError:
logger.warning("import pilk failed, silk voice conversion will not be supported. Try: pip install pilk")

sil_supports = [8000, 12000, 16000, 24000, 32000, 44100, 48000] # slk转wav时,支持的采样率


Expand Down Expand Up @@ -43,15 +49,43 @@ def get_pcm_from_wav(wav_path):
def any_to_mp3(any_path, mp3_path):
"""
把任意格式转成mp3文件
"""
if any_path.endswith(".mp3"):
shutil.copy2(any_path, mp3_path)
return
if any_path.endswith(".sil") or any_path.endswith(".silk") or any_path.endswith(".slk"):
sil_to_wav(any_path, any_path)
any_path = mp3_path
audio = AudioSegment.from_file(any_path)
audio.export(mp3_path, format="mp3")
Args:
any_path: 输入文件路径
mp3_path: 输出的mp3文件路径
"""
try:
# 如果已经是mp3格式,直接复制
if any_path.endswith(".mp3"):
shutil.copy2(any_path, mp3_path)
return

# 如果是silk格式,使用pilk转换
if any_path.endswith((".sil", ".silk", ".slk")):
# 先转成PCM
pcm_path = any_path + '.pcm'
pilk.decode(any_path, pcm_path)

# 再用pydub把PCM转成MP3
# TODO: 下面的参数可能需要调整
audio = AudioSegment.from_raw(pcm_path, format="raw",
frame_rate=24000,
channels=1,
sample_width=2) # 16-bit PCM = 2 bytes
audio.export(mp3_path, format="mp3")

# 清理临时PCM文件
import os
os.remove(pcm_path)
return

# 其他格式使用pydub转换
audio = AudioSegment.from_file(any_path)
audio.export(mp3_path, format="mp3")

except Exception as e:
logger.error(f"转换文件到mp3失败: {str(e)}")
raise


def any_to_wav(any_path, wav_path):
Expand Down Expand Up @@ -87,6 +121,32 @@ def any_to_sil(any_path, sil_path):
f.write(silk_data)
return audio.duration_seconds * 1000

def mp3_to_silk(mp3_path: str, silk_path: str) -> str:
"""Convert MP3 file to SILK format
Args:
mp3_path: Path to input MP3 file
silk_path: Path to output SILK file
Returns:
Path to output SILK file
"""
# First load the MP3 file
audio = AudioSegment.from_file(mp3_path)

# Convert to mono and set sample rate to 24000Hz
# TODO: 下面的参数可能需要调整
audio = audio.set_channels(1)
audio = audio.set_frame_rate(24000)

# Export to PCM
pcm_path = os.path.splitext(mp3_path)[0] + '.pcm'
audio.export(pcm_path, format='s16le')

# Convert PCM to SILK
pilk.encode(pcm_path, silk_path, pcm_rate=24000, tencent=True)

# Clean up temporary PCM file
os.remove(pcm_path)
return silk_path

def any_to_amr(any_path, amr_path):
"""
Expand Down

0 comments on commit 084bf0f

Please sign in to comment.