GitHub:2noise/ChatTTS: A generative speech model for daily dialogue.。他是这样介绍的:一款适用于日常对话的生成式语音模型。
conda create -n chattts python=3.11 conda activate chattts pip install -r requirements.txt
我去,好顺利啊!这里直接启动带 Web 页面,对 Web 有天生的好感……
python examples/web/webui.py
这一启动可不得了啊,这一拉流日志都看不过来啊!自己偷偷下载了不少东西,请看:
人家下载就下载呗,人家最后还不是跑起来了,你看:
随后测试了一下,很顺利,看一下效果截图吧:
这才是咱最想要的,有了她,才能和其他的应用结合在一起。当然,项目还是很贴心的,提供了 API 启动方式:
安装成功之后就可以启动了
改动了好几处地方,这里就直接全部弄过来了,如下:
import os import sys now_dir = os.getcwd() sys.path.append(now_dir) from pydantic import BaseModel import torch from tools.logger import get_logger from tools.audio import pcm_arr_to_mp3_view import ChatTTS from typing import Optional import io import zipfile import uvicorn from fastapi import FastAPI from fastapi.responses import StreamingResponse from contextlib import asynccontextmanager if sys.platform == "darwin": os.environ["PYTORCH_ENABLE_MPS_FALLBACK"] = "1" logger = get_logger("Command") @asynccontextmanager async def lifespan(app: FastAPI): global chat chat = ChatTTS.Chat(get_logger("ChatTTS")) logger.info("Initializing ChatTTS...") if chat.load(): logger.info("Models loaded successfully.") else: logger.error("Models load failed.") sys.exit(1) yield app = FastAPI(lifespan=lifespan) class ChatTTSParams(BaseModel): text: list[str] stream: bool = False lang: Optional[str] = None skip_refine_text: bool = False refine_text_only: bool = False use_decoder: bool = True do_text_normalization: bool = True do_homophone_replacement: bool = False params_refine_text: ChatTTS.Chat.RefineTextParams params_infer_code: ChatTTS.Chat.InferCodeParams @app.post("/generate_voice") async def generate_voice(params: ChatTTSParams): logger.info("Text input: %s", str(params.text)) # audio seed if params.params_infer_code.manual_seed is not None: torch.manual_seed(params.params_infer_code.manual_seed) params.params_infer_code.spk_emb = chat.sample_random_speaker() # text seed for text refining if params.params_refine_text: text = chat.infer( text=params.text, skip_refine_text=False, refine_text_only=True ) logger.info(f"Refined text: {text}") else: # no text refining text = params.text logger.info("Use speaker:") logger.info(params.params_infer_code.spk_emb) logger.info("Start voice inference.") wavs = chat.infer( text=text, stream=params.stream, lang=params.lang, skip_refine_text=params.skip_refine_text, use_decoder=params.use_decoder, do_text_normalization=params.do_text_normalization, do_homophone_replacement=params.do_homophone_replacement, params_infer_code=params.params_infer_code, params_refine_text=params.params_refine_text, ) logger.info("Inference completed.") # zip all of the audio files together buf = io.BytesIO() with zipfile.ZipFile( buf, "a", compression=zipfile.ZIP_DEFLATED, allowZip64=False ) as f: for idx, wav in enumerate(wavs): f.writestr(f"{idx}.mp3", pcm_arr_to_mp3_view(wav)) logger.info("Audio generation successful.") buf.seek(0) response = StreamingResponse(buf, media_type="application/zip") response.headers["Content-Disposition"] = "attachment; filename=audio_files.zip" return response if __name__ == "__main__": uvicorn.run(app, host="0.0.0.0", port=13140)
之后就可以成功启动了,如下图:
# requirements.txt numpy==1.26.4 numba==0.60.0 torch==2.5.1 torchaudio==2.5.1 tqdm==4.67.1 vector_quantize_pytorch transformers==4.47.1 vocos==0.1.0 IPython==8.31.0 gradio==5.9.1 pybase16384==0.3.7 pynini==2.1.5; sys_platform == 'linux' WeTextProcessing; sys_platform == 'linux' nemo_text_processing; sys_platform == 'linux' av==14.0.1 pydub==0.25.1 # examples/api/requirements.txt fastapi==0.115.6 requests==2.32.3 uvicorn==0.34.0
心想:现在总行了吧!你想多了,还是有问题啊,如下:
INFO: 127.0.0.1:55969 - "POST /generate_voice HTTP/1.1" 500 Internal Server Error ERROR: Exception in ASGI application Traceback (most recent call last): File "C:\Users\xiaodu\.conda\envs\chattts\Lib\site-packages\uvicorn\protocols\http\h11_impl.py", line 403, in run_asgi result = await app( # type: ignore[func-returns-value] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "C:\Users\xiaodu\.conda\envs\chattts\Lib\site-packages\uvicorn\middleware\proxy_headers.py", line 60, in __call__ return await self.app(scope, receive, send) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "C:\Users\xiaodu\.conda\envs\chattts\Lib\site-packages\fastapi\applications.py", line 1054, in __call__ await super().__call__(scope, receive, send) File "C:\Users\xiaodu\.conda\envs\chattts\Lib\site-packages\starlette\applications.py", line 113, in __call__ await self.middleware_stack(scope, receive, send) File "C:\Users\xiaodu\.conda\envs\chattts\Lib\site-packages\starlette\middleware\errors.py", line 187, in __call__ raise exc File "C:\Users\xiaodu\.conda\envs\chattts\Lib\site-packages\starlette\middleware\errors.py", line 165, in __call__ await self.app(scope, receive, _send) File "C:\Users\xiaodu\.conda\envs\chattts\Lib\site-packages\starlette\middleware\exceptions.py", line 62, in __call__ await wrap_app_handling_exceptions(self.app, conn)(scope, receive, send) File "C:\Users\xiaodu\.conda\envs\chattts\Lib\site-packages\starlette\_exception_handler.py", line 53, in wrapped_app raise exc File "C:\Users\xiaodu\.conda\envs\chattts\Lib\site-packages\starlette\_exception_handler.py", line 42, in wrapped_app await app(scope, receive, sender) File "C:\Users\xiaodu\.conda\envs\chattts\Lib\site-packages\starlette\routing.py", line 715, in __call__ await self.middleware_stack(scope, receive, send) File "C:\Users\xiaodu\.conda\envs\chattts\Lib\site-packages\starlette\routing.py", line 735, in app await route.handle(scope, receive, send) File "C:\Users\xiaodu\.conda\envs\chattts\Lib\site-packages\starlette\routing.py", line 288, in handle await self.app(scope, receive, send) File "C:\Users\xiaodu\.conda\envs\chattts\Lib\site-packages\starlette\routing.py", line 76, in app await wrap_app_handling_exceptions(app, request)(scope, receive, send) File "C:\Users\xiaodu\.conda\envs\chattts\Lib\site-packages\starlette\_exception_handler.py", line 53, in wrapped_app raise exc File "C:\Users\xiaodu\.conda\envs\chattts\Lib\site-packages\starlette\_exception_handler.py", line 42, in wrapped_app await app(scope, receive, sender) File "C:\Users\xiaodu\.conda\envs\chattts\Lib\site-packages\starlette\routing.py", line 73, in app response = await f(request) ^^^^^^^^^^^^^^^^ File "C:\Users\xiaodu\.conda\envs\chattts\Lib\site-packages\fastapi\routing.py", line 301, in app raw_response = await run_endpoint_function( ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "C:\Users\xiaodu\.conda\envs\chattts\Lib\site-packages\fastapi\routing.py", line 212, in run_endpoint_function return await dependant.call(**values) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "F:\mllm\ChatTTS-0.2.1\examples\api\main.py", line 92, in generate_voice wavs = chat.infer( ^^^^^^^^^^^ File "F:\mllm\ChatTTS-0.2.1\ChatTTS\core.py", line 221, in infer return next(res_gen) ^^^^^^^^^^^^^ File "F:\mllm\ChatTTS-0.2.1\ChatTTS\core.py", line 385, in _infer for result in self._infer_code( ^^^^^^^^^^^^^^^^^ File "C:\Users\xiaodu\.conda\envs\chattts\Lib\site-packages\torch\utils\_contextlib.py", line 116, in decorate_context return func(*args, **kwargs) ^^^^^^^^^^^^^^^^^^^^^ File "F:\mllm\ChatTTS-0.2.1\ChatTTS\core.py", line 485, in _infer_code self.speaker.decode_prompt(params.spk_smp) File "C:\Users\xiaodu\.conda\envs\chattts\Lib\site-packages\torch\utils\_contextlib.py", line 116, in decorate_context return func(*args, **kwargs) ^^^^^^^^^^^^^^^^^^^^^ File "F:\mllm\ChatTTS-0.2.1\ChatTTS\model\speaker.py", line 112, in decode_prompt lzma.decompress( File "C:\Users\xiaodu\.conda\envs\chattts\Lib\lzma.py", line 343, in decompress res = decomp.decompress(data) ^^^^^^^^^^^^^^^^^^^^^^^ _lzma.LZMAError: Corrupt input data
这个错误发现是
我去,其他的都是浮云,这里才是重点,都有 Python 包了,还要啥 API 啊,集成更方便了👍👍👍。官网的“开发教程”章节给出了使用方法: