diff --git a/docker/Dockerfile b/docker/Dockerfile new file mode 100644 index 0000000..dcc4185 --- /dev/null +++ b/docker/Dockerfile @@ -0,0 +1,51 @@ +# 使用nvidia/cuda:11.7.1-cudnn8-runtime-ubuntu22.04作为基础镜像 +FROM nvidia/cuda:11.7.1-cudnn8-runtime-ubuntu22.04 + +# 安装Python编译依赖,下载并编译Python,并设置Python 3.10为默认Python版本 +RUN apt-get update && apt-get install -y \ + wget \ + build-essential \ + libssl-dev \ + zlib1g-dev \ + libncurses5-dev \ + libncursesw5-dev \ + libreadline-dev \ + libsqlite3-dev \ + libgdbm-dev \ + libdb5.3-dev \ + libbz2-dev \ + libexpat1-dev \ + liblzma-dev \ + libffi-dev \ + libgdbm-compat-dev \ + && wget https://www.python.org/ftp/python/3.10.12/Python-3.10.12.tar.xz -O /tmp/Python-3.10.12.tar.xz \ + && tar xvf /tmp/Python-3.10.12.tar.xz -C /tmp \ + && cd /tmp/Python-3.10.12 \ + && ./configure --enable-optimizations --with-ensurepip=install \ + && make -j $(nproc) \ + && make altinstall \ + && update-alternatives --install /usr/bin/python python /usr/local/bin/python3.10 1 \ + && update-alternatives --install /usr/bin/pip pip /usr/local/bin/pip3.10 1 \ + && rm -rf /tmp/Python-3.10.12.tar.xz /tmp/Python-3.10.12 \ + && apt-get install -y git \ + && curl -s https://packagecloud.io/install/repositories/github/git-lfs/script.deb.sh | bash \ + && apt-get install -y git-lfs \ + && pip install --upgrade pip + +#设置工作目录 +WORKDIR /root/Llama2-Chinese + +# 从git上克隆llama2-chinese仓库 +RUN git clone https://github.com/FlagAlpha/Llama2-Chinese.git /root/Llama2-Chinese + +# 使用pip安装requirements.txt +RUN pip install -r requirements.txt + +#克隆Hugging Face仓库 +RUN git clone https://huggingface.co/FlagAlpha/Llama2-Chinese-7b-Chat + +#开启7860端口 +EXPOSE 7860 + +#设置启动命令 +ENTRYPOINT ["python", "examples/chat_gradio.py", "--model_name_or_path", "/root/Llama2-Chinese/Llama2-Chinese-7b-Chat/"] diff --git a/docker/docker-compose.yml b/docker/docker-compose.yml new file mode 100644 index 0000000..f2ebd77 --- /dev/null +++ b/docker/docker-compose.yml @@ -0,0 +1,16 @@ +version: '3.7' +services: + app: + image: longerhuya/llama2-chinese-7b:gradio # 这里替换为你实际的镜像名 + volumes: + - /usr/local/nvidia:/usr/local/nvidia # 让容器访问主机的NVIDIA驱动 + environment: + - NVIDIA_VISIBLE_DEVICES=all # 让容器可以访问所有的NVIDIA GPU + ports: + - 7860:7860 # 在容器和主机之间映射端口 + deploy: + resources: + reservations: + devices: + - driver: nvidia + capabilities: [gpu] # 使用Docker的设备请求来让容器使用GPU diff --git a/examples/chat_gradio.py b/examples/chat_gradio.py index ea33774..36e9de4 100644 --- a/examples/chat_gradio.py +++ b/examples/chat_gradio.py @@ -91,4 +91,4 @@ def bot(history,temperature,top_p,slider_context_times): streamer = TextIteratorStreamer(tokenizer,skip_prompt=True) if torch.__version__ >= "2" and sys.platform != "win32": model = torch.compile(model) - demo.queue().launch(share=False,debug = True) + demo.queue().launch(share=False, debug=True, server_name="0.0.0.0") diff --git a/requirements.txt b/requirements.txt index 8b3993c..ed589d9 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,6 +1,6 @@ bitsandbytes==0.39.0 accelerate==0.21.0 -deepspeed==0.10.0 +#deepspeed==0.10.0 git+https://github.com/PanQiWei/AutoGPTQ.git scipy sentencepiece