下载模型 GIT_LFS_SKIP_SMUDGE=1 git clone https://www.modelscope.cn/Qwen/QwQ-32B.git
git lfs pull
启动模型 docker run -itd --name llm-QWQ \
--device=/dev/davinci_manager \
--device=/dev/devmm_svm \
--device=/dev/hisi_hdc \
-v /usr/local/dcmi:/usr/local/dcmi \
-v /usr/local/bin/npu-smi:/usr/local/bin/npu-smi \
-v /usr/local/Ascend/driver/lib64/common:/usr/local/Ascend/driver/lib64/common \
-v /usr/local/Ascend/driver/lib64/driver:/usr/local/Ascend/driver/lib64/driver \
-v /etc/ascend_install.info:/etc/ascend_install.info \
-v /etc/vnpu.cfg:/etc/vnpu.cfg \
-v /usr/local/Ascend/driver/version.info:/usr/local/Ascend/driver/version.info \
-v /home/aicc:/home/aicc \
-v /data/4pd/models/:/models \
--privileged=true \
-e ASCEND_RT_VISIBLE_DEVICES=4,5 \
-p 9996:9996 \
harbor.4pd.io/dooke/mindie-1.0rc3-service:pipe-11-commit-29fd5fd3 \
/usr/local/Ascend/entrypoint.sh \
--model=/models/QwQ-32B \
--tensor-parallel-size=2 \
--port=9996 \
--max-model-len=32768 \
--served-model-name=QwQ-32B