FEAT: Xavier: Share KV cache between VLLM replicas #5324
Workflow file for this run
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: Python CI | |
on: | |
push: | |
branches: | |
- '*' | |
pull_request: | |
types: ['opened', 'reopened', 'synchronize'] | |
concurrency: | |
group: ${{ github.workflow }}-${{ github.ref }} | |
cancel-in-progress: true | |
jobs: | |
lint: | |
runs-on: ${{ matrix.os }} | |
strategy: | |
fail-fast: false | |
matrix: | |
os: [ "ubuntu-latest" ] | |
python-version: [ "3.10" ] | |
steps: | |
- name: Check out code | |
uses: actions/checkout@v3 | |
with: | |
fetch-depth: 0 | |
submodules: recursive | |
- name: Set up Python environment | |
uses: actions/setup-python@v4 | |
with: | |
python-version: "3.10" | |
- name: flake8 Lint | |
uses: py-actions/flake8@v2 | |
with: | |
path: "xinference" | |
args: "--config setup.cfg" | |
- name: black | |
uses: psf/black@stable | |
with: | |
src: "xinference" | |
options: "--check" | |
version: "23.12.0" | |
- uses: isort/isort-action@master | |
with: | |
sortPaths: "xinference" | |
configuration: "--check-only --diff --sp setup.cfg" | |
- name: mypy | |
run: pip install mypy && mypy --install-types --non-interactive xinference | |
- name: codespell | |
run: pip install codespell && codespell --ignore-words-list thirdparty xinference | |
- name: Set up Node.js | |
uses: actions/setup-node@v1 | |
with: | |
node-version: 16 | |
# ESLint and Prettier must be in `package.json` | |
- name: Install Node.js dependencies | |
run: cd xinference/web/ui && npm ci | |
- name: ESLint Check | |
run: cd xinference/web/ui && npx eslint . | |
- name: Prettier Check | |
run: cd xinference/web/ui && ./node_modules/.bin/prettier --check . | |
build_test_job: | |
runs-on: ${{ matrix.os }} | |
needs: lint | |
env: | |
CONDA_ENV: test | |
SELF_HOST_PYTHON: /root/miniconda3/envs/inference_test/bin/python | |
SELF_HOST_CONDA: /root/miniconda3/condabin/conda | |
defaults: | |
run: | |
shell: bash -l {0} | |
strategy: | |
fail-fast: false | |
matrix: | |
os: [ "ubuntu-latest", "macos-13", "windows-latest" ] | |
python-version: [ "3.9", "3.10", "3.11", "3.12" ] | |
module: [ "xinference" ] | |
exclude: | |
- { os: macos-13, python-version: 3.10 } | |
- { os: macos-13, python-version: 3.11 } | |
- { os: windows-latest, python-version: 3.10 } | |
- { os: windows-latest, python-version: 3.11 } | |
include: | |
- { os: self-hosted, module: gpu, python-version: 3.9} | |
- { os: macos-latest, module: metal, python-version: "3.10" } | |
steps: | |
- name: Check out code | |
uses: actions/checkout@v3 | |
with: | |
fetch-depth: 0 | |
submodules: recursive | |
- name: Set up conda ${{ matrix.python-version }} | |
uses: conda-incubator/setup-miniconda@v3 | |
if: ${{ matrix.module != 'gpu' }} | |
with: | |
python-version: ${{ matrix.python-version }} | |
activate-environment: ${{ env.CONDA_ENV }} | |
# Important for python == 3.12 | |
- name: Update pip and setuptools | |
if: ${{ matrix.python-version == '3.12' }} | |
run: | | |
python -m pip install -U pip setuptools | |
- name: Install numpy | |
if: | | |
startsWith(matrix.os, 'macos') && matrix.python-version == '3.12' | |
run: | | |
python -m pip install "numpy<2" | |
- name: Install dependencies | |
env: | |
MODULE: ${{ matrix.module }} | |
OS: ${{ matrix.os }} | |
if: ${{ matrix.module != 'gpu' }} | |
run: | | |
if [ "$OS" == "ubuntu-latest" ]; then | |
sudo rm -rf /usr/share/dotnet | |
sudo rm -rf /opt/ghc | |
sudo rm -rf "/usr/local/share/boost" | |
sudo rm -rf "$AGENT_TOOLSDIRECTORY" | |
fi | |
pip install -e ".[dev]" | |
if [ "$MODULE" == "metal" ]; then | |
conda install -c conda-forge "ffmpeg<7" | |
pip install mlx-lm | |
pip install mlx-vlm | |
pip install mlx-whisper | |
pip install f5-tts-mlx | |
pip install qwen-vl-utils | |
pip install tomli | |
else | |
pip install "llama-cpp-python==0.2.77" --extra-index-url https://abetlen.github.io/llama-cpp-python/whl/cpu | |
pip install transformers | |
pip install attrdict | |
pip install "timm>=0.9.16" | |
pip install torch torchvision | |
pip install accelerate | |
pip install sentencepiece | |
pip install transformers_stream_generator | |
pip install bitsandbytes | |
pip install "sentence-transformers>=2.3.1" | |
pip install modelscope | |
pip install diffusers | |
pip install protobuf | |
pip install FlagEmbedding | |
pip install "tenacity>=8.2.0,<8.4.0" | |
pip install "jinja2==3.1.2" | |
pip install tensorizer | |
pip install jj-pytorchvideo | |
pip install qwen-vl-utils | |
pip install datamodel_code_generator | |
pip install jsonschema | |
fi | |
working-directory: . | |
- name: Test with pytest | |
env: | |
MODULE: ${{ matrix.module }} | |
run: | | |
if [ "$MODULE" == "gpu" ]; then | |
${{ env.SELF_HOST_PYTHON }} -m pip install -U "openai>1" | |
${{ env.SELF_HOST_PYTHON }} -m pip install -U modelscope | |
${{ env.SELF_HOST_PYTHON }} -m pip install -U sse_starlette | |
${{ env.SELF_HOST_PYTHON }} -m pip install -U xoscar | |
${{ env.SELF_HOST_PYTHON }} -m pip install -U "python-jose[cryptography]" | |
${{ env.SELF_HOST_PYTHON }} -m pip install -U "passlib[bcrypt]" | |
${{ env.SELF_HOST_PYTHON }} -m pip install -U "aioprometheus[starlette]" | |
${{ env.SELF_HOST_PYTHON }} -m pip install -U "pynvml" | |
${{ env.SELF_HOST_PYTHON }} -m pip install -U "transformers" | |
${{ env.SELF_HOST_CONDA }} install -c conda-forge pynini=2.1.5 | |
${{ env.SELF_HOST_CONDA }} install -c conda-forge "ffmpeg<7" | |
${{ env.SELF_HOST_PYTHON }} -m pip install "funasr<1.1.17" | |
${{ env.SELF_HOST_PYTHON }} -m pip install -U nemo_text_processing<1.1.0 | |
${{ env.SELF_HOST_PYTHON }} -m pip install -U omegaconf~=2.3.0 | |
${{ env.SELF_HOST_PYTHON }} -m pip install -U WeTextProcessing<1.0.4 | |
${{ env.SELF_HOST_PYTHON }} -m pip install -U librosa | |
${{ env.SELF_HOST_PYTHON }} -m pip install -U xxhash | |
${{ env.SELF_HOST_PYTHON }} -m pip install -U "ChatTTS>=0.2.1" | |
${{ env.SELF_HOST_PYTHON }} -m pip install -U HyperPyYAML | |
${{ env.SELF_HOST_PYTHON }} -m pip uninstall -y matcha-tts | |
${{ env.SELF_HOST_PYTHON }} -m pip install -U onnxruntime-gpu==1.16.0; sys_platform == 'linux' | |
${{ env.SELF_HOST_PYTHON }} -m pip install -U openai-whisper | |
${{ env.SELF_HOST_PYTHON }} -m pip install -U "torch==2.3.1" "torchaudio==2.3.1" | |
${{ env.SELF_HOST_PYTHON }} -m pip install -U "loguru" | |
${{ env.SELF_HOST_PYTHON }} -m pip install -U "natsort" | |
${{ env.SELF_HOST_PYTHON }} -m pip install -U "loralib" | |
${{ env.SELF_HOST_PYTHON }} -m pip install -U "ormsgpack" | |
${{ env.SELF_HOST_PYTHON }} -m pip uninstall -y opencc | |
${{ env.SELF_HOST_PYTHON }} -m pip uninstall -y "faster_whisper" | |
${{ env.SELF_HOST_PYTHON }} -m pip install -U accelerate | |
${{ env.SELF_HOST_PYTHON }} -m pip install -U verovio | |
${{ env.SELF_HOST_PYTHON }} -m pip install -U cachetools | |
${{ env.SELF_HOST_PYTHON }} -m pip install -U silero-vad | |
${{ env.SELF_HOST_PYTHON }} -m pip install -U pydantic | |
${{ env.SELF_HOST_PYTHON }} -m pip install -U diffusers | |
${{ env.SELF_HOST_PYTHON }} -m pip install -U onnx | |
${{ env.SELF_HOST_PYTHON }} -m pip install -U onnxconverter_common | |
${{ env.SELF_HOST_PYTHON }} -m pip install -U torchdiffeq | |
${{ env.SELF_HOST_PYTHON }} -m pip install -U "x_transformers>=1.31.14" | |
${{ env.SELF_HOST_PYTHON }} -m pip install -U pypinyin | |
${{ env.SELF_HOST_PYTHON }} -m pip install -U tomli | |
${{ env.SELF_HOST_PYTHON }} -m pip install -U vocos | |
${{ env.SELF_HOST_PYTHON }} -m pip install -U jieba | |
${{ env.SELF_HOST_PYTHON }} -m pip install -U soundfile | |
${{ env.SELF_HOST_PYTHON }} -m pip install -U sentence-transformers | |
${{ env.SELF_HOST_PYTHON }} -m pip install -U FlagEmbedding | |
${{ env.SELF_HOST_PYTHON }} -m pytest --timeout=1500 \ | |
--disable-warnings \ | |
--cov-config=setup.cfg --cov-report=xml --cov=xinference xinference/core/tests/test_continuous_batching.py && \ | |
${{ env.SELF_HOST_PYTHON }} -m pytest --timeout=1500 \ | |
-W ignore::PendingDeprecationWarning \ | |
--cov-config=setup.cfg --cov-report=xml --cov=xinference xinference/model/image/tests/test_stable_diffusion.py && \ | |
${{ env.SELF_HOST_PYTHON }} -m pytest --timeout=1500 \ | |
-W ignore::PendingDeprecationWarning \ | |
--cov-config=setup.cfg --cov-report=xml --cov=xinference xinference/model/image/tests/test_got_ocr2.py && \ | |
${{ env.SELF_HOST_PYTHON }} -m pytest --timeout=1500 \ | |
-W ignore::PendingDeprecationWarning \ | |
--cov-config=setup.cfg --cov-report=xml --cov=xinference xinference/model/audio/tests/test_whisper.py && \ | |
${{ env.SELF_HOST_PYTHON }} -m pytest --timeout=1500 \ | |
-W ignore::PendingDeprecationWarning \ | |
--cov-config=setup.cfg --cov-report=xml --cov=xinference xinference/model/audio/tests/test_funasr.py && \ | |
${{ env.SELF_HOST_PYTHON }} -m pytest --timeout=1500 \ | |
-W ignore::PendingDeprecationWarning \ | |
--cov-config=setup.cfg --cov-report=xml --cov=xinference xinference/model/audio/tests/test_chattts.py && \ | |
${{ env.SELF_HOST_PYTHON }} -m pytest --timeout=1500 \ | |
-W ignore::PendingDeprecationWarning \ | |
--cov-config=setup.cfg --cov-report=xml --cov=xinference xinference/model/audio/tests/test_cosyvoice.py && \ | |
${{ env.SELF_HOST_PYTHON }} -m pytest --timeout=1500 \ | |
-W ignore::PendingDeprecationWarning \ | |
--cov-config=setup.cfg --cov-report=xml --cov=xinference xinference/model/audio/tests/test_f5tts.py && \ | |
${{ env.SELF_HOST_PYTHON }} -m pytest --timeout=1500 \ | |
-W ignore::PendingDeprecationWarning \ | |
--cov-config=setup.cfg --cov-report=xml --cov=xinference xinference/model/audio/tests/test_f5tts.py && \ | |
${{ env.SELF_HOST_PYTHON }} -m pytest --timeout=1500 \ | |
-W ignore::PendingDeprecationWarning \ | |
--cov-config=setup.cfg --cov-report=xml --cov=xinference xinference/model/audio/tests/test_fish_speech.py && \ | |
${{ env.SELF_HOST_PYTHON }} -m pytest --timeout=1500 \ | |
-W ignore::PendingDeprecationWarning \ | |
--cov-config=setup.cfg --cov-report=xml --cov=xinference xinference/model/embedding/tests/test_integrated_embedding.py | |
elif [ "$MODULE" == "metal" ]; then | |
pytest --timeout=1500 \ | |
-W ignore::PendingDeprecationWarning \ | |
--cov-config=setup.cfg --cov-report=xml --cov=xinference xinference/model/llm/mlx/tests/test_mlx.py && \ | |
pytest --timeout=1500 \ | |
-W ignore::PendingDeprecationWarning \ | |
--cov-config=setup.cfg --cov-report=xml --cov=xinference xinference/model/audio/tests/test_whisper_mlx.py && \ | |
pytest --timeout=1500 \ | |
-W ignore::PendingDeprecationWarning \ | |
--cov-config=setup.cfg --cov-report=xml --cov=xinference xinference/model/audio/tests/test_f5tts_mlx.py | |
else | |
pytest --timeout=1500 \ | |
-W ignore::PendingDeprecationWarning \ | |
--cov-config=setup.cfg --cov-report=xml --cov=xinference xinference/client/tests/test_client.py | |
pytest --timeout=1500 \ | |
-W ignore::PendingDeprecationWarning \ | |
--cov-config=setup.cfg --cov-report=xml --cov=xinference --ignore xinference/core/tests/test_continuous_batching.py --ignore xinference/client/tests/test_client.py --ignore xinference/model/image/tests/test_stable_diffusion.py --ignore xinference/model/image/tests/test_got_ocr2.py --ignore xinference/model/audio/tests --ignore xinference/model/embedding/tests/test_integrated_embedding.py xinference | |
fi | |
working-directory: . |