Merge branch 'xorbitsai:main' into main

xorbitsai · Nov 20, 2024 · d7c6e6b · d7c6e6b
2 parents 362d60c + 0cdfb43
commit d7c6e6b
Show file tree

Hide file tree

Showing 119 changed files with 8,493 additions and 697 deletions.
diff --git a/.github/workflows/docker-cd.yaml b/.github/workflows/docker-cd.yaml
@@ -73,26 +73,6 @@ jobs:
             echo "XINFERENCE_GIT_TAG=${GIT_TAG}" >> $GITHUB_ENV
           fi
 
-      - name: Log in to Aliyun Docker Hub
-        uses: docker/login-action@v1
-        with:
-          registry: registry.cn-hangzhou.aliyuncs.com
-          username: ${{ secrets.DOCKERHUB_ALIYUN_USERNAME }}
-          password: ${{ secrets.DOCKERHUB_ALIYUN_PASSWORD }}
-
-      - name: Push docker image to Aliyun
-        shell: bash
-        if: ${{ github.repository == 'xorbitsai/inference' }}
-        env:
-          DOCKER_ORG: registry.cn-hangzhou.aliyuncs.com/xprobe_xinference
-        run: |
-          if [[ -n "$XINFERENCE_GIT_TAG" ]]; then
-            docker tag "xprobe/xinference:${XINFERENCE_GIT_TAG}" "$DOCKER_ORG/xinference:latest"
-            docker push "$DOCKER_ORG/xinference:latest"
-            docker tag "xprobe/xinference:${XINFERENCE_GIT_TAG}-cpu" "$DOCKER_ORG/xinference:latest-cpu"
-            docker push "$DOCKER_ORG/xinference:latest-cpu"
-          fi
-
       - name: Clean docker image cache
         shell: bash
         if: ${{ github.repository == 'xorbitsai/inference' }}

diff --git a/.github/workflows/python.yaml b/.github/workflows/python.yaml
@@ -74,13 +74,13 @@ jobs:
       fail-fast: false
       matrix:
         os: [ "ubuntu-latest", "macos-12", "windows-latest" ]
-        python-version: [ "3.8", "3.9", "3.10", "3.11" ]
+        python-version: [ "3.9", "3.10", "3.11", "3.12" ]
         module: [ "xinference" ]
         exclude:
-          - { os: macos-12, python-version: 3.9 }
           - { os: macos-12, python-version: 3.10 }
-          - { os: windows-latest, python-version: 3.9 }
+          - { os: macos-12, python-version: 3.11 }
           - { os: windows-latest, python-version: 3.10 }
+          - { os: windows-latest, python-version: 3.11 }
         include:
           - { os: self-hosted, module: gpu, python-version: 3.9}
           - { os: macos-latest, module: metal, python-version: "3.10" }
@@ -99,6 +99,12 @@ jobs:
           python-version: ${{ matrix.python-version }}
           activate-environment: ${{ env.CONDA_ENV }}
 
+      # Important for python == 3.12
+      - name: Update pip and setuptools
+        if: ${{ matrix.python-version == '3.12' }}
+        run: |
+          python -m pip install -U pip setuptools
+
       - name: Install dependencies
         env:
           MODULE: ${{ matrix.module }}
@@ -118,8 +124,7 @@ jobs:
           pip install transformers
           pip install attrdict
           pip install "timm>=0.9.16"
-          pip install torch
-          pip install torchvision
+          pip install torch torchvision
           pip install accelerate
           pip install sentencepiece
           pip install transformers_stream_generator
@@ -133,7 +138,6 @@ jobs:
           pip install -e ".[dev]"
           pip install "jinja2==3.1.2"
           pip install tensorizer
-          pip install eva-decord
           pip install jj-pytorchvideo
           pip install qwen-vl-utils
           pip install datamodel_code_generator
@@ -162,7 +166,7 @@ jobs:
             ${{ env.SELF_HOST_PYTHON }} -m pip install -U WeTextProcessing<1.0.4
             ${{ env.SELF_HOST_PYTHON }} -m pip install -U librosa
             ${{ env.SELF_HOST_PYTHON }} -m pip install -U xxhash
-            ${{ env.SELF_HOST_PYTHON }} -m pip install -U "ChatTTS>=0.2"
+            ${{ env.SELF_HOST_PYTHON }} -m pip install -U "ChatTTS>=0.2.1"
             ${{ env.SELF_HOST_PYTHON }} -m pip install -U HyperPyYAML
             ${{ env.SELF_HOST_PYTHON }} -m pip uninstall -y matcha-tts
             ${{ env.SELF_HOST_PYTHON }} -m pip install -U onnxruntime-gpu==1.16.0; sys_platform == 'linux'
@@ -175,9 +179,19 @@ jobs:
             ${{ env.SELF_HOST_PYTHON }} -m pip uninstall -y opencc
             ${{ env.SELF_HOST_PYTHON }} -m pip uninstall -y "faster_whisper"
             ${{ env.SELF_HOST_PYTHON }} -m pip install -U accelerate
+            ${{ env.SELF_HOST_PYTHON }} -m pip install -U verovio
+            ${{ env.SELF_HOST_PYTHON }} -m pip install -U cachetools
+            ${{ env.SELF_HOST_PYTHON }} -m pip install -U silero-vad
+            ${{ env.SELF_HOST_PYTHON }} -m pip install -U pydantic
+            ${{ env.SELF_HOST_PYTHON }} -m pytest --timeout=1500 \
+              --disable-warnings \
+              --cov-config=setup.cfg --cov-report=xml --cov=xinference xinference/core/tests/test_continuous_batching.py && \
             ${{ env.SELF_HOST_PYTHON }} -m pytest --timeout=1500 \
               -W ignore::PendingDeprecationWarning \
               --cov-config=setup.cfg --cov-report=xml --cov=xinference xinference/model/image/tests/test_stable_diffusion.py && \
+            ${{ env.SELF_HOST_PYTHON }} -m pytest --timeout=1500 \
+              -W ignore::PendingDeprecationWarning \
+              --cov-config=setup.cfg --cov-report=xml --cov=xinference xinference/model/image/tests/test_got_ocr2.py && \
             ${{ env.SELF_HOST_PYTHON }} -m pytest --timeout=1500 \
               -W ignore::PendingDeprecationWarning \
               --cov-config=setup.cfg --cov-report=xml --cov=xinference xinference/model/audio/tests/test_whisper.py && \
@@ -203,6 +217,6 @@ jobs:
               --cov-config=setup.cfg --cov-report=xml --cov=xinference xinference/client/tests/test_client.py
             pytest --timeout=1500 \
               -W ignore::PendingDeprecationWarning \
-              --cov-config=setup.cfg --cov-report=xml --cov=xinference --ignore xinference/client/tests/test_client.py --ignore xinference/model/image/tests/test_stable_diffusion.py --ignore xinference/model/audio/tests xinference
+              --cov-config=setup.cfg --cov-report=xml --cov=xinference --ignore xinference/core/tests/test_continuous_batching.py --ignore xinference/client/tests/test_client.py --ignore xinference/model/image/tests/test_stable_diffusion.py --ignore xinference/model/image/tests/test_got_ocr2.py --ignore xinference/model/audio/tests xinference
           fi
         working-directory: .
diff --git a/README.md b/README.md
@@ -180,6 +180,24 @@ Once Xinference is running, there are multiple ways you can try it: via the web
 | [Slack](https://join.slack.com/t/xorbitsio/shared_invite/zt-1o3z9ucdh-RbfhbPVpx7prOVdM1CAuxg) | Collaborating with other Xorbits users.            |
 | [Twitter](https://twitter.com/xorbitsio)                                                      | Staying up-to-date on new features.                |
 
+## Citation
+
+If this work is helpful, please kindly cite as:
+
+```bibtex
+@inproceedings{lu2024xinference,
+    title = "Xinference: Making Large Model Serving Easy",
+    author = "Lu, Weizheng and Xiong, Lingfeng and Zhang, Feng and Qin, Xuye and Chen, Yueguo",
+    booktitle = "Proceedings of the 2024 Conference on Empirical Methods in Natural Language Processing: System Demonstrations",
+    month = nov,
+    year = "2024",
+    address = "Miami, Florida, USA",
+    publisher = "Association for Computational Linguistics",
+    url = "https://aclanthology.org/2024.emnlp-demo.30",
+    pages = "291--300",
+}
+```
+
 ## Contributors
 
 <a href="https://github.com/xorbitsai/inference/graphs/contributors">

diff --git a/README_ja_JP.md b/README_ja_JP.md
@@ -104,6 +104,24 @@ Xinferenceが実行されると、Web UI、cURL、コマンドライン、また
 | [Slack](https://join.slack.com/t/xorbitsio/shared_invite/zt-1o3z9ucdh-RbfhbPVpx7prOVdM1CAuxg)      | 他のXorbitsユーザーとの協力。                      |
 | [Twitter](https://twitter.com/xorbitsio)                                                          | 新機能に関する最新情報の入手。                    |
 
+## 引用
+
+この仕事が役立つ場合は、以下のように引用してください：
+
+```bibtex
+@inproceedings{lu2024xinference,
+    title = "Xinference: Making Large Model Serving Easy",
+    author = "Lu, Weizheng and Xiong, Lingfeng and Zhang, Feng and Qin, Xuye and Chen, Yueguo",
+    booktitle = "Proceedings of the 2024 Conference on Empirical Methods in Natural Language Processing: System Demonstrations",
+    month = nov,
+    year = "2024",
+    address = "Miami, Florida, USA",
+    publisher = "Association for Computational Linguistics",
+    url = "https://aclanthology.org/2024.emnlp-demo.30",
+    pages = "291--300",
+}
+```
+
 ## 寄稿者
 
 <a href="https://github.com/xorbitsai/inference/graphs/contributors">

diff --git a/README_zh_CN.md b/README_zh_CN.md
@@ -164,6 +164,24 @@ $ xinference-local
 | [微信社群](https://xorbits.cn/assets/images/wechat_work_qr.png)                                     | 与其他 Xorbits 用户交流。                         |
 | [知乎](https://zhihu.com/org/xorbits)                                                         | 了解团队最新的进展。                                  |
 
+## 引用
+
+如果您觉得此项目有帮助，请以如下格式引用我们：
+
+```bibtex
+@inproceedings{lu2024xinference,
+    title = "Xinference: Making Large Model Serving Easy",
+    author = "Lu, Weizheng and Xiong, Lingfeng and Zhang, Feng and Qin, Xuye and Chen, Yueguo",
+    booktitle = "Proceedings of the 2024 Conference on Empirical Methods in Natural Language Processing: System Demonstrations",
+    month = nov,
+    year = "2024",
+    address = "Miami, Florida, USA",
+    publisher = "Association for Computational Linguistics",
+    url = "https://aclanthology.org/2024.emnlp-demo.30",
+    pages = "291--300",
+}
+```
+
 ## 贡献者
 
 <a href="https://github.com/xorbitsai/inference/graphs/contributors">

diff --git a/doc/source/conf.py b/doc/source/conf.py
@@ -121,7 +121,7 @@
         "type": "fontawesome",
     }])
     html_theme_options["external_links"] = [
-        {"name": "产品官网", "url": "https://xorbits.cn/inference"},
+        {"name": "产品官网", "url": "https://xorbits.cn"},
     ]
 
 html_favicon = "_static/favicon.svg"
diff --git a/doc/source/getting_started/installation_npu.rst b/doc/source/getting_started/installation_npu.rst
@@ -6,6 +6,13 @@ Installation Guide for Ascend NPU
 =================================
 Xinference can run on Ascend NPU, follow below instructions to install.
 
+.. warning::
+
+    The open-source version relies on Transformers for inference,
+    which can be slow on chips like 310p3. We provide an enterprise version that supports the MindIE engine,
+    offering better performance and compatibility for Ascend NPU.
+    Refer to `Xinference Enterprise <https://github.com/xorbitsai/enterprise-docs/blob/main/README_zh_CN.md>`_
+
 
 Installing PyTorch and Ascend extension for PyTorch
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

diff --git a/doc/source/locale/zh_CN/LC_MESSAGES/getting_started/installation_npu.po b/doc/source/locale/zh_CN/LC_MESSAGES/getting_started/installation_npu.po
@@ -8,7 +8,7 @@ msgid ""
 msgstr ""
 "Project-Id-Version: Xinference \n"
 "Report-Msgid-Bugs-To: \n"
-"POT-Creation-Date: 2024-07-30 17:00+0800\n"
+"POT-Creation-Date: 2024-10-25 15:13+0800\n"
 "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
 "Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
 "Language: zh_CN\n"
@@ -28,52 +28,65 @@ msgid "Xinference can run on Ascend NPU, follow below instructions to install."
 msgstr "Xinference 能在昇腾 NPU 上运行，使用如下命令安装。"
 
 #: ../../source/getting_started/installation_npu.rst:11
+msgid ""
+"The open-source version relies on Transformers for inference, which can "
+"be slow on chips like 310p3. We provide an enterprise version that "
+"supports the MindIE engine, offering better performance and compatibility"
+" for Ascend NPU. Refer to `Xinference Enterprise "
+"<https://github.com/xorbitsai/enterprise-"
+"docs/blob/main/README_zh_CN.md>`_"
+msgstr ""
+"开源版本依赖 Transformers 进行推理，在 310p3 等芯片上会存在运行慢的问题。"
+"我们提供了支持 MindIE 引擎，性能更为强大，兼容性更好的企业版本来支持 "
+"Ascend NPU。详细参考 `Xinference 企业版 <https://github.com/xorbitsai/"
+"enterprise-docs/blob/main/README_zh_CN.md>`_"
+
+#: ../../source/getting_started/installation_npu.rst:18
 msgid "Installing PyTorch and Ascend extension for PyTorch"
 msgstr "安装 PyTorch 和昇腾扩展"
 
-#: ../../source/getting_started/installation_npu.rst:12
+#: ../../source/getting_started/installation_npu.rst:19
 msgid "Install PyTorch CPU version and corresponding Ascend extension."
 msgstr "安装 PyTorch CPU 版本和相应的昇腾扩展。"
 
-#: ../../source/getting_started/installation_npu.rst:14
+#: ../../source/getting_started/installation_npu.rst:21
 msgid "Take PyTorch v2.1.0 as example."
 msgstr "以 PyTorch v2.1.0 为例。"
 
-#: ../../source/getting_started/installation_npu.rst:20
+#: ../../source/getting_started/installation_npu.rst:27
 msgid ""
 "Then install `Ascend extension for PyTorch "
 "<https://github.com/Ascend/pytorch>`_."
-msgstr ""
-"接着安装 `昇腾 PyTorch 扩展 "
-"<https://gitee.com/ascend/pytorch>`_."
+msgstr "接着安装 `昇腾 PyTorch 扩展 <https://gitee.com/ascend/pytorch>`_."
 
-#: ../../source/getting_started/installation_npu.rst:28
+#: ../../source/getting_started/installation_npu.rst:35
 msgid "Running below command to see if it correctly prints the Ascend NPU count."
 msgstr "运行如下命令查看，如果正常运行，会打印昇腾 NPU 的个数。"
 
-#: ../../source/getting_started/installation_npu.rst:35
+#: ../../source/getting_started/installation_npu.rst:42
 msgid "Installing Xinference"
 msgstr "安装 Xinference"
 
-#: ../../source/getting_started/installation_npu.rst:41
+#: ../../source/getting_started/installation_npu.rst:48
 msgid ""
 "Now you can use xinference according to :ref:`doc <using_xinference>`. "
 "``Transformers`` backend is the only available engine supported for "
 "Ascend NPU for open source version."
 msgstr ""
-"现在你可以参考 :ref:`文档 <using_xinference>` 来使用 Xinference。"
-"``Transformers`` 是开源唯一支持的昇腾 NPU 的引擎。"
+"现在你可以参考 :ref:`文档 <using_xinference>` 来使用 Xinference。``"
+"Transformers`` 是开源唯一支持的昇腾 NPU 的引擎。"
 
-#: ../../source/getting_started/installation_npu.rst:45
+#: ../../source/getting_started/installation_npu.rst:52
 msgid "Enterprise Support"
 msgstr "企业支持"
 
-#: ../../source/getting_started/installation_npu.rst:46
+#: ../../source/getting_started/installation_npu.rst:53
 msgid ""
 "If you encounter any performance or other issues for Ascend NPU, please "
 "reach out to us via `link <https://xorbits.io/community>`_."
 msgstr ""
-"如果你在昇腾 NPU 遇到任何性能和其他问题，欢迎垂询 Xinference 企业版，"
-"在 `这里 <https://xorbits.cn/community>`_ 可以找到我们，亦可以 "
-"`填写表单 <https://w8v6grm432.feishu.cn/share/base/form/shrcn9u1EBXQxmGMqILEjguuGoh>`_ 申请企业版试用。"
+"如果你在昇腾 NPU 遇到任何性能和其他问题，欢迎垂询 Xinference 企业版，在 `"
+"这里 <https://xorbits.cn/community>`_ 可以找到我们，亦可以 `填写表单 <"
+"https://w8v6grm432.feishu.cn/share/base/form/shrcn9u1EBXQxmGMqILEjguuGoh>"
+"`_ 申请企业版试用。"
 
diff --git a/doc/source/locale/zh_CN/LC_MESSAGES/models/model_abilities/image.po b/doc/source/locale/zh_CN/LC_MESSAGES/models/model_abilities/image.po
@@ -8,7 +8,7 @@ msgid ""
 msgstr ""
 "Project-Id-Version: Xinference \n"
 "Report-Msgid-Bugs-To: \n"
-"POT-Creation-Date: 2024-08-09 19:13+0800\n"
+"POT-Creation-Date: 2024-10-30 07:49+0000\n"
 "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
 "Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
 "Language: zh_CN\n"
@@ -17,7 +17,7 @@ msgstr ""
 "MIME-Version: 1.0\n"
 "Content-Type: text/plain; charset=utf-8\n"
 "Content-Transfer-Encoding: 8bit\n"
-"Generated-By: Babel 2.14.0\n"
+"Generated-By: Babel 2.16.0\n"
 
 #: ../../source/models/model_abilities/image.rst:5
 msgid "Images"
@@ -143,34 +143,39 @@ msgid ""
 " move a model component onto the GPU when it needs to be executed, while "
 "keeping the remaining components on the CPU."
 msgstr ""
-"``--cpu_offload True``：指定 ``True`` 会在推理过程中将模型的组件卸载到 CPU 上以节省内存，"
-"这会导致推理延迟略有增加。模型卸载仅会在需要执行时将模型组件移动到 GPU 上，同时保持其余组件在 CPU 上"
+"``--cpu_offload True``：指定 ``True`` 会在推理过程中将模型的组件卸载到 "
+"CPU 上以节省内存，这会导致推理延迟略有增加。模型卸载仅会在需要执行时将"
+"模型组件移动到 GPU 上，同时保持其余组件在 CPU 上"
 
 #: ../../source/models/model_abilities/image.rst:117
 msgid ""
 "``--quantize_text_encoder <text encoder layer>``: We leveraged the "
 "``bitsandbytes`` library to load and quantize the T5-XXL text encoder to "
-"8-bit precision. This allows you to keep using all text encoders "
-"while only slightly impacting performance."
-msgstr "``--quantize_text_encoder <text encoder layer>``：我们利用 ``bitsandbytes`` 库"
-"加载并量化 T5-XXL 文本编码器至8位精度。这使得你能够在仅轻微影响性能的情况下继续使用全部文本编码器。"
+"8-bit precision. This allows you to keep using all text encoders while "
+"only slightly impacting performance."
+msgstr ""
+"``--quantize_text_encoder <text encoder layer>``：我们利用 ``bitsandbytes"
+"`` 库加载并量化 T5-XXL 文本编码器至8位精度。这使得你能够在仅轻微影响性能"
+"的情况下继续使用全部文本编码器。"
 
 #: ../../source/models/model_abilities/image.rst:120
 msgid ""
 "``--text_encoder_3 None``, for sd3-medium, removing the memory-intensive "
 "4.7B parameter T5-XXL text encoder during inference can significantly "
 "decrease the memory requirements with only a slight loss in performance."
 msgstr ""
-"``--text_encoder_3 None``，对于 sd3-medium，"
-"移除在推理过程中内存密集型的47亿参数T5-XXL文本编码器可以显著降低内存需求，而仅造成性能上的轻微损失。"
+"``--text_encoder_3 None``，对于 sd3-medium，移除在推理过程中内存密集型的"
+"47亿参数T5-XXL文本编码器可以显著降低内存需求，而仅造成性能上的轻微损失。"
 
 #: ../../source/models/model_abilities/image.rst:124
 msgid ""
 "If you are trying to run large image models liek sd3-medium or FLUX.1 "
 "series on GPU card that has less memory than 24GB, you may encounter OOM "
 "when launching or inference. Try below solutions."
-msgstr "如果你试图在显存小于24GB的GPU上运行像sd3-medium或FLUX.1系列这样的大型图像模型，"
-"你在启动或推理过程中可能会遇到显存溢出（OOM）的问题。尝试以下解决方案。"
+msgstr ""
+"如果你试图在显存小于24GB的GPU上运行像sd3-medium或FLUX.1系列这样的大型图像"
+"模型，你在启动或推理过程中可能会遇到显存溢出（OOM）的问题。尝试以下"
+"解决方案。"
 
 #: ../../source/models/model_abilities/image.rst:128
 msgid "For FLUX.1 series, try to apply quantization."
@@ -200,4 +205,15 @@ msgstr ""
 msgid "Learn from a Stable Diffusion ControlNet example"
 msgstr "学习一个 Stable Diffusion 控制网络的示例"
 
+#: ../../source/models/model_abilities/image.rst:160
+msgid "OCR"
+msgstr ""
+
+#: ../../source/models/model_abilities/image.rst:162
+msgid "The OCR API accepts image bytes and returns the OCR text."
+msgstr "OCR API 接受图像字节并返回 OCR 文本。"
+
+#: ../../source/models/model_abilities/image.rst:164
+msgid "We can try OCR API out either via cURL, or Xinference's python client:"
+msgstr "可以通过 cURL 或 Xinference 的 Python 客户端来尝试 OCR API。"
 
diff --git a/doc/source/models/builtin/embedding/gte-qwen2.rst b/doc/source/models/builtin/embedding/gte-qwen2.rst
@@ -11,11 +11,11 @@ gte-Qwen2
 Specifications
 ^^^^^^^^^^^^^^
 
-- **Dimensions:** 3584
+- **Dimensions:** 4096
 - **Max Tokens:** 32000
 - **Model ID:** Alibaba-NLP/gte-Qwen2-7B-instruct
 - **Model Hubs**: `Hugging Face <https://huggingface.co/Alibaba-NLP/gte-Qwen2-7B-instruct>`__, `ModelScope <https://modelscope.cn/models/iic/gte_Qwen2-7B-instruct>`__
 
 Execute the following command to launch the model::
 
-   xinference launch --model-name gte-Qwen2 --model-type embedding
+   xinference launch --model-name gte-Qwen2 --model-type embedding