Skip to content

Commit

Permalink
GenAIComps microservices refactor (opea-project#1072)
Browse files Browse the repository at this point in the history
Signed-off-by: Xinyao Wang <xinyao.wang@intel.com>
Signed-off-by: lvliang-intel <liang1.lv@intel.com>
Signed-off-by: letonghan <letong.han@intel.com>
Signed-off-by: Wang, Kai Lawrence <kai.lawrence.wang@intel.com>
Signed-off-by: Wang, Xigui <xigui.wang@intel.com>
Signed-off-by: chensuyue <suyue.chen@intel.com>
Signed-off-by: WenjiaoYue <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>
  • Loading branch information
chensuyue authored and smguggen committed Jan 23, 2025
1 parent 8987090 commit e223081
Show file tree
Hide file tree
Showing 276 changed files with 3,836 additions and 5,053 deletions.
Original file line number Diff line number Diff line change
@@ -1,9 +1,8 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0

# this file should be run in the root of the repo
services:
nginx:
build:
dockerfile: comps/nginx/Dockerfile
dockerfile: comps/3rd_parties/nginx/src/Dockerfile
image: ${REGISTRY:-opea}/nginx:${TAG:-latest}
4 changes: 4 additions & 0 deletions .github/workflows/docker/compose/dataprep-compose.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,10 @@

# this file should be run in the root of the repo
services:
dataprep:
build:
dockerfile: comps/dataprep/src/Dockerfile
image: ${REGISTRY:-opea}/dataprep:${TAG:-latest}
dataprep-redis:
build:
dockerfile: comps/dataprep/redis/langchain/Dockerfile
Expand Down
28 changes: 6 additions & 22 deletions .github/workflows/docker/compose/embeddings-compose.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,41 +5,25 @@
services:
embedding-tei:
build:
dockerfile: comps/embeddings/tei/langchain/Dockerfile
dockerfile: comps/embeddings/src/Dockerfile
image: ${REGISTRY:-opea}/embedding-tei:${TAG:-latest}
embedding-multimodal-clip:
build:
dockerfile: comps/embeddings/multimodal_clip/Dockerfile
dockerfile: comps/embeddings/src/integrations/dependency/clip/Dockerfile
image: ${REGISTRY:-opea}/embedding-multimodal-clip:${TAG:-latest}
embedding-multimodal-bridgetower:
build:
dockerfile: comps/embeddings/multimodal/bridgetower/Dockerfile
dockerfile: comps/embeddings/src/integrations/dependency/bridgetower/Dockerfile
image: ${REGISTRY:-opea}/embedding-multimodal-bridgetower:${TAG:-latest}
embedding-multimodal:
build:
dockerfile: comps/embeddings/multimodal/multimodal_langchain/Dockerfile
dockerfile: comps/embeddings/src/Dockerfile
image: ${REGISTRY:-opea}/embedding-multimodal:${TAG:-latest}
embedding-langchain-mosec-endpoint:
build:
dockerfile: comps/embeddings/mosec/langchain/dependency/Dockerfile
image: ${REGISTRY:-opea}/embedding-langchain-mosec-endpoint:${TAG:-latest}
embedding-langchain-mosec:
build:
dockerfile: comps/embeddings/mosec/langchain/Dockerfile
image: ${REGISTRY:-opea}/embedding-langchain-mosec:${TAG:-latest}
embedding-tei-llama-index:
build:
dockerfile: comps/embeddings/tei/llama_index/Dockerfile
image: ${REGISTRY:-opea}/embedding-tei-llama-index:${TAG:-latest}
embedding-multimodal-bridgetower-gaudi:
build:
dockerfile: comps/embeddings/multimodal/bridgetower/Dockerfile.intel_hpu
dockerfile: comps/embeddings/src/integrations/dependency/bridgetower/Dockerfile.intel_hpu
image: ${REGISTRY:-opea}/embedding-multimodal-bridgetower-gaudi:${TAG:-latest}
embedding-predictionguard:
build:
dockerfile: comps/embeddings/predictionguard/Dockerfile
dockerfile: comps/embeddings/src/Dockerfile
image: ${REGISTRY:-opea}/embedding-predictionguard:${TAG:-latest}
embedding-reranking-local:
build:
dockerfile: comps/embeddings/tei/langchain/Dockerfile.dynamic_batching
image: ${REGISTRY:-opea}/embedding-reranking-local:${TAG:-latest}
8 changes: 2 additions & 6 deletions .github/workflows/docker/compose/llms-compose.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
services:
llm-tgi:
build:
dockerfile: comps/llms/text-generation/tgi/Dockerfile
dockerfile: comps/llms/src/text-generation/Dockerfile
image: ${REGISTRY:-opea}/llm-tgi:${TAG:-latest}
llm-ollama:
build:
Expand All @@ -21,7 +21,7 @@ services:
image: ${REGISTRY:-opea}/llm-faqgen-tgi:${TAG:-latest}
llm-vllm:
build:
dockerfile: comps/llms/text-generation/vllm/langchain/Dockerfile
dockerfile: comps/llms/src/text-generation/Dockerfile
image: ${REGISTRY:-opea}/llm-vllm:${TAG:-latest}
llm-native:
build:
Expand Down Expand Up @@ -50,10 +50,6 @@ services:
build:
dockerfile: comps/llms/utils/lm-eval/Dockerfile
image: ${REGISTRY:-opea}/llm-eval:${TAG:-latest}
llm-vllm-llamaindex:
build:
dockerfile: comps/llms/text-generation/vllm/llama_index/Dockerfile
image: ${REGISTRY:-opea}/llm-vllm-llamaindex:${TAG:-latest}
llm-textgen-predictionguard:
build:
dockerfile: comps/llms/text-generation/predictionguard/Dockerfile
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/docker/compose/ragas-compose.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,5 +3,5 @@
services:
ragas:
build:
dockerfile: comps/ragas/tgi/langchain/Dockerfile
dockerfile: comps/ragas/src/tgi/langchain/Dockerfile
image: ${REGISTRY:-opea}/ragas:${TAG:-latest}
14 changes: 3 additions & 11 deletions .github/workflows/docker/compose/reranks-compose.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,21 +5,13 @@
services:
reranking-tei:
build:
dockerfile: comps/reranks/tei/Dockerfile
dockerfile: comps/reranks/src/Dockerfile
image: ${REGISTRY:-opea}/reranking-tei:${TAG:-latest}
reranking-videoqna:
build:
dockerfile: comps/reranks/videoqna/Dockerfile
dockerfile: comps/reranks/src/Dockerfile # TODO. need to update
image: ${REGISTRY:-opea}/reranking-videoqna:${TAG:-latest}
reranking-fastrag:
build:
dockerfile: comps/reranks/fastrag/Dockerfile
dockerfile: comps/reranks/src/Dockerfile # TODO. need to update
image: ${REGISTRY:-opea}/reranking-fastrag:${TAG:-latest}
reranking-langchain-mosec-endpoint:
build:
dockerfile: comps/reranks/mosec/langchain/dependency/Dockerfile
image: ${REGISTRY:-opea}/reranking-langchain-mosec-endpoint:${TAG:-latest}
reranking-langchain-mosec:
build:
dockerfile: comps/reranks/mosec/langchain/Dockerfile
image: ${REGISTRY:-opea}/reranking-langchain-mosec:${TAG:-latest}
4 changes: 4 additions & 0 deletions .github/workflows/docker/compose/retrievers-compose.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,10 @@

# this file should be run in the root of the repo
services:
retriever:
build:
dockerfile: comps/retrievers/src/Dockerfile
image: ${REGISTRY:-opea}/retriever:${TAG:-latest}
retriever-redis:
build:
dockerfile: comps/retrievers/redis/langchain/Dockerfile
Expand Down
7 changes: 2 additions & 5 deletions .github/workflows/pr-examples-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,8 @@ on:
paths:
- .github/workflows/pr-examples-test.yml
- comps/cores/**
- comps/embeddings/langchain/**
- comps/retrievers/langchain/redis/**
- comps/reranks/tei/**
- comps/llms/text-generation/tgi/**
- comps/dataprep/redis/langchain/**
- comps/dataprep/src/redis/langchain/**
- comps/retrievers/src/redis/langchain/**
- requirements.txt
- "!**.md"

Expand Down
5 changes: 2 additions & 3 deletions .github/workflows/pr-microservice-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -52,9 +52,8 @@ jobs:
hardware: ${{ matrix.hardware }}
run: |
cd tests
echo "log_name=${service}_${hardware}" >> $GITHUB_ENV
if [ ${hardware} = "intel_cpu" ]; then on_hw=""; else on_hw="_on_${hardware}"; fi
timeout 60m bash $(find . -type f -name test_${service}${on_hw}.sh)
echo "log_name=${service}" >> $GITHUB_ENV
timeout 60m bash $(find . -type f -name test_${service}.sh)
- name: Clean up container
if: cancelled() || failure()
Expand Down
109 changes: 79 additions & 30 deletions .github/workflows/scripts/get_test_matrix.sh
Original file line number Diff line number Diff line change
Expand Up @@ -5,79 +5,128 @@
# service: service path name, like 'agent_langchain', 'asr_whisper'
# hardware: 'intel_cpu', 'intel_hpu', ...

set -xe
set -e
cd $WORKSPACE
changed_files_full=$changed_files_full
run_matrix="{\"include\":["

# add test services when comps code change
function find_test_1() {
local pre_service=$1
local pre_service_path=$1
local n=$2
local all_service=$3

common_file_change=$(printf '%s\n' "${changed_files[@]}"| grep ${pre_service} | cut -d'/' -f$n | grep -E '*.py' | grep -vE '__init__.py|version.py' | sort -u) || true
common_file_change=$(printf '%s\n' "${changed_files[@]}"| grep ${pre_service_path} | cut -d'/' -f$n | grep -E '*.py' | grep -vE '__init__.py|version.py' | sort -u) || true
if [ "$common_file_change" ] || [ "$all_service" = "true" ]; then
# if common files changed, run all services
services=$(ls ${pre_service} | cut -d'/' -f$n | grep -vE '*.md|*.py|*.sh|*.yaml|*.yml|*.pdf' | sort -u) || true
services=$(ls ${pre_service_path} | cut -d'/' -f$n | grep -vE '*.md|*.py|*.sh|*.yaml|*.yml|*.pdf' | sort -u) || true
all_service="true"
else
# if specific service files changed, only run the specific service
services=$(printf '%s\n' "${changed_files[@]}"| grep ${pre_service} | cut -d'/' -f$n | grep -vE '*.py|*.sh|*.yaml|*.yml|*.pdf' | sort -u) || true
services=$(printf '%s\n' "${changed_files[@]}"| grep ${pre_service_path} | cut -d'/' -f$n | grep -vE '*.py|*.sh|*.yaml|*.yml|*.pdf' | sort -u) || true
fi

for service in ${services}; do
service=$pre_service/$service
if [[ $(ls ${service} | grep -E "Dockerfile*") ]]; then
service_name=$(echo $service | tr '/' '_' | cut -c7-) # comps/dataprep/redis/langchain -> dataprep_redis_langchain
default_service_script_path=$(find ./tests -type f -name test_${service_name}.sh) || true
if [ "$default_service_script_path" ]; then
run_matrix="${run_matrix}{\"service\":\"${service_name}\",\"hardware\":\"intel_cpu\"},"
service_path=$pre_service_path/$service
if [[ $(ls ${service_path} | grep -E "Dockerfile*") ]]; then
if [[ $(ls ${service_path} | grep "integrations") ]]; then
# new org with `src` and `integrations` folder
run_all_interation="false"
service_name=$(echo $service_path | sed 's:/src::' | tr '/' '_' | cut -c7-) # comps/retrievers/src/redis/langchain -> retrievers_redis_langchain
common_file_change_insight=$(printf '%s\n' "${changed_files[@]}"| grep ${service_path} | grep -vE 'integrations' | sort -u) || true
if [ "$common_file_change_insight" ]; then
# if common file changed, run all integrations
run_all_interation="true"
fi
if [ "$run_all_interation" = "false" ]; then
changed_integrations=$(printf '%s\n' "${changed_files[@]}"| grep ${service_path} | grep -E 'integrations' | cut -d'/' -f$((n+2)) | cut -d'.' -f1 | sort -u) || true
for integration in ${changed_integrations}; do
# Accurate matching test scripts
# find_test=$(find ./tests -type f \( -name test_${service_name}_${integration}.sh -o -name test_${service_name}_${integration}_on_*.sh \)) || true
# Fuzzy matching test scripts, for example, llms/src/text-generation/integrations/opea.py match several tests.
find_test=$(find ./tests -type f -name test_${service_name}_${integration}*.sh) || true
if [ "$find_test" ]; then
fill_in_matrix "$find_test"
else
run_all_interation="true"
break
fi
done
fi
if [ "$run_all_interation" = "true" ]; then
find_test=$(find ./tests -type f -name test_${service_name}*.sh) || true
if [ "$find_test" ]; then
fill_in_matrix "$find_test"
fi
fi
else
# old org without 'src' folder
service_name=$(echo $service_path | tr '/' '_' | cut -c7-) # comps/retrievers/redis/langchain -> retrievers_redis_langchain
find_test=$(find ./tests -type f -name test_${service_name}*.sh) || true
if [ "$find_test" ]; then
fill_in_matrix "$find_test"
fi
fi
other_service_script_path=$(find ./tests -type f -name test_${service_name}_on_*.sh) || true
for script in ${other_service_script_path}; do
_service=$(echo $script | cut -d'/' -f4 | cut -d'.' -f1 | cut -c6-)
hardware=${_service#*_on_}
run_matrix="${run_matrix}{\"service\":\"${service_name}\",\"hardware\":\"${hardware}\"},"
done
else
find_test_1 $service $((n+1)) $all_service
find_test_1 $service_path $((n+1)) $all_service
fi
done
}

function fill_in_matrix() {
find_test=$1
for test in ${find_test}; do
_service=$(echo $test | cut -d'/' -f4 | cut -d'.' -f1 | cut -c6-)
_fill_in_matrix $_service
done
}

function _fill_in_matrix() {
_service=$1
if [ $(echo ${_service} | grep -c "_on_") == 0 ]; then
service=${_service}
hardware="intel_cpu"
else
hardware=${_service#*_on_}
fi
echo "service=${_service}, hardware=${hardware}"
if [[ $(echo ${run_matrix} | grep -c "{\"service\":\"${_service}\",\"hardware\":\"${hardware}\"},") == 0 ]]; then
run_matrix="${run_matrix}{\"service\":\"${_service}\",\"hardware\":\"${hardware}\"},"
echo "------------------ add one service ------------------"
fi
sleep 1s
}

# add test case when test scripts code change
function find_test_2() {
test_files=$(printf '%s\n' "${changed_files[@]}" | grep -E "*.sh") || true
for test_file in ${test_files}; do
_service=$(echo $test_file | cut -d'/' -f3 | cut -d'.' -f1 | cut -c6-)
if [ $(echo ${_service} | grep -c "_on_") == 0 ]; then
service=${_service}
hardware="intel_cpu"
else
service=${_service%_on_*}
hardware=${_service#*_on_}
fi
if [[ $(echo ${run_matrix} | grep -c "{\"service\":\"${service}\",\"hardware\":\"${hardware}\"},") == 0 ]]; then
run_matrix="${run_matrix}{\"service\":\"${service}\",\"hardware\":\"${hardware}\"},"
if [ -f $test_file ]; then
_service=$(echo $test_file | cut -d'/' -f3 | cut -d'.' -f1 | cut -c6-)
_fill_in_matrix $_service
fi
done
}

function main() {

changed_files=$(printf '%s\n' "${changed_files_full[@]}" | grep 'comps/' | grep -vE '*.md|comps/cores') || true
changed_files=$(printf '%s\n' "${changed_files_full[@]}" | grep 'comps/' | grep -vE '*.md|comps/cores|comps/3rd_parties|deployment|*.yaml') || true
echo "===========start find_test_1============"
echo "changed_files=${changed_files}"
find_test_1 "comps" 2 false
sleep 1s
echo "run_matrix=${run_matrix}"
echo "===========finish find_test_1============"

changed_files=$(printf '%s\n' "${changed_files_full[@]}" | grep 'tests/' | grep -vE '*.md|*.txt|tests/cores') || true
echo "===========start find_test_2============"
echo "changed_files=${changed_files}"
find_test_2
sleep 1s
echo "run_matrix=${run_matrix}"
echo "===========finish find_test_2============"

run_matrix=$run_matrix"]}"
echo "run_matrix=${run_matrix}"
echo "run_matrix=${run_matrix}" >> $GITHUB_OUTPUT
}

Expand Down
Loading

0 comments on commit e223081

Please sign in to comment.