Skip to content

Commit

Permalink
Source connector performance harness (airbytehq#23786)
Browse files Browse the repository at this point in the history
* remove arguments

* test

* test

* test

* test

* test

* test

* test

* test

* test

* test

* test

* test

* test

* test

* test

* test

* test

* test

* test

* test

* test

* test

* test

* test

* test

* test

* test

* test

* test

* test

* test

* test

* test

* test

* test

* test

* test

* test

* test

* test

* test

* test

* test

* test

* test

* test

* test

* test

* test

* test

* test

* test

* test

* test

* test

* test

* test

* test

* test

* Source Performance harness (airbytehq#23922)

* test

* initial commit

* Working dev

* src-platform-dst

* Integrate secrets

* Update .dockerignore

* Update build.gradle

* Update base.sh

* Build harness module and use it to measure

* Build harness module and use it to measure

* Build harness module and use it to measure

* Automated Change

* Integrate secrets

* Integrate secrets

* Integrate secrets

* Integrate secrets

* test

* test

* test

* test

* test

* test

* test

* test

* test

* test

* test

* test

* test

* test

* Automated Change

* test

* Automated Change

* better error handling

* temp

* Automated Change

* use connector image name

* use connector image name

* use connector image name

* use connector image name

* use connector image name

* use connector image name

* use connector image name

* use connector image name

* add test name argument

* add test name argument

* add test name argument

* add test name argument

* add test name argument

* add test name argument

* Automated Change

* add bottleneck_stream1 dataset

* Automated Change

* fix dependency

* fix dependency

* test downgrade of commons to 41.1

* Automated Change

* test downgrade of commons to 41.0

* test

* test

* test

* test

* test

* exit condition

* exit condition

* exit condition

* test

* test

* test

* test

* test

* test

* sanity

* readme

* change command name

* update gradle settings

* sanity

* architecture aware docker build

* sanity

* put run harness step in a script

* sanity

* sanity

---------

Co-authored-by: rodireich <rodireich@users.noreply.github.com>
  • Loading branch information
rodireich and rodireich authored Mar 30, 2023
1 parent 836e4f4 commit 0a38d4e
Show file tree
Hide file tree
Showing 18 changed files with 1,185 additions and 8 deletions.
172 changes: 164 additions & 8 deletions .github/workflows/connector-performance-command.yml
Original file line number Diff line number Diff line change
Expand Up @@ -19,15 +19,171 @@ on:
uuid:
description: "Custom UUID of workflow run. Used because GitHub dispatches endpoint does not return workflow run id."
required: false
connector-acceptance-test-version:
description: "Set a specific connector acceptance test version to use. Enter 'dev' to test, build and use a local version of Connector Acceptance Test."
required: false
default: "latest"
local_cdk:
description: "Run Connector Acceptance Tests against the CDK version on the current branch."
dataset:
description: "Name of dataset to use for performance measurement. Currently supports 1m, 10m, 20m."
required: false
default: "1m"
jobs:
Stub:
uuid:
name: "Custom UUID of workflow run"
timeout-minutes: 10
runs-on: ubuntu-latest
steps:
- name: UUID ${{ github.event.inputs.uuid }}
run: true
start-test-runner:
name: Start Build EC2 Runner
needs: uuid
timeout-minutes: 10
runs-on: ubuntu-latest
outputs:
label: ${{ steps.start-ec2-runner.outputs.label }}
ec2-instance-id: ${{ steps.start-ec2-runner.outputs.ec2-instance-id }}
steps:
- name: Checkout Airbyte
uses: actions/checkout@v3
with:
repository: ${{ github.event.inputs.repo }}
ref: ${{ github.event.inputs.gitref }}
- name: Check PAT rate limits
run: |
./tools/bin/find_non_rate_limited_PAT \
${{ secrets.GH_PAT_BUILD_RUNNER_OSS }} \
${{ secrets.GH_PAT_BUILD_RUNNER_BACKUP }}
- name: Start AWS Runner
id: start-ec2-runner
uses: ./.github/actions/start-aws-runner
with:
aws-access-key-id: ${{ secrets.SELF_RUNNER_AWS_ACCESS_KEY_ID }}
aws-secret-access-key: ${{ secrets.SELF_RUNNER_AWS_SECRET_ACCESS_KEY }}
github-token: ${{ env.PAT }}
performance-test:
timeout-minutes: 240
needs: start-test-runner
runs-on: ${{ needs.start-test-runner.outputs.label }}
steps:
- name: Link comment to workflow run
if: github.event.inputs.comment-id
uses: peter-evans/create-or-update-comment@v1
with:
comment-id: ${{ github.event.inputs.comment-id }}
body: |
#### Note: The following `dataset=` values are supported: `1m`<sub>(default)</sub>, `10m`, `20m`, `bottleneck_stream1`
> :runner: ${{github.event.inputs.connector}} https://github.com/${{github.repository}}/actions/runs/${{github.run_id}}
- name: Search for valid connector name format
id: regex
uses: AsasInnab/regex-action@v1
with:
regex_pattern: "^((connectors|bases)/)?[a-zA-Z0-9-_]+$"
regex_flags: "i" # required to be set for this plugin
search_string: ${{ github.event.inputs.connector }}
- name: Validate input workflow format
if: steps.regex.outputs.first_match != github.event.inputs.connector
run: echo "The connector provided has an invalid format!" && exit 1
- name: Filter supported connectors
if: "${{ github.event.inputs.connector != 'connectors/source-postgres' }}"
run: echo "Only connectors/source-postgres currently supported by harness" && exit 1
- name: Checkout Airbyte
uses: actions/checkout@v3
with:
repository: ${{ github.event.inputs.repo }}
ref: ${{ github.event.inputs.gitref }}
- name: Install Java
uses: actions/setup-java@v3
with:
distribution: "zulu"
java-version: "17"
- name: Install Python
uses: actions/setup-python@v4
with:
python-version: "3.9"
- name: Install CI scripts
# all CI python packages have the prefix "ci_"
run: |
pip install --quiet -e ./tools/ci_*
- name: Write source-harness credentials
run: |
ci_credentials connectors-performance/source-harness write-to-storage
env:
GCP_GSM_CREDENTIALS: ${{ secrets.GCP_GSM_CREDENTIALS }}
- name: build harness
shell: bash
run: |
echo "Building... source-harness" >> $GITHUB_STEP_SUMMARY
echo "" >> $GITHUB_STEP_SUMMARY
./gradlew :airbyte-integrations:connectors-performance:source-harness:build -x check
- name: build connector
shell: bash
run: |
echo "Building... ${{github.event.inputs.connector}}" >> $GITHUB_STEP_SUMMARY
echo "" >> $GITHUB_STEP_SUMMARY # this is a blank line
echo "Running ./gradlew :airbyte-integrations:connectors:source-postgres:build -x check"
./gradlew :airbyte-integrations:connectors:source-postgres:build -x check
- name: KIND Kubernetes Cluster Setup
uses: helm/kind-action@v1.4.0
with:
config: ./tools/bin/source-harness-kind-cluster-config.yaml
- name: Run harness
id: run-harness
shell: bash
env:
CONN: ${{ github.event.inputs.connector }}
DS: ${{ github.event.inputs.dataset }}
prefix: '{"type":"LOG","log":{"level":"INFO","message":"INFO i.a.i.p.PerformanceTest(runTest):165'
suffix: '"}}'
run: |
kubectl apply -f ./tools/bin/admin-service-account.yaml
kind load docker-image airbyte/source-postgres:dev --name chart-testing
kind load docker-image airbyte/source-harness:dev --name chart-testing
export CONNECTOR_IMAGE_NAME=${CONN/connectors/airbyte}:dev
export DATASET=$DS
envsubst < ./tools/bin/source-harness-process.yaml | kubectl create -f -
POD=$(kubectl get pod -l app=source-harness -o jsonpath="{.items[0].metadata.name}")
kubectl wait --for=condition=Ready --timeout=20s "pod/$POD"
kubectl logs --follow $POD
EOF=$(dd if=/dev/urandom bs=15 count=1 status=none | base64)
echo "RUN_RESULT<<$EOF" >> $GITHUB_OUTPUT
kubectl logs --tail=1 $POD | while read line ; do line=${line#"$prefix"}; line=${line%"$suffix"}; echo $line >> $GITHUB_OUTPUT ; done
echo "$EOF" >> $GITHUB_OUTPUT
- name: Link comment to workflow run
uses: peter-evans/create-or-update-comment@v2
with:
reactions: '+1'
comment-id: ${{ github.event.inputs.comment-id }}
body: |
## Performance test Result:
```
${{ steps.run-harness.outputs.RUN_RESULT }}
```
# need to add credentials here
# In case of self-hosted EC2 errors, remove this block.
stop-test-runner:
name: Stop Build EC2 Runner
timeout-minutes: 10
needs:
- start-test-runner # required to get output from the start-runner job
- performance-test # required to wait when the main job is done
- uuid
runs-on: ubuntu-latest
if: ${{ always() }} # required to stop the runner even if the error happened in the previous jobs
steps:
- run: echo "Connector Performance harness stub"
- name: Configure AWS credentials
uses: aws-actions/configure-aws-credentials@v1
with:
aws-access-key-id: ${{ secrets.SELF_RUNNER_AWS_ACCESS_KEY_ID }}
aws-secret-access-key: ${{ secrets.SELF_RUNNER_AWS_SECRET_ACCESS_KEY }}
aws-region: us-east-2
- name: Checkout Airbyte
uses: actions/checkout@v3
- name: Check PAT rate limits
run: |
./tools/bin/find_non_rate_limited_PAT \
${{ secrets.GH_PAT_BUILD_RUNNER_OSS }} \
${{ secrets.GH_PAT_BUILD_RUNNER_BACKUP }}
- name: Stop EC2 runner
uses: supertopher/ec2-github-runner@base64v1.0.10
with:
mode: stop
github-token: ${{ env.PAT }}
label: ${{ needs.start-test-runner.outputs.label }}
ec2-instance-id: ${{ needs.start-test-runner.outputs.ec2-instance-id }}
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,16 @@ public String fieldSelectionWorkspaces() {
return getEnvOrDefault(FIELD_SELECTION_WORKSPACES, "", (arg) -> arg);
}

@Override
public String strictComparisonNormalizationWorkspaces() {
return "";
}

@Override
public String strictComparisonNormalizationTag() {
return "";
}

// TODO: refactor in order to use the same method than the ones in EnvConfigs.java
public <T> T getEnvOrDefault(final String key, final T defaultValue, final Function<String, T> parser) {
final String value = System.getenv(key);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,4 +37,20 @@ public interface FeatureFlags {
*/
String fieldSelectionWorkspaces();

/**
* Get the workspaces allow-listed for strict incremental comparison in normalization. This takes
* precedence over the normalization version in destination_definitions.yaml.
*
* @return a comma-separated list of workspace ids where strict incremental comparison should be
* enabled in normalization.
*/
String strictComparisonNormalizationWorkspaces();

/**
* Get the Docker image tag representing the normalization version with strict-comparison.
*
* @return The Docker image tag representing the normalization version with strict-comparison
*/
String strictComparisonNormalizationTag();

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
*
!Dockerfile
!build
!base.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
FROM airbyte/integration-base-java:dev AS build
WORKDIR /airbyte

ENV APPLICATION source-harness

COPY build/distributions/${APPLICATION}*.tar ${APPLICATION}.tar

RUN tar xf ${APPLICATION}.tar --strip-components=1 && rm -rf ${APPLICATION}.tar

FROM airbyte/integration-base-java:dev
ARG TARGETARCH
WORKDIR /airbyte

ENV APPLICATION source-harness
# Kubectl and socat are needed in order to create a pod similar to cloud orchestrator
# It brings up in cluster the source pod to measure, and communicates over socat
RUN curl -O https://s3.us-west-2.amazonaws.com/amazon-eks/1.25.6/2023-01-30/bin/linux/${TARGETARCH}/kubectl
RUN chmod +x ./kubectl
RUN mv ./kubectl /bin/
RUN yum install -y socat && yum clean all

COPY --from=build /airbyte /airbyte
COPY base.sh .

LABEL io.airbyte.version=0.1
LABEL io.airbyte.name=airbyte/source-harness
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
# source-harness

Performance harness for source connectors

This component is used by the `/connector-performance` github action and is used in order to test throughput of
source connectors on a number of datasets.
20 changes: 20 additions & 0 deletions airbyte-integrations/connectors-performance/source-harness/base.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
#!/usr/bin/env bash

set -e

function echo2() {
echo >&2 "$@"
}

function error() {
echo2 "$@"
exit 1
}

# todo: make it easy to select source or destination and validate based on selection by adding an integration type env variable.
function main() {
nohup bash -c "socat tcp-listen:9000,reuseaddr,fork \"exec:printf \'HTTP/1.0 200 OK\r\n\r\n\'\" &";
cat <&0 | /airbyte/bin/"$APPLICATION" "$@"
}

main "$@"
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
plugins {
id 'application'
id 'airbyte-docker'
}

repositories {
maven {
url 'https://airbyte.mycloudrepo.io/public/repositories/airbyte-public-jars/'
}
}

application {
mainClass = 'io.airbyte.integrations.performance.Main'
applicationDefaultJvmArgs = ['-XX:+ExitOnOutOfMemoryError', '-XX:MaxRAMPercentage=75.0']
}
dependencies {
implementation project(':airbyte-db:db-lib')
implementation project(':airbyte-integrations:bases:base-java')
implementation libs.airbyte.protocol
implementation 'io.fabric8:kubernetes-client:5.12.2'
implementation 'org.apache.commons:commons-lang3:3.11'
implementation files(project(':airbyte-integrations:bases:base-java').airbyteDocker.outputs)
implementation 'io.airbyte:airbyte-commons-worker:0.42.0'
implementation 'io.airbyte.airbyte-config:config-models:0.42.0'
}
Loading

0 comments on commit 0a38d4e

Please sign in to comment.