Skip to content

Commit

Permalink
Papi network config updates and tests BT-372 (broadinstitute#6476)
Browse files Browse the repository at this point in the history
  • Loading branch information
kshakir authored Aug 27, 2021
1 parent 448b20d commit 6fbb7c1
Show file tree
Hide file tree
Showing 18 changed files with 463 additions and 197 deletions.
11 changes: 11 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,16 @@
# Cromwell Change Log

## 68 Release Notes

### Virtual Private Cloud

Previous Cromwell versions allowed PAPIV2 jobs to run on a specific subnetwork inside a private network by adding the
information to Google Cloud project labels.

Cromwell now allows PAPIV2 jobs to run on a specific subnetwork inside a private network by adding the network and
subnetwork name directly inside the `virtual-private-cloud` backend configuration. More info
[here](https://cromwell.readthedocs.io/en/stable/backends/Google/).

## 67 Release Notes

### Configuration updates for improved scaling
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
name: check_network_in_vpc
testFormat: workflowsuccess
backends: [Papiv2-Virtual-Private-Cloud]
backends: [Papiv2-Virtual-Private-Cloud-Labels, Papiv2-Virtual-Private-Cloud-Literals]

files {
workflow: virtual_private_cloud/check_network_in_vpc.wdl
Expand All @@ -11,7 +11,10 @@ metadata {
workflowName: check_network_in_vpc
status: Succeeded

"outputs.check_network_in_vpc.network_used": "cromwell-ci-vpc-network"
"outputs.check_network_in_vpc.subnetwork_used": "cromwell-ci-vpc-network"
"outputs.check_network_in_vpc.zone_used": "us-east1-c"
"outputs.check_network_in_vpc.network_used_labels": "cromwell-ci-vpc-network"
"outputs.check_network_in_vpc.subnetwork_used_labels": "cromwell-ci-vpc-network"
"outputs.check_network_in_vpc.zone_used_labels": "us-east1-c"
"outputs.check_network_in_vpc.network_used_literals": "cromwell-ci-vpc-network"
"outputs.check_network_in_vpc.subnetwork_used_literals": "cromwell-ci-vpc-network"
"outputs.check_network_in_vpc.zone_used_literals": "us-east1-c"
}
Original file line number Diff line number Diff line change
@@ -1,25 +1,34 @@
version 1.0

task get_network {
command {
set -euo pipefail

apt-get install --assume-yes jq > /dev/null
INSTANCE=$(curl -s "http://metadata.google.internal/computeMetadata/v1/instance/name" -H "Metadata-Flavor: Google")
ZONE=$(curl -s "http://metadata.google.internal/computeMetadata/v1/instance/zone" -H "Metadata-Flavor: Google" | sed -E 's!.*/(.*)!\1!')
TOKEN=$(gcloud auth application-default print-access-token)
INSTANCE_METADATA=$(curl "https://www.googleapis.com/compute/v1/projects/broad-dsde-cromwell-dev/zones/$ZONE/instances/$INSTANCE" -H "Authorization: Bearer $TOKEN" -H 'Accept: application/json')
NETWORK_OBJECT=$(echo $INSTANCE_METADATA | jq --raw-output --exit-status '.networkInterfaces[0]')
echo $NETWORK_OBJECT | jq --exit-status '.network' | sed -E 's!.*/(.*)!\1!' | sed 's/"//g' > network
echo $NETWORK_OBJECT | jq --exit-status '.subnetwork' | sed -E 's!.*/(.*)!\1!' | sed 's/"//g' > subnetwork
echo $ZONE > zone
task get_network_labels_backend {
meta { volatile: true }
input {
Array[String] commandScript
String dockerImage
}

command <<<~{sep="\n" commandScript}>>>
runtime {
docker: "gcr.io/google.com/cloudsdktool/cloud-sdk:slim"
backend: "Papiv2-Virtual-Private-Cloud"
docker: dockerImage
backend: "Papiv2-Virtual-Private-Cloud-Labels"
}
output {
String networkName = read_string("network")
String subnetworkName = read_string("subnetwork")
String zone = read_string("zone")
}
}

task get_network_literals_backend {
meta { volatile: true }
input {
Array[String] commandScript
String dockerImage
}
command <<<~{sep="\n" commandScript}>>>
runtime {
docker: dockerImage
backend: "Papiv2-Virtual-Private-Cloud-Literals"
}
output {
String networkName = read_string("network")
String subnetworkName = read_string("subnetwork")
Expand All @@ -28,12 +37,64 @@ task get_network {
}

workflow check_network_in_vpc {
call get_network
# Create a reusable script for multiple backends using workarounds for Cromwell 66:
# - can't pass `backend` as a variable runtime attribute; the call will run on the default backend
# - use an `Array[String]` to simulate a WDL multiline string
# - an escaped backslash `\\` at the end of a WDL string returns a syntax error (maybe from womtool describe?)
String backslash = "\u005c"
Array[String] commandScript = [
"set -euo pipefail",
"",
"apt-get install --assume-yes jq > /dev/null",
"PROJECT=$(",
" curl " + backslash,
" -s \"http://metadata.google.internal/computeMetadata/v1/project/project-id\" " + backslash,
" -H \"Metadata-Flavor: Google\" |",
" sed -E 's!.*/(.*)!\\1!'",
")",
"ZONE=$(",
" curl " + backslash,
" -s \"http://metadata.google.internal/computeMetadata/v1/instance/zone\" " + backslash,
" -H \"Metadata-Flavor: Google\" |",
" sed -E 's!.*/(.*)!\\1!'",
")",
"INSTANCE=$(",
" curl " + backslash,
" -s \"http://metadata.google.internal/computeMetadata/v1/instance/name\" " + backslash,
" -H \"Metadata-Flavor: Google\"",
")",
"TOKEN=$(gcloud auth application-default print-access-token)",
"INSTANCE_METADATA=$(",
" curl \"https://www.googleapis.com/compute/v1/projects/$PROJECT/zones/$ZONE/instances/$INSTANCE\" " + backslash,
" -H \"Authorization: Bearer $TOKEN\" " + backslash,
" -H 'Accept: application/json'",
")",
"NETWORK_OBJECT=$(echo $INSTANCE_METADATA | jq --raw-output --exit-status '.networkInterfaces[0]')",
"echo $NETWORK_OBJECT | jq --exit-status '.network' | sed -E 's!.*/(.*)!\\1!' | sed 's/\"//g' > network",
"echo $NETWORK_OBJECT | jq --exit-status '.subnetwork' | sed -E 's!.*/(.*)!\\1!' | sed 's/\"//g' > subnetwork",
"echo $ZONE > zone"
]
String dockerImage = "gcr.io/google.com/cloudsdktool/cloud-sdk:slim"

call get_network_labels_backend {
input:
commandScript = commandScript,
dockerImage = dockerImage
}

call get_network_literals_backend {
input:
commandScript = commandScript,
dockerImage = dockerImage
}

output {
String network_used = get_network.networkName
String subnetwork_used = get_network.subnetworkName
String zone_used = get_network.zone
String network_used_labels = get_network_labels_backend.networkName
String subnetwork_used_labels = get_network_labels_backend.subnetworkName
String zone_used_labels = get_network_labels_backend.zone
String network_used_literals = get_network_literals_backend.networkName
String subnetwork_used_literals = get_network_literals_backend.subnetworkName
String zone_used_literals = get_network_literals_backend.zone
}
}

Original file line number Diff line number Diff line change
Expand Up @@ -9,5 +9,5 @@ files {
metadata {
workflowName: workbench_health_monitor_check
status: Succeeded
"outputs.workbench_health_monitor_check.out": """{"DockerHub":{"ok":true},"Engine Database":{"ok":true},"GCS":{"ok":true},"Papi":{"ok":true},"Papi-Caching-No-Copy":{"ok":true},"Papiv2":{"ok":true},"Papiv2-Reference-Disk-Localization":{"ok":true},"Papiv2-Virtual-Private-Cloud":{"ok":true},"Papiv2NoDockerHubConfig":{"ok":true},"Papiv2RequesterPays":{"ok":true},"Papiv2USADockerhub":{"ok":true}}"""
"outputs.workbench_health_monitor_check.out": """{"DockerHub":{"ok":true},"Engine Database":{"ok":true},"GCS":{"ok":true},"Papi":{"ok":true},"Papi-Caching-No-Copy":{"ok":true},"Papiv2":{"ok":true},"Papiv2-Reference-Disk-Localization":{"ok":true},"Papiv2-Virtual-Private-Cloud-Labels":{"ok":true},"Papiv2-Virtual-Private-Cloud-Literals":{"ok":true},"Papiv2NoDockerHubConfig":{"ok":true},"Papiv2RequesterPays":{"ok":true},"Papiv2USADockerhub":{"ok":true}}"""
}
44 changes: 42 additions & 2 deletions docs/backends/Google.md
Original file line number Diff line number Diff line change
Expand Up @@ -313,7 +313,9 @@ This filesystem has two required configuration options:

To run your jobs in a private network add the `virtual-private-cloud` stanza in the `config` stanza of the PAPI v2 backend:

```
#### Virtual Private Network via Labels

```hocon
backend {
...
providers {
Expand Down Expand Up @@ -347,8 +349,46 @@ For example, if your `virtual-private-cloud` config looks like the one above, an

Cromwell will get labels from the project's metadata and look for a label whose key is `my-private-network`.
Then it will use the value of the label, which is `vpc-network` here, as the name of private network and run the jobs on this network.
If the network key is not present in the project's metadata Cromwell will fall back to running jobs on the default network.
If the network key is not present in the project's metadata Cromwell will fall back to trying to run jobs using literal
network labels, and then fall back to running on the default network.

#### Virtual Private Network via Literals

```hocon
backend {
...
providers {
...
PapiV2 {
actor-factory = "cromwell.backend.google.pipelines.v2beta.PipelinesApiLifecycleActorFactory"
config {
...
virtual-private-cloud {
network-name = "vpc-network"
subnetwork-name = "vpc-subnetwork"
}
...
}
}
}
}
```

The `network-name` and `subnetwork-name` should reference the name of your private network and subnetwork within that
network respectively. The `subnetwork-name` is an optional config.

For example, if your `virtual-private-cloud` config looks like the one above, then Cromwell will use the value of the
configuration key, which is `vpc-network` here, as the name of private network and run the jobs on this network.
If the network name is not present in the config Cromwell will fall back to trying to run jobs on the default network.

If the `network-name` or `subnetwork-name` values contain the string `${projectId}` then that value will be replaced
by Cromwell with the name of the project running the Pipelines API.

If the `network-name` does not contain a `/` then it will be prefixed with `projects/${projectId}/global/networks/`.

Cromwell will then pass the network and subnetwork values to the Pipelines API. See the documentation for the
[Cloud Life Sciences API](https://cloud.google.com/life-sciences/docs/reference/rest/v2beta/projects.locations.pipelines/run#Network)
for more information on the various formats accepted for `network` and `subnetwork`.

### Custom Google Cloud SDK container
Cromwell can't use Google's container registry if VPC Perimeter is used in project.
Expand Down
31 changes: 29 additions & 2 deletions src/ci/resources/papi_v2_shared_application.inc.conf
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,8 @@ services {
"Papiv2NoDockerHubConfig",
"Papiv2RequesterPays",
"Papi-Caching-No-Copy",
"Papiv2-Virtual-Private-Cloud",
"Papiv2-Virtual-Private-Cloud-Labels",
"Papiv2-Virtual-Private-Cloud-Literals",
"Papiv2-Reference-Disk-Localization",
]
}
Expand Down Expand Up @@ -101,7 +102,7 @@ backend {
include "dockerhub_provider_config_v1.inc.conf"
}
}
Papiv2-Virtual-Private-Cloud {
Papiv2-Virtual-Private-Cloud-Labels {
actor-factory = "REPLACEME!"
config {
# When importing: Remember to also include an appropriate provider_config.inc.conf here.
Expand All @@ -118,6 +119,32 @@ backend {
include "dockerhub_provider_config_v1.inc.conf"
}
}
Papiv2-Virtual-Private-Cloud-Literals {
actor-factory = "REPLACEME!"
config {
# When importing: Remember to also include an appropriate provider_config.inc.conf here.

genomics.compute-service-account = "centaur@broad-dsde-cromwell-dev.iam.gserviceaccount.com"
filesystems.http {}
virtual-private-cloud {
# integration testing:
# - fully qualified name
# - hardcoded project id
# - does not end with `/`
network-name = "projects/broad-dsde-cromwell-dev/global/networks/cromwell-ci-vpc-network"
# integration testing:
# - fully qualified name
# - cromwell replaces the `${projectId}`
# - papi replaces the `*`
# Btw, yes, each of the subnets in this network have the same name as the network itself
# https://console.cloud.google.com/networking/networks/details/cromwell-ci-vpc-network?project=broad-dsde-cromwell-dev&pageTab=SUBNETS
subnetwork-name = "projects/${projectId}/regions/*/subnetworks/cromwell-ci-vpc-network"
}

# Have the engine authenticate to docker.io. See BT-141 for more info.
include "dockerhub_provider_config_v1.inc.conf"
}
}
papi-v2-usa {
actor-factory = "REPLACEME!"
config {
Expand Down
8 changes: 7 additions & 1 deletion src/ci/resources/papi_v2alpha1_application.conf
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,13 @@ backend {
include "papi_v1_v2alpha1_provider_config.inc.conf"
}
}
Papiv2-Virtual-Private-Cloud {
Papiv2-Virtual-Private-Cloud-Labels {
actor-factory = "cromwell.backend.google.pipelines.v2alpha1.PipelinesApiLifecycleActorFactory"
config {
include "papi_v1_v2alpha1_provider_config.inc.conf"
}
}
Papiv2-Virtual-Private-Cloud-Literals {
actor-factory = "cromwell.backend.google.pipelines.v2alpha1.PipelinesApiLifecycleActorFactory"
config {
include "papi_v1_v2alpha1_provider_config.inc.conf"
Expand Down
8 changes: 7 additions & 1 deletion src/ci/resources/papi_v2beta_application.conf
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,13 @@ backend {
include "papi_v2beta_provider_config.inc.conf"
}
}
Papiv2-Virtual-Private-Cloud {
Papiv2-Virtual-Private-Cloud-Labels {
actor-factory = "cromwell.backend.google.pipelines.v2beta.PipelinesApiLifecycleActorFactory"
config {
include "papi_v2beta_provider_config.inc.conf"
}
}
Papiv2-Virtual-Private-Cloud-Literals {
actor-factory = "cromwell.backend.google.pipelines.v2beta.PipelinesApiLifecycleActorFactory"
config {
include "papi_v2beta_provider_config.inc.conf"
Expand Down
Loading

0 comments on commit 6fbb7c1

Please sign in to comment.