Skip to content

Commit

Permalink
Google Cloud SDK updates surrounding gsutil and python3 (#6479)
Browse files Browse the repository at this point in the history
- Updated pinned gcloud versions to the latest (354.0.0)
- Switched localizer/delocalizer default image from slim to the even smaller alpine
- Using python3 instead of python to decode base64 multiline strings
- Explicitly using UTF-8 to encode and decode base64 multiline strings
- Did not update GATK images for calls to gsutil as latest (4.2.2.0) still uses old gcloud w/ python2
- Left all images pointing at existing repositories, and did not switch to pulling from gcr for now
- Moved more of the duplicated code from v2alpha1 and v2beta into common
  • Loading branch information
kshakir authored Aug 30, 2021
1 parent 6fbb7c1 commit f14d91a
Show file tree
Hide file tree
Showing 25 changed files with 118 additions and 276 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,8 @@ task delete_file_in_gcs {
gsutil rm ${file_path}
}
runtime {
docker: "google/cloud-sdk@sha256:fb904276e8a902ccd9564989d9222bdfbe37ffcd7f9989ca7e24b4019a9b4b6b"
# google/cloud-sdk:354.0.0-slim
docker: "google/cloud-sdk@sha256:b5bd0d4b9e56a8b82cea893e7c45f9dfb01fa7cb4e1ce0d426a4468d64654710"
}
output {
Boolean done = true
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,8 @@ task delete_file_in_gcs {
gsutil rm ${file_path}
}
runtime {
docker: "google/cloud-sdk@sha256:fb904276e8a902ccd9564989d9222bdfbe37ffcd7f9989ca7e24b4019a9b4b6b"
# google/cloud-sdk:354.0.0-slim
docker: "google/cloud-sdk@sha256:b5bd0d4b9e56a8b82cea893e7c45f9dfb01fa7cb4e1ce0d426a4468d64654710"
backend: "Papi-Caching-No-Copy"
}
output {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ task get_stdout {
output {
File out = stdout()
}
runtime { docker: "google/cloud-sdk:251.0.0-slim" }
runtime { docker: "google/cloud-sdk:slim" }
}


Expand All @@ -28,5 +28,5 @@ task check_detritus {
output {
String out = read_string(stdout())
}
runtime { docker: "google/cloud-sdk:251.0.0-slim" }
runtime { docker: "google/cloud-sdk:slim" }
}
2 changes: 1 addition & 1 deletion docs/backends/Google.md
Original file line number Diff line number Diff line change
Expand Up @@ -397,7 +397,7 @@ Own repository can be used by adding `cloud-sdk-image-url` reference to used con
```
google {
...
cloud-sdk-image-url = "eu.gcr.io/your-project-id/cloudsdktool/cloud-sdk:275.0.0-slim"
cloud-sdk-image-url = "eu.gcr.io/your-project-id/cloudsdktool/cloud-sdk:354.0.0-alpine"
cloud-sdk-image-size-gb = 1
}
```
Expand Down
Original file line number Diff line number Diff line change
@@ -1,17 +1,18 @@
package cromwell.backend.google.pipelines.v2alpha1.api
import java.util.UUID
package cromwell.backend.google.pipelines.common.action

import java.util.UUID
import akka.http.scaladsl.model.ContentType
import common.util.StringUtil._
import cromwell.backend.google.pipelines.common.PipelinesApiConfigurationAttributes.GcsTransferConfiguration
import cromwell.backend.google.pipelines.v2alpha1.api.ActionBuilder._
import cromwell.backend.google.pipelines.common.action.ActionUtils._
import cromwell.core.path.Path
import cromwell.filesystems.gcs.GcsPath
import cromwell.filesystems.gcs.RequesterPaysErrors._
import mouse.all._
import org.apache.commons.codec.binary.Base64
import org.apache.commons.text.StringEscapeUtils

import java.nio.charset.StandardCharsets
import scala.concurrent.duration._

/**
Expand All @@ -36,9 +37,13 @@ object ActionCommands {

def makeContainerDirectory(containerPath: Path) = s"mkdir -p ${containerPath.escape}"

def delocalizeDirectory(containerPath: Path, cloudPath: Path, contentType: Option[ContentType])(implicit gcsTransferConfiguration: GcsTransferConfiguration) = retry {
recoverRequesterPaysError(cloudPath) { flag =>
s"rm -f $$HOME/.config/gcloud/gce && gsutil $flag ${contentType |> makeContentTypeFlag} -m rsync -r ${containerPath.escape} ${cloudPath.escape}"
def delocalizeDirectory(containerPath: Path, cloudPath: Path, contentType: Option[ContentType])
(implicit gcsTransferConfiguration: GcsTransferConfiguration): String = {
retry {
recoverRequesterPaysError(cloudPath) { flag =>
s"rm -f $$HOME/.config/gcloud/gce && " +
s"gsutil $flag ${contentType |> makeContentTypeFlag} -m rsync -r ${containerPath.escape} ${cloudPath.escape}"
}
}
}

Expand All @@ -54,33 +59,41 @@ object ActionCommands {
* By instead using the parent directory (and ensuring it ends with a slash), gsutil will treat that as a directory and put the file under it.
* So the final gsutil command will look something like gsutil cp /local/file.txt gs://bucket/subdir/
*/
def delocalizeFile(containerPath: Path, cloudPath: Path, contentType: Option[ContentType])(implicit gcsTransferConfiguration: GcsTransferConfiguration) = retry {
recoverRequesterPaysError(cloudPath) { flag =>
s"rm -f $$HOME/.config/gcloud/gce && gsutil $flag ${contentType |> makeContentTypeFlag} cp ${containerPath.escape} ${cloudPath.parent.escape.ensureSlashed}"
def delocalizeFile(containerPath: Path, cloudPath: Path, contentType: Option[ContentType])
(implicit gcsTransferConfiguration: GcsTransferConfiguration): String = {
retry {
recoverRequesterPaysError(cloudPath) { flag =>
s"rm -f $$HOME/.config/gcloud/gce && " +
s"gsutil $flag ${contentType |> makeContentTypeFlag} cp ${containerPath.escape} ${cloudPath.parent.escape.ensureSlashed}"
}
}
}

/**
* delocalizeFile necessarily copies the file to the same name. Use this if you want to to specify a name different from the original
* Make sure that there's no object named "yourfinalname_something" (see above) in the same cloud directory.
*/
def delocalizeFileTo(containerPath: Path, cloudPath: Path, contentType: Option[ContentType])(implicit gcsTransferConfiguration: GcsTransferConfiguration) = retry {
recoverRequesterPaysError(cloudPath) { flag =>
s"rm -f $$HOME/.config/gcloud/gce && gsutil $flag ${contentType |> makeContentTypeFlag} cp ${containerPath.escape} ${cloudPath.escape}"
def delocalizeFileTo(containerPath: Path, cloudPath: Path, contentType: Option[ContentType])
(implicit gcsTransferConfiguration: GcsTransferConfiguration): String = {
retry {
recoverRequesterPaysError(cloudPath) { flag =>
s"rm -f $$HOME/.config/gcloud/gce && " +
s"gsutil $flag ${contentType |> makeContentTypeFlag} cp ${containerPath.escape} ${cloudPath.escape}"
}
}
}

def ifExist(containerPath: Path)(f: => String) = s"if [ -e ${containerPath.escape} ]; then $f; fi"

def every(duration: FiniteDuration)(f: => String) =
def every(duration: FiniteDuration)(f: => String): String =
s"""while true; do
| (
| $f
| ) > /dev/null 2>&1
| sleep ${duration.toSeconds}
|done""".stripMargin

def retry(f: => String)(implicit gcsTransferConfiguration: GcsTransferConfiguration, wait: FiniteDuration) = {
def retry(f: => String)(implicit gcsTransferConfiguration: GcsTransferConfiguration, wait: FiniteDuration): String = {
s"""for i in $$(seq ${gcsTransferConfiguration.transferAttempts}); do
| (
| $f
Expand All @@ -97,27 +110,35 @@ object ActionCommands {
|exit "$$RC"""".stripMargin
}

def delocalizeFileOrDirectory(containerPath: Path, cloudPath: Path, contentType: Option[ContentType])(implicit gcsTransferConfiguration: GcsTransferConfiguration) = {
def delocalizeFileOrDirectory(containerPath: Path, cloudPath: Path, contentType: Option[ContentType])
(implicit gcsTransferConfiguration: GcsTransferConfiguration): String = {
s"""if [ -d ${containerPath.escape} ]; then
| ${delocalizeDirectory(containerPath, cloudPath, contentType)}
|else
| ${delocalizeFile(containerPath, cloudPath, contentType)}
|fi""".stripMargin
}

def localizeDirectory(cloudPath: Path, containerPath: Path)(implicit gcsTransferConfiguration: GcsTransferConfiguration) = retry {
recoverRequesterPaysError(cloudPath) { flag =>
s"${containerPath |> makeContainerDirectory} && rm -f $$HOME/.config/gcloud/gce && gsutil $flag -m rsync -r ${cloudPath.escape} ${containerPath.escape}"
def localizeDirectory(cloudPath: Path, containerPath: Path)
(implicit gcsTransferConfiguration: GcsTransferConfiguration): String = {
retry {
recoverRequesterPaysError(cloudPath) { flag =>
s"${containerPath |> makeContainerDirectory} && " +
s"rm -f $$HOME/.config/gcloud/gce && gsutil $flag -m rsync -r ${cloudPath.escape} ${containerPath.escape}"
}
}
}

def localizeFile(cloudPath: Path, containerPath: Path)(implicit gcsTransferConfiguration: GcsTransferConfiguration) = retry {
recoverRequesterPaysError(cloudPath) { flag =>
s"rm -f $$HOME/.config/gcloud/gce && gsutil $flag cp ${cloudPath.escape} ${containerPath.escape}"
def localizeFile(cloudPath: Path, containerPath: Path)
(implicit gcsTransferConfiguration: GcsTransferConfiguration): String = {
retry {
recoverRequesterPaysError(cloudPath) { flag =>
s"rm -f $$HOME/.config/gcloud/gce && gsutil $flag cp ${cloudPath.escape} ${containerPath.escape}"
}
}
}

def recoverRequesterPaysError(path: Path)(f: String => String) = {
def recoverRequesterPaysError(path: Path)(f: String => String): String = {
val commandWithoutProject = f("")
val commandWithProject = f(s"-u ${path.projectId}")

Expand Down Expand Up @@ -149,10 +170,11 @@ object ActionCommands {
def multiLineCommandTransformer(shell: String)(commandString: String): String = {
val randomUuid = UUID.randomUUID().toString
val withBashShebang = s"#!/bin/bash\n\n$commandString"
val base64EncodedScript = Base64.encodeBase64String(withBashShebang.getBytes)
val base64EncodedScript = Base64.encodeBase64String(withBashShebang.getBytes(StandardCharsets.UTF_8))
val scriptPath = s"/tmp/$randomUuid.sh"

s"""python -c 'import base64; print(base64.b64decode("$base64EncodedScript"));' > $scriptPath && """ +
s"""python3 -c 'import base64; print(base64.b64decode("$base64EncodedScript").decode("utf-8"));' """ +
s"""> $scriptPath && """ +
s"chmod u+x $scriptPath && " +
s"$shell $scriptPath"
}
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
package cromwell.backend.google.pipelines.common.action

import com.typesafe.config.ConfigFactory
import org.apache.commons.text.StringEscapeUtils
import net.ceedubs.ficus.Ficus._

object ActionUtils {
/** Image to use for ssh access. */
Expand All @@ -17,6 +19,26 @@ object ActionUtils {
*/
val cromwellImagesSizeRoundedUpInGB = 1

private val config = ConfigFactory.load().getConfig("google")

/**
* An image with the Google Cloud SDK installed.
* http://gcr.io/google.com/cloudsdktool/cloud-sdk
*
* FYI additional older versions are available on DockerHub at:
* https://hub.docker.com/r/google/cloud-sdk
*
* When updating this value, also consider updating the CromwellImagesSizeRoundedUpInGB below.
*/
val CloudSdkImage: String =
config.getOrElse("cloud-sdk-image-url", "gcr.io/google.com/cloudsdktool/cloud-sdk:354.0.0-alpine")

/*
* At the moment, cloud-sdk (584MB for 354.0.0-alpine) and stedolan/jq (182MB) decompressed ~= 0.8 GB
*/
val CromwellImagesSizeRoundedUpInGB: Int =
config.getOrElse("cloud-sdk-image-size-gb", 1)

/** Quotes a string such that it's compatible as a string argument in the shell. */
def shellEscaped(any: Any): String = {
val str = String.valueOf(any)
Expand Down Expand Up @@ -52,6 +74,9 @@ object ActionUtils {

val terminateAllBackgroundActionsCommand: String = s"kill -TERM -1 && sleep $backgroundActionTerminationGraceTime || true"

def timestampedMessage(message: String): String =
s"""printf '%s %s\\n' "$$(date -u '+%Y/%m/%d %H:%M:%S')" ${shellEscaped(message)}"""

/** Start background actions first, leave the rest as is */
def sortActions[Action](containerSetup: List[Action],
localization: List[Action],
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@ import com.google.api.services.genomics.v2alpha1.model._
import com.google.api.services.genomics.v2alpha1.{Genomics, GenomicsScopes}
import com.google.api.services.oauth2.Oauth2Scopes
import com.google.api.services.storage.StorageScopes
import com.typesafe.config.ConfigFactory
import cromwell.backend.google.pipelines.common.PipelinesApiConfigurationAttributes.GcsTransferConfiguration
import cromwell.backend.google.pipelines.common.action.ActionUtils
import cromwell.backend.google.pipelines.common.api.PipelinesApiRequestFactory.CreatePipelineParameters
Expand Down Expand Up @@ -200,24 +199,3 @@ case class GenomicsFactory(applicationName: String, authMode: GoogleAuthMode, en

override def usesEncryptedDocker: Boolean = true
}

//noinspection ScalaRedundantConversion
object GenomicsFactory {
private val config = ConfigFactory.load().getConfig("google")

/**
* An image with the Google Cloud SDK installed.
* http://gcr.io/google.com/cloudsdktool/cloud-sdk
*
* FYI additional older versions are available on DockerHub at:
* https://hub.docker.com/r/google/cloud-sdk
*
* When updating this value, also consider updating the CromwellImagesSizeRoundedUpInGB below.
*/
val CloudSdkImage: String = if (config.hasPath("cloud-sdk-image-url")) { config.getString("cloud-sdk-image-url").toString } else "gcr.io/google.com/cloudsdktool/cloud-sdk:276.0.0-slim"

/*
* At the moment, cloud-sdk (924MB for 276.0.0-slim) and stedolan/jq (182MB) decompressed ~= 1.1 GB
*/
val CromwellImagesSizeRoundedUpInGB: Int = if (config.hasPath("cloud-sdk-image-size-gb")) { config.getInt("cloud-sdk-image-size-gb") } else 1
}
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,11 @@ package cromwell.backend.google.pipelines.v2alpha1
import cloud.nio.impl.drs.{DrsCloudNioFileSystemProvider, DrsConfig}
import com.google.api.services.genomics.v2alpha1.model.{Action, Mount}
import com.typesafe.config.ConfigFactory
import cromwell.backend.google.pipelines.common.action.ActionCommands._
import cromwell.backend.google.pipelines.common.action.ActionLabels._
import cromwell.backend.google.pipelines.common.PipelinesApiConfigurationAttributes.GcsTransferConfiguration
import cromwell.backend.google.pipelines.common._
import cromwell.backend.google.pipelines.v2alpha1.api.ActionBuilder._
import cromwell.backend.google.pipelines.v2alpha1.api.ActionCommands._
import cromwell.backend.google.pipelines.v2alpha1.api.{ActionBuilder, ActionFlag}
import cromwell.filesystems.drs.DrsPath
import cromwell.filesystems.gcs.GcsPath
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,11 @@ package cromwell.backend.google.pipelines.v2alpha1.api

import com.google.api.services.genomics.v2alpha1.model.{Action, Mount, Secret}
import cromwell.backend.google.pipelines.common.PipelinesApiConfigurationAttributes.GcsTransferConfiguration
import cromwell.backend.google.pipelines.common.action.ActionCommands
import cromwell.backend.google.pipelines.common.action.ActionLabels._
import cromwell.backend.google.pipelines.common.action.ActionUtils._
import cromwell.backend.google.pipelines.common.api.PipelinesApiRequestFactory.CreatePipelineDockerKeyAndToken
import cromwell.backend.google.pipelines.common.{PipelinesApiInput, PipelinesApiOutput, PipelinesParameter}
import cromwell.backend.google.pipelines.v2alpha1.GenomicsFactory
import cromwell.backend.google.pipelines.v2alpha1.api.ActionFlag.ActionFlag
import cromwell.core.path.Path
import cromwell.docker.DockerImageIdentifier
Expand Down Expand Up @@ -65,7 +65,7 @@ object ActionBuilder {
}
}

def cloudSdkAction: Action = new Action().setImageUri(GenomicsFactory.CloudSdkImage)
def cloudSdkAction: Action = new Action().setImageUri(CloudSdkImage)

def withImage(image: String): Action = new Action()
.setImageUri(image)
Expand Down Expand Up @@ -219,9 +219,6 @@ object ActionBuilder {
)
}

def timestampedMessage(message: String): String =
s"""printf '%s %s\\n' "$$(date -u '+%Y/%m/%d %H:%M:%S')" ${shellEscaped(message)}"""

/**
* Creates an Action that logs the time as UTC plus prints the message. The original actionLabels will also be
* applied to the logged action, except that Key.Tag -> some-value will be replaced with Key.Logging -> some-value.
Expand Down
Original file line number Diff line number Diff line change
@@ -1,15 +1,16 @@
package cromwell.backend.google.pipelines.v2alpha1.api

import com.google.api.services.genomics.v2alpha1.model.{Action, Mount}
import cromwell.backend.google.pipelines.common.action.ActionCommands
import cromwell.backend.google.pipelines.common.action.ActionUtils
import cromwell.backend.google.pipelines.common.api.PipelinesApiRequestFactory.CreatePipelineParameters
import cromwell.backend.google.pipelines.v2alpha1.GenomicsFactory

trait CheckpointingAction {
def checkpointingSetupActions(createPipelineParameters: CreatePipelineParameters,
mounts: List[Mount]
): List[Action] =
createPipelineParameters.runtimeAttributes.checkpointFilename map { checkpointFilename =>
val checkpointingImage = GenomicsFactory.CloudSdkImage
val checkpointingImage = ActionUtils.CloudSdkImage
val checkpointingCommand = createPipelineParameters.checkpointingConfiguration.checkpointingCommand(checkpointFilename, ActionCommands.multiLineBinBashCommand)
val checkpointingEnvironment = Map.empty[String, String]

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,14 +7,14 @@ import common.util.StringUtil._
import cromwell.backend.google.pipelines.common.PipelinesApiAsyncBackendJobExecutionActor
import cromwell.backend.google.pipelines.common.PipelinesApiConfigurationAttributes.GcsTransferConfiguration
import cromwell.backend.google.pipelines.common.PipelinesApiJobPaths.GcsDelocalizationScriptName
import cromwell.backend.google.pipelines.common.action.ActionCommands._
import cromwell.backend.google.pipelines.common.action.ActionLabels._
import cromwell.backend.google.pipelines.common.action.ActionUtils._
import cromwell.backend.google.pipelines.common.api.PipelinesApiRequestFactory.CreatePipelineParameters
import cromwell.backend.google.pipelines.v2alpha1.PipelinesConversions._
import cromwell.backend.google.pipelines.v2alpha1.ToParameter.ops._
import cromwell.backend.google.pipelines.v2alpha1.api.ActionBuilder._
import cromwell.backend.google.pipelines.v2alpha1.api.ActionCommands._
import cromwell.backend.google.pipelines.v2alpha1.{GenomicsFactory, RuntimeOutputMapping}
import cromwell.backend.google.pipelines.v2alpha1.RuntimeOutputMapping
import cromwell.core.path.{DefaultPathBuilder, Path}
import wom.runtime.WomOutputRuntimeExtractor

Expand Down Expand Up @@ -60,7 +60,7 @@ trait Delocalization {
)

ActionBuilder
.withImage(womOutputRuntimeExtractor.dockerImage.getOrElse(GenomicsFactory.CloudSdkImage))
.withImage(womOutputRuntimeExtractor.dockerImage.getOrElse(CloudSdkImage))
.setCommands(commands.asJava)
.withMounts(mounts)
.setEntrypoint("/bin/bash")
Expand Down
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
package cromwell.backend.google.pipelines.v2alpha1.api

import com.google.api.services.genomics.v2alpha1.model.{Action, Mount}
import cromwell.backend.google.pipelines.common.action.ActionCommands.localizeFile
import cromwell.backend.google.pipelines.common.action.ActionLabels._
import cromwell.backend.google.pipelines.common.PipelinesApiConfigurationAttributes.GcsTransferConfiguration
import cromwell.backend.google.pipelines.common.PipelinesApiJobPaths._
import cromwell.backend.google.pipelines.common.api.PipelinesApiRequestFactory.CreatePipelineParameters
import cromwell.backend.google.pipelines.v2alpha1.PipelinesConversions._
import cromwell.backend.google.pipelines.v2alpha1.ToParameter.ops._
import cromwell.backend.google.pipelines.v2alpha1.api.ActionBuilder.cloudSdkShellAction
import cromwell.backend.google.pipelines.v2alpha1.api.ActionCommands.localizeFile


trait Localization {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import com.typesafe.config.{Config, ConfigFactory}
import common.assertion.CromwellTimeoutSpec
import cromwell.backend.google.pipelines.common.PipelinesApiConfigurationAttributes.GcsTransferConfiguration
import cromwell.backend.google.pipelines.common.PipelinesApiFileInput
import cromwell.backend.google.pipelines.common.action.ActionUtils
import cromwell.backend.google.pipelines.common.io.{DiskType, PipelinesApiWorkingDisk}
import cromwell.core.path.DefaultPathBuilder
import cromwell.filesystems.drs.DrsPathBuilder
Expand Down Expand Up @@ -65,7 +66,7 @@ class PipelinesConversionsSpec extends AnyFlatSpec with CromwellTimeoutSpec with
logging.get("mounts") should be(a[java.util.List[_]])
logging.get("mounts").asInstanceOf[java.util.List[_]] should be (empty)

logging.get("imageUri") should be(GenomicsFactory.CloudSdkImage)
logging.get("imageUri") should be(ActionUtils.CloudSdkImage)

val loggingLabels = logging.get("labels").asInstanceOf[java.util.Map[_, _]]
loggingLabels.keySet.asScala should contain theSameElementsAs List("logging", "inputName")
Expand Down
Loading

0 comments on commit f14d91a

Please sign in to comment.