Skip to content

Commit

Permalink
Switch to a streaming zip encoding (digital-asset#2595)
Browse files Browse the repository at this point in the history
This switches the creation of the archive in `daml build` from
`zip-archive` to `zip`. This has a few advantages:

1. It gets rid of lazy IO for reading all the interface and source
files. This avoids the high usage of file handles in `daml build`.

2. It seems to be a slight improvement in max memory usage and runtime
and a giant improvement in allocations (but I think the latter
probably comes primarily from the fact that the locations are moved to
the bzip C library). The improvement in max memory usage is less than
I expected so probably there is still something off somewhere.

For now, I only switched over `createArchive`. Archive reading is
still done using `zip-archive`. We might want to switch that over in a
separate PR.
  • Loading branch information
cocreature authored Aug 19, 2019
1 parent c71237b commit 3e2ccc0
Show file tree
Hide file tree
Showing 12 changed files with 147 additions and 70 deletions.
24 changes: 24 additions & 0 deletions 3rdparty/c/bzip2.BUILD
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
package(default_visibility = ["//visibility:public"])

cc_library(
name = "bz2",
srcs = [
"blocksort.c",
"bzlib.c",
"compress.c",
"crctable.c",
"decompress.c",
"huffman.c",
"randtable.c",
],
hdrs = [
"bzlib.h",
"bzlib_private.h",
],
includes = [
".",
],
visibility = [
"//visibility:public",
],
)
18 changes: 18 additions & 0 deletions 3rdparty/haskell/BUILD.bindings-DSL
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
package(default_visibility = ["//visibility:public"])

load("@rules_haskell//haskell:defs.bzl", "haskell_library")
load("@ai_formation_hazel//:hazel.bzl", "hazel_library")

haskell_library(
name = "lib",
visibility = ["//visibility:public"],
srcs = [":Bindings/Utilities.hs"],
deps = [hazel_library("base"), ":bindings-DSL-cbits"],
)


cc_library(
name = "bindings-DSL-cbits",
visibility = ["//visibility:public"],
hdrs = [":bindings.dsl.h", ":bindings.cmacros.h"],
)
13 changes: 13 additions & 0 deletions 3rdparty/haskell/bzlib-conduit.patch
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
diff --git a/src/Data/Conduit/BZlib/Internal.hsc b/src/Data/Conduit/BZlib/Internal.hsc
index f980d35..18d6e3a 100644
--- a/src/Data/Conduit/BZlib/Internal.hsc
+++ b/src/Data/Conduit/BZlib/Internal.hsc
@@ -1,7 +1,7 @@
{-# LANGUAGE ForeignFunctionInterface #-}

#include <bzlib.h>
-#include <bindings.dsl.h>
+#include "bindings.dsl.h"

module Data.Conduit.BZlib.Internal where

17 changes: 17 additions & 0 deletions WORKSPACE
Original file line number Diff line number Diff line change
Expand Up @@ -494,6 +494,7 @@ hazel_repositories(
},
),
exclude_packages = [
"bindings-DSL",
"clock",
# Excluded since we build it via the http_archive line above.
"ghc-lib-parser",
Expand All @@ -515,6 +516,7 @@ hazel_repositories(
{
"z": "@com_github_madler_zlib//:z",
"ffi": "" if is_windows else "@libffi_nix//:ffi",
"bz2": "@bzip2//:bz2",
},
),
ghc_workspaces = {
Expand Down Expand Up @@ -586,6 +588,12 @@ hazel_repositories(
"91dd121ac565009f2fc215c50f3365ed66705071a698a545e869041b5d7ff4da",
patch_args = ["-p1"],
patches = ["@com_github_digital_asset_daml//bazel_tools:haskell-c2hs.patch"],
) + hazel_hackage(
"bzlib-conduit",
"0.3.0.2",
"eb2c732b3d4ab5f7b367c51eef845e597ade19da52c03ee11954d35b6cfc4128",
patch_args = ["-p1"],
patches = ["@com_github_digital_asset_daml//3rdparty/haskell:bzlib-conduit.patch"],
),
pkgs = packages,
),
Expand All @@ -611,6 +619,15 @@ hazel_custom_package_hackage(
version = "0.6.2",
)

hazel_custom_package_hackage(
package_name = "bindings-DSL",
# Without a custom build file, packages depending on bindings-DSL
# fail to find bindings.dsl.h.
build_file = "//3rdparty/haskell:BUILD.bindings-DSL",
sha256 = "63de32380c68d1cc5e9c7b3622d67832c786da21163ba0c8a4835e6dd169194f",
version = "1.0.25",
)

hazel_custom_package_hackage(
package_name = "streaming-commons",
build_file = "//3rdparty/haskell:BUILD.streaming-commons",
Expand Down
2 changes: 1 addition & 1 deletion compiler/daml-lf-reader/src/DA/Daml/LF/Reader.hs
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ module DA.Daml.LF.Reader
, getManifestField
) where

import Codec.Archive.Zip
import "zip-archive" Codec.Archive.Zip
import qualified Data.ByteString.Lazy as BSL
import qualified Data.ByteString.Lazy.UTF8 as UTF8
import qualified Data.ByteString.Char8 as BSC
Expand Down
1 change: 1 addition & 0 deletions compiler/damlc/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,7 @@ da_haskell_library(
"vector",
"xml",
"yaml",
"zip",
"zip-archive",
"unordered-containers",
"uniplate",
Expand Down
3 changes: 2 additions & 1 deletion compiler/damlc/daml-compiler/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ da_haskell_library(
hazel_deps = [
"base",
"bytestring",
"conduit",
"containers",
"directory",
"extra",
Expand All @@ -27,7 +28,7 @@ da_haskell_library(
"text",
"time",
"transformers",
"zip-archive",
"zip",
],
src_strip_prefix = "src",
visibility = ["//visibility:public"],
Expand Down
51 changes: 21 additions & 30 deletions compiler/damlc/daml-compiler/src/DA/Daml/Compiler/Dar.hs
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ module DA.Daml.Compiler.Dar
, pkgNameVersion
) where

import qualified Codec.Archive.Zip as Zip
import qualified "zip" Codec.Archive.Zip as Zip
import Control.Monad.Extra
import Control.Monad.IO.Class
import Control.Monad.Trans.Maybe
Expand All @@ -18,6 +18,7 @@ import DA.Daml.Options.Types
import qualified Data.ByteString as BS
import qualified Data.ByteString.Lazy as BSL
import qualified Data.ByteString.Lazy.Char8 as BSC
import Data.Conduit.Combinators (sourceFile, sourceLazy)
import Data.List.Extra
import qualified Data.Map.Strict as Map
import Data.Maybe
Expand Down Expand Up @@ -78,16 +79,15 @@ buildDar ::
-> PackageConfigFields
-> NormalizedFilePath
-> FromDalf
-> IO (Maybe BSL.ByteString)
-> IO (Maybe (Zip.ZipArchive ()))
buildDar service pkgConf@PackageConfigFields {..} ifDir dalfInput = do
liftIO $
IdeLogger.logDebug (ideLogger service) $
"Creating dar: " <> T.pack pMain
if unFromDalf dalfInput
then liftIO $
Just <$> do
bytes <- BSL.readFile pMain
createArchive pkgConf "" bytes [] (toNormalizedFilePath ".") [] [] []
then do
bytes <- BSL.readFile pMain
pure $ Just $ createArchive pkgConf "" bytes [] (toNormalizedFilePath ".") [] [] []
else runAction service $
runMaybeT $ do
WhnfPackage pkg <- useE GeneratePackage file
Expand Down Expand Up @@ -118,7 +118,7 @@ buildDar service pkgConf@PackageConfigFields {..} ifDir dalfInput = do
fileDependencies <- MaybeT $ getDependencies file
let dataFiles =
[mkConfFile pkgConf pkgModuleNames (T.unpack pkgId)]
liftIO $
pure $
createArchive
pkgConf
(T.unpack pkgId)
Expand Down Expand Up @@ -176,29 +176,21 @@ createArchive ::
-> [NormalizedFilePath] -- ^ Module dependencies
-> [(String, BS.ByteString)] -- ^ Data files
-> [NormalizedFilePath] -- ^ Interface files
-> IO BSL.ByteString
-> Zip.ZipArchive ()
createArchive PackageConfigFields {..} pkgId dalf dalfDependencies srcRoot fileDependencies dataFiles ifaces
= do
-- Reads all module source files, and pairs paths (with changed prefix)
-- with contents as BS. The path must be within the module root path, and
-- is modified to have prefix <name-hash> instead of the original root path.
srcFiles <-
forM fileDependencies $ \mPath -> do
contents <- BSL.readFile $ fromNormalizedFilePath mPath
return
( pkgName </>
fromNormalizedFilePath (makeRelative' srcRoot mPath)
, contents)
ifaceFaceFiles <-
forM ifaces $ \mPath -> do
contents <- BSL.readFile $ fromNormalizedFilePath mPath
let ifaceRoot =
toNormalizedFilePath
(ifaceDir </> fromNormalizedFilePath srcRoot)
return
( pkgName </>
fromNormalizedFilePath (makeRelative' ifaceRoot mPath)
, contents)
forM_ fileDependencies $ \mPath -> do
entry <- Zip.mkEntrySelector $ pkgName </> fromNormalizedFilePath (makeRelative' srcRoot mPath)
Zip.sinkEntry Zip.Deflate (sourceFile $ fromNormalizedFilePath mPath) entry
forM_ ifaces $ \mPath -> do
let ifaceRoot =
toNormalizedFilePath
(ifaceDir </> fromNormalizedFilePath srcRoot)
entry <- Zip.mkEntrySelector $ pkgName </> fromNormalizedFilePath (makeRelative' ifaceRoot mPath)
Zip.sinkEntry Zip.Deflate (sourceFile $ fromNormalizedFilePath mPath) entry
let dalfName = pkgName </> pkgNameVersion pName pVersion <.> "dalf"
let dependencies =
[ (pkgName </> T.unpack depName <> ".dalf", BSL.fromStrict bs)
Expand All @@ -213,11 +205,10 @@ createArchive PackageConfigFields {..} pkgId dalf dalfDependencies srcRoot fileD
( "META-INF/MANIFEST.MF"
, manifestHeader dalfName (dalfName : map fst dependencies)) :
(dalfName, dalf) :
srcFiles ++ ifaceFaceFiles ++ dependencies ++ dataFiles'
mkEntry (filePath, content) = Zip.toEntry filePath 0 content
zipArchive =
foldr (Zip.addEntryToArchive . mkEntry) Zip.emptyArchive allFiles
pure $ Zip.fromArchive zipArchive
dependencies ++ dataFiles'
forM_ allFiles $ \(file, content) -> do
entry <- Zip.mkEntrySelector file
Zip.sinkEntry Zip.Deflate (sourceLazy content) entry
where
pkgName = fullPkgName pName pVersion pkgId
manifestHeader :: FilePath -> [String] -> BSL.ByteString
Expand Down
Loading

0 comments on commit 3e2ccc0

Please sign in to comment.