Skip to content

Commit

Permalink
allow the string data packer to deal with arbitrary byte sequences
Browse files Browse the repository at this point in the history
by adding an escape sequence for base64 encoded runs
  • Loading branch information
luite committed Jan 16, 2018
1 parent f851f1d commit adeff7e
Show file tree
Hide file tree
Showing 11 changed files with 93 additions and 395 deletions.
3 changes: 1 addition & 2 deletions ghcjs.cabal
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,6 @@ Library
Gen2.Printer,
Gen2.Linker,
Gen2.Shim,
Gen2.PrimIface,
Gen2.Compactor,
Gen2.Object,
Gen2.Archive,
Expand Down Expand Up @@ -141,7 +140,7 @@ Library
optparse-applicative >= 0.14 && < 0.15,
stringsearch >= 0.3 && < 0.4,
base16-bytestring >= 0.1 && < 0.2,
cryptohash,
base64-bytestring >= 1.0 && < 1.1,
-- for JMacro
regex-posix >= 0.90 && < 0.100,
safe >= 0.3 && < 0.4,
Expand Down
134 changes: 45 additions & 89 deletions lib/boot/shims/src/string.js
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,13 @@ function h$str(s) {
// decoding to bytes, the following is produced:
// - \cz\0 -> C0 80
// - \cz\cz -> 1A
//
// additionally, for dealing with raw binary data we have an escape sequence
// to pack base64 encoded runs:
//
// - \cz\xNN -> followed by NN-0x1f (31 decimal) bytes of base64 encoded
// data. supported range: 0x20 .. 0x9f (1-128 bytes data)
//

function h$pstr(s) {
var enc = null;
Expand Down Expand Up @@ -255,96 +262,21 @@ function h$encodeModifiedUtf8(str) {
function h$encodePackedUtf8(str) {
return h$encodeUtf8Internal(str, false, true);
}
/*
function h$encodeUtf8Internal(str, modified) {
var i, low;
var n = 0;
for(i=0;i<str.length;i++) {
// non-BMP encoded as surrogate pair in JavaScript string, get actual codepoint
var c = str.charCodeAt(i);
if (0xD800 <= c && c <= 0xDBFF) {
low = str.charCodeAt(i+1);
c = ((c - 0xD800) * 0x400) + (low - 0xDC00) + 0x10000;
i++;
}
if(c === 0 && modified) {
n+=2;
} else if(c <= 0x7F) {
n++;
} else if(c <= 0x7FF) {
n+=2;
} else if(c <= 0xFFFF) {
n+=3;
} else if(c <= 0x1FFFFF) {
n+=4;
} else if(c <= 0x3FFFFFF) {
n+=5;
} else {
n+=6;
}
}
var v = h$newByteArray(n+1);
var u8 = v.u8;
n = 0;
for(i=0;i<str.length;i++) {
var c = str.charCodeAt(i);
// non-BMP encoded as surrogate pair in JavaScript string, get actual codepoint
if (0xD800 <= c && c <= 0xDBFF) {
low = str.charCodeAt(i+1);
c = ((c - 0xD800) * 0x400) + (low - 0xDC00) + 0x10000;
i++;
}
// h$log("### encoding char " + c + " to UTF-8: " + String.fromCodePoint(c));
if(c === 0 && modified) {
u8[n] = 192;
u8[n+1] = 128;
n+=2;
} else if(c <= 0x7F) {
u8[n] = c;
n++;
} else if(c <= 0x7FF) {
u8[n] = (c >> 6) | 0xC0;
u8[n+1] = (c & 0x3F) | 0x80;
n+=2;
} else if(c <= 0xFFFF) {
u8[n] = (c >> 12) | 0xE0;
u8[n+1] = ((c >> 6) & 0x3F) | 0x80;
u8[n+2] = (c & 0x3F) | 0x80;
n+=3;
} else if(c <= 0x1FFFFF) {
u8[n] = (c >> 18) | 0xF0;
u8[n+1] = ((c >> 12) & 0x3F) | 0x80;
u8[n+2] = ((c >> 6) & 0x3F) | 0x80;
u8[n+3] = (c & 0x3F) | 0x80;
n+=4;
} else if(c <= 0x3FFFFFF) {
u8[n] = (c >> 24) | 0xF8;
u8[n+1] = ((c >> 18) & 0x3F) | 0x80;
u8[n+2] = ((c >> 12) & 0x3F) | 0x80;
u8[n+3] = ((c >> 6) & 0x3F) | 0x80;
u8[n+4] = (c & 0x3F) | 0x80;
n+=5;
} else {
u8[n] = (c >>> 30) | 0xFC;
u8[n+1] = ((c >> 24) & 0x3F) | 0x80;
u8[n+2] = ((c >> 18) & 0x3F) | 0x80;
u8[n+3] = ((c >> 12) & 0x3F) | 0x80;
u8[n+4] = ((c >> 6) & 0x3F) | 0x80;
u8[n+5] = (c & 0x3F) | 0x80;
n+=6;
}
}
u8[v.len-1] = 0; // terminator
// h$log("### encodeUtf8: " + str);
// h$log(v);
return v;
}
*/

// modified: encode \0 -> 192 128
// packed: encode \cz\cz -> 26
// \cz\0 -> 192 128
function h$encodeUtf8Internal(str, modified, packed) {
var i, low;
var i, j, c, low, b64bytes, b64chars;
function base64val(cc) {
if(cc >= 65 && cc < 90) return cc - 65; // A-Z
if(cc >= 97 && cc < 122) return cc - 71; // a-z
if(cc >= 48 && cc < 57) return cc + 4; // 0-9
if(cc === 43) return 62; // +
if(cc === 47) return 63; // /
if(cc === 61) return 0; // = (treat padding as zero)
throw new Error("invalid base64 value: " + cc);
}
var n = 0;
var czescape = false;
for(i=0;i<str.length;i++) {
Expand Down Expand Up @@ -376,6 +308,12 @@ function h$encodeUtf8Internal(str, modified, packed) {
n-=1;
} else if(c === 0) { // \cz\0 -> 192 128
// no adjustments needed
} else if(c >= 0x20 && c <= 0x9f) {
b64bytes = c - 0x1f; // number of bytes in base64 encoded run
b64chars = ((b64bytes + 2) / 3) << 2;
n += b64bytes;
i += b64chars;
if(b64bytes % 3 === 0) i++; // skip trailing =
} else {
throw new Error("invalid cz escaped character: " + c);
}
Expand All @@ -389,7 +327,7 @@ function h$encodeUtf8Internal(str, modified, packed) {
var u8 = v.u8;
n = 0;
for(i=0;i<str.length;i++) {
var c = str.charCodeAt(i);
c = str.charCodeAt(i);
// non-BMP encoded as surrogate pair in JavaScript string, get actual codepoint
if (0xD800 <= c && c <= 0xDBFF) {
low = str.charCodeAt(i+1);
Expand All @@ -405,8 +343,26 @@ function h$encodeUtf8Internal(str, modified, packed) {
n+=2;
czescape = false;
} else if(czescape) {
u8[n] = c;
n++;
if(c >= 0x20 && c <= 0x9f) {
b64bytes = c - 0x1f;
while(b64bytes > 0) {
var c1 = base64val(str.charCodeAt(i+1)),
c2 = base64val(str.charCodeAt(i+2)),
c3 = base64val(str.charCodeAt(i+3)),
c4 = base64val(str.charCodeAt(i+4));
i+=4;
u8[n] = (c1<<2)|(c2>>4);
n++;
if(b64bytes >= 2) u8[n] = ((c2&0xf)<<4)|(c3 >> 2);
n++;
if(b64bytes >= 3) u8[n] = ((c3&0x3)<<6)|c4;
n++;
b64bytes -= 3;
}
} else {
u8[n] = c;
n++;
}
czescape = false;
} else if(c <= 0x7F) {
u8[n] = c;
Expand Down
1 change: 0 additions & 1 deletion src-bin/Haddock.hs
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,6 @@ withGhcjs' libDir flags ghcActs = runGhc (Just libDir) $ do
-- that may need to be re-linked: Haddock doesn't do any
-- dynamic or static linking at all!
_ <- setSessionDynFlags dynflags''
Ghcjs.fixNameCache
ghcActs dynflags''
where
parseGhcFlags :: MonadIO m => DynFlags -> m DynFlags
Expand Down
41 changes: 18 additions & 23 deletions src/Compiler/GhcjsHooks.hs
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{-# LANGUAGE CPP #-}
{-# LANGUAGE GADTs,
{-# LANGUAGE CPP,
GADTs,
ScopedTypeVariables,
ImpredicativeTypes,
OverloadedStrings,
Expand All @@ -8,7 +8,6 @@
module Compiler.GhcjsHooks where

import CorePrep (corePrepPgm)
-- import Gen2.GHC.CorePrep (corePrepPgm) -- customized to not float new toplevel binds
import CoreToStg (coreToStg)
import DriverPipeline
import DriverPhases
Expand Down Expand Up @@ -40,14 +39,11 @@ import qualified Compiler.Plugins as Plugins
import qualified Gen2.DynamicLinking as Gen2
import qualified Gen2.Foreign as Gen2

import qualified Gen2.PrimIface as Gen2
import qualified Gen2.TH as Gen2TH

import System.IO.Error

#if __GLASGOW_HASKELL__ >= 711
import qualified GHC.LanguageExtensions as Ext
#endif

installGhcjsHooks :: GhcjsEnv
-> GhcjsSettings
Expand All @@ -57,27 +53,29 @@ installGhcjsHooks env settings js_objs dflags =
Gen2.installForeignHooks True $ dflags { hooks = addHooks (hooks dflags) }
where
addHooks h = h
{ linkHook = Just (Gen2.ghcjsLink env settings js_objs True)
, getValueSafelyHook = Just (Plugins.getValueSafely dflags env)
, runMetaHook = Just (Gen2TH.ghcjsRunMeta env settings)
{ linkHook = Just (Gen2.ghcjsLink env settings js_objs True)
, getValueSafelyHook = Just (Plugins.getValueSafely dflags env)
, runMetaHook = Just (Gen2TH.ghcjsRunMeta env settings)
}

installNativeHooks :: GhcjsEnv -> GhcjsSettings -> DynFlags -> DynFlags
installNativeHooks env settings dflags =
Gen2.installForeignHooks False $ dflags { hooks = addHooks (hooks dflags) }
where
addHooks h = h { linkHook = Just (Gen2.ghcjsLink env settings [] False)
#if !(__GLASGOW_HASKELL__ >= 709)
, getValueSafelyHook = Just Gen2.ghcjsGetValueSafely
, hscCompileCoreExprHook = Just Gen2.ghcjsCompileCoreExpr
#endif
addHooks h = h { linkHook = Just (Gen2.ghcjsLink env settings [] False)
}

--------------------------------------------------
-- One shot replacement (the oneShot in DriverPipeline
-- always uses the unhooked linker)

ghcjsOneShot :: GhcjsEnv -> GhcjsSettings -> Bool -> HscEnv -> Phase -> [(String, Maybe Phase)] -> IO ()
ghcjsOneShot :: GhcjsEnv
-> GhcjsSettings
-> Bool
-> HscEnv
-> Phase
-> [(String, Maybe Phase)]
-> IO ()
ghcjsOneShot env settings native hsc_env stop_phase srcs = do
o_files <- mapM (compileFile hsc_env stop_phase) srcs
Gen2.ghcjsDoLink env settings native (hsc_dflags hsc_env) stop_phase o_files
Expand All @@ -88,15 +86,10 @@ ghcjsOneShot env settings native hsc_env stop_phase srcs = do
installDriverHooks :: GhcjsSettings -> GhcjsEnv -> DynFlags -> DynFlags
installDriverHooks settings env df = df { hooks = hooks' }
where hooks' = (hooks df) { runPhaseHook = Just (runGhcjsPhase settings env)
, ghcPrimIfaceHook = Just Gen2.ghcjsPrimIface
}

haveCpp :: DynFlags -> Bool
#if __GLASGOW_HASKELL__ >= 711
haveCpp dflags = xopt Ext.Cpp dflags
#else
haveCpp dflags = xopt Opt_Cpp dflags
#endif

runGhcjsPhase :: GhcjsSettings
-> GhcjsEnv
Expand Down Expand Up @@ -161,7 +154,6 @@ runGhcjsPhase settings env (HscOut src_flavour mod_name result) _ dflags = do
-- stamp file for the benefit of Make
liftIO $ touchObjectFile dflags o_file
return (RealPhase next_phase, o_file)
#if __GLASGOW_HASKELL__ >= 709
HscUpdateSig ->
do -- We need to create a REAL but empty .o file
-- because we are going to attempt to put it in a library
Expand All @@ -171,7 +163,6 @@ runGhcjsPhase settings env (HscOut src_flavour mod_name result) _ dflags = do
-- fixme do we need to create a js_o file here?
-- liftIO $ compileEmptyStub dflags hsc_env' basename location
return (RealPhase next_phase, o_file)
#endif
HscRecomp cgguts mod_summary
-> do output_fn <- phaseOutputFilename next_phase

Expand Down Expand Up @@ -243,7 +234,11 @@ ghcjsCompileModule settings jsEnv env core mod = do
cms = compiledModules jsEnv
dflags = hsc_dflags env
compile = do
core_binds <- corePrepPgm env mod' (ms_location mod) (cg_binds core) (cg_tycons core)
core_binds <- corePrepPgm env
mod'
(ms_location mod)
(cg_binds core)
(cg_tycons core)
let stg = coreToStg dflags mod' core_binds
(stg', cCCs) <- stg2stg dflags mod' stg
return $ variantRender gen2Variant settings dflags mod' stg' cCCs
Loading

0 comments on commit adeff7e

Please sign in to comment.