IntersectMBO · effectfully · Jan 23, 2022 · Jan 16, 2022 · Jan 23, 2022 · kwxm
diff --git a/plutus-core/plutus-core/src/PlutusCore/Constant/Typed.hs b/plutus-core/plutus-core/src/PlutusCore/Constant/Typed.hs
@@ -149,6 +149,8 @@ the value. Instead we can keep the argument intact and apply the Haskell functio
 the PLC AST representing some value.
 
 E.g. Having a built-in function with the following signature:
+(TODO: we can't have that, figure out a way to make this example actually work while being as
+clear as it currently is)
 
     reverse : all a. [a] -> [a]
 
@@ -970,6 +972,16 @@ instance uni ~ UniOf term => KnownTypeIn uni term (Opaque term rep) where
 underTypeError :: void
 underTypeError = error "Panic: a 'TypeError' was bypassed"
 
+type NoStandalonePolymorphicDataErrMsg =
+    'Text "Plutus type variables can't directly appear inside built-in types" ':$$:
+    'Text "Are you trying to define a polymorphic built-in function over a polymorphic type?" ':$$:
+    'Text "In that case you need to wrap all polymorphic built-in types having type variables" ':<>:
+    'Text " in them with either ‘SomeConstant’ or ‘Opaque’ depending on whether its the type" ':<>:
+    'Text " of an argument or the type of the result, respectively"
+
+instance TypeError NoStandalonePolymorphicDataErrMsg => uni `Contains` TyVarRep where
+    knownUni = underTypeError
+
 type NoConstraintsErrMsg =
     'Text "Built-in functions are not allowed to have constraints" ':$$:
     'Text "To fix this error instantiate all constrained type variables"

diff --git a/plutus-core/plutus-core/src/PlutusCore/Default/Builtins.hs b/plutus-core/plutus-core/src/PlutusCore/Default/Builtins.hs
@@ -1,5 +1,3 @@
-{-# OPTIONS_GHC -fno-warn-redundant-constraints #-}
-
 {-# LANGUAGE DataKinds             #-}
 {-# LANGUAGE DeriveAnyClass        #-}
 {-# LANGUAGE FlexibleInstances     #-}
@@ -132,6 +130,323 @@ nonZeroArg :: (Integer -> Integer -> Integer) -> Integer -> Integer -> Evaluatio
 nonZeroArg _ _ 0 = EvaluationFailure
 nonZeroArg f x y = EvaluationSuccess $ f x y
 
+{- Note [How to add a built-in function: simple cases]
+This Notes explains how to add a built-in function and how to read definitions of existing built-in
+functions. It does not attempt to explain why things the way they are, that is explained in comments
+in relevant files (will have a proper overview doc on that, but for now you can check out this
+comment: https://github.com/input-output-hk/plutus/issues/4306#issuecomment-1003308938).
+
+In order to add a new built-in function one needs to add a constructor to 'DefaultFun' and handle
+it within the @ToBuiltinMeaning uni DefaultFun@ instance like that:
+
+    toBuiltinMeaning <Name> =
+        makeBuiltinMeaning
+            <denotation>
+            <costingFunction>
+
+'makeBuiltinMeaning' creates a Plutus builtin out of its denotation (i.e. Haskell implementation)
+and a costing function for it. Once a builtin is added, its Plutus type is kind-checked and printed
+to a golden file automatically (consult @git status@).
+
+Below we will enumerate what kind of denotations are accepted by 'makeBuiltinMeaning' without
+touching any costing stuff.
+
+1. The simplest example of an accepted denotation is a monomorphic function that takes values of
+built-in types and returns a value of a built-in type as well. For example
+
+    encodeUtf8 :: Text -> BS.ByteString
+
+You can feed 'encodeUtf8' directly to 'makeBuiltinMeaning' without specifying any types:
+
+    toBuiltinMeaning EncodeUtf8 =
+        makeBuiltinMeaning
+            encodeUtf8
+            <costingFunction>
+
+This will add the builtin, the only two things that remain are implementing costing for this
+builtin (out of the scope of this Note) and handling it within the @Flat DefaultFun@ instance
+(see Note [Stable encoding of PLC]).
+
+2. If the type of the denotation has any constrained type variables in it, all of them need to be
+be instantiated. For example feeding @(+)@ directly to 'makeBuiltinMeaning' will give you an error
+message asking to instantiate constrained type variables, which you can do via an explicit type
+annotation or type application or using any other way of specifying types.
+
+Here's how it looks with a type application instantiating the type variable of @(+)@:
+
+    toBuiltinMeaning AddInteger =
+        makeBuiltinMeaning
+            ((+) @Integer)
+            <costingFunction>
+
+Or we can specify the whole type of the denotation by type-applying 'makeBuiltinMeaning':
+
+    toBuiltinMeaning AddInteger =
+        makeBuiltinMeaning
+            @(Integer -> Integer -> Integer)
+            (+)
+            <costingFunction>
+
+Or we can simply annotate @(+)@ with its monomorphized type:
+    toBuiltinMeaning AddInteger =
+        makeBuiltinMeaning
+            ((+) :: Integer -> Integer -> Integer)
+            <costingFunction>
+
+All of these are equivalent.
+
+3. Unconstrained type variables are fine, you don't need to instantiate them (but you may want to if
+you want some builtin to be less general than what Haskell infers for its denotation). For example
+
+    toBuiltinMeaning IfThenElse =
+        makeBuiltinMeaning
+            (\b x y -> if b then x else y)
+            <costingFunction>
+
+works alright. The inferred Haskell type of the denotation is
+
+    forall a. Bool -> a -> a -> a
+
+whose counterpart in Plutus is
+
+    all a. bool -> a -> a -> a
+
+and unsurprisingly it's the exact Plutus type of the added builtin.
+
+It may seem like getting the latter from the former is entirely trivial, however
+'makeBuiltinMeaning' jumps through quite a few hoops to achieve that and below we'll consider those
+of them that are important to know to be able to use 'makeBuiltinMeaning' in cases that are more
+complicated than a simple monomorphic or polymorphic function. But for now let's talk about a few
+more simple cases.
+
+4. Certain types are not built-in, but can be represented via built-in ones. For example, we don't
+have 'Int' as a built-in, but we have 'Integer' and we can represent the former in terms of the
+latter. The conversions between the two types are handled by 'makeBuiltinMeaning', so that the user
+doesn't need to write them themselves and can just write
+
+    toBuiltinMeaning LengthOfByteString =
+        makeBuiltinMeaning
+            BS.length
+            <costingFunction>
+
+directly (where @BS.length :: BS.ByteString -> Int@).
+
+Note however that while it's always safe to convert an 'Int' to an 'Integer', doing the opposite is
+not safe in general, because an 'Integer' may not fit into the range of 'Int'. For this reason
+
+    YOU MUST NEVER USE 'fromIntegral' AND SIMILAR FUNCTIONS THAT CAN SILENTLY UNDER- OR OVERFLOW
+    WHEN DEFINING A BUILT-IN FUNCTION
+
+For example defining a builtin that takes an 'Integer' and converts it to an 'Int' using
+'fromIntegral' is not allowed under any circumstances and can be a huge vulnerability.
+
+It's completely fine to define a builtin that takes an 'Int' directly, though. How so? That's due
+to the fact that the builtin application machinery checks that an 'Integer' is in the bounds of
+'Int' before doing the conversion. If the bounds check succeeds, then the 'Integer' gets converted
+to the corresponding 'Int', and if it doesn't, then the builtin application fails.
+
+For the list of types that can be converted to/from built-in ones look into the file with the
+default universe. If you need to add a new such type, just copy-paste what's done for an existing
+one and adjust.
+
+Speaking of builtin application failing:
+
+5. A built-in function can fail. Whenever a builtin fails, evaluation of the whole program fails.
+There's a number of ways a builtin can fail:
+
+- as we've just seen a type conversion can fail due to an unsuccessful bounds check
+- if the builtin expects, say, a 'Text' argument, but gets fed an 'Integer' argument
+- if the builtin expects any constant, but gets fed a non-constant
+- if its denotation runs in the 'EvaluationResult' and an 'EvaluationFailure' gets returned
+
+Most of these are not a concern to the user defining a built-in function (conversions are handled
+within the builtin application machinery, type mismatches are on the type checker and the person
+writing the program etc), however explicitly returning 'EvaluationFailure' from a builtin is
+something that happens commonly.
+
+One simple example is a monomorphic function matching on a certain constructor and failing in all
+other cases:
+
+    toBuiltinMeaning UnIData =
+        makeBuiltinMeaning
+            (\case
+                I i -> EvaluationSuccess i
+                _   -> EvaluationFailure)
+            <costingFunction>
+
+The inferred type of the denotation is
+
+    Data -> EvaluationResult Integer
+
+and the Plutus type of the builtin is
+
+    data -> integer
+
+because the error effect is implicit in Plutus.
+
+Returning @EvaluationResult a@ for a type variable @a@ is also fine, i.e. it doesn't matter whether
+the denotation is monomorphic or polymorphic w.r.t. failing.
+
+But note that
+
+    'EvaluationResult' MUST BE EXPLICIYLY USED FOR ANY FAILING BUILTIN AND THROWING AN EXCEPTION
+    VIA 'error' OR 'throw' OR ELSE IS NOT ALLOWED AND CAN BE A HUGE VULNERABILITY. MAKE SURE THAT
+    NONE OF THE FUNCTIONS THAT YOU USE TO DEFINE A BUILTIN THROW EXCEPTIONS
+
+An argument of a builtin can't have 'EvaluationResult' in its type -- only the result.
+
+6. A builtin can emit log messages. For that it needs to run in the 'Emitter' monad. The ergonomics
+are the same as with 'EvaluationResult': 'Emitter' can't appear in the type of an argument and
+polymorphism is fine. For example:
+
+    toBuiltinMeaning Trace =
+        makeBuiltinMeaning
+            (\text a -> a <$ emitM text)
+            <costingFunction>
+
+The inferred type of the denotation is
+
+    forall a. Text -> a -> Emitter a
+
+and the Plutus type of the builtin is
+
+    all a. text -> a -> a
+
+because just like with the error effect, whether a function logs anything or not is not reflected
+in its type.
+
+'makeBuiltinMeaning' allows one to nest 'EvaluationResult' inside of 'Emitter' and vice versa,
+but as always nesting monads inside of each other without using monad transformers doesn't have good
+ergonomics, since computations of such a type can't be chained with a simple @(>>=)@.
+
+This concludes the list of simple cases. Before we jump to the hard ones, we need to talk about how
+polymorphism gets elaborated, so read Note [Elaboration of polymorphism] next.
+-}
+
+{- Note [Elaboration of polymorphism]
+In Note [How to add a built-in function: simple cases] we defined the following builtin:
+
+    toBuiltinMeaning IfThenElse =
+        makeBuiltinMeaning
+            (\b x y -> if b then x else y)
+            <costingFunction>
+
+whose inferred Haskell type is
+
+    forall a. Bool -> a -> a -> a
+
+The way 'makeBuiltinMeaning' handles such a type is by traversing it and instantiating every type
+variable. What a type variable gets instantiated to depends on where it appears. When the entire
+type of an argument is a single type variable, it gets instantiated to @Opaque term VarN@ where
+@VarN@ is pseudocode for "a Haskell type representing a Plutus type variable with 'Unique' N"
+(for the purpose of this explanation it doesn't matter what @VarN@ actually is).
+See Note [Implementation of polymorphic built-in functions] for what 'Opaque' is.
+
+So the type from the above elaborates to
+
+    Bool -> Opaque term Var0 -> Opaque term Var0 -> Opaque term Var0
+
+We could've specified this type explicitly (leaving out the @Var0@ thing for the elaboration
+machinery to figure out):
+
+    toBuiltinMeaning IfThenElse =
+        makeBuiltinMeaning
+            @(Bool -> Opaque term _ -> Opaque term _ -> Opaque term _)
+            (\b x y -> if b then x else y)
+            <costingFunction>
+
+and this would be equivalent to the original definition. We didn't do that, because why bother if
+the correct thing gets inferred anyway.
+
+Another thing we could do is define an auxiliary function with a type signature and explicit
+'Opaque' while still having explicit polymorphism:
+
+    ifThenElse :: Bool -> Opaque term a -> Opaque term a -> Opaque term a
+    ifThenElse b x y = if b then x else y
+
+    toBuiltinMeaning IfThenElse =
+        makeBuiltinMeaning
+            ifThenElse
+            <costingFunction>
+
+This achieves the same, but note how @a@ is now an argument to 'Opaque' rather than the entire type
+of an argument. In order for this definition to elaborate to the same type as before @a@ needs to be
+instantiated to just @Var0@, as opposed to @Opaque term Var0@, because the 'Opaque' part is
+already there, so this is what the elaboration machinery does.
+
+So regardless of which method of defining 'IfThenElse' we choose, the type of its denotation gets
+elaborated to the same
+
+    Bool -> Opaque term Var0 -> Opaque term Var0 -> Opaque term Var0
+
+which then gets digested, so that we can compute what Plutus type it corresponds to. The procedure
+is simple: collect all distinct type variables, @all@-bind them and replace the usages with the
+bound variables. This turns the type above into
+
+    all a. bool -> a -> a -> a
+
+which is the Plutus type of the 'IfThenElse' builtin.
+
+It's of course allowed to have multiple type variables, e.g. in the following snippet:
+
+    toBuiltinMeaning Const =
+        makeBuiltinMeaning
+            Prelude.const
+            <costingFunction>
+
+the Haskell type of 'const' gets inferred as
+
+    forall a b. a -> b -> a
+
+and the elaboration machinery turns that into
+
+    Opaque term Var0 -> Opaque term Var1 -> Opaque term Var0
+
+The elaboration machinery respects the explicitly specified parts of the type and does not attempt
+to argue with them. For example if the user insisted that the instantiated type of 'const' had
+@Var0@ and @Var1@ swapped:
+
+    Opaque term Var1 -> Opaque term Var0 -> Opaque term Var1
+
+the elaboration machinery wouldn't make a fuss about that.
+
+As a final simple example, consider
+
+    toBuiltinMeaning Trace =
+        makeBuiltinMeaning
+            (\text a -> a <$ emitM text)
+            <costingFunction>
+
+from [How to add a built-in function: simple cases]. The inferred type of the denotation is
+
+    forall a. Text -> a -> Emitter a
+
+which elaborates to
+
+    Text -> Opaque term Var0 -> Emitter (Opaque term Var0)
+
+Elaboration machinery is able to look under 'Emitter' and 'EvaluationResult' even if there's a type
+variable inside that does not appear anywhere else in the type signature, for example the inferred
+type of the denotation in
+
+    toBuiltinMeaning ErrorPrime =
+        makeBuiltinMeaning
+            EvaluationFailure
+            <costingFunction>
+
+is
+
+    forall a. EvaluationResult a
+
+which gets elaborated to
+
+    EvaluationResult (Opaque term Var0)
+
+from which the final Plutus type of the builtin is computed:
+
+    all a. a
+-}
+
 instance uni ~ DefaultUni => ToBuiltinMeaning uni DefaultFun where
     type CostingPart uni DefaultFun = BuiltinCostModel
     -- Integers
@@ -248,7 +563,7 @@ instance uni ~ DefaultUni => ToBuiltinMeaning uni DefaultFun where
             (runCostingFunOneArgument . paramDecodeUtf8)
     -- Bool
     toBuiltinMeaning IfThenElse =
-       makeBuiltinMeaning
+        makeBuiltinMeaning
             (\b x y -> if b then x else y)
             (runCostingFunThreeArguments . paramIfThenElse)
     -- Unit
@@ -301,9 +616,7 @@ instance uni ~ DefaultUni => ToBuiltinMeaning uni DefaultFun where
             (runCostingFunTwoArguments . paramMkCons)
         where
           consPlc
-              :: SomeConstant uni a
-              -> SomeConstant uni [a]
-              -> EvaluationResult (Opaque term [a])
+              :: SomeConstant uni a -> SomeConstant uni [a] -> EvaluationResult (Opaque term [a])
           consPlc
             (SomeConstant (Some (ValueOf uniA x)))
             (SomeConstant (Some (ValueOf uniListA xs))) = do