* added protobuf module (purely in C, based on protobuf-c) and Onnx p…

…arsing module based on it + simple demo script * extended compiler to simplify creation of complex modules that include big chunks of C/C++ code * fixed a few bugs in the tutorial
vpisarev · Dec 24, 2021 · 9cbff35 · 9cbff35
1 parent 6507aff
commit 9cbff35
Show file tree

Hide file tree

Showing 14 changed files with 10,616 additions and 13 deletions.
diff --git a/compiler/Ast.fx b/compiler/Ast.fx
@@ -488,6 +488,7 @@ var builtin_exceptions = empty_idmap
 var all_compile_errs: exn list = []
 var all_compile_err_ctx: string list = []
 var all_func_ctx: (id_t, typ_t, loc_t) list = []
+var all_c_inc_dirs: string Hashset.t = Hashset.empty(256, "")
 
 fun string(loc: loc_t)
 {
@@ -1640,6 +1641,7 @@ fun init_all(): void
     freeze_ids = false
     all_names.clear()
     all_strhash.clear()
+    all_c_inc_dirs.clear()
     for i <- builtin_ids.0.rev() { ignore(get_id(i)) }
     ignore(fname_always_import())
     all_modules_hash.clear()

diff --git a/compiler/Compiler.fx b/compiler/Compiler.fx
@@ -6,7 +6,7 @@
 // Ficus compiler, the driving part
 // (calls all other parts of the compiler in the proper order)
 
-import Filename, File, Sys, Hashmap, LexerUtils as Lxu
+import Filename, File, Sys, Hashmap, Hashset, LexerUtils as Lxu
 import Ast, Ast_pp, Lexer, Parser, Options
 import Ast_typecheck
 import K_form, K_pp, K_normalize, K_annotate, K_mangle
@@ -395,7 +395,8 @@ fun run_cc(cmods: C_form.cmodule_t list, ficus_root: string) {
                 } else {
                     " /DNDEBUG /MT " + (if opt_level == 1 {"/O1"} else {"/O2"})
                 }
-            val cflags = f"/utf-8 /nologo{opt_flags}{omp_flag} /I{runtime_include_path}"
+            val incdirs = " ".join([for d <- Ast.all_c_inc_dirs.list() {"/I"+d}])
+            val cflags = f"/utf-8 /nologo{opt_flags}{omp_flag} {incdirs} /I{runtime_include_path}"
             ("win", "cl", "cl", ".obj", "/c /Fo", "/Fe", "", cflags, "/nologo /F10485760 kernel32.lib advapi32.lib")
         } else {
             // unix or hopefully something more or less compatible with it
@@ -440,7 +441,8 @@ fun run_cc(cmods: C_form.cmodule_t list, ficus_root: string) {
                     f" -DNDEBUG{stk_overflow}"
                 }
 
-            val cflags = f"-O{opt_level_str}{ggdb_opt} {cflags} {common_cflags} -I{runtime_include_path}"
+            val incdirs = " ".join([for d <- Ast.all_c_inc_dirs.list() {"-I"+d}])
+            val cflags = f"-O{opt_level_str}{ggdb_opt} {cflags} {common_cflags} {incdirs} -I{runtime_include_path}"
             val clibs = (if libpath!="" {f"-L{runtime_lib_path}/{libpath} "} else {""}) + f"-lm {clibs}"
             (os, c_comp, cpp_comp, ".o", "-c -o ", "-o ", "-l", cflags, clibs)
         }

diff --git a/compiler/Parser.fx b/compiler/Parser.fx
@@ -5,7 +5,7 @@
 
 // Ficus recursive descent parser
 
-import File, Filename, Hashmap, Sys
+import File, Filename, Hashmap, Hashset, Sys
 from Ast import *
 import LexerUtils as Lxu
 from Lexer import *
@@ -26,12 +26,27 @@ var parser_ctx = parser_ctx_t { m_idx=-1, filename="", deps=[], inc_dirs=[], def
 
 fun add_to_imported_modules(mname: id_t, loc: loc_t): int
 {
-    val mfname = pp(mname).replace(".", Filename.dir_sep()) + ".fx"
+    val mfname = pp(mname)
+    var ncomps = mfname.split('.', allow_empty=false).length()
+    val mfname = mfname.replace(".", Filename.dir_sep())
     val mfname =
-        try Filename.locate(mfname, parser_ctx.inc_dirs)
+        try Filename.locate(mfname + ".fx", parser_ctx.inc_dirs)
         catch {
-        | NotFoundError => throw ParseError(loc, f"module {mname} is not found")
+        | NotFoundError =>
+            try {
+                ncomps += 1
+                Filename.locate(Filename.concat(mfname, "init.fx"), parser_ctx.inc_dirs)
+            }
+            catch {
+            | NotFoundError => throw ParseError(loc, f"module {mname} is not found")
+            }
         }
+    var dirname = mfname
+    for i <- 0:ncomps {
+        dirname = Filename.dirname(dirname)
+        if all_c_inc_dirs.mem(dirname) {break}
+        all_c_inc_dirs.add(dirname)
+    }
     val m_idx = find_module(mname, mfname)
     if !parser_ctx.deps.mem(m_idx) {
         parser_ctx.deps = m_idx :: parser_ctx.deps

diff --git a/doc/ficustut.md b/doc/ficustut.md
@@ -933,7 +933,7 @@ Ficus includes the following built-in, automatically defined and user-defined ty
     for i <- 0:100000 { sum -= big_list.nth(rng(0, N)) }
     ```
 
-  In principle, i-th element of vector can be ‘modified’ more or less efficiently with `vec[:i] + [new_value] + vec[i+1:]`, but if you modify elements quite often, an array may be a better (10x-100x better) option.
+  In principle, i-th element of vector can be ‘modified’ more or less efficiently with `vec[:i] + [< new_value >] + vec[i+1:]`, but if you modify elements quite often, an array may be a better (10x-100x better) option.
 
 * **variant**, also known as sum type: `Tag1: 't1 | Tag2: 't2 | ...`. Variants are used to represent various data structures from from simple enumerations to very complex hierarchical data structures. We cover them in the dedicated section.
 
@@ -1055,7 +1055,7 @@ If you want to insert a non-void expression (e.g. call a function, where you are
                     // Since it's value declaration,
                     // it cannot be a last expression
                     // in the code block
-    ignore(waitkey()  // Same effect; ignore the return value
+    ignore(waitkey())  // Same effect; ignore the return value
                     // It can be used as the last expression
                     // in a code block
     ```
@@ -1257,7 +1257,7 @@ In the nested loop you can combine iteration over ranges and collections.
 There can also be simultaneous iteration over several collections, e.g. when we have two or more collections and want to process corresponding pairs/n-tuples of elements. The construction looks like:
 
 ```
-for val1 <- expr1, val2 <— <expr2> ... { exprs ... }
+for val1 <- expr1, val2 <— expr2 ... { exprs ... }
 ```
 
 For example, here is how to compute Hamming distance between 2 vectors:
@@ -1698,7 +1698,7 @@ fun (arg1 [: T1], arg2 [: T2], ..., argn[: Tn])
 
 The differences from the regular function are:
 
-1. the function name is omitted. You can, however, declare a value/variable with a lambda function as a value and then call this lambda function by name.
+1. the function name is omitted. You can, however, declare a value/variable, initialize it with the lambda function and then call this lambda function by name.
 2. `=` form is unavailable, use `{}`
 3. it's not required to specify types of arguments, because lambda function usually has very small scope and its parameters types can often be inferenced from the way it's used. In particular, the standard `sort` function, used in the example above, is defined as:
 
@@ -1718,7 +1718,7 @@ fun make_coin()
 {
     val rng = RNG(uint64(Sys.tick_count()))
     // 'warm up' rng a bit
-    val _ = fold s = 0UL for i <- 0:1000 {s ^ rng.next()}
+    val _ = fold s = 0UL for i <- 0:10 {s ^ rng.next()}
     fun () { if bool(rng) {"heads"} else {"tails"} }
 }
 val coin1 = make_coin()
@@ -2027,7 +2027,7 @@ But when you unpack the record, compiler already knows the type of unpacked valu
 
 ### Modifying/updating record
 
-While tuples are usually small and rarely need to be modified by parts, it's generally not true for records, and it may be too much of a code, especially if you need to change just a single field. There is convenient record update operator `.{...}` to solve this problem: `record_instance . {filed_i1=new_val_i1, ..., field_iK=new_val_iK }`:
+While tuples are usually small and rarely need to be modified by parts, it's generally not true for records, and it may be too much of a code, especially if you need to change just a single field. There is convenient record update operator `.{...}` to solve this problem: `record_instance . {field_i1=new_val_i1, ..., field_iK=new_val_iK }`:
 
 ```
 type Rect = {x: int; y: int; width: int; height: int}

diff --git a/doc/ficustut_a4.pdf b/doc/ficustut_a4.pdf
diff --git a/examples/onnx.fx b/examples/onnx.fx
@@ -0,0 +1,13 @@
+/*
+    This file is a part of ficus language project.
+    See ficus/LICENSE for the licensing terms
+*/
+
+// dumps .onnx model
+
+import Onnx.Ast, Onnx.Parse, Sys
+val model_name = match Sys.arguments() {
+    | f :: [] => f
+    | _ => println("specify .onnx model name"); throw Fail("")
+    }
+println(Onnx.Parse.parse(model_name))
diff --git a/lib/Array.fx b/lib/Array.fx
@@ -18,6 +18,8 @@ fun total(a: 't []) = size(a)
 
 fun __negate__(a: 't [+]) = [| for x <- a {-x} |]
 
+fun map(arr: 'a [+], f: 'a -> 'b) = [| for x <- arr {f(x)} |]
+
 operator .+ (a: 'ta [+], b: 'tb) =
     [| for x <- a {x .+ b} |]
 operator .- (a: 'ta [+], b: 'tb) =
@@ -118,6 +120,13 @@ operator .>= (a: 't [+], b: 't [+]): bool [+] =
 
 fun sum(a: 't [+]) =
     fold s = ((0 :> 't) :> double) for aj <- a {s + aj}
+
+fun sum(a: 't [+], v0: 's) =
+    fold s = v0 for aj <- a {s + aj}
+
+fun product(a: 't [+], v0: 's) =
+    fold p = v0 for aj <- a {p * aj}
+
 fun mean(a: 't [+]) = sum(a)/(max(total(a), 1) :> double)
 
 fun normInf(a: 't [+]) =

diff --git a/lib/Onnx/Ast.fx b/lib/Onnx/Ast.fx
@@ -0,0 +1,215 @@
+/*
+    This file is a part of ficus language project.
+    See ficus/LICENSE for the licensing terms
+*/
+
+// hierarchical representation of Onnx format
+import Hashmap
+
+type datatype_t =
+    | DTYP_UNDEFINED | DTYP_FLOAT | DTYP_UINT8 | DTYP_INT8 | DTYP_UINT16 | DTYP_INT16
+    | DTYP_INT32 | DTYP_INT64 | DTYP_STRING | DTYP_BOOL | DTYP_FLOAT16 | DTYP_DOUBLE
+    | DTYP_UINT32 | DTYP_UINT64 | DTYP_COMPLEX64 | DTYP_COMPLEX128 | DTYP_BFLOAT16
+
+type tdata_t = T_FLOAT: float [] | T_INT8: int8 [] | T_INT64: int64 []
+type tensor_id_t = string
+
+type tensor_t =
+{
+    name: tensor_id_t
+    shape: int []
+    data: tdata_t
+}
+
+type opset_t =
+{
+    version: int64
+    domain: string
+}
+
+type dim_t = DimParam: string | DimValue: int64
+type typeinfo_t = TYPINFO_TENSOR: (datatype_t, dim_t [])
+
+type valueinfo_t =
+{
+    name: tensor_id_t
+    denotation: string
+    typ: typeinfo_t
+}
+
+type attrval_t =
+    | AttrInt: int64 | AttrFloat: float
+    | AttrString: string | AttrTensor: tensor_t
+    | AttrFloats: float [] | AttrInts: int [] | AttrStrings: string []
+
+type attr_t =
+{
+    name: string
+    v: attrval_t
+}
+
+type node_t =
+{
+    name: string
+    op: string
+    inputs: tensor_id_t []
+    outputs: tensor_id_t []
+    attrs: attr_t []
+}
+
+type graph_t =
+{
+    name: string
+    inputs: valueinfo_t []
+    outputs: valueinfo_t []
+    values: valueinfo_t []
+    initializers: tensor_t []
+    nodes: node_t []
+}
+
+type model_t =
+{
+    ir_version: int64
+    producer: string
+    domain: string
+    doc_string: string
+    import_opsets: opset_t []
+    metadata: (string, string) []
+    graph: graph_t
+}
+
+fun total(t: tensor_t) = product(t.shape, 1)
+
+fun string(d: datatype_t)
+{
+    | DTYP_UNDEFINED => "undefined"
+    | DTYP_FLOAT => "float"
+    | DTYP_UINT8 => "uint8"
+    | DTYP_INT8 => "int8"
+    | DTYP_UINT16 => "uint16"
+    | DTYP_INT16 => "int16"
+    | DTYP_INT32 => "int32"
+    | DTYP_INT64 => "int64"
+    | DTYP_STRING => "string"
+    | DTYP_BOOL => "bool"
+    | DTYP_FLOAT16 => "float16"
+    | DTYP_DOUBLE => "double"
+    | DTYP_UINT32 => "uint32"
+    | DTYP_UINT64 => "uint64"
+    | DTYP_COMPLEX64 => "complex64"
+    | DTYP_COMPLEX128 => "complex128"
+    | DTYP_BFLOAT16 => "bfloat16"
+}
+
+fun string(ops: opset_t) = f"{ops.domain} v{ops.version}"
+fun string(dim: dim_t) {
+    | DimParam(n) => n
+    | DimValue(v) => string(v)
+}
+
+fun tensor_data_prefix(t: tdata_t)
+{
+    | T_FLOAT _ => "float32"
+    | T_INT8 _ => "int8"
+    | T_INT64 _ => "int64"
+}
+
+fun print_tensor_data(t: tdata_t)
+{
+    | T_FLOAT(data) => print(data)
+    | T_INT8(data) => print(data)
+    | T_INT64(data) => print(data)
+}
+
+fun print(t: tensor_t)
+{
+    val shape = "x".join(t.shape.map(string))
+    print(f"tensor {{name='{t.name}', shape={shape}, data={tensor_data_prefix(t.data)} ")
+    if total(t) > 10 {print("[...]")} else {print_tensor_data(t.data)}
+    print("}")
+}
+
+fun print(a: attr_t)
+{
+    print(f"{a.name}: ")
+    match a.v {
+    | AttrInt(i) => print(i)
+    | AttrFloat(f) => print(f)
+    | AttrString(s) => print(repr(s))
+    | AttrTensor(t) => print(t)
+    | AttrInts(ints) => print(ints)
+    | AttrFloats(floats) => print(floats)
+    | AttrStrings(strings) => print(strings)
+    }
+}
+
+fun print(n: node_t)
+{
+    val indent0 = " "*4
+    val indent1 = indent0 + indent0
+    val indent2 = indent1 + indent0
+    val indent3 = indent2 + indent0
+    println(f"{{")
+    println(f"{indent2}name: '{n.name}'")
+    println(f"{indent2}op: {n.op}")
+    println(f"{indent2}inputs: {n.inputs}")
+    println(f"{indent2}outputs: {n.outputs}")
+    print(f"{indent2}attributes: {{")
+    if n.attrs.empty() {print(f"}}\n{indent1}}}")}
+    else {
+        println()
+        for a <- n.attrs {
+            print(indent3)
+            println(a)
+        }
+        print(f"{indent2}}}\n{indent1}}}")
+    }
+}
+
+fun print(vi: valueinfo_t)
+{
+    print(f"{vi.name}")
+    if vi.denotation != "" {
+        print(f" ({vi.denotation})")
+    }
+    print(": ")
+    match vi.typ {
+    | TYPINFO_TENSOR (dt, diminfo) =>
+        val diminfo = " x ".join(diminfo.map(string))
+        print(f"{dt}, {diminfo}")
+    }
+}
+
+fun print(graph: graph_t)
+{
+    val indent0 = " "*4, indent1 = indent0 + indent0
+    println(f"graph {{")
+    println(f"{indent0}name: {graph.name},")
+    println(f"{indent0}inputs: {{")
+    for x <- graph.inputs { print(indent1); print(x); println(",") }
+    println(f"{indent0}}},\n{indent0}outputs: {{")
+    for x <- graph.outputs { print(indent1); print(x); println(",") }
+    println(f"{indent0}}},\n{indent0}values: {{")
+    for x <- graph.values { print(indent1); print(x); println(",") }
+    println(f"{indent0}}},\n{indent0}initializers: {{")
+    for x <- graph.initializers { print(indent1); print(x); println(",") }
+    println(f"{indent0}}},\n{indent0}nodes: {{")
+    for x <- graph.nodes { print(indent1); print(x); println(",") }
+    print(f"{indent0}}}\n}")
+}
+
+fun print(model: model_t)
+{
+    println(f"ir_version: {model.ir_version}")
+    println(f"producer: {model.producer}")
+    println(f"domain: {model.domain}")
+    println(f"doc_string: '{model.doc_string}'")
+    for opset <- model.import_opsets {
+        println(f"import opset: {opset}")
+    }
+    for (k, v) <- model.metadata {
+        println(f"property '{k}': '{v}'")
+    }
+    println(model.graph)
+    println("// end of model")
+}