Skip to content

Commit

Permalink
* added protobuf module (purely in C, based on protobuf-c) and Onnx p…
Browse files Browse the repository at this point in the history
…arsing module based on it + simple demo script

* extended compiler to simplify creation of complex modules that include big chunks of C/C++ code
* fixed a few bugs in the tutorial
  • Loading branch information
vpisarev committed Dec 24, 2021
1 parent 6507aff commit 9cbff35
Show file tree
Hide file tree
Showing 14 changed files with 10,616 additions and 13 deletions.
2 changes: 2 additions & 0 deletions compiler/Ast.fx
Original file line number Diff line number Diff line change
Expand Up @@ -488,6 +488,7 @@ var builtin_exceptions = empty_idmap
var all_compile_errs: exn list = []
var all_compile_err_ctx: string list = []
var all_func_ctx: (id_t, typ_t, loc_t) list = []
var all_c_inc_dirs: string Hashset.t = Hashset.empty(256, "")

fun string(loc: loc_t)
{
Expand Down Expand Up @@ -1640,6 +1641,7 @@ fun init_all(): void
freeze_ids = false
all_names.clear()
all_strhash.clear()
all_c_inc_dirs.clear()
for i <- builtin_ids.0.rev() { ignore(get_id(i)) }
ignore(fname_always_import())
all_modules_hash.clear()
Expand Down
8 changes: 5 additions & 3 deletions compiler/Compiler.fx
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
// Ficus compiler, the driving part
// (calls all other parts of the compiler in the proper order)

import Filename, File, Sys, Hashmap, LexerUtils as Lxu
import Filename, File, Sys, Hashmap, Hashset, LexerUtils as Lxu
import Ast, Ast_pp, Lexer, Parser, Options
import Ast_typecheck
import K_form, K_pp, K_normalize, K_annotate, K_mangle
Expand Down Expand Up @@ -395,7 +395,8 @@ fun run_cc(cmods: C_form.cmodule_t list, ficus_root: string) {
} else {
" /DNDEBUG /MT " + (if opt_level == 1 {"/O1"} else {"/O2"})
}
val cflags = f"/utf-8 /nologo{opt_flags}{omp_flag} /I{runtime_include_path}"
val incdirs = " ".join([for d <- Ast.all_c_inc_dirs.list() {"/I"+d}])
val cflags = f"/utf-8 /nologo{opt_flags}{omp_flag} {incdirs} /I{runtime_include_path}"
("win", "cl", "cl", ".obj", "/c /Fo", "/Fe", "", cflags, "/nologo /F10485760 kernel32.lib advapi32.lib")
} else {
// unix or hopefully something more or less compatible with it
Expand Down Expand Up @@ -440,7 +441,8 @@ fun run_cc(cmods: C_form.cmodule_t list, ficus_root: string) {
f" -DNDEBUG{stk_overflow}"
}

val cflags = f"-O{opt_level_str}{ggdb_opt} {cflags} {common_cflags} -I{runtime_include_path}"
val incdirs = " ".join([for d <- Ast.all_c_inc_dirs.list() {"-I"+d}])
val cflags = f"-O{opt_level_str}{ggdb_opt} {cflags} {common_cflags} {incdirs} -I{runtime_include_path}"
val clibs = (if libpath!="" {f"-L{runtime_lib_path}/{libpath} "} else {""}) + f"-lm {clibs}"
(os, c_comp, cpp_comp, ".o", "-c -o ", "-o ", "-l", cflags, clibs)
}
Expand Down
23 changes: 19 additions & 4 deletions compiler/Parser.fx
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

// Ficus recursive descent parser

import File, Filename, Hashmap, Sys
import File, Filename, Hashmap, Hashset, Sys
from Ast import *
import LexerUtils as Lxu
from Lexer import *
Expand All @@ -26,12 +26,27 @@ var parser_ctx = parser_ctx_t { m_idx=-1, filename="", deps=[], inc_dirs=[], def

fun add_to_imported_modules(mname: id_t, loc: loc_t): int
{
val mfname = pp(mname).replace(".", Filename.dir_sep()) + ".fx"
val mfname = pp(mname)
var ncomps = mfname.split('.', allow_empty=false).length()
val mfname = mfname.replace(".", Filename.dir_sep())
val mfname =
try Filename.locate(mfname, parser_ctx.inc_dirs)
try Filename.locate(mfname + ".fx", parser_ctx.inc_dirs)
catch {
| NotFoundError => throw ParseError(loc, f"module {mname} is not found")
| NotFoundError =>
try {
ncomps += 1
Filename.locate(Filename.concat(mfname, "init.fx"), parser_ctx.inc_dirs)
}
catch {
| NotFoundError => throw ParseError(loc, f"module {mname} is not found")
}
}
var dirname = mfname
for i <- 0:ncomps {
dirname = Filename.dirname(dirname)
if all_c_inc_dirs.mem(dirname) {break}
all_c_inc_dirs.add(dirname)
}
val m_idx = find_module(mname, mfname)
if !parser_ctx.deps.mem(m_idx) {
parser_ctx.deps = m_idx :: parser_ctx.deps
Expand Down
12 changes: 6 additions & 6 deletions doc/ficustut.md
Original file line number Diff line number Diff line change
Expand Up @@ -933,7 +933,7 @@ Ficus includes the following built-in, automatically defined and user-defined ty
for i <- 0:100000 { sum -= big_list.nth(rng(0, N)) }
```
In principle, i-th element of vector can be ‘modified’ more or less efficiently with `vec[:i] + [new_value] + vec[i+1:]`, but if you modify elements quite often, an array may be a better (10x-100x better) option.
In principle, i-th element of vector can be ‘modified’ more or less efficiently with `vec[:i] + [< new_value >] + vec[i+1:]`, but if you modify elements quite often, an array may be a better (10x-100x better) option.
* **variant**, also known as sum type: `Tag1: 't1 | Tag2: 't2 | ...`. Variants are used to represent various data structures from from simple enumerations to very complex hierarchical data structures. We cover them in the dedicated section.
Expand Down Expand Up @@ -1055,7 +1055,7 @@ If you want to insert a non-void expression (e.g. call a function, where you are
// Since it's value declaration,
// it cannot be a last expression
// in the code block
ignore(waitkey() // Same effect; ignore the return value
ignore(waitkey()) // Same effect; ignore the return value
// It can be used as the last expression
// in a code block
```
Expand Down Expand Up @@ -1257,7 +1257,7 @@ In the nested loop you can combine iteration over ranges and collections.
There can also be simultaneous iteration over several collections, e.g. when we have two or more collections and want to process corresponding pairs/n-tuples of elements. The construction looks like:
```
for val1 <- expr1, val2 <— <expr2> ... { exprs ... }
for val1 <- expr1, val2 <— expr2 ... { exprs ... }
```
For example, here is how to compute Hamming distance between 2 vectors:
Expand Down Expand Up @@ -1698,7 +1698,7 @@ fun (arg1 [: T1], arg2 [: T2], ..., argn[: Tn])
The differences from the regular function are:
1. the function name is omitted. You can, however, declare a value/variable with a lambda function as a value and then call this lambda function by name.
1. the function name is omitted. You can, however, declare a value/variable, initialize it with the lambda function and then call this lambda function by name.
2. `=` form is unavailable, use `{}`
3. it's not required to specify types of arguments, because lambda function usually has very small scope and its parameters types can often be inferenced from the way it's used. In particular, the standard `sort` function, used in the example above, is defined as:
Expand All @@ -1718,7 +1718,7 @@ fun make_coin()
{
val rng = RNG(uint64(Sys.tick_count()))
// 'warm up' rng a bit
val _ = fold s = 0UL for i <- 0:1000 {s ^ rng.next()}
val _ = fold s = 0UL for i <- 0:10 {s ^ rng.next()}
fun () { if bool(rng) {"heads"} else {"tails"} }
}
val coin1 = make_coin()
Expand Down Expand Up @@ -2027,7 +2027,7 @@ But when you unpack the record, compiler already knows the type of unpacked valu
### Modifying/updating record
While tuples are usually small and rarely need to be modified by parts, it's generally not true for records, and it may be too much of a code, especially if you need to change just a single field. There is convenient record update operator `.{...}` to solve this problem: `record_instance . {filed_i1=new_val_i1, ..., field_iK=new_val_iK }`:
While tuples are usually small and rarely need to be modified by parts, it's generally not true for records, and it may be too much of a code, especially if you need to change just a single field. There is convenient record update operator `.{...}` to solve this problem: `record_instance . {field_i1=new_val_i1, ..., field_iK=new_val_iK }`:
```
type Rect = {x: int; y: int; width: int; height: int}
Expand Down
Binary file modified doc/ficustut_a4.pdf
Binary file not shown.
13 changes: 13 additions & 0 deletions examples/onnx.fx
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
/*
This file is a part of ficus language project.
See ficus/LICENSE for the licensing terms
*/

// dumps .onnx model

import Onnx.Ast, Onnx.Parse, Sys
val model_name = match Sys.arguments() {
| f :: [] => f
| _ => println("specify .onnx model name"); throw Fail("")
}
println(Onnx.Parse.parse(model_name))
9 changes: 9 additions & 0 deletions lib/Array.fx
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@ fun total(a: 't []) = size(a)

fun __negate__(a: 't [+]) = [| for x <- a {-x} |]

fun map(arr: 'a [+], f: 'a -> 'b) = [| for x <- arr {f(x)} |]

operator .+ (a: 'ta [+], b: 'tb) =
[| for x <- a {x .+ b} |]
operator .- (a: 'ta [+], b: 'tb) =
Expand Down Expand Up @@ -118,6 +120,13 @@ operator .>= (a: 't [+], b: 't [+]): bool [+] =

fun sum(a: 't [+]) =
fold s = ((0 :> 't) :> double) for aj <- a {s + aj}

fun sum(a: 't [+], v0: 's) =
fold s = v0 for aj <- a {s + aj}

fun product(a: 't [+], v0: 's) =
fold p = v0 for aj <- a {p * aj}

fun mean(a: 't [+]) = sum(a)/(max(total(a), 1) :> double)

fun normInf(a: 't [+]) =
Expand Down
215 changes: 215 additions & 0 deletions lib/Onnx/Ast.fx
Original file line number Diff line number Diff line change
@@ -0,0 +1,215 @@
/*
This file is a part of ficus language project.
See ficus/LICENSE for the licensing terms
*/

// hierarchical representation of Onnx format
import Hashmap

type datatype_t =
| DTYP_UNDEFINED | DTYP_FLOAT | DTYP_UINT8 | DTYP_INT8 | DTYP_UINT16 | DTYP_INT16
| DTYP_INT32 | DTYP_INT64 | DTYP_STRING | DTYP_BOOL | DTYP_FLOAT16 | DTYP_DOUBLE
| DTYP_UINT32 | DTYP_UINT64 | DTYP_COMPLEX64 | DTYP_COMPLEX128 | DTYP_BFLOAT16

type tdata_t = T_FLOAT: float [] | T_INT8: int8 [] | T_INT64: int64 []
type tensor_id_t = string

type tensor_t =
{
name: tensor_id_t
shape: int []
data: tdata_t
}

type opset_t =
{
version: int64
domain: string
}

type dim_t = DimParam: string | DimValue: int64
type typeinfo_t = TYPINFO_TENSOR: (datatype_t, dim_t [])

type valueinfo_t =
{
name: tensor_id_t
denotation: string
typ: typeinfo_t
}

type attrval_t =
| AttrInt: int64 | AttrFloat: float
| AttrString: string | AttrTensor: tensor_t
| AttrFloats: float [] | AttrInts: int [] | AttrStrings: string []

type attr_t =
{
name: string
v: attrval_t
}

type node_t =
{
name: string
op: string
inputs: tensor_id_t []
outputs: tensor_id_t []
attrs: attr_t []
}

type graph_t =
{
name: string
inputs: valueinfo_t []
outputs: valueinfo_t []
values: valueinfo_t []
initializers: tensor_t []
nodes: node_t []
}

type model_t =
{
ir_version: int64
producer: string
domain: string
doc_string: string
import_opsets: opset_t []
metadata: (string, string) []
graph: graph_t
}

fun total(t: tensor_t) = product(t.shape, 1)

fun string(d: datatype_t)
{
| DTYP_UNDEFINED => "undefined"
| DTYP_FLOAT => "float"
| DTYP_UINT8 => "uint8"
| DTYP_INT8 => "int8"
| DTYP_UINT16 => "uint16"
| DTYP_INT16 => "int16"
| DTYP_INT32 => "int32"
| DTYP_INT64 => "int64"
| DTYP_STRING => "string"
| DTYP_BOOL => "bool"
| DTYP_FLOAT16 => "float16"
| DTYP_DOUBLE => "double"
| DTYP_UINT32 => "uint32"
| DTYP_UINT64 => "uint64"
| DTYP_COMPLEX64 => "complex64"
| DTYP_COMPLEX128 => "complex128"
| DTYP_BFLOAT16 => "bfloat16"
}

fun string(ops: opset_t) = f"{ops.domain} v{ops.version}"
fun string(dim: dim_t) {
| DimParam(n) => n
| DimValue(v) => string(v)
}

fun tensor_data_prefix(t: tdata_t)
{
| T_FLOAT _ => "float32"
| T_INT8 _ => "int8"
| T_INT64 _ => "int64"
}

fun print_tensor_data(t: tdata_t)
{
| T_FLOAT(data) => print(data)
| T_INT8(data) => print(data)
| T_INT64(data) => print(data)
}

fun print(t: tensor_t)
{
val shape = "x".join(t.shape.map(string))
print(f"tensor {{name='{t.name}', shape={shape}, data={tensor_data_prefix(t.data)} ")
if total(t) > 10 {print("[...]")} else {print_tensor_data(t.data)}
print("}")
}

fun print(a: attr_t)
{
print(f"{a.name}: ")
match a.v {
| AttrInt(i) => print(i)
| AttrFloat(f) => print(f)
| AttrString(s) => print(repr(s))
| AttrTensor(t) => print(t)
| AttrInts(ints) => print(ints)
| AttrFloats(floats) => print(floats)
| AttrStrings(strings) => print(strings)
}
}

fun print(n: node_t)
{
val indent0 = " "*4
val indent1 = indent0 + indent0
val indent2 = indent1 + indent0
val indent3 = indent2 + indent0
println(f"{{")
println(f"{indent2}name: '{n.name}'")
println(f"{indent2}op: {n.op}")
println(f"{indent2}inputs: {n.inputs}")
println(f"{indent2}outputs: {n.outputs}")
print(f"{indent2}attributes: {{")
if n.attrs.empty() {print(f"}}\n{indent1}}}")}
else {
println()
for a <- n.attrs {
print(indent3)
println(a)
}
print(f"{indent2}}}\n{indent1}}}")
}
}

fun print(vi: valueinfo_t)
{
print(f"{vi.name}")
if vi.denotation != "" {
print(f" ({vi.denotation})")
}
print(": ")
match vi.typ {
| TYPINFO_TENSOR (dt, diminfo) =>
val diminfo = " x ".join(diminfo.map(string))
print(f"{dt}, {diminfo}")
}
}

fun print(graph: graph_t)
{
val indent0 = " "*4, indent1 = indent0 + indent0
println(f"graph {{")
println(f"{indent0}name: {graph.name},")
println(f"{indent0}inputs: {{")
for x <- graph.inputs { print(indent1); print(x); println(",") }
println(f"{indent0}}},\n{indent0}outputs: {{")
for x <- graph.outputs { print(indent1); print(x); println(",") }
println(f"{indent0}}},\n{indent0}values: {{")
for x <- graph.values { print(indent1); print(x); println(",") }
println(f"{indent0}}},\n{indent0}initializers: {{")
for x <- graph.initializers { print(indent1); print(x); println(",") }
println(f"{indent0}}},\n{indent0}nodes: {{")
for x <- graph.nodes { print(indent1); print(x); println(",") }
print(f"{indent0}}}\n}")
}

fun print(model: model_t)
{
println(f"ir_version: {model.ir_version}")
println(f"producer: {model.producer}")
println(f"domain: {model.domain}")
println(f"doc_string: '{model.doc_string}'")
for opset <- model.import_opsets {
println(f"import opset: {opset}")
}
for (k, v) <- model.metadata {
println(f"property '{k}': '{v}'")
}
println(model.graph)
println("// end of model")
}
Loading

0 comments on commit 9cbff35

Please sign in to comment.