Skip to content

Commit

Permalink
* improved performance of convolution on ARM. Now Resnet-50 runs in 2…
Browse files Browse the repository at this point in the history
…6.6ms on Apple M1 (the previous results was ~42ms).

* added preliminary implementation of Winograd-based convolution; for now it's disabled, because it does not accelerate execution, quite the opposite.
  • Loading branch information
vpisarev committed May 24, 2022
1 parent da008cd commit 616e3ad
Show file tree
Hide file tree
Showing 8 changed files with 1,182 additions and 43 deletions.
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ The code is distributed under Apache 2 license, see the [LICENSE](LICENSE)

The compiler is written in Ficus itself and needs C/C++ compiler and make utility.

### **Unix (Linux, macOS, BSD, ...)**
### **Unix (Linux, macOS, BSD, WSL, ...)**

```
cd <ficus_root>
Expand All @@ -22,7 +22,7 @@ bin/ficus -run -O3 examples/fst.fx # run some examples, e.g. fst.fx,
# optionally specify optimization level
```

### **Windows**
### **Windows (native)**

Install Visual Studio, for example Visual Studio 2019 Community Edition, open "Developer PowerShell for VS2019" from the Windows menu and type:

Expand Down
11 changes: 7 additions & 4 deletions examples/classify_img.fx
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import Json, Sys, LexerUtils as Lxu
import OpenCV as cv
//import Image.Decoder
import NN.Ast, NN.Inference, NN.FromOnnx, NN.FuseBasic, NN.BufferAllocator
import NN.Ast, NN.Inference, NN.FromOnnx, NN.FuseBasic, NN.BufferAllocator, NN.OpConv

var mname = "", lname = ""
var images: string list = []
Expand Down Expand Up @@ -99,7 +99,7 @@ for imgname@i <- images {
val inp = cv.blobFromImage(img, size=(224, 224),
scaleFactor=0.017,
mean=(103., 116., 123.),
swapRB=true, crop=false)
swapRB=false, crop=false)
println(inp.size())
val out = net.forward(inp)
//println(f"out[1]={out[1][:]}")
Expand All @@ -110,15 +110,18 @@ for imgname@i <- images {
sort(tprobs, (>))
val inp_ = NN.Ast.make_tensor(inp)
var outputs: nn_output_t [] = []
NN.OpConv.reset_min_total_time_1x1()
val niters = 30
val (gmean, mintime) = Sys.timeit(
fun () {
outputs =
try NN.Inference.run(model, [("", inp_)], outputs=temp_outputs) catch {
| NN.Ast.NNError msg => println(f"exception NNError('{msg}') occured"); []
| Fail msg => println(f"failure: '{msg}'"); []
}
}, iterations=15, batch=1)
println(f"execution time: gmean={gmean*1000.}, mintime={mintime*1000.}")
}, iterations=niters, batch=1)
val total_time = NN.OpConv.get_total_time_1x1()*1000/Sys.tick_frequency()
println(f"execution time: gmean={gmean*1000.}, mintime={mintime*1000.}, 1x1 total={total_time} ms")
/*for t_out@i <- temp_outputs {
println(f"temp output #{i}: name='{t_out.0}', shape={t_out.1.shape}")
}
Expand Down
5 changes: 3 additions & 2 deletions lib/NN/Ast.fx
Original file line number Diff line number Diff line change
Expand Up @@ -701,7 +701,8 @@ fun graph2str(net: nnet_t, graph: nngraph_t, indent: string)
val prog_indent = new_indent + " "
val inpstrs = [for a <- inpargs {net.args[a].name}]
val outstrs = [for a <- outargs {net.args[a].name}]
val prog = [for op <- prog {op2str(net, op, prog_indent)}]
val prog = [for op@i <- prog {
f"{indent}// op #{i}\n{prog_indent}" + op2str(net, op, prog_indent)}]
join_embrace(f"graph {{\n{new_indent}inputs={inpstrs},\n\
{new_indent}outputs={outstrs},\n{new_indent}prog={{\n{prog_indent}",
f"\n{new_indent}}}\n{indent}}}",
Expand Down Expand Up @@ -807,7 +808,7 @@ fun nnop_t.get_inputs_outputs(): (int [], int []) = match self
| NN_Unsqueeze {t_inp, t_axes, t_out} => ([t_inp, t_axes], [t_out])
}

fun op2str(net: nnet_t, op: nnop_t, indent: string)
fun op2str(net: nnet_t, op: nnop_t, indent: string): string
{
val sub_indent = indent + " "
//println(f"dumping op={op.name()}")
Expand Down
2 changes: 2 additions & 0 deletions lib/NN/BufferAllocator.fx
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,8 @@ fun assign_buffers(net: Ast.nnet_t)
| Some(argidx) => (true, argidx)
| _ => (false, -1)
}
| Ast.NN_Conv {t_passby} when t_passby > 0 && usecounts[t_passby] == 1 =>
(true, t_passby)
| _ => (false, -1)
}
//println(f"name={op.name()}, inplace={inplace_op}, inps={[::for i<-inps {net.args[i].name}]}, outs={[::for i<-outs {net.args[i].name}]}")
Expand Down
2 changes: 2 additions & 0 deletions lib/NN/Inference.fx
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ fun run(net: Ast.nnet_t, inputs: (string, Ast.nntensor_t) []/*,
(string, Ast.nntensor_t) []
{
var empty_names = true
OpConv.reset_total_time_1x1()

// assign input tensors
for (inpname, t)@i <- inputs {
Expand All @@ -40,6 +41,7 @@ fun run(net: Ast.nnet_t, inputs: (string, Ast.nntensor_t) []/*,

//println("running main graph")
run_graph(net, net.graph, outputs)
OpConv.update_total_time_1x1()

// collect outputs
[for argidx <- net.graph.outargs {
Expand Down
Loading

0 comments on commit 616e3ad

Please sign in to comment.