Skip to content

Commit

Permalink
added option to accumulate and dump the summary profile
Browse files Browse the repository at this point in the history
  • Loading branch information
vpisarev committed Jul 24, 2022
1 parent dcfd893 commit dfb2c22
Show file tree
Hide file tree
Showing 4 changed files with 93 additions and 9 deletions.
8 changes: 7 additions & 1 deletion examples/vision_classify.fx
Original file line number Diff line number Diff line change
Expand Up @@ -135,14 +135,20 @@ for imgname@i <- images {
val img = Image.Decoder.imread_rgb(imgname)
val inp = NN.Preprocess.image_to_tensor(img, preprocess_params, ntasks)
var outputs: nn_output_t [] = []
var best_profile = model.perf_profile_time.copy()
val (gmean, mintime) = Sys.timeit(
fun () {
outputs =
try NN.Inference.run(model, [("", inp)], outputs=[]) catch {
| NN.Ast.NNError msg => println(f"exception NNError('{msg}') occured"); []
| Fail msg => println(f"failure: '{msg}'"); []
}
}, iterations=niter, batch=1)
}, iterations=niter, batch=1, updated_min=Some(
fun () {if profile {best_profile=model.perf_profile_time.copy()}}))
if (profile) {
model.perf_profile_time[:] = best_profile
NN.Inference.print_total_profile(model)
}
println(f"execution time: gmean={gmean*1000.:.2f}ms, mintime={mintime*1000.:.2f}ms")
for out@i <- outputs {
println(f"output #{i}: name='{out.0}', shape={out.1.shape}: top-{k}:")
Expand Down
23 changes: 20 additions & 3 deletions lib/NN/Ast.fx
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ type nnelwise_t =
| NN_Abs | NN_Acos | NN_Acosh | NN_Asin | NN_Asinh | NN_Atan | NN_Atanh
| NN_Ceil | NN_Cos | NN_Cosh | NN_Erf | NN_Exp | NN_Floor | NN_IsInf | NN_IsNaN
| NN_Log | NN_Mish | NN_Neg | NN_Not | NN_Relu | NN_Round | NN_Sigmoid | NN_Sign
| NN_Sin | NN_Sinh | NN_Softplus | NN_Softsign | NN_Sqrt | NN_Tan | NN_Tanh
| NN_Sin | NN_Sinh | NN_Softplus | NN_Softsign | NN_Sqrt | NN_Tan | NN_Tanh | NN_Elemwise_ZZ

type nnreduce_t =
| NN_ReduceL1 | NN_ReduceL2
Expand Down Expand Up @@ -268,6 +268,10 @@ class nnop_t =
name: string; t_inp: int; t_axes: int; t_out: int }
| NN_Nop // shall always be the last operation in the list

val nn_operations = NN_Nop.__tag__
val nn_elemwise_operations = NN_Elemwise_ZZ.__tag__ - 1
val nn_total_operations = nn_operations + nn_elemwise_operations

type nnnames_t = (string, int) Hashmap.t

type nnonnx_t =
Expand Down Expand Up @@ -307,7 +311,10 @@ class nnmodel_t
use_fp16: bool ref
trace: bool ref
profile: bool ref
detailed_profile: bool ref
scratch_buf: nnbuf_t ref
perf_profile_time: int64 []
perf_profile_count: int []
}

type op_callback_t = (nnmodel_t, nnop_t) -> void
Expand All @@ -327,7 +334,10 @@ fun empty_net() = nnmodel_t {
use_fp16 = ref false,
trace = ref false,
profile = ref false,
scratch_buf = ref ([] : nnbuf_t)
detailed_profile = ref false,
scratch_buf = ref ([] : nnbuf_t),
perf_profile_time = array(nn_total_operations, 0L),
perf_profile_count = array(nn_total_operations, 0)
}

fun empty_graph() = NN_Graph {
Expand Down Expand Up @@ -410,6 +420,8 @@ fun string(ew: nnelwise_t)
| NN_Min => "Min"
| NN_Max => "Max"
| NN_Mean => "Mean"

| NN_Elemwise_ZZ => "???"
}

fun string(r: nnreduce_t)
Expand Down Expand Up @@ -805,7 +817,6 @@ fun graph2str(model: nnmodel_t, graph: nngraph_t, indent: string)

fun nnop_t.name(): (string, string) = match self
{
| NN_Nop => ("", "Nop")
| NN_AvgPool {name} => (name, "AvgPool")
| NN_BatchNorm {name} => (name, "BatchNorm")
| NN_Cast {name} => (name, "Cast")
Expand Down Expand Up @@ -844,6 +855,12 @@ fun nnop_t.name(): (string, string) = match self
| NN_TopK {name} => (name, "TopK")
| NN_Transpose {name} => (name, "Transpose")
| NN_Unsqueeze {name} => (name, "Unsqueeze")
| NN_Nop => ("", "Nop")
}

fun nnop_t.perf_profile_index(): int = match self {
| NN_Elemwise {el_op} => nn_operations + el_op.__tag__ - 1
| _ => self.__tag__ - 1
}

fun targs2pairs(prefix: string, args: int []) =
Expand Down
61 changes: 57 additions & 4 deletions lib/NN/Inference.fx
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,12 @@ fun run(model: Ast.nnmodel_t, inputs: (string, Ast.nntensor_t) []/*,
val ninputs = inputs.size()

assert(`ninputs == model.graph.inpargs.size()`)
if *model.profile {
for i <- 0:Ast.nn_operations {
model.perf_profile_time[i] = 0L
model.perf_profile_count[i] = 0
}
}

// assign input tensors
for (inpname, inp)@i <- inputs {
Expand Down Expand Up @@ -68,9 +74,10 @@ fun dump_arg(model: Ast.nnmodel_t, prefix: string, idx: int, argidx: int, dumpda
fun run_graph(model: Ast.nnmodel_t, graph: Ast.nngraph_t, outputs: (string, Ast.nntensor_t) [])
{
val nops = graph.prog.size()
val any_profile = *model.profile || *model.detailed_profile
var t = 0L
for op@opidx <- graph.prog {
if *model.profile {
if any_profile {
t = Sys.tick_count()
}
val (opname, opkind) = op.name()
Expand Down Expand Up @@ -209,10 +216,15 @@ fun run_graph(model: Ast.nnmodel_t, graph: Ast.nngraph_t, outputs: (string, Ast.
}
| _ => RunOp.run_op(model, op)
}
if *model.profile {
if any_profile {
t = Sys.tick_count() - t
val t_ms = t*1000./Sys.tick_frequency()
println(f"TIME {op.name()}: {round(t_ms, 2)}ms")
val op_idx = op.__tag__ - 1
model.perf_profile_time[op_idx] += t
model.perf_profile_count[op_idx] += 1
if *model.detailed_profile {
val t_ms = t*1000./Sys.tick_frequency()
println(f"TIME {op.name()}: {t_ms:.2f}ms")
}
}
/*for oi@outidx <- oinfo {
val {idx=argidx} = oi
Expand Down Expand Up @@ -282,6 +294,47 @@ fun top_k(t: Ast.nntensor_t, k: int)
result
}

fun collect_used_op_names(model: Ast.nnmodel_t): string []
{
val names = array(Ast.nn_total_operations, "")
fun scan_graph(graph: Ast.nngraph_t) {
for op <- graph.prog {
val idx = op.perf_profile_index()
if names[idx] == "" {
names[idx] = op.name().1
}
match op {
| Ast.NN_Loop {body} =>
scan_graph(body)
| Ast.NN_If {then_branch, else_branch} =>
scan_graph(then_branch)
scan_graph(else_branch)
| _ => {}
}
}
}
scan_graph(model.graph)
names
}

fun print_total_profile(model: Ast.nnmodel_t)
{
val opnames = collect_used_op_names(model)
val N = opnames.size()
val idx = mkrange(N)
fun cmp_time(i: int, j: int) {
val diff = model.perf_profile_time[i] - model.perf_profile_time[j]
if diff > 0 {true} else {diff == 0 && i < j}
}
sort(idx, cmp_time)
val ms_scale = 1000./Sys.tick_frequency()
for i <- 0:N {
val t = model.perf_profile_time[idx[i]]
if t == 0 {break}
println(f"{i+1}. {opnames[idx[i]]}: {t*ms_scale:.2f}")
}
}

fun read_labels(fname: string) =
try {
match Json.parse_file(fname) {
Expand Down
10 changes: 9 additions & 1 deletion lib/Sys.fx
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,8 @@ fun arguments() = argv.tl()
@pure @nothrow fun tick_count(): int64 = @ccode { return fx_tick_count() }
@pure @nothrow fun tick_frequency(): double = @ccode { return fx_tick_frequency() }

fun timeit(f: void -> void, ~iterations: int=1, ~batch: int=1): (double, double)
fun timeit(f: void -> void, ~updated_min: (void->void)?,
~iterations: int=1, ~batch: int=1, ): (double, double)
{
val nreal_iterations = max(iterations - 1, 1)
val fold gmean = 0., mintime = 0. for i <- 0:iterations {
Expand All @@ -94,13 +95,20 @@ fun timeit(f: void -> void, ~iterations: int=1, ~batch: int=1): (double, double)
val t = t/tick_frequency()
val log_t = if nreal_iterations > 1 { log(max(t, 1e-16)) } else {t}
val new_mintime = if i == 0 {t} else {min(mintime, t)}
match (i == 0 || new_mintime < mintime, updated_min) {
| (true, Some(f)) => f()
| _ => {}
}
if iterations > 1 && i == 0 {(gmean, new_mintime)}
else { (gmean + log_t, new_mintime) }
}
val gmean = if nreal_iterations > 1 { exp(gmean/nreal_iterations)/batch } else {gmean/batch}
(gmean, mintime/batch)
}

fun timeit(f: void -> void, ~iterations: int=1, ~batch: int=1, ): (double, double) =
timeit(f, updated_min=None, iterations=iterations, batch=batch)

fun remove(name: string): void
@ccode {
fx_cstr_t name_;
Expand Down

0 comments on commit dfb2c22

Please sign in to comment.