tinyyolo v3 works! (though the results are somewhat worse than mobile…

…net-ssd for some reason)
vpisarev · Jul 12, 2022 · b893619 · b893619
1 parent 4389d50
commit b893619
Show file tree

Hide file tree

Showing 5 changed files with 65 additions and 16 deletions.
diff --git a/examples/vision_detect.fx b/examples/vision_detect.fx
@@ -62,6 +62,8 @@ fun parse_args(args: string list)
         detector_kind = DetectorSSD; parse_args(rest)
     | "-yolo" :: rest =>
         detector_kind = DetectorYolo; parse_args(rest)
+    | "-yolo" :: rest =>
+        detector_kind = DetectorTinyYolo; parse_args(rest)
     | "-noshow" :: rest =>
         show_boxes = false; parse_args(rest)
     | "-prmodel" :: rest =>
@@ -214,7 +216,7 @@ for imgname@i <- images {
                                             shape=[1, 3, input_size, input_size]}}
             NN.OpPermute.run_transpose(inp_.shape.shape, inp_.data, [0, 3, 1, 2],
                                        planar_t.shape.shape, planar_data)
-            [("", planar_t), ("", NN.Ast.mktensor([float(h), float(w)]))]
+            [("", planar_t), ("", NN.Ast.mktensor([float(h), float(w)].reshape(1, 2)))]
         | _ => [("", inp_)]
         }
     val (gmean, mintime) = Sys.timeit(
@@ -281,10 +283,16 @@ for imgname@i <- images {
             anchors=NN.OpDetect.yolov4_default_anchors,
             strides=NN.OpDetect.yolov4_default_strides,
             xyscale=NN.OpDetect.yolov4_default_scale)
-        | _ =>
+        | DetectorSSD =>
             NN.OpDetect.ssd_postprocess(outputs, orig_image_size=(h, w), input_size=input_size)
+        | DetectorTinyYolo =>
+            NN.OpDetect.tinyyolo_postprocess(outputs, orig_image_size=(h, w), input_size=input_size)
+        | _ =>
+            println("unrecognized detector type; specify it explicitly via command line options: '-ssd', '-yolo' or '-tinyyolo'")
+            throw Fail("...")
         }
     println(f"{imgname}: {boxes.size()} object(s) detected")
+    //val resized_img = cv.cvtColor(resized_img, cv.COLOR_BGR2RGB)
     draw_boxes(img, boxes, class_names=coco_class_names)
     if show_boxes {
         cv.imshow("detection", img)

diff --git a/lib/NN/BufferAllocator.fx b/lib/NN/BufferAllocator.fx
@@ -265,9 +265,9 @@ fun assign_buffers(model: Ast.nnmodel_t)
                         }
                     }
                     if !model.isconst(v_out) {
-                        if !model.isconst(v_inp) {
+                        /*if !model.isconst(v_inp) {
                             share_buffer(v_inp, v_out)
-                        }
+                        }*/
                         if !model.isconst(outarg) && usecounts[outarg] == 1 {
                             share_buffer(v_out, outarg)
                         }

diff --git a/lib/NN/Inference.fx b/lib/NN/Inference.fx
@@ -61,7 +61,7 @@ fun dump_arg(model: Ast.nnmodel_t, prefix: string, idx: int, argidx: int, dumpda
     val name = model.args[argidx].name
     val etyp = t.elemtype()
     val sh = join_embrace("{", "}", ",", [for sz <- t.shape.shape {string(sz)}])
-    println(f"{prefix} {idx} Name: {name}\n Type: {etyp}\n Shape: {sh}")
+    println(f"{prefix} {idx} Name: {name}\n Buf: {model.bufidxs[argidx]}\n Type: {etyp}\n Shape: {sh}")
     if dumpdata {
         println(string(t))
     }
@@ -131,6 +131,7 @@ fun run_graph(model: Ast.nnmodel_t, graph: Ast.nngraph_t, outputs: (string, Ast.
                     | _ => throw Ast.NNError("Loop's trip_count (if any) is expected to be I32/I64 scalar")
                     }
                 } else {None}
+            val trip_count0 = trip_count
             var loop_condition =
                 if t_cond_in > 0 {
                     val t_data = model.tensors[t_cond_in].data
@@ -156,7 +157,7 @@ fun run_graph(model: Ast.nnmodel_t, graph: Ast.nngraph_t, outputs: (string, Ast.
             var iter = 0
             while loop_condition && trip_count.value_or(1L) > 0L {
                 if *model.trace {
-                    println(f"================ LOOP ITERATION #{iter}/{trip_count.value_or(-1L)} ================")
+                    println(f"================ LOOP ITERATION #{iter}/{trip_count0.value_or(-1L)} ================")
                 }
                 run_graph(model, body, outputs)
                 val outarg_0 = outargs[0]
@@ -180,14 +181,26 @@ fun run_graph(model: Ast.nnmodel_t, graph: Ast.nngraph_t, outputs: (string, Ast.
                         if !isaccum || iter == 0 {
                             val t = model.tensors[outarg]
                             val t_shape = t.shape
-                            val new_shape = Ast.nnshape_t {layout=t_shape.layout, shape=[1, \t_shape.shape]}
+                            val shape1 = if isaccum {[1, \t_shape.shape]} else {t_shape.shape}
+                            val new_shape = Ast.nnshape_t {layout=t_shape.layout, shape=shape1}
                             model.fit(v_out, new_shape, t.elemtype())
                             model.copy_tensor_data(outarg, v_out)
                         } else {
+                            //println(f"accumulating '{model.args[outarg].name}'={model.tensors[outarg]} to '{model.args[v_out].name}'")
                             model.concat_inplace(outarg, v_out)
                         }
                     }
                 }
+                for i <- 0:n_state_vars {
+                    val outarg = outargs[i+1]
+                    val v_inp = inpargs[i+2]
+                    if v_inp >= 0 && model.bufidxs[v_inp] >= 0 {
+                        val t = model.tensors[outarg]
+                        val t_shape = t.shape
+                        model.fit(v_inp, t_shape, t.elemtype())
+                        model.copy_tensor_data(outarg, v_inp)
+                    }
+                }
                 iter += 1
             }
             if *model.trace {

diff --git a/lib/NN/OpDetect.fx b/lib/NN/OpDetect.fx
@@ -230,3 +230,34 @@ fun ssd_postprocess(ssd_outputs: Ast.nntensor_t [],
         (y1, x1, y2, x2, score, cls)
     }]
 }
+
+fun tinyyolo_postprocess(ty_outputs: Ast.nntensor_t [], ~orig_image_size: (int*2), ~input_size: int)
+{
+    assert(`ty_outputs.size() == 3`)
+    val boxes = ty_outputs[0], scores = ty_outputs[1], selected = ty_outputs[2]
+    val box_shape = boxes.shape.shape, scores_shape = scores.shape.shape
+    val selected_shape = selected.shape.shape
+    assert(`box_shape.size() == 3`)
+    assert(`scores_shape.size() == 3`)
+    assert(`selected_shape.size() == 3`)
+    assert(`box_shape[0] == scores_shape[0]`)
+    assert(`box_shape[0] == 1`) // batch_size=1 is only supported for now
+    assert(`box_shape[1] == scores_shape[2]`)
+    val box_data = float(boxes.data).reshape(box_shape[0], box_shape[1], box_shape[2])
+    val scores_data = float(scores.data).reshape(scores_shape[0], scores_shape[1], scores_shape[2])
+    val selected_data = float(selected.data).reshape(selected_shape[0], selected_shape[1], selected_shape[2])
+    val nselected = selected_shape[1]
+    [for i <- 0:nselected {
+        val batch_id = int(selected_data[0, i, 0])
+        val class_id = int(selected_data[0, i, 1])
+        val box_idx = int(selected_data[0, i, 2])
+        val y1 = box_data[batch_id, box_idx, 0]
+        val x1 = box_data[batch_id, box_idx, 1]
+        val y2 = box_data[batch_id, box_idx, 2]
+        val x2 = box_data[batch_id, box_idx, 3]
+        val score = scores_data[batch_id, class_id, box_idx]
+        val detection = (y1, x1, y2, x2, score, float(class_id))
+        //println(f"detection #{i}: detection={detection}")
+        detection
+    }]
+}
diff --git a/lib/NN/OpPermute.fx b/lib/NN/OpPermute.fx
@@ -337,8 +337,8 @@ match op {
         inp_shape[delta + i] = ((int_*)inp_shape_->data)[i];
         out_shape[delta + i] = ((int_*)out_shape_->data)[i];
     }
-    for (i = SLICE_MAX_DIMS-2; i >= 0; i--)
-        inp_step[i] = inp_step[i+1]*inp_shape[i+1];
+    for (i = SLICE_MAX_DIMS-1; i >= 0; i--)
+        inp_step[i] = i == SLICE_MAX_DIMS-1 ? 1 : inp_step[i+1]*inp_shape[i+1];
     for (i = 0; i < naxes; i++) {
         int_ j = axes_->data ? ((int_*)axes_->data)[i] : i;
         int_ start = ((int_*)starts_->data)[i];
@@ -370,16 +370,13 @@ match op {
         starts[j] = start;
         ends[j] = end;
         steps[j] = step;
+        //printf("slice: i=%d. j=%d. sz_j=%d, starts_j=%d, ends_j=%d, steps_j=%d\n",
+        //       (int)i, (int)(j-delta), (int)sz_j, (int)starts[j], (int)ends[j], (int)steps[j]);
     }
 
     for (i = 0; i < SLICE_MAX_DIMS; i++) {
-        if (ends[i] > inp_shape[i]) ends[i] = inp_shape[i];
-        if (steps[i] > 0)
-            inptr0 += starts[i]*inp_step[i]*esz;
-        else {
-            inptr0 += (ends[i]-1)*inp_step[i]*esz;
-            inp_step[i] *= -1;
-        }
+        inptr0 += starts[i]*inp_step[i]*esz;
+        inp_step[i] *= steps[i];
     }
 
     int_ sz0 = out_shape[4], sz1 = out_shape[3];