Merge pull request #13 from patricksnape/py_corr_tracker

Add a Python interface for the Correlation Tracker
davisking · May 22, 2015 · 03cfc68 · 03cfc68
2 parents d3a3288 + dd922c6
commit 03cfc68
Show file tree

Hide file tree

Showing 8 changed files with 401 additions and 69 deletions.
diff --git a/python_examples/correlation_tracker.py b/python_examples/correlation_tracker.py
@@ -0,0 +1,63 @@
+#!/usr/bin/python
+# The contents of this file are in the public domain. See LICENSE_FOR_EXAMPLE_PROGRAMS.txt
+#
+# This example shows how to use the correlation_tracker from the dlib Python
+# library.  This object lets you track the position of an object as it moves
+# from frame to frame in a video sequence.  To use it, you give the
+# correlation_tracker the bounding box of the object you want to track in the
+# current video frame.  Then it will identify the location of the object in
+# subsequent frames.
+#
+# In this particular example, we are going to run on the
+# video sequence that comes with dlib, which can be found in the
+# examples/video_frames folder.  This video shows a juice box sitting on a table
+# and someone is waving the camera around.  The task is to track the position of
+# the juice box as the camera moves around.
+#
+# COMPILING THE DLIB PYTHON INTERFACE
+#   Dlib comes with a compiled python interface for python 2.7 on MS Windows. If
+#   you are using another python version or operating system then you need to
+#   compile the dlib python interface before you can use this file.  To do this,
+#   run compile_dlib_python_module.bat.  This should work on any operating
+#   system so long as you have CMake and boost-python installed.
+#   On Ubuntu, this can be done easily by running the command:
+#       sudo apt-get install libboost-python-dev cmake
+#
+#   Also note that this example requires scikit-image which can be installed
+#   via the command:
+#       pip install -U scikit-image
+#   Or downloaded from http://scikit-image.org/download.html. 
+
+import os
+import glob
+
+import dlib
+from skimage import io
+
+# Path to the video frames
+video_folder = os.path.join("..", "examples", "video_frames")
+
+# Create the correlation tracker - the object needs to be initialized
+# before it can be used
+tracker = dlib.correlation_tracker()
+
+win = dlib.image_window()
+# We will track the frames as we load them off of disk
+for k, f in enumerate(sorted(glob.glob(os.path.join(video_folder, "*.jpg")))):
+    print("Processing Frame {}".format(k))
+    img = io.imread(f)
+
+    # We need to initialize the tracker on the first frame
+    if k == 0:
+        # Start a track on the juice box. If you look at the first frame you
+        # will see that the juice box is contained within the bounding
+        # box (74, 67, 112, 153).
+        tracker.start_track(img, dlib.rectangle(74, 67, 112, 153))
+    else:
+        # Else we just attempt to track from the previous frame
+        tracker.update(img)
+
+    win.clear_overlay()
+    win.set_image(img)
+    win.add_overlay(tracker.get_position())
+    dlib.hit_enter_to_continue()
diff --git a/tools/python/CMakeLists.txt b/tools/python/CMakeLists.txt
@@ -18,8 +18,10 @@ set(python_srcs
    src/sequence_segmenter.cpp
    src/svm_struct.cpp
    src/image.cpp
+   src/rectangles.cpp
    src/object_detection.cpp
    src/shape_predictor.cpp
+   src/correlation_tracker.cpp
 )
 
 # Only add the GUI module if requested

diff --git a/tools/python/src/correlation_tracker.cpp b/tools/python/src/correlation_tracker.cpp
@@ -0,0 +1,166 @@
+// Copyright (C) 2014  Davis E. King (davis@dlib.net)
+// License: Boost Software License   See LICENSE.txt for the full license.
+
+#include <dlib/python.h>
+#include <dlib/geometry.h>
+#include <boost/python/args.hpp>
+#include <dlib/image_processing.h>
+
+using namespace dlib;
+using namespace std;
+using namespace boost::python;
+
+// ----------------------------------------------------------------------------------------
+
+void start_track (
+    correlation_tracker& tracker,
+    object img,
+    const drectangle& bounding_box
+)
+{
+    if (is_gray_python_image(img))
+    {
+        tracker.start_track(numpy_gray_image(img), bounding_box);
+    }
+    else if (is_rgb_python_image(img))
+    {
+        tracker.start_track(numpy_rgb_image(img), bounding_box);
+    }
+    else
+    {
+        throw dlib::error("Unsupported image type, must be 8bit gray or RGB image.");
+    }
+}
+
+void start_track_rec (
+    correlation_tracker& tracker,
+    object img,
+    const rectangle& bounding_box
+)
+{
+    drectangle dbounding_box(bounding_box);
+    start_track(tracker, img, dbounding_box);
+}
+
+double update (
+    correlation_tracker& tracker,
+    object img
+)
+{
+    if (is_gray_python_image(img))
+    {
+        return tracker.update(numpy_gray_image(img));
+    }
+    else if (is_rgb_python_image(img))
+    {
+        return tracker.update(numpy_rgb_image(img));
+    }
+    else
+    {
+        throw dlib::error("Unsupported image type, must be 8bit gray or RGB image.");
+    }
+}
+
+double update_guess (
+    correlation_tracker& tracker,
+    object img,
+    const drectangle& bounding_box
+)
+{
+    if (is_gray_python_image(img))
+    {
+        return tracker.update(numpy_gray_image(img), bounding_box);
+    }
+    else if (is_rgb_python_image(img))
+    {
+        return tracker.update(numpy_rgb_image(img), bounding_box);
+    }
+    else
+    {
+        throw dlib::error("Unsupported image type, must be 8bit gray or RGB image.");
+    }
+}
+
+double update_guess_rec (
+    correlation_tracker& tracker,
+    object img,
+    const rectangle& bounding_box
+)
+{
+    drectangle dbounding_box(bounding_box);
+    return update_guess(tracker, img, dbounding_box);
+}
+
+drectangle get_position (const correlation_tracker& tracker) { return tracker.get_position(); }
+
+// ----------------------------------------------------------------------------------------
+
+void bind_correlation_tracker()
+{
+    using boost::python::arg;
+    {
+    typedef correlation_tracker type;
+    class_<type>("correlation_tracker", "This is a tool for tracking moving objects in a video stream.  You give it \n\
+            the bounding box of an object in the first frame and it attempts to track the \n\
+            object in the box from frame to frame.  \n\
+            This tool is an implementation of the method described in the following paper: \n\
+                Danelljan, Martin, et al. 'Accurate scale estimation for robust visual \n\
+                tracking.' Proceedings of the British Machine Vision Conference BMVC. 2014.")
+        .def("start_track", &::start_track, (arg("image"), arg("bounding_box")), "\
+            requires \n\
+                - image is a numpy ndarray containing either an 8bit grayscale or RGB image. \n\
+                - bounding_box.is_empty() == false \n\
+            ensures \n\
+                - This object will start tracking the thing inside the bounding box in the \n\
+                  given image.  That is, if you call update() with subsequent video frames \n\
+                  then it will try to keep track of the position of the object inside bounding_box. \n\
+                - #get_position() == bounding_box")
+        .def("start_track", &::start_track_rec, (arg("image"), arg("bounding_box")), "\
+            requires \n\
+                - image is a numpy ndarray containing either an 8bit grayscale or RGB image. \n\
+                - bounding_box.is_empty() == false \n\
+            ensures \n\
+                - This object will start tracking the thing inside the bounding box in the \n\
+                  given image.  That is, if you call update() with subsequent video frames \n\
+                  then it will try to keep track of the position of the object inside bounding_box. \n\
+                - #get_position() == bounding_box")
+        .def("update", &::update, arg("image"), "\
+            requires \n\
+                - image is a numpy ndarray containing either an 8bit grayscale or RGB image. \n\
+                - get_position().is_empty() == false \n\
+                  (i.e. you must have started tracking by calling start_track()) \n\
+            ensures \n\
+                - performs: return update(img, get_position())")
+        .def("update", &::update_guess, (arg("image"), arg("guess")), "\
+            requires \n\
+                - image is a numpy ndarray containing either an 8bit grayscale or RGB image. \n\
+                - get_position().is_empty() == false \n\
+                  (i.e. you must have started tracking by calling start_track()) \n\
+            ensures \n\
+                - When searching for the object in img, we search in the area around the \n\
+                  provided guess. \n\
+                - #get_position() == the new predicted location of the object in img.  This \n\
+                  location will be a copy of guess that has been translated and scaled \n\
+                  appropriately based on the content of img so that it, hopefully, bounds \n\
+                  the object in img. \n\
+                - Returns the peak to side-lobe ratio.  This is a number that measures how \n\
+                  confident the tracker is that the object is inside #get_position(). \n\
+                  Larger values indicate higher confidence.")
+        .def("update", &::update_guess_rec, (arg("image"), arg("guess")), "\
+            requires \n\
+                - image is a numpy ndarray containing either an 8bit grayscale or RGB image. \n\
+                - get_position().is_empty() == false \n\
+                  (i.e. you must have started tracking by calling start_track()) \n\
+            ensures \n\
+                - When searching for the object in img, we search in the area around the \n\
+                  provided guess. \n\
+                - #get_position() == the new predicted location of the object in img.  This \n\
+                  location will be a copy of guess that has been translated and scaled \n\
+                  appropriately based on the content of img so that it, hopefully, bounds \n\
+                  the object in img. \n\
+                - Returns the peak to side-lobe ratio.  This is a number that measures how \n\
+                  confident the tracker is that the object is inside #get_position(). \n\
+                  Larger values indicate higher confidence.")
+        .def("get_position", &::get_position, "returns the predicted position of the object under track.");
+    }
+}
diff --git a/tools/python/src/dlib.cpp b/tools/python/src/dlib.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2013  Davis E. King (davis@dlib.net)
+// Copyright (C) 2015 Davis E. King (davis@dlib.net)
 // License: Boost Software License   See LICENSE.txt for the full license.
 
 #include <boost/python.hpp>
@@ -14,8 +14,10 @@ void bind_cca();
 void bind_sequence_segmenter();
 void bind_svm_struct();
 void bind_image_classes();
+void bind_rectangles();
 void bind_object_detection();
 void bind_shape_predictors();
+void bind_correlation_tracker();
 
 #ifndef DLIB_NO_GUI_SUPPORT
 void bind_gui();
@@ -38,8 +40,10 @@ BOOST_PYTHON_MODULE(dlib)
     bind_sequence_segmenter();
     bind_svm_struct();
     bind_image_classes();
+    bind_rectangles();
     bind_object_detection();
     bind_shape_predictors();
+    bind_correlation_tracker();
 #ifndef DLIB_NO_GUI_SUPPORT
     bind_gui();
 #endif

diff --git a/tools/python/src/gui.cpp b/tools/python/src/gui.cpp
@@ -54,6 +54,16 @@ void add_overlay_rect (
     win.add_overlay(rect, color);
 }
 
+void add_overlay_drect (
+    image_window& win,
+    const drectangle& drect,
+    const rgb_pixel& color
+)
+{
+    rectangle rect(drect.left(), drect.top(), drect.right(), drect.bottom());
+    win.add_overlay(rect, color);
+}
+
 void add_overlay_parts (
     image_window& win,
     const full_object_detection& detection,
@@ -106,6 +116,8 @@ void bind_gui()
             "Add a list of rectangles to the image_window. They will be displayed as red boxes by default, but the color can be passed.")
         .def("add_overlay", add_overlay_rect, (arg("rectangle"), arg("color")=rgb_pixel(255, 0, 0)),
             "Add a rectangle to the image_window.  It will be displayed as a red box by default, but the color can be passed.")
+        .def("add_overlay", add_overlay_drect, (arg("rectangle"), arg("color")=rgb_pixel(255, 0, 0)),
+            "Add a rectangle to the image_window.  It will be displayed as a red box by default, but the color can be passed.")
         .def("add_overlay", add_overlay_parts, (arg("detection"), arg("color")=rgb_pixel(0, 0, 255)),
             "Add full_object_detection parts to the image window. They will be displayed as blue lines by default, but the color can be passed.")
         .def("wait_until_closed", &type::wait_until_closed,