Downscale image if necessary

Very large input images eat up all the GPU memory and slow down inference. Also, DOPE seems to work best when the object size (in pixels) has appeared in the training data. For these reasons, downscaling large input images improves memory consumption, inference speed *and* recognition results.
kini5gowda · Jun 13, 2019 · ee57dcb · ee57dcb
1 parent 6c5040d
commit ee57dcb
Show file tree

Hide file tree

Showing 3 changed files with 16 additions and 4 deletions.
diff --git a/config/config_pose.yaml b/config/config_pose.yaml
@@ -2,6 +2,7 @@ topic_camera: "/dope/webcam_rgb_raw"
 topic_camera_info: "/dope/camera_info"
 topic_publishing: "dope"
 input_is_rectified: True   # Whether the input image is rectified (strongly suggested!)
+downscale_height: 500      # if the input image is larger than this, scale it down to this pixel height
 
 # Comment any of these lines to prevent detection / pose estimation of that object
 weights: {

diff --git a/nodes/dope b/nodes/dope
@@ -11,6 +11,7 @@ listening to an image topic and publishing poses.
 
 from __future__ import print_function
 
+import cv2
 import message_filters
 import numpy as np
 import rospkg
@@ -100,6 +101,7 @@ class DopeNode(object):
         self.cv_bridge = CvBridge()
 
         self.input_is_rectified = params['input_is_rectified']
+        self.downscale_height = params['downscale_height']
 
         self.config_detect = lambda: None
         self.config_detect.mask_edges = 1
@@ -170,6 +172,9 @@ class DopeNode(object):
     def image_callback(self, image_msg, camera_info):
         """Image callback"""
 
+        img = self.cv_bridge.imgmsg_to_cv2(image_msg, "rgb8")
+        # cv2.imwrite('img.png', cv2.cvtColor(img, cv2.COLOR_BGR2RGB))  # for debugging
+
         # Update camera matrix and distortion coefficients
         if self.input_is_rectified:
             P = np.matrix(camera_info.P, dtype='float64')
@@ -182,13 +187,17 @@ class DopeNode(object):
             dist_coeffs = np.matrix(camera_info.D, dtype='float64')
             dist_coeffs.resize((len(camera_info.D), 1))
 
+        # Downscale image if necessary
+        height, width, _ = img.shape
+        scaling_factor = float(self.downscale_height) / height
+        if scaling_factor < 1.0:
+            camera_matrix[:2] *= scaling_factor
+            img = cv2.resize(img, (int(scaling_factor * width), int(scaling_factor * height)))
+
         for m in self.models:
             self.pnp_solvers[m].set_camera_intrinsic_matrix(camera_matrix)
             self.pnp_solvers[m].set_dist_coeffs(dist_coeffs)
 
-        img = self.cv_bridge.imgmsg_to_cv2(image_msg, "rgb8")
-        # cv2.imwrite('img.png', cv2.cvtColor(img, cv2.COLOR_BGR2RGB))  # for debugging
-
         # Copy and draw image
         img_copy = img.copy()
         im = Image.fromarray(img_copy)

diff --git a/readme.md b/readme.md
@@ -75,7 +75,9 @@ This is the official DOPE ROS package for detection and 6-DoF pose estimation of
     * `weights`: dictionary of object names and there weights path name, **comment out any line to disable detection/estimation of that object**
     * `dimension`: dictionary of dimensions for the objects  (key values must match the `weights` names)
     * `draw_colors`: dictionary of object colors  (key values must match the `weights` names)
-    * `thresh_points`: Thresholding the confidence for object detection; increase this value if you see too many false positives, reduce it if  objects are not detected. 
+    * `thresh_points`: Thresholding the confidence for object detection; increase this value if you see too many false positives, reduce it if  objects are not detected.
+    * `downscale_height`: If the input image is larger than this, scale it down to this pixel height. Very large input images eat up all the GPU memory and slow down inference. Also, DOPE works best when the object size (in pixels) has appeared in the training data (which is downscaled to 400 px). For these reasons, downscaling large input images to something reasonable (e.g., 400-500 px) improves memory consumption, inference speed *and* recognition results.
+
     
 4. **Start DOPE node**
     ```