doc/trustregion.lyx

#LyX 2.0 created this file. For more info see http://www.lyx.org/
\lyxformat 413
\begin_document
\begin_header
\textclass article
\begin_preamble
\usepackage{amssymb}
\end_preamble
\use_default_options true
\maintain_unincluded_children false
\language english
\language_package default
\inputencoding auto
\fontencoding global
\font_roman default
\font_sans default
\font_typewriter default
\font_default_family default
\use_non_tex_fonts false
\font_sc false
\font_osf false
\font_sf_scale 100
\font_tt_scale 100

\graphics default
\default_output_format default
\output_sync 0
\bibtex_command default
\index_command default
\paperfontsize default
\spacing single
\use_hyperref false
\papersize default
\use_geometry false
\use_amsmath 1
\use_esint 1
\use_mhchem 1
\use_mathdots 1
\cite_engine basic
\use_bibtopic false
\use_indices false
\paperorientation portrait
\suppress_date false
\use_refstyle 1
\index Index
\shortcut idx
\color #008000
\end_index
\secnumdepth 3
\tocdepth 3
\paragraph_separation indent
\paragraph_indentation default
\quotes_language english
\papercolumns 1
\papersides 1
\paperpagestyle default
\tracking_changes false
\output_changes false
\html_math_output 0
\html_css_as_file 0
\html_be_strict false
\end_header

\begin_body

\begin_layout Standard
\begin_inset FormulaMacro
\newcommand{\SE}[1]{\mathbb{SE}\left(#1\right)}
{\mathbb{SE}\left(#1\right)}
\end_inset


\end_layout

\begin_layout Standard
\begin_inset FormulaMacro
\newcommand{\se}[1]{\mathfrak{se}\left(#1\right)}
{\mathfrak{se}\left(#1\right)}
\end_inset


\end_layout

\begin_layout Standard
\begin_inset FormulaMacro
\newcommand{\SO}[1]{\mathbb{SO}\left(#1\right)}
{\mathbb{SO}\left(#1\right)}
\end_inset


\end_layout

\begin_layout Standard
\begin_inset FormulaMacro
\newcommand{\so}[1]{\mathfrak{so}\left(#1\right)}
{\mathfrak{so}\left(#1\right)}
\end_inset


\end_layout

\begin_layout Standard
\begin_inset FormulaMacro
\newcommand{\R}[1]{\mathbb{R}^{#1}}
{\mathbb{R}^{#1}}
\end_inset


\end_layout

\begin_layout Standard
\begin_inset FormulaMacro
\newcommand{\prob}[2]{#1\hspace{0.1em}|\hspace{0.1em}#2}
{#1\hspace{0.1em}|\hspace{0.1em}#2}
\end_inset


\end_layout

\begin_layout Standard
\begin_inset FormulaMacro
\newcommand{\norm}[1]{\left\Vert #1\right\Vert }
{\left\Vert #1\right\Vert }
\end_inset


\end_layout

\begin_layout Standard
\begin_inset FormulaMacro
\newcommand{\t}{\mathsf{T}}
{\mathsf{T}}
\end_inset


\end_layout

\begin_layout Standard
\begin_inset ERT
status open

\begin_layout Plain Layout


\backslash
newcommand{
\backslash
smallequals}{
\backslash
mbox{
\backslash
raisebox{0.2ex}{
\backslash
fontsize{8}{10}
\backslash
selectfont $=$}}}
\end_layout

\end_inset


\end_layout

\begin_layout Standard
\begin_inset FormulaMacro
\newcommand{\smeq}{\smallequals}
{{\scriptstyle =}}
\end_inset


\end_layout

\begin_layout Standard
\begin_inset FormulaMacro
\newcommand{\th}[1]{#1^{\mathrm{th}}}
{#1^{\mathrm{th}}}
\end_inset


\end_layout

\begin_layout Standard
\begin_inset FormulaMacro
\newcommand{\defeq}{\stackrel{\Delta}{=}}
{\stackrel{\Delta}{=}}
\end_inset


\end_layout

\begin_layout Standard
\begin_inset FormulaMacro
\newcommand{\im}{\mathcal{I}}
{\mathcal{I}}
\end_inset


\end_layout

\begin_layout Standard
\begin_inset FormulaMacro
\newcommand{\lin}[1]{\overset{{\scriptscriptstyle \circ}}{#1}}
{\overset{{\scriptscriptstyle \circ}}{#1}}
\end_inset


\end_layout

\begin_layout Standard
\begin_inset FormulaMacro
\newcommand{\lins}[3]{\overset{{\scriptscriptstyle \circ}}{#1}\vphantom{#1}_{#3}^{#2}}
{\overset{{\scriptscriptstyle \circ}}{#1}\vphantom{#1}_{#3}^{#2}}
\end_inset


\end_layout

\begin_layout Section
Overview of Trust-region Methods
\end_layout

\begin_layout Standard
For nice figures, see
\begin_inset space ~
\end_inset


\begin_inset CommandInset citation
LatexCommand cite
key "Hauser06lecture"

\end_inset

 (in our /net/hp223/borg/Literature folder).
\end_layout

\begin_layout Standard
We just deal here with a small subset of trust-region methods, specifically
 approximating the cost function as quadratic using Newton's method, and
 using the Dogleg method and later to include Steihaug's method.
\end_layout

\begin_layout Standard
The overall goal of a nonlinear optimization method is to iteratively find
 a local minimum of a nonlinear function
\begin_inset Formula 
\[
\hat{x}=\arg\min_{x}f\left(x\right)
\]

\end_inset

where 
\begin_inset Formula $f\left(x\right)\to\mathbb{R}$
\end_inset

 is a scalar function.
 In GTSAM, the variables 
\begin_inset Formula $x$
\end_inset

 could be manifold or Lie group elements, so in this document we only work
 with 
\emph on
increments
\emph default
 
\begin_inset Formula $\delta x\in\R n$
\end_inset

 in the tangent space.
 In this document we specifically deal with 
\emph on
trust-region
\emph default
 methods, which at every iteration attempt to find a good increment 
\begin_inset Formula $\norm{\delta x}\leq\Delta$
\end_inset

 within the 
\begin_inset Quotes eld
\end_inset

trust radius
\begin_inset Quotes erd
\end_inset

 
\begin_inset Formula $\Delta$
\end_inset

.
\end_layout

\begin_layout Standard
Further, most nonlinear optimization methods, including trust region methods,
 deal with an approximate problem at every iteration.
 Although there are other choices (such as quasi-Newton), the Newton's method
 approximation is, given an estimate 
\begin_inset Formula $x^{\left(k\right)}$
\end_inset

 of the variables 
\begin_inset Formula $x$
\end_inset

, 
\begin_inset Formula 
\begin{equation}
f\left(x^{\left(k\right)}\oplus\delta x\right)\approx M^{\left(k\right)}\left(\delta x\right)=f^{\left(k\right)}+g^{\left(k\right)\t}\delta x+\frac{1}{2}\delta x^{\t}G^{\left(k\right)}\delta x\text{,}\label{eq:M-approx}
\end{equation}

\end_inset

where 
\begin_inset Formula $f^{\left(k\right)}=f\left(x^{\left(k\right)}\right)$
\end_inset

 is the function at 
\begin_inset Formula $x^{\left(k\right)}$
\end_inset

, 
\begin_inset Formula $g^{\left(x\right)}=\left.\frac{\partial f}{\partial x}\right|_{x^{\left(k\right)}}$
\end_inset

 is its gradient, and 
\begin_inset Formula $G^{\left(k\right)}=\left.\frac{\partial^{2}f}{\partial x^{2}}\right|_{x^{\left(k\right)}}$
\end_inset

 is its Hessian (or an approximation of the Hessian).
\end_layout

\begin_layout Standard
Trust-region methods adaptively adjust the trust radius 
\begin_inset Formula $\Delta$
\end_inset

 so that within it, 
\begin_inset Formula $M$
\end_inset

 is a good approximation of 
\begin_inset Formula $f$
\end_inset

, and then never step beyond the trust radius in each iteration.
 When the true minimum is within the trust region, they converge quadratically
 like Newton's method.
 At each iteration 
\begin_inset Formula $k$
\end_inset

, they solve the 
\emph on
trust-region subproblem
\emph default
 to find a proposed update 
\begin_inset Formula $\delta x$
\end_inset

 inside the trust radius 
\begin_inset Formula $\Delta$
\end_inset

, which decreases the approximate function 
\begin_inset Formula $M^{\left(k\right)}$
\end_inset

 as much as possible.
 The proposed update is only accepted if the true function 
\begin_inset Formula $f$
\end_inset

 decreases as well.
 If the decrease of 
\begin_inset Formula $M$
\end_inset

 matches the decrease of 
\begin_inset Formula $f$
\end_inset

 well, the size of the trust region is increased, while if the match is
 not close the trust region size is decreased.
\end_layout

\begin_layout Standard
Minimizing Eq.
\begin_inset space ~
\end_inset


\begin_inset CommandInset ref
LatexCommand ref
reference "eq:M-approx"

\end_inset

 is itself a nonlinear optimization problem, so there are various methods
 for approximating it, including Dogleg and Steihaug's method.
\end_layout

\begin_layout Section
Adapting the Trust Region Size
\end_layout

\begin_layout Standard
As mentioned in the previous section, we increase the trust region size
 if the decrease in the model function 
\begin_inset Formula $M$
\end_inset

 matches the decrease in the true cost function 
\begin_inset Formula $S$
\end_inset

 very closely, and decrease it if they do not match closely.
 The closeness of this match is measured with the 
\emph on
gain ratio
\emph default
,
\begin_inset Formula 
\[
\rho=\frac{f\left(x\right)-f\left(x\oplus\delta x_{d}\right)}{M\left(0\right)-M\left(\delta x_{d}\right)}\text{,}
\]

\end_inset

where 
\begin_inset Formula $\delta x_{d}$
\end_inset

 is the proposed dogleg step to be introduced next.
 The decrease in the model function is always non-negative, and as the decrease
 in 
\begin_inset Formula $f$
\end_inset

 approaches it, 
\begin_inset Formula $\rho$
\end_inset

 approaches 
\begin_inset Formula $1$
\end_inset

.
 If the true cost function increases, 
\begin_inset Formula $\rho$
\end_inset

 will be negative, and if the true cost function decreases even more than
 predicted by 
\begin_inset Formula $M$
\end_inset

, then 
\begin_inset Formula $\rho$
\end_inset

 will be greater than 
\begin_inset Formula $1$
\end_inset

.
 A typical update rule [
\color blue
see where this came from in paper
\color inherit
] is
\begin_inset Formula 
\[
\Delta\leftarrow\begin{cases}
\max\left(\Delta,3\norm{\delta x_{d}}\right)\text{,} & \rho>0.75\\
\Delta & 0.75>\rho>0.25\\
\Delta/2 & 0.25>\rho
\end{cases}
\]

\end_inset


\end_layout

\begin_layout Section
Dogleg
\end_layout

\begin_layout Standard
Dogleg minimizes an approximation of Eq.
\begin_inset space ~
\end_inset


\begin_inset CommandInset ref
LatexCommand ref
reference "eq:M-approx"

\end_inset

 by considering three possibilities using two points - the minimizer 
\begin_inset Formula $\delta x_{u}^{\left(k\right)}$
\end_inset

 of 
\begin_inset Formula $M^{\left(k\right)}$
\end_inset

 along the negative gradient direction 
\begin_inset Formula $-g^{\left(k\right)}$
\end_inset

, and the overall Newton's method minimizer 
\begin_inset Formula $\delta x_{n}^{\left(k\right)}$
\end_inset

 of 
\begin_inset Formula $M^{\left(k\right)}$
\end_inset

.
 When the Hessian 
\begin_inset Formula $G^{\left(k\right)}$
\end_inset

 is positive, the magnitude of 
\begin_inset Formula $\delta x_{u}^{\left(k\right)}$
\end_inset

 is always less than that of 
\begin_inset Formula $\delta x_{n}^{\left(k\right)}$
\end_inset

, meaning that the Newton's method step is 
\begin_inset Quotes eld
\end_inset

more adventurous
\begin_inset Quotes erd
\end_inset

.
 How much we step towards the Newton's method point depends on the trust
 region size:
\end_layout

\begin_layout Enumerate
If the trust region is smaller than 
\begin_inset Formula $\delta x_{u}^{\left(k\right)}$
\end_inset

, we step in the negative gradient direction but only by the trust radius.
\end_layout

\begin_layout Enumerate
If the trust region boundary is between 
\begin_inset Formula $\delta x_{u}^{\left(k\right)}$
\end_inset

 and 
\begin_inset Formula $\delta x_{n}^{\left(k\right)}$
\end_inset

, we step to the linearly-interpolated point between these two points that
 intersects the trust region boundary.
\end_layout

\begin_layout Enumerate
If the trust region boundary is larger than 
\begin_inset Formula $\delta x_{n}^{\left(k\right)}$
\end_inset

, we step to 
\begin_inset Formula $\delta x_{n}^{\left(k\right)}$
\end_inset

.
\end_layout

\begin_layout Standard
To find the intersection of the line between 
\begin_inset Formula $\delta x_{u}^{\left(k\right)}$
\end_inset

 and 
\begin_inset Formula $\delta x_{n}^{\left(k\right)}$
\end_inset

 with the trust region boundary, we solve a quadratic roots problem,
\begin_inset Formula 
\begin{align*}
\Delta & =\norm{\left(1-\tau\right)\delta x_{u}+\tau\delta x_{n}}\\
\Delta^{2} & =\left(1-\tau\right)^{2}\delta x_{u}^{\t}\delta x_{u}+2\tau\left(1-\tau\right)\delta x_{u}^{\t}\delta x_{n}+\tau^{2}\delta x_{n}^{\t}\delta x_{n}\\
0 & =uu-2\tau uu+\tau^{2}uu+2\tau un-2\tau^{2}un+\tau^{2}nn-\Delta^{2}\\
0 & =\left(uu-2un+nn\right)\tau^{2}+\left(2un-2uu\right)\tau-\Delta^{2}+uu\\
\tau & =\frac{-\left(2un-2uu\right)\pm\sqrt{\left(2un-2uu\right)^{2}-4\left(uu-2un+nn\right)\left(uu-\Delta^{2}\right)}}{2\left(uu-un+nn\right)}
\end{align*}

\end_inset

From this we take whichever possibility for 
\begin_inset Formula $\tau$
\end_inset

 such that 
\begin_inset Formula $0<\tau<1$
\end_inset

.
\end_layout

\begin_layout Standard
To find the steepest-descent minimizer 
\begin_inset Formula $\delta x_{u}^{\left(k\right)}$
\end_inset

, we perform line search in the gradient direction on the approximate function
 
\begin_inset Formula $M$
\end_inset

,
\begin_inset Formula 
\begin{equation}
\delta x_{u}^{\left(k\right)}=\frac{-g^{\left(k\right)\t}g^{\left(k\right)}}{g^{\left(k\right)\t}G^{\left(k\right)}g^{\left(k\right)}}g^{\left(k\right)}\label{eq:steepest-descent-point}
\end{equation}

\end_inset


\end_layout

\begin_layout Standard
Thus, mathematically, we can write the dogleg update 
\begin_inset Formula $\delta x_{d}^{\left(k\right)}$
\end_inset

 as
\begin_inset Formula 
\[
\delta x_{d}^{\left(k\right)}=\begin{cases}
-\frac{\Delta}{\norm{g^{\left(k\right)}}}g^{\left(k\right)}\text{,} & \Delta<\norm{\delta x_{u}^{\left(k\right)}}\\
\left(1-\tau^{\left(k\right)}\right)\delta x_{u}^{\left(k\right)}+\tau^{\left(k\right)}\delta x_{n}^{\left(k\right)}\text{,} & \norm{\delta x_{u}^{\left(k\right)}}<\Delta<\norm{\delta x_{n}^{\left(k\right)}}\\
\delta x_{n}^{\left(k\right)}\text{,} & \norm{\delta x_{n}^{\left(k\right)}}<\Delta
\end{cases}
\]

\end_inset


\end_layout

\begin_layout Section
Working with 
\begin_inset Formula $M$
\end_inset

 as a Bayes' Net
\end_layout

\begin_layout Standard
When we have already eliminated a factor graph into a Bayes' Net, we have
 the same quadratic error function 
\begin_inset Formula $M^{\left(k\right)}\left(\delta x\right)$
\end_inset

, but it is in a different form:
\begin_inset Formula 
\[
M^{\left(k\right)}\left(\delta x\right)=\frac{1}{2}\norm{Rx-d}^{2}\text{,}
\]

\end_inset

where 
\begin_inset Formula $R$
\end_inset

 is an upper-triangular matrix (stored as a set of sparse block Gaussian
 conditionals in GTSAM), and 
\begin_inset Formula $d$
\end_inset

 is the r.h.s.
 vector.
 The gradient and Hessian of 
\begin_inset Formula $M$
\end_inset

 are then
\begin_inset Formula 
\begin{align*}
g^{\left(k\right)} & =R^{\t}\left(Rx-d\right)\\
G^{\left(k\right)} & =R^{\t}R
\end{align*}

\end_inset


\end_layout

\begin_layout Standard
In GTSAM, because the Bayes' Net is not dense, we evaluate Eq.
\begin_inset space ~
\end_inset


\begin_inset CommandInset ref
LatexCommand ref
reference "eq:steepest-descent-point"

\end_inset

 in an efficient way.
 Rewriting the denominator (leaving out the 
\begin_inset Formula $\left(k\right)$
\end_inset

 superscript) as
\begin_inset Formula 
\[
g^{\t}Gg=\sum_{i}\left(R_{i}g\right)^{\t}\left(R_{i}g\right)\text{,}
\]

\end_inset

where 
\begin_inset Formula $i$
\end_inset

 indexes over the conditionals in the Bayes' Net (corresponding to blocks
 of rows of 
\begin_inset Formula $R$
\end_inset

) exploits the sparse structure of the Bayes' Net, because it is easy to
 only include the variables involved in each 
\begin_inset Formula $i^{\text{th}}$
\end_inset

 conditional when multiplying them by the corresponding elements of 
\begin_inset Formula $g$
\end_inset

.
\end_layout

\begin_layout Standard
\begin_inset CommandInset bibtex
LatexCommand bibtex
bibfiles "trustregion"
options "plain"

\end_inset


\end_layout

\end_body
\end_document