Tutorial

Introduction - Image Restoration - Problem Formulation - Regularization - Execution


Introduction

This tutorial describes how to solve an optimization problem with the forward-backward primal-dual algorithm (FBPD), which is able to $$ \operatorname*{minimize}_{x\in\mathbb{R}^N} \; f(x) + g(x) + h(Lx), $$ where Under some technical assumptions, it can be show that the sequence $(x_i)_{i\in\mathbb{N}}$ generated by $$ \left\lfloor \begin{aligned} x_{i+1} &= \operatorname{prox}_{\tau \, f} \Big(x_i - \tau \big(\nabla g(x_i) + L^\top y_i\big)\Big)\\[0.5em] y_{i+1} &= \operatorname{prox}_{\sigma \, h^*} \Big( y_i+\sigma \, L\big(2x_{i+1}-x_i\big)\Big) \end{aligned} \right. $$ converges to a solution to the above problem, provided that $\tau>0$ and $\sigma>0$ are such that $$\tau \left(\frac{\beta}{2} + \sigma \|L^\top L\|\right) < 1.$$

A generic implementation of FBPD is given below (the function inputs will be clarified later).


function [x, it, time, crit] = FBPD(x, f, g, h, opt)

% default inputs
if nargin < 2 || isempty(f), f.fun = @(x) 0; f.prox = @(x,gamma) x;                                                   end
if nargin < 3 || isempty(g), g.fun = @(x) 0; g.grad = @(x)       0; g.beta   = 0;                                     end
if nargin < 4 || isempty(h), h.fun = @(y) 0; h.prox = @(y,gamma) y; h.dir_op = @(x) x; h.adj_op = @(y) y; h.beta = 1; end
if nargin < 5 || isempty(opt), opt.tol = 1e-4; opt.iter = 500;                                                        end

% select the step-sizes
tau = 2 / (g.beta+2);
sigma = (1/tau - g.beta/2) / h.beta;

% initialize the dual solution
y = h.dir_op(x);

% execute the algorithm
time = zeros(1, opt.iter);
crit = zeros(1, opt.iter);
hdl = waitbar(0, 'Running FBPD...');
for it = 1:opt.iter
    
    tic;
    
    % primal forward-backward step
    x_old = x;
    x = x - tau * ( g.grad(x) + h.adj_op(y) );
    x = f.prox(x, tau);
    
    % dual forward-backward step
    y = y + sigma * h.dir_op(2*x - x_old);
    y = y - sigma * h.prox(y/sigma, 1/sigma);   

    % time and criterion
    time(it) = toc;
    crit(it) = f.fun(x) + g.fun(x) + h.fun(h.dir_op(x));
           
    % stopping rule
    if norm( x(:) - x_old(:) ) < opt.tol * norm( x_old(:) ) && it > 10
        break;
    end
    
    waitbar(it/opt.iter, hdl);
end

close(hdl);
crit = crit(1:it);
time = cumsum(time(1:it));

import numpy as np
import time as time


class EmptyFunction:
    def fun(x):      return 0
    def grad(x):     return 0
    def prox(x,tau): return x
    def dir_op(x):   return x
    def adj_op(x):   return x
    

def FBPD(x_init, f=None, g=None, h=None, opt=None):

    # default inputs
    if f   is None: f = EmptyFunction()
    if g   is None: g = EmptyFunction()
    if h   is None: h = EmptyFunction()
    if opt is None: opt = {'tol': 1e-4, 'iter': 500}

    # algorithmic parameters
    tol      = opt['tol']
    max_iter = opt['iter']
    
    # step-sizes
    tau   = 2.0 / (g.beta + 2.0);
    sigma = (1.0/tau - g.beta/2.0) / h.beta;

    # initialization
    x = x_init
    y = h.dir_op(x);

    print('Running FBPD...');
    
    timing = np.zeros(max_iter)
    criter = np.zeros(max_iter)

    # algorithm loop
    for it in range(0, max_iter):
    
        t = time.time()
    
        # primal forward-backward step
        x_old = x;
        x = x - tau * ( g.grad(x) + h.adj_op(y) );
        x = f.prox(x, tau);
    
        # dual forward-backward step
        y = y + sigma * h.dir_op(2*x - x_old);
        y = y - sigma * h.prox(y/sigma, 1/sigma);   

        # time and criterion
        timing[it] = time.time() - t
        criter[it] = f.fun(x) + g.fun(x) + h.fun(h.dir_op(x));
           
        # stopping rule
        if np.linalg.norm(x - x_old) < tol * np.linalg.norm(x_old) and it > 10:
           break
    
        print(str(it)+'out of'+str(max_iter)+'iterations')

    criter = criter[0:it+1];
    timing = np.cumsum(timing[0:it+1]);
    
    return x, it, timing, criter

Image Restoration

The goal of image restoration is to recover the visual content of a corrupted image through the inversion of the corresponding degradation process. In this context, a popular task consists of recovering an image $\overline{x} \in \mathbb{R}^N$ as close as possible to some observations $z \in \mathbb{R}^K$ generated as $$ z = A\overline{x} + b $$ where For the purpose of this tutorial, the degraded image is generated as follows, with $K=N$.


% original image
x_bar = double( imread('firemen.jpg') );

% blur operator
psf = fspecial('average', 3);

% noisy image
rng('default');
z = imfilter(x_bar, psf) + 20 * randn( size(x_bar) );

% visualization
figure; imshow(x_bar/255,[]); title('Original image');
figure; imshow(    z/255,[]); title('Noisy image');

import scipy.misc as misc
import scipy.ndimage.filters as fil
import matplotlib.pyplot as plt

# original image
x_bar = misc.imread('firemen.jpg');
x_bar = x_bar.astype(np.float64)

# blur operator
psf = (3, 3, 1)

# noisy image
z = fil.uniform_filter(x_bar, psf) + 20 * np.random.randn(*x_bar.shape);

# visualization
plt.imshow(x_bar/255)
plt.title('Original image')
plt.figure()
plt.imshow(np.clip(z/255,0,1))
plt.title('Noisy image')


Problem Formulation

To recover $\overline{x}$ from $z$, one can follow a variational approach that aims at $$ \operatorname*{minimize}_{x\in[0,255]^N} \quad \underbrace{\frac{1}{2}\|Ax-z\|_2^2}_{\textrm{Data fidelity}} \quad + \underbrace{h(L x)}_{\textrm{Regularization}} $$ This formulation reverts to the general convex optimization problem given in the introduction by setting As for the data fidelity term, FBPD needs the following information: This is illustrated in the code below.

% constraint
f.prox = @(x,tau) project_box(x, 0, 255);

% data fidelity
A_dir  = @(x) imfilter(x, psf);
A_adj  = @(x) imfilter(x, rot90(psf,2));  % WARNING: 'psf' must be a (2n+1)-by-(2n+1) matrix
g.grad = @(x) A_adj(A_dir(x) - z);
g.beta = sum(abs(psf(:)));

% criteria
f.fun = @(x) indicator_box(x, 0, 255);
g.fun = @(x) sum(sum(sum((A_dir(x)-z).^2)));

from BoxConstraint import *

class LeastSquares:
    
    z     = None
    psf   = None
    beta  = None
    
    def __init__(self, z, psf):
        self.z    = z
        self.psf  = psf
        self.beta = np.prod(psf)
        
    def A_dir(self, x):
        return fil.uniform_filter(x, self.psf)
    
    def A_adj(self, x):
        return fil.uniform_filter(x, self.psf);     # WARNING: filter dimensions must be odd
    
    def grad(self, x):
        return self.A_adj( self.A_dir(x) - self.z )
    
    def fun(self, x):
        p = self.A_dir(x)
        return np.sum(np.square(p-z))

# constraint
f = BoxConstraint(0, 255);

# data fidelity
g = LeastSquares(z, psf)

Regularization

The total variation (TV) regularization is defined as: $$ \operatorname{TV}(x) = \lambda \sum_{\ell=1}^N \left(\big|x^{(\ell)}-x^{(n_{\ell,1})}\big|^2 + \big|x^{(\ell)}-x^{(n_{\ell,2})}\big|^2\right)^{1/2} $$ where $\lambda>0$, and $(n_{\ell,1},n_{\ell,2})\in\{1,\dots,N\}^2$ denote the positions of the horizontal/vertical nearest neighbors of $x^{(\ell)}$.

This penalty can be plugged into the problem formulation by setting:

$$ y = L x = \left[ \begin{aligned} &x^{(1)}-x^{(n_{1,1})}\\ &x^{(1)}-x^{(n_{1,2})}\\ &\quad\;\; \vdots\\ &x^{(N)}-x^{(n_{N,1})}\\ &x^{(N)}-x^{(n_{N,2})}\\ \end{aligned} \right] \begin{aligned} &\left.\vphantom{ \begin{aligned} &x^{(1)}-x^{(n_{1,1})}\\ &x^{(1)}-x^{(n_{1,2})}\\ \end{aligned} }\right\} \; y_1\in\mathbb{R}^2\\ &\qquad\vdots\\ &\left.\vphantom{ \begin{aligned} &x^{(N)}-x^{(n_{N,1})}\\ &x^{(N)}-x^{(n_{N,2})}\\ \end{aligned} }\right\} \; y_N\in\mathbb{R}^2\\ \end{aligned} $$ and $$ h(y) = \sum_{\ell=1}^N \lambda \|y_\ell\|_2. $$ Consequently, FBPD needs the implementation of For the implementation of $\operatorname{prox}_h$, note that h.dir_op generates a 4D matrix in which the gradient vector of each pixel is stored along the 4th dimension.


% forward finite differences (with Neumann boundary conditions)
hor_forw = @(x) [x(:,2:end,:)-x(:,1:end-1,:), zeros(size(x,1),1,size(x,3))]; % horizontal
ver_forw = @(x) [x(2:end,:,:)-x(1:end-1,:,:); zeros(1,size(x,2),size(x,3))]; % vertical

% backward finite differences (with Neumann boundary conditions)
hor_back = @(x) [-x(:,1,:), x(:,1:end-2,:)-x(:,2:end-1,:), x(:,end-1,:)];    % horizontal
ver_back = @(x) [-x(1,:,:); x(1:end-2,:,:)-x(2:end-1,:,:); x(end-1,:,:)];    % vertical

% direct and adjoint operators
h.dir_op = @(x) cat( 4, hor_forw(x), ver_forw(x) );
h.adj_op = @(y) hor_back( y(:,:,:,1) ) + ver_back( y(:,:,:,2) );

% operator norm
h.beta = 8;

% regularization parameter
lambda = 5;

% proximity operator
h.prox = @(y,gamma) prox_L2(y, gamma*lambda, 4);

% criterion
h.fun = @(y) fun_L2(y, lambda, 4);

from L2_Norm import *

def hor_forward(x):
    """ Horizontal forward finite differences (with Neumann boundary conditions) """
    hor = np.zeros_like(x)
    hor[:,:-1,:] = x[:,1:,:] - x[:,:-1,:]
    return hor

def ver_forward(x):
    """ Vertical forward finite differences (with Neumann boundary conditions) """
    ver = np.zeros_like(x)
    ver[:-1,:,:] = x[1:,:,:] - x[:-1,:,:]
    return ver

def hor_backward(x):
    """ Horizontal backward finite differences (with Neumann boundary conditions) """
    Nr, Nc, Nb = x.shape
    zer = np.zeros((Nr,1,Nb))
    xxx = x[:,:-1,:]
    return np.concatenate((zer,xxx), 1) - np.concatenate((xxx,zer), 1)

def ver_backward(x):
    """ Vertical backward finite differences (with Neumann boundary conditions) """
    Nr, Nc, Nb = x.shape
    zer = np.zeros((1,Nc,Nb))
    xxx = x[:-1,:,:]
    return np.concatenate((zer,xxx), 0) - np.concatenate((xxx,zer), 0)


class TotalVariation(L2_Norm):

    beta = 8
    
    def __init__(self, gamma):
        L2_Norm.__init__(self, gamma, 3)
    
    def dir_op(self, x):
        h = hor_forward(x)
        v = ver_forward(x)
        return np.stack((h,v), 3)
    
    def adj_op(self, y):
        h = hor_backward( y[:,:,:,0] )
        v = ver_backward( y[:,:,:,1] )
        return h + v
    
    #def fun(self, x)  --> inherited from L2_Norm
    #def prox(self, x) --> inherited from L2_Norm


# hyperparameter
gamma = 5;

# regularization
h = TotalVariation(gamma)

Execution

The information needed by FBPD is now complete, and the optimization method can be executed.

% minimization
[x, it, time, crit] = FBPD(z, f, g, h);

% PSRN
psnr = 10 * log10( 255^2 / mean((x(:)-x_bar(:)).^2) );

% visualization
figure; imshow(x/255,[]); title(['Restored image - PSNR: ' num2str(round(psnr,2))])
figure; plot(time, crit); title('Convergence plot'); xlabel('seconds'); ylabel('criterion')

# minimization
x, it, time, crit = FBPD(z, f, g, h);

# PSNR
psnr = 10 * np.log10( 255*255 / np.mean(np.square(x-x_bar)) )

# visualization
plt.imshow(x/255); 
plt.title( 'Restored image - PSNR: ' + str(np.round(psnr,2)) )
plt.figure()
plt.plot(time, crit) 
plt.title('Convergence plot')
plt.xlabel('seconds')
plt.ylabel('criterion')