mirror of
				https://github.com/AxioDL/metaforce.git
				synced 2025-10-26 08:50:25 +00:00 
			
		
		
		
	
		
			
				
	
	
		
			211 lines
		
	
	
		
			7.2 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
			
		
		
	
	
			211 lines
		
	
	
		
			7.2 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
| /* -*- c++ -*- (enables emacs c++ mode) */
 | |
| /*===========================================================================
 | |
| 
 | |
|  Copyright (C) 2004-2017 Yves Renard
 | |
| 
 | |
|  This file is a part of GetFEM++
 | |
| 
 | |
|  GetFEM++  is  free software;  you  can  redistribute  it  and/or modify it
 | |
|  under  the  terms  of the  GNU  Lesser General Public License as published
 | |
|  by  the  Free Software Foundation;  either version 3 of the License,  or
 | |
|  (at your option) any later version along with the GCC Runtime Library
 | |
|  Exception either version 3.1 or (at your option) any later version.
 | |
|  This program  is  distributed  in  the  hope  that it will be useful,  but
 | |
|  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
 | |
|  or  FITNESS  FOR  A PARTICULAR PURPOSE.  See the GNU Lesser General Public
 | |
|  License and GCC Runtime Library Exception for more details.
 | |
|  You  should  have received a copy of the GNU Lesser General Public License
 | |
|  along  with  this program;  if not, write to the Free Software Foundation,
 | |
|  Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301, USA.
 | |
| 
 | |
|  As a special exception, you  may use  this file  as it is a part of a free
 | |
|  software  library  without  restriction.  Specifically,  if   other  files
 | |
|  instantiate  templates  or  use macros or inline functions from this file,
 | |
|  or  you compile this  file  and  link  it  with other files  to produce an
 | |
|  executable, this file  does  not  by itself cause the resulting executable
 | |
|  to be covered  by the GNU Lesser General Public License.  This   exception
 | |
|  does not  however  invalidate  any  other  reasons why the executable file
 | |
|  might be covered by the GNU Lesser General Public License.
 | |
| 
 | |
| ===========================================================================*/
 | |
| 
 | |
| /**@file gmm_solver_bfgs.h 
 | |
|    @author  Yves Renard <Yves.Renard@insa-lyon.fr>
 | |
|    @date October 14 2004.
 | |
|    @brief Implements BFGS (Broyden, Fletcher, Goldfarb, Shanno) algorithm.
 | |
|  */
 | |
| #ifndef GMM_BFGS_H
 | |
| #define GMM_BFGS_H
 | |
| 
 | |
| #include "gmm_kernel.h"
 | |
| #include "gmm_iter.h"
 | |
| 
 | |
| namespace gmm {
 | |
| 
 | |
|   // BFGS algorithm (Broyden, Fletcher, Goldfarb, Shanno)
 | |
|   // Quasi Newton method for optimization problems.
 | |
|   // with Wolfe Line search.
 | |
| 
 | |
| 
 | |
|   // delta[k] = x[k+1] - x[k]
 | |
|   // gamma[k] = grad f(x[k+1]) - grad f(x[k])
 | |
|   // H[0] = I
 | |
|   // BFGS : zeta[k] = delta[k] - H[k] gamma[k]
 | |
|   // DFP  : zeta[k] = H[k] gamma[k]
 | |
|   // tau[k] = gamma[k]^T zeta[k]
 | |
|   // rho[k] = 1 / gamma[k]^T delta[k]
 | |
|   // BFGS : H[k+1] = H[k] + rho[k](zeta[k] delta[k]^T + delta[k] zeta[k]^T)
 | |
|   //                 - rho[k]^2 tau[k] delta[k] delta[k]^T
 | |
|   // DFP  : H[k+1] = H[k] + rho[k] delta[k] delta[k]^T 
 | |
|   //                 - (1/tau[k])zeta[k] zeta[k]^T 
 | |
| 
 | |
|   // Object representing the inverse of the Hessian
 | |
|   template <typename VECTOR> struct bfgs_invhessian {
 | |
|     
 | |
|     typedef typename linalg_traits<VECTOR>::value_type T;
 | |
|     typedef typename number_traits<T>::magnitude_type R;
 | |
| 
 | |
|     std::vector<VECTOR> delta, gamma, zeta;
 | |
|     std::vector<T> tau, rho;
 | |
|     int version;
 | |
| 
 | |
|     template<typename VEC1, typename VEC2> void hmult(const VEC1 &X, VEC2 &Y) {
 | |
|       copy(X, Y);
 | |
|       for (size_type k = 0 ; k < delta.size(); ++k) {
 | |
| 	T xdelta = vect_sp(X, delta[k]), xzeta = vect_sp(X, zeta[k]);
 | |
| 	switch (version) {
 | |
| 	case 0 : // BFGS
 | |
| 	  add(scaled(zeta[k], rho[k]*xdelta), Y);
 | |
| 	  add(scaled(delta[k], rho[k]*(xzeta-rho[k]*tau[k]*xdelta)), Y);
 | |
| 	  break;
 | |
| 	case 1 : // DFP
 | |
| 	  add(scaled(delta[k], rho[k]*xdelta), Y);
 | |
| 	  add(scaled(zeta[k], -xzeta/tau[k]), Y);
 | |
| 	  break;
 | |
| 	}
 | |
|       }
 | |
|     }
 | |
|     
 | |
|     void restart(void) {
 | |
|       delta.resize(0); gamma.resize(0); zeta.resize(0); 
 | |
|       tau.resize(0); rho.resize(0);
 | |
|     }
 | |
|     
 | |
|     template<typename VECT1, typename VECT2>
 | |
|     void update(const VECT1 &deltak, const VECT2 &gammak) {
 | |
|       T vsp = vect_sp(deltak, gammak);
 | |
|       if (vsp == T(0)) return;
 | |
|       size_type N = vect_size(deltak), k = delta.size();
 | |
|       VECTOR Y(N);
 | |
|       hmult(gammak, Y);
 | |
|       delta.resize(k+1); gamma.resize(k+1); zeta.resize(k+1);
 | |
|       tau.resize(k+1); rho.resize(k+1);
 | |
|       resize(delta[k], N); resize(gamma[k], N); resize(zeta[k], N); 
 | |
|       gmm::copy(deltak, delta[k]);
 | |
|       gmm::copy(gammak, gamma[k]);
 | |
|       rho[k] = R(1) / vsp;
 | |
|       if (version == 0)
 | |
| 	add(delta[k], scaled(Y, -1), zeta[k]);
 | |
|       else
 | |
| 	gmm::copy(Y, zeta[k]);
 | |
|       tau[k] = vect_sp(gammak,  zeta[k]);
 | |
|     }
 | |
|     
 | |
|     bfgs_invhessian(int v = 0) { version = v; }
 | |
|   };
 | |
| 
 | |
| 
 | |
|   template <typename FUNCTION, typename DERIVATIVE, typename VECTOR> 
 | |
|   void bfgs(const FUNCTION &f, const DERIVATIVE &grad, VECTOR &x,
 | |
| 	    int restart, iteration& iter, int version = 0,
 | |
| 	    double lambda_init=0.001, double print_norm=1.0) {
 | |
| 
 | |
|     typedef typename linalg_traits<VECTOR>::value_type T;
 | |
|     typedef typename number_traits<T>::magnitude_type R;
 | |
|     
 | |
|     bfgs_invhessian<VECTOR> invhessian(version);
 | |
|     VECTOR r(vect_size(x)), d(vect_size(x)), y(vect_size(x)), r2(vect_size(x));
 | |
|     grad(x, r);
 | |
|     R lambda = lambda_init, valx = f(x), valy;
 | |
|     int nb_restart(0);
 | |
|     
 | |
|     if (iter.get_noisy() >= 1) cout << "value " << valx / print_norm << " ";
 | |
|     while (! iter.finished_vect(r)) {
 | |
| 
 | |
|       invhessian.hmult(r, d); gmm::scale(d, T(-1));
 | |
|       
 | |
|       // Wolfe Line search
 | |
|       R derivative = gmm::vect_sp(r, d);    
 | |
|       R lambda_min(0), lambda_max(0), m1 = 0.27, m2 = 0.57;
 | |
|       bool unbounded = true, blocked = false, grad_computed = false;
 | |
|       
 | |
|       for(;;) {
 | |
| 	add(x, scaled(d, lambda), y);
 | |
| 	valy = f(y);
 | |
| 	if (iter.get_noisy() >= 2) {
 | |
| 	  cout.precision(15);
 | |
| 	  cout << "Wolfe line search, lambda = " << lambda 
 | |
|  	       << " value = " << valy /print_norm << endl;
 | |
| // 	       << " derivative = " << derivative
 | |
| // 	       << " lambda min = " << lambda_min << " lambda max = "
 | |
| // 	       << lambda_max << endl; getchar();
 | |
| 	}
 | |
| 	if (valy <= valx + m1 * lambda * derivative) {
 | |
| 	  grad(y, r2); grad_computed = true;
 | |
| 	  T derivative2 = gmm::vect_sp(r2, d);
 | |
| 	  if (derivative2 >= m2*derivative) break;
 | |
| 	  lambda_min = lambda;
 | |
| 	}
 | |
| 	else {
 | |
| 	  lambda_max = lambda;
 | |
| 	  unbounded = false;
 | |
| 	}
 | |
| 	if (unbounded) lambda *= R(10);
 | |
| 	else  lambda = (lambda_max + lambda_min) / R(2);
 | |
| 	if (lambda == lambda_max || lambda == lambda_min) break;
 | |
| 	// valy <= R(2)*valx replaced by
 | |
| 	// valy <= valx + gmm::abs(derivative)*lambda_init
 | |
| 	// for compatibility with negative values (08.24.07).
 | |
| 	if (valy <= valx + R(2)*gmm::abs(derivative)*lambda &&
 | |
| 	    (lambda < R(lambda_init*1E-8) ||
 | |
| 	     (!unbounded && lambda_max-lambda_min < R(lambda_init*1E-8))))
 | |
| 	{ blocked = true; lambda = lambda_init; break; }
 | |
|       }
 | |
| 
 | |
|       // Rank two update
 | |
|       ++iter;
 | |
|       if (!grad_computed) grad(y, r2);
 | |
|       gmm::add(scaled(r2, -1), r);
 | |
|       if ((iter.get_iteration() % restart) == 0 || blocked) { 
 | |
| 	if (iter.get_noisy() >= 1) cout << "Restart\n";
 | |
| 	invhessian.restart();
 | |
| 	if (++nb_restart > 10) {
 | |
| 	  if (iter.get_noisy() >= 1) cout << "BFGS is blocked, exiting\n";
 | |
| 	  return;
 | |
| 	}
 | |
|       }
 | |
|       else {
 | |
| 	invhessian.update(gmm::scaled(d,lambda), gmm::scaled(r,-1));
 | |
| 	nb_restart = 0;
 | |
|       }
 | |
|       copy(r2, r); copy(y, x); valx = valy;
 | |
|       if (iter.get_noisy() >= 1)
 | |
| 	cout << "BFGS value " << valx/print_norm << "\t";
 | |
|     }
 | |
| 
 | |
|   }
 | |
| 
 | |
| 
 | |
|   template <typename FUNCTION, typename DERIVATIVE, typename VECTOR> 
 | |
|   inline void dfp(const FUNCTION &f, const DERIVATIVE &grad, VECTOR &x,
 | |
| 	    int restart, iteration& iter, int version = 1) {
 | |
|     bfgs(f, grad, x, restart, iter, version);
 | |
| 
 | |
|   }
 | |
| 
 | |
| 
 | |
| }
 | |
| 
 | |
| #endif 
 | |
| 
 |