From 5149128b60a750cc4acf97399d2201834ac3a584 Mon Sep 17 00:00:00 2001
From: Jack Andersen <jackoalan@gmail.com>
Date: Mon, 16 Oct 2017 19:51:53 -1000
Subject: [PATCH] DCLN cooking and various bug fixes

---
 DataSpec/DNACommon/CMakeLists.txt             |    1 +
 DataSpec/DNACommon/DeafBabe.cpp               |   46 +-
 DataSpec/DNACommon/OBBTreeBuilder.cpp         |  256 ++
 DataSpec/DNACommon/OBBTreeBuilder.hpp         |   18 +
 DataSpec/DNAMP1/DCLN.hpp                      |   80 +-
 DataSpec/DNAMP1/ScriptObjects/CameraHint.hpp  |    2 +-
 DataSpec/SpecBase.cpp                         |    6 +
 DataSpec/SpecBase.hpp                         |    3 +
 DataSpec/SpecMP1.cpp                          |   11 +
 DataSpec/SpecMP2.cpp                          |    6 +
 DataSpec/SpecMP3.cpp                          |    6 +
 Editor/ViewManager.cpp                        |    1 +
 GMM-LICENSE                                   |   13 +
 Runtime/Camera/CBallCamera.cpp                |    6 +-
 Runtime/Graphics/CBooRenderer.hpp             |    2 +
 Runtime/Graphics/CModel.hpp                   |   18 +-
 Runtime/Graphics/CModelBoo.cpp                |   88 +-
 .../Graphics/Shaders/CModelShadersGLSL.cpp    |    2 +-
 .../Graphics/Shaders/CModelShadersHLSL.cpp    |    2 +-
 .../Graphics/Shaders/CModelShadersMetal.cpp   |    4 +-
 .../Shaders/CParticleSwooshShadersGLSL.cpp    |    2 +-
 .../Shaders/CParticleSwooshShadersHLSL.cpp    |    2 +-
 .../Shaders/CParticleSwooshShadersMetal.cpp   |    2 +-
 Runtime/IMain.hpp                             |    3 +-
 Runtime/MP1/MP1.cpp                           |   65 +-
 Runtime/MP1/MP1.hpp                           |    5 +
 Runtime/World/CGameArea.cpp                   |   38 +
 Runtime/World/CGameArea.hpp                   |    3 +
 gmm/gmm.h                                     |   54 +
 gmm/gmm_MUMPS_interface.h                     |  355 +++
 gmm/gmm_algobase.h                            |  228 ++
 gmm/gmm_blas.h                                | 2221 +++++++++++++++++
 gmm/gmm_blas_interface.h                      |  948 +++++++
 gmm/gmm_condition_number.h                    |  147 ++
 gmm/gmm_conjugated.h                          |  398 +++
 gmm/gmm_def.h                                 | 1123 +++++++++
 gmm/gmm_dense_Householder.h                   |  317 +++
 gmm/gmm_dense_lu.h                            |  250 ++
 gmm/gmm_dense_matrix_functions.h              |  302 +++
 gmm/gmm_dense_qr.h                            |  789 ++++++
 gmm/gmm_dense_sylvester.h                     |  174 ++
 gmm/gmm_domain_decomp.h                       |  165 ++
 gmm/gmm_except.h                              |  328 +++
 gmm/gmm_inoutput.h                            | 1176 +++++++++
 gmm/gmm_interface.h                           | 1068 ++++++++
 gmm/gmm_interface_bgeot.h                     |   83 +
 gmm/gmm_iter.h                                |  162 ++
 gmm/gmm_iter_solvers.h                        |  111 +
 gmm/gmm_kernel.h                              |   55 +
 gmm/gmm_lapack_interface.h                    |  470 ++++
 gmm/gmm_least_squares_cg.h                    |   96 +
 gmm/gmm_matrix.h                              | 1199 +++++++++
 gmm/gmm_modified_gram_schmidt.h               |  127 +
 gmm/gmm_opt.h                                 |  128 +
 gmm/gmm_precond.h                             |   65 +
 gmm/gmm_precond_diagonal.h                    |  132 +
 gmm/gmm_precond_ildlt.h                       |  241 ++
 gmm/gmm_precond_ildltt.h                      |  174 ++
 gmm/gmm_precond_ilu.h                         |  280 +++
 gmm/gmm_precond_ilut.h                        |  263 ++
 gmm/gmm_precond_ilutp.h                       |  284 +++
 gmm/gmm_precond_mr_approx_inverse.h           |  149 ++
 gmm/gmm_range_basis.h                         |  499 ++++
 gmm/gmm_real_part.h                           |  605 +++++
 gmm/gmm_ref.h                                 |  526 ++++
 gmm/gmm_scaled.h                              |  434 ++++
 gmm/gmm_solver_Schwarz_additive.h             |  805 ++++++
 gmm/gmm_solver_bfgs.h                         |  210 ++
 gmm/gmm_solver_bicgstab.h                     |  160 ++
 gmm/gmm_solver_cg.h                           |  180 ++
 gmm/gmm_solver_constrained_cg.h               |  165 ++
 gmm/gmm_solver_gmres.h                        |  173 ++
 gmm/gmm_solver_idgmres.h                      |  805 ++++++
 gmm/gmm_solver_qmr.h                          |  210 ++
 gmm/gmm_std.h                                 |  424 ++++
 gmm/gmm_sub_index.h                           |  224 ++
 gmm/gmm_sub_matrix.h                          |  406 +++
 gmm/gmm_sub_vector.h                          |  560 +++++
 gmm/gmm_superlu_interface.h                   |  410 +++
 gmm/gmm_transposed.h                          |  244 ++
 gmm/gmm_tri_solve.h                           |  222 ++
 gmm/gmm_vector.h                              | 1571 ++++++++++++
 gmm/gmm_vector_to_matrix.h                    |  340 +++
 hecl                                          |    2 +-
 84 files changed, 23876 insertions(+), 52 deletions(-)
 create mode 100644 DataSpec/DNACommon/OBBTreeBuilder.cpp
 create mode 100644 DataSpec/DNACommon/OBBTreeBuilder.hpp
 create mode 100644 GMM-LICENSE
 create mode 100644 gmm/gmm.h
 create mode 100644 gmm/gmm_MUMPS_interface.h
 create mode 100644 gmm/gmm_algobase.h
 create mode 100644 gmm/gmm_blas.h
 create mode 100644 gmm/gmm_blas_interface.h
 create mode 100644 gmm/gmm_condition_number.h
 create mode 100644 gmm/gmm_conjugated.h
 create mode 100644 gmm/gmm_def.h
 create mode 100644 gmm/gmm_dense_Householder.h
 create mode 100644 gmm/gmm_dense_lu.h
 create mode 100644 gmm/gmm_dense_matrix_functions.h
 create mode 100644 gmm/gmm_dense_qr.h
 create mode 100644 gmm/gmm_dense_sylvester.h
 create mode 100644 gmm/gmm_domain_decomp.h
 create mode 100644 gmm/gmm_except.h
 create mode 100644 gmm/gmm_inoutput.h
 create mode 100644 gmm/gmm_interface.h
 create mode 100644 gmm/gmm_interface_bgeot.h
 create mode 100644 gmm/gmm_iter.h
 create mode 100644 gmm/gmm_iter_solvers.h
 create mode 100644 gmm/gmm_kernel.h
 create mode 100644 gmm/gmm_lapack_interface.h
 create mode 100644 gmm/gmm_least_squares_cg.h
 create mode 100644 gmm/gmm_matrix.h
 create mode 100644 gmm/gmm_modified_gram_schmidt.h
 create mode 100644 gmm/gmm_opt.h
 create mode 100644 gmm/gmm_precond.h
 create mode 100644 gmm/gmm_precond_diagonal.h
 create mode 100644 gmm/gmm_precond_ildlt.h
 create mode 100644 gmm/gmm_precond_ildltt.h
 create mode 100644 gmm/gmm_precond_ilu.h
 create mode 100644 gmm/gmm_precond_ilut.h
 create mode 100644 gmm/gmm_precond_ilutp.h
 create mode 100644 gmm/gmm_precond_mr_approx_inverse.h
 create mode 100644 gmm/gmm_range_basis.h
 create mode 100644 gmm/gmm_real_part.h
 create mode 100644 gmm/gmm_ref.h
 create mode 100644 gmm/gmm_scaled.h
 create mode 100644 gmm/gmm_solver_Schwarz_additive.h
 create mode 100644 gmm/gmm_solver_bfgs.h
 create mode 100644 gmm/gmm_solver_bicgstab.h
 create mode 100644 gmm/gmm_solver_cg.h
 create mode 100644 gmm/gmm_solver_constrained_cg.h
 create mode 100644 gmm/gmm_solver_gmres.h
 create mode 100644 gmm/gmm_solver_idgmres.h
 create mode 100644 gmm/gmm_solver_qmr.h
 create mode 100644 gmm/gmm_std.h
 create mode 100644 gmm/gmm_sub_index.h
 create mode 100644 gmm/gmm_sub_matrix.h
 create mode 100644 gmm/gmm_sub_vector.h
 create mode 100644 gmm/gmm_superlu_interface.h
 create mode 100644 gmm/gmm_transposed.h
 create mode 100644 gmm/gmm_tri_solve.h
 create mode 100644 gmm/gmm_vector.h
 create mode 100644 gmm/gmm_vector_to_matrix.h

diff --git a/DataSpec/DNACommon/CMakeLists.txt b/DataSpec/DNACommon/CMakeLists.txt
index 576397aa5..cb12614a9 100644
--- a/DataSpec/DNACommon/CMakeLists.txt
+++ b/DataSpec/DNACommon/CMakeLists.txt
@@ -34,6 +34,7 @@ set(DNACOMMON_SOURCES
     BabeDead.hpp BabeDead.cpp
     RigInverter.hpp RigInverter.cpp
     AROTBuilder.hpp AROTBuilder.cpp
+    OBBTreeBuilder.hpp OBBTreeBuilder.cpp
     Tweaks/ITweak.hpp
     Tweaks/TweakWriter.hpp
     Tweaks/ITweakGame.hpp
diff --git a/DataSpec/DNACommon/DeafBabe.cpp b/DataSpec/DNACommon/DeafBabe.cpp
index eaf384e5e..1f7ce6a94 100644
--- a/DataSpec/DNACommon/DeafBabe.cpp
+++ b/DataSpec/DNACommon/DeafBabe.cpp
@@ -83,16 +83,40 @@ template void DeafBabeSendToBlender<DNAMP1::DeafBabe>(hecl::BlenderConnection::P
 template void DeafBabeSendToBlender<DNAMP2::DeafBabe>(hecl::BlenderConnection::PyOutStream& os, const DNAMP2::DeafBabe& db, bool isDcln, atInt32 idx);
 template void DeafBabeSendToBlender<DNAMP1::DCLN::Collision>(hecl::BlenderConnection::PyOutStream& os, const DNAMP1::DCLN::Collision& db, bool isDcln, atInt32 idx);
 
+template<class DEAFBABE>
+static void PopulateAreaFields(DEAFBABE& db,
+    const hecl::BlenderConnection::DataStream::ColMesh& colMesh,
+    const zeus::CAABox& fullAABB,
+    std::enable_if_t<std::is_same<DEAFBABE, DNAMP1::DeafBabe>::value ||
+                     std::is_same<DEAFBABE, DNAMP2::DeafBabe>::value, int>* = 0)
+{
+    AROTBuilder builder;
+    auto octree = builder.buildCol(colMesh, db.rootNodeType);
+    static_cast<std::unique_ptr<atUint8[]>&>(db.bspTree) = std::move(octree.first);
+    db.bspSize = octree.second;
+
+    db.unk1 = 0x1000000;
+    db.length = db.binarySize(0) - 8;
+    db.magic = 0xDEAFBABE;
+    db.version = 3;
+    db.aabb[0] = fullAABB.min;
+    db.aabb[1] = fullAABB.max;
+}
+
+template<class DEAFBABE>
+static void PopulateAreaFields(DEAFBABE& db,
+    const hecl::BlenderConnection::DataStream::ColMesh& colMesh,
+    const zeus::CAABox& fullAABB,
+    std::enable_if_t<std::is_same<DEAFBABE, DNAMP1::DCLN::Collision>::value, int>* = 0)
+{
+    db.magic = 0xDEAFBABE;
+    db.version = 2;
+    db.memSize = 0;
+}
+
 template<class DEAFBABE>
 void DeafBabeBuildFromBlender(DEAFBABE& db, const hecl::BlenderConnection::DataStream::ColMesh& colMesh)
 {
-    {
-        AROTBuilder builder;
-        auto octree = builder.buildCol(colMesh, db.rootNodeType);
-        static_cast<std::unique_ptr<atUint8[]>&>(db.bspTree) = std::move(octree.first);
-        db.bspSize = octree.second;
-    }
-
     db.materials.reserve(colMesh.materials.size());
     for (const hecl::BlenderConnection::DataStream::ColMesh::Material& mat : colMesh.materials)
     {
@@ -186,15 +210,11 @@ void DeafBabeBuildFromBlender(DEAFBABE& db, const hecl::BlenderConnection::DataS
     db.triMatsCount = colMesh.trianges.size();
     db.triangleEdgesCount = colMesh.trianges.size() * 3;
 
-    db.unk1 = 0x1000000;
-    db.length = db.binarySize(0) - 8;
-    db.magic = 0xDEAFBABE;
-    db.version = 3;
-    db.aabb[0] = fullAABB.min;
-    db.aabb[1] = fullAABB.max;
+    PopulateAreaFields(db, colMesh, fullAABB);
 }
 
 template void DeafBabeBuildFromBlender<DNAMP1::DeafBabe>(DNAMP1::DeafBabe& db, const hecl::BlenderConnection::DataStream::ColMesh& colMesh);
 template void DeafBabeBuildFromBlender<DNAMP2::DeafBabe>(DNAMP2::DeafBabe& db, const hecl::BlenderConnection::DataStream::ColMesh& colMesh);
+template void DeafBabeBuildFromBlender<DNAMP1::DCLN::Collision>(DNAMP1::DCLN::Collision& db, const hecl::BlenderConnection::DataStream::ColMesh& colMesh);
 
 }
diff --git a/DataSpec/DNACommon/OBBTreeBuilder.cpp b/DataSpec/DNACommon/OBBTreeBuilder.cpp
new file mode 100644
index 000000000..dce9fb3e0
--- /dev/null
+++ b/DataSpec/DNACommon/OBBTreeBuilder.cpp
@@ -0,0 +1,256 @@
+#include <athena/Types.hpp>
+#include "OBBTreeBuilder.hpp"
+#include "zeus/CTransform.hpp"
+#include "DataSpec/DNAMP1/DCLN.hpp"
+#include "gmm/gmm.h"
+
+namespace DataSpec
+{
+
+using ColMesh = hecl::BlenderConnection::DataStream::ColMesh;
+
+struct FittedOBB
+{
+    zeus::CTransform xf;
+    zeus::CVector3f he;
+};
+
+static std::vector<int> MakeRootTriangleIndex(const ColMesh& mesh)
+{
+    std::vector<int> ret;
+    ret.reserve(mesh.trianges.size());
+    for (int i = 0; i < mesh.trianges.size(); ++i)
+        ret.push_back(i);
+    return ret;
+}
+
+static std::unordered_set<uint32_t> GetTriangleVerts(const ColMesh& mesh, int triIdx)
+{
+    const ColMesh::Triangle& T = mesh.trianges[triIdx];
+    std::unordered_set<uint32_t> verts;
+    verts.insert(mesh.edges[T.edges[0]].verts[0]);
+    verts.insert(mesh.edges[T.edges[0]].verts[1]);
+    verts.insert(mesh.edges[T.edges[1]].verts[0]);
+    verts.insert(mesh.edges[T.edges[1]].verts[1]);
+    verts.insert(mesh.edges[T.edges[2]].verts[0]);
+    verts.insert(mesh.edges[T.edges[2]].verts[1]);
+    return verts;
+}
+
+// method to set the OBB parameters which produce a box oriented according to
+// the covariance matrix C, which just containts the points pnts
+static FittedOBB BuildFromCovarianceMatrix(gmm::dense_matrix<float>& C,
+                                           const ColMesh& mesh, const std::vector<int>& index)
+{
+    FittedOBB ret;
+
+    // extract the eigenvalues and eigenvectors from C
+    gmm::dense_matrix<float> eigvec(3,3);
+    std::vector<float> eigval(3);
+    gmm::symmetric_qr_algorithm(C, eigval, eigvec);
+
+    // find the right, up and forward vectors from the eigenvectors
+    zeus::CVector3f r(eigvec(0,0), eigvec(1,0), eigvec(2,0));
+    zeus::CVector3f u(eigvec(0,1), eigvec(1,1), eigvec(2,1));
+    zeus::CVector3f f(eigvec(0,2), eigvec(1,2), eigvec(2,2));
+    r.normalize(); u.normalize(), f.normalize();
+
+    // set the rotation matrix using the eigvenvectors
+    ret.xf.basis[0][0]=r.x; ret.xf.basis[1][0]=u.x; ret.xf.basis[2][0]=f.x;
+    ret.xf.basis[0][1]=r.y; ret.xf.basis[1][1]=u.y; ret.xf.basis[2][1]=f.y;
+    ret.xf.basis[0][2]=r.z; ret.xf.basis[1][2]=u.z; ret.xf.basis[2][2]=f.z;
+
+    // now build the bounding box extents in the rotated frame
+    zeus::CVector3f minim(1e10f, 1e10f, 1e10f), maxim(-1e10f, -1e10f, -1e10f);
+    for (int triIdx : index)
+    {
+        std::unordered_set<uint32_t> verts = GetTriangleVerts(mesh, triIdx);
+        for (uint32_t v : verts)
+        {
+            const zeus::CVector3f& p = mesh.verts[v].val;
+            zeus::CVector3f p_prime(r.dot(p), u.dot(p), f.dot(p));
+            minim = zeus::min(minim, p_prime);
+            maxim = zeus::max(maxim, p_prime);
+        }
+    }
+
+    // set the center of the OBB to be the average of the
+    // minimum and maximum, and the extents be half of the
+    // difference between the minimum and maximum
+    zeus::CVector3f center = (maxim + minim) * 0.5f;
+    ret.xf.origin = ret.xf.basis * center;
+    ret.he = (maxim - minim) * 0.5f;
+
+    return ret;
+}
+
+// builds an OBB from triangles specified as an array of
+// points with integer indices into the point array. Forms
+// the covariance matrix for the triangles, then uses the
+// method build_from_covariance_matrix() method to fit
+// the box.  ALL points will be fit in the box, regardless
+// of whether they are indexed by a triangle or not.
+static FittedOBB FitOBB(const ColMesh& mesh, const std::vector<int>& index)
+{
+    float Ai, Am=0.0;
+    zeus::CVector3f mu, mui;
+    gmm::dense_matrix<float> C(3,3);
+    float cxx=0.0, cxy=0.0, cxz=0.0, cyy=0.0, cyz=0.0, czz=0.0;
+
+    // loop over the triangles this time to find the
+    // mean location
+    for (int i : index)
+    {
+        const ColMesh::Triangle& T = mesh.trianges[i];
+        std::unordered_set<uint32_t> verts = GetTriangleVerts(mesh, i);
+        auto it = verts.begin();
+        zeus::CVector3f p = mesh.verts[*it++].val;
+        zeus::CVector3f q = mesh.verts[*it++].val;
+        zeus::CVector3f r = mesh.verts[*it++].val;
+        mui = (p+q+r)/3.f;
+        Ai = (q-p).cross(r-p).magnitude() / 2.f;
+        mu += mui*Ai;
+        Am += Ai;
+
+        // these bits set the c terms to Am*E[xx], Am*E[xy], Am*E[xz]....
+        cxx += ( 9.0*mui.x*mui.x + p.x*p.x + q.x*q.x + r.x*r.x )*(Ai/12.0);
+        cxy += ( 9.0*mui.x*mui.y + p.x*p.y + q.x*q.y + r.x*r.y )*(Ai/12.0);
+        cxz += ( 9.0*mui.x*mui.z + p.x*p.z + q.x*q.z + r.x*r.z )*(Ai/12.0);
+        cyy += ( 9.0*mui.y*mui.y + p.y*p.y + q.y*q.y + r.y*r.y )*(Ai/12.0);
+        cyz += ( 9.0*mui.y*mui.z + p.y*p.z + q.y*q.z + r.y*r.z )*(Ai/12.0);
+    }
+    // divide out the Am fraction from the average position and
+    // covariance terms
+    mu = mu / Am;
+    cxx /= Am; cxy /= Am; cxz /= Am; cyy /= Am; cyz /= Am; czz /= Am;
+
+    // now subtract off the E[x]*E[x], E[x]*E[y], ... terms
+    cxx -= mu.x*mu.x; cxy -= mu.x*mu.y; cxz -= mu.x*mu.z;
+    cyy -= mu.y*mu.y; cyz -= mu.y*mu.z; czz -= mu.z*mu.z;
+
+    // now build the covariance matrix
+    C(0,0)=cxx; C(0,1)=cxy; C(0,2)=cxz;
+    C(1,0)=cxy; C(1,1)=cyy; C(1,2)=cyz;
+    C(2,0)=cxz; C(1,2)=cyz; C(2,2)=czz;
+
+    // set the obb parameters from the covariance matrix
+    return BuildFromCovarianceMatrix(C, mesh, index);
+}
+
+template <typename Node>
+static void MakeLeaf(const ColMesh& mesh, const std::vector<int>& index, Node& n)
+{
+    n.left.reset();
+    n.right.reset();
+    n.isLeaf = true;
+    n.leafData = std::make_unique<typename Node::LeafData>();
+    n.leafData->edgeIndexCount = atUint32(index.size() * 3);
+    n.leafData->edgeIndices.reserve(n.leafData->edgeIndexCount);
+    for (int i : index)
+    {
+        const ColMesh::Triangle& T = mesh.trianges[i];
+        for (int j = 0; j < 3; ++j)
+            n.leafData->edgeIndices.push_back(T.edges[j]);
+    }
+}
+
+template <typename Node>
+static std::unique_ptr<Node> RecursiveMakeNode(const ColMesh& mesh, const std::vector<int>& index)
+{
+    // calculate root OBB
+    FittedOBB obb = FitOBB(mesh, index);
+
+    // make results row-major and also invert the rotation basis
+    obb.xf.basis.transpose();
+
+    std::unique_ptr<Node> n = std::make_unique<Node>();
+    for (int i = 0; i < 3; ++i)
+    {
+        n->xf[i] = zeus::CVector4f{obb.xf.basis[i]};
+        n->xf[i].vec[3] = obb.xf.origin[i];
+    }
+    n->halfExtent = obb.he;
+
+    // terminate branch when volume < 1.0
+    if (obb.he[0] * obb.he[1] * obb.he[2] < 1.f)
+    {
+        MakeLeaf(mesh, index, *n);
+        return n;
+    }
+
+    n->isLeaf = false;
+
+    std::vector<int> indexNeg[3];
+    std::vector<int> indexPos[3];
+    for (int c = 0; c < 3; ++c)
+    {
+        // subdivide negative side
+        indexNeg[c].reserve(index.size());
+        for (int i : index)
+        {
+            std::unordered_set<uint32_t> verts = GetTriangleVerts(mesh, i);
+            for (uint32_t vtx : verts)
+            {
+                zeus::CVector3f v = mesh.verts[vtx].val;
+                v = obb.xf.basis * (v - obb.xf.origin);
+                if (v[c] < 0.f)
+                {
+                    indexNeg[c].push_back(i);
+                    break;
+                }
+            }
+        }
+
+        // subdivide positive side
+        indexPos[c].reserve(index.size());
+        for (int i : index)
+        {
+            std::unordered_set<uint32_t> verts = GetTriangleVerts(mesh, i);
+            for (uint32_t vtx : verts)
+            {
+                zeus::CVector3f v = mesh.verts[vtx].val;
+                v = obb.xf.basis * (v - obb.xf.origin);
+                if (v[c] >= 0.f)
+                {
+                    indexPos[c].push_back(i);
+                    break;
+                }
+            }
+        }
+    }
+
+    size_t idxMin = index.size();
+    int minComp = -1;
+    for (int c = 0; c < 3; ++c)
+    {
+        size_t test = std::max(indexNeg[c].size(), indexPos[c].size());
+        if (test < idxMin && test < index.size() * 3 / 4)
+        {
+            minComp = c;
+            idxMin = test;
+        }
+    }
+
+    if (minComp == -1)
+    {
+        MakeLeaf(mesh, index, *n);
+        return n;
+    }
+
+    n->left = RecursiveMakeNode<Node>(mesh, indexNeg[minComp]);
+    n->right = RecursiveMakeNode<Node>(mesh, indexPos[minComp]);
+
+    return n;
+}
+
+template <typename Node>
+std::unique_ptr<Node> OBBTreeBuilder::buildCol(const ColMesh& mesh)
+{
+    std::vector<int> root = MakeRootTriangleIndex(mesh);
+    return RecursiveMakeNode<Node>(mesh, root);
+}
+
+template std::unique_ptr<DNAMP1::DCLN::Collision::Node>
+OBBTreeBuilder::buildCol<DNAMP1::DCLN::Collision::Node>(const ColMesh& mesh);
+
+}
diff --git a/DataSpec/DNACommon/OBBTreeBuilder.hpp b/DataSpec/DNACommon/OBBTreeBuilder.hpp
new file mode 100644
index 000000000..612ac894d
--- /dev/null
+++ b/DataSpec/DNACommon/OBBTreeBuilder.hpp
@@ -0,0 +1,18 @@
+#ifndef DNACOMMON_OBBTREEBUILDER_HPP
+#define DNACOMMON_OBBTREEBUILDER_HPP
+
+#include "DNACommon.hpp"
+
+namespace DataSpec
+{
+
+struct OBBTreeBuilder
+{
+    using ColMesh = hecl::BlenderConnection::DataStream::ColMesh;
+    template <typename Node>
+    static std::unique_ptr<Node> buildCol(const ColMesh& mesh);
+};
+
+}
+
+#endif // DNACOMMON_OBBTREEBUILDER_HPP
diff --git a/DataSpec/DNAMP1/DCLN.hpp b/DataSpec/DNAMP1/DCLN.hpp
index 9a227b539..b73cb29cc 100644
--- a/DataSpec/DNAMP1/DCLN.hpp
+++ b/DataSpec/DNAMP1/DCLN.hpp
@@ -1,8 +1,10 @@
 #ifndef __DNAMP1_DCLN_HPP__
 #define __DNAMP1_DCLN_HPP__
 
+#include <athena/Types.hpp>
 #include "../DNACommon/DeafBabe.hpp"
 #include "../DNACommon/PAK.hpp"
+#include "../DNACommon/OBBTreeBuilder.hpp"
 #include "DNAMP1.hpp"
 #include "DeafBabe.hpp"
 
@@ -13,6 +15,8 @@ namespace DNAMP1
 
 struct DCLN : BigDNA
 {
+    using Mesh = hecl::BlenderConnection::DataStream::ColMesh;
+
     DECL_DNA
     Value<atUint32> colCount;
     struct Collision : BigDNA
@@ -40,19 +44,20 @@ struct DCLN : BigDNA
         Value<atUint32> vertCount;
         Vector<atVec3f, DNA_COUNT(vertCount)> verts;
 
-        struct LeafData : BigDNA
-        {
-            DECL_DNA
-            Value<atUint32> edgeIndexCount;
-            Vector<atUint16, DNA_COUNT(edgeIndexCount)> edgeIndices;
-            size_t getMemoryUsage() const { return (((edgeIndices.size() * 2) + 16) + 3) & ~3; }
-        };
-
         struct Node : BigDNA
         {
             Delete _d;
+
+            struct LeafData : BigDNA
+            {
+                DECL_DNA
+                Value<atUint32> edgeIndexCount;
+                Vector<atUint16, DNA_COUNT(edgeIndexCount)> edgeIndices;
+                size_t getMemoryUsage() const { return (((edgeIndices.size() * 2) + 16) + 3) & ~3; }
+            };
+
             Value<atVec4f> xf[3];
-            Value<atVec3f> origin;
+            Value<atVec3f> halfExtent;
             Value<bool> isLeaf;
             std::unique_ptr<LeafData> leafData;
             std::unique_ptr<Node> left;
@@ -63,7 +68,7 @@ struct DCLN : BigDNA
                 xf[0] = __dna_reader.readVec4fBig();
                 xf[1] = __dna_reader.readVec4fBig();
                 xf[2] = __dna_reader.readVec4fBig();
-                origin = __dna_reader.readVec3fBig();
+                halfExtent = __dna_reader.readVec3fBig();
                 isLeaf = __dna_reader.readBool();
                 if (isLeaf)
                 {
@@ -84,7 +89,7 @@ struct DCLN : BigDNA
                 __dna_writer.writeVec4fBig(xf[0]);
                 __dna_writer.writeVec4fBig(xf[1]);
                 __dna_writer.writeVec4fBig(xf[2]);
-                __dna_writer.writeVec3fBig(origin);
+                __dna_writer.writeVec3fBig(halfExtent);
                 __dna_writer.writeBool(isLeaf);
                 if (isLeaf && leafData)
                     leafData->write(__dna_writer);
@@ -121,6 +126,30 @@ struct DCLN : BigDNA
 
                 return (ret + 3) & ~3;
             }
+
+            void sendToBlender(hecl::BlenderConnection::PyOutStream& os) const
+            {
+                os.format("obj = bpy.data.objects.new('%s', None)\n"
+                          "obj.empty_draw_type = 'CUBE'\n"
+                          "bpy.context.scene.objects.link(obj)\n"
+                          "mtx = Matrix(((%f,%f,%f,%f),(%f,%f,%f,%f),(%f,%f,%f,%f),(0.0,0.0,0.0,1.0)))\n"
+                          "mtxd = mtx.decompose()\n"
+                          "obj.rotation_mode = 'QUATERNION'\n"
+                          "obj.location = mtxd[0]\n"
+                          "obj.rotation_quaternion = mtxd[1]\n"
+                          "obj.scale = (%f,%f,%f)\n", isLeaf ? "leaf" : "branch",
+                          xf[0].vec[0], xf[0].vec[1], xf[0].vec[2], xf[0].vec[3],
+                          xf[1].vec[0], xf[1].vec[1], xf[1].vec[2], xf[1].vec[3],
+                          xf[2].vec[0], xf[2].vec[1], xf[2].vec[2], xf[2].vec[3],
+                          halfExtent.vec[0], halfExtent.vec[1], halfExtent.vec[2]);
+                if (isLeaf)
+                    os << "obj.show_name = True\n";
+                if (!isLeaf)
+                {
+                    left->sendToBlender(os);
+                    right->sendToBlender(os);
+                }
+            }
         };
         Node root;
         size_t getMemoryUsage()
@@ -141,7 +170,8 @@ struct DCLN : BigDNA
         hecl::BlenderConnection::PyOutStream os = conn.beginPythonOut(true);
         os.format("import bpy\n"
                   "import bmesh\n"
-                  "from mathutils import Vector\n"
+                  "from mathutils import Vector, Matrix\n"
+
                   "\n"
                   "bpy.context.scene.name = '%s'\n"
                   "# Clear Scene\n"
@@ -154,7 +184,10 @@ struct DCLN : BigDNA
         DeafBabe::BlenderInit(os);
         atInt32 idx = 0;
         for (const Collision& col : collision)
+        {
             DeafBabeSendToBlender(os, col, true, idx++);
+            col.root.sendToBlender(os);
+        }
         os.centerView();
         os.close();
     }
@@ -171,12 +204,33 @@ struct DCLN : BigDNA
         DCLN dcln;
         dcln.read(rs);
         hecl::BlenderConnection& conn = btok.getBlenderConnection();
-        if (!conn.createBlend(outPath, hecl::BlenderConnection::BlendType::Mesh))
+        if (!conn.createBlend(outPath, hecl::BlenderConnection::BlendType::ColMesh))
             return false;
 
         dcln.sendToBlender(conn, pakRouter.getBestEntryName(entry, false));
         return conn.saveBlend();
     }
+
+    static bool Cook(const hecl::ProjectPath& outPath,
+                     const hecl::ProjectPath& inPath,
+                     const std::vector<Mesh>& meshes,
+                     hecl::BlenderConnection* conn = nullptr)
+    {
+        DCLN dcln;
+        dcln.colCount = atUint32(meshes.size());
+        for (const Mesh& mesh : meshes)
+        {
+            dcln.collision.emplace_back();
+            Collision& colOut = dcln.collision.back();
+            DeafBabeBuildFromBlender(colOut, mesh);
+            colOut.root = std::move(*OBBTreeBuilder::buildCol<Collision::Node>(mesh));
+            colOut.memSize = atUint32(colOut.root.getMemoryUsage());
+        }
+
+        athena::io::FileWriter w(outPath.getAbsolutePath());
+        dcln.write(w);
+        return true;
+    }
 };
 
 }
diff --git a/DataSpec/DNAMP1/ScriptObjects/CameraHint.hpp b/DataSpec/DNAMP1/ScriptObjects/CameraHint.hpp
index 371ee9165..c0c42441e 100644
--- a/DataSpec/DNAMP1/ScriptObjects/CameraHint.hpp
+++ b/DataSpec/DNAMP1/ScriptObjects/CameraHint.hpp
@@ -22,7 +22,7 @@ struct CameraHint : IScriptObject
     {
         DECL_YAML
         Value<atUint32> propertyCount;
-        Value<bool> unknown1; // 0x1
+        Value<bool> calculateCamPos; // 0x1
         Value<bool> chaseAllowed; // 0x2
         Value<bool> boostAllowed; // 0x4
         Value<bool> obscureAvoidance; // 0x8
diff --git a/DataSpec/SpecBase.cpp b/DataSpec/SpecBase.cpp
index cfcf36cd8..c52d9e873 100644
--- a/DataSpec/SpecBase.cpp
+++ b/DataSpec/SpecBase.cpp
@@ -249,6 +249,12 @@ void SpecBase::doCook(const hecl::ProjectPath& path, const hecl::ProjectPath& co
             cookMesh(cookedPath, path, ds, fast, btok, progress);
             break;
         }
+        case hecl::BlenderConnection::BlendType::ColMesh:
+        {
+            hecl::BlenderConnection::DataStream ds = conn.beginData();
+            cookColMesh(cookedPath, path, ds, fast, btok, progress);
+            break;
+        }
         case hecl::BlenderConnection::BlendType::Actor:
         {
             hecl::BlenderConnection::DataStream ds = conn.beginData();
diff --git a/DataSpec/SpecBase.hpp b/DataSpec/SpecBase.hpp
index d9c8bb115..2c93382dd 100644
--- a/DataSpec/SpecBase.hpp
+++ b/DataSpec/SpecBase.hpp
@@ -71,6 +71,9 @@ struct SpecBase : hecl::Database::IDataSpec
     virtual void cookMesh(const hecl::ProjectPath& out, const hecl::ProjectPath& in,
                           BlendStream& ds, bool fast, hecl::BlenderToken& btok,
                           FCookProgress progress)=0;
+    virtual void cookColMesh(const hecl::ProjectPath& out, const hecl::ProjectPath& in,
+                             BlendStream& ds, bool fast, hecl::BlenderToken& btok,
+                             FCookProgress progress)=0;
     virtual void cookActor(const hecl::ProjectPath& out, const hecl::ProjectPath& in,
                            BlendStream& ds, bool fast, hecl::BlenderToken& btok,
                            FCookProgress progress)=0;
diff --git a/DataSpec/SpecMP1.cpp b/DataSpec/SpecMP1.cpp
index 44618df66..b1a943a09 100644
--- a/DataSpec/SpecMP1.cpp
+++ b/DataSpec/SpecMP1.cpp
@@ -10,6 +10,7 @@
 #include "DNAMP1/STRG.hpp"
 #include "DNAMP1/SCAN.hpp"
 #include "DNAMP1/CMDL.hpp"
+#include "DNAMP1/DCLN.hpp"
 #include "DNAMP1/MREA.hpp"
 #include "DNAMP1/ANCS.hpp"
 #include "DNAMP1/AGSC.hpp"
@@ -555,6 +556,8 @@ struct SpecMP1 : SpecBase
             {
             case hecl::BlenderConnection::BlendType::Mesh:
                 return {SBIG('CMDL'), path.hash().val32()};
+            case hecl::BlenderConnection::BlendType::ColMesh:
+                return {SBIG('DCLN'), path.hash().val32()};
             case hecl::BlenderConnection::BlendType::Actor:
                 if (path.getAuxInfo().size())
                 {
@@ -728,6 +731,14 @@ struct SpecMP1 : SpecBase
             DNAMP1::CMDL::Cook(out, in, mesh);
     }
 
+    void cookColMesh(const hecl::ProjectPath& out, const hecl::ProjectPath& in, BlendStream& ds, bool fast,
+                     hecl::BlenderToken& btok, FCookProgress progress)
+    {
+        std::vector<ColMesh> mesh = ds.compileColMeshes();
+        ds.close();
+        DNAMP1::DCLN::Cook(out, in, mesh);
+    }
+
     void cookActor(const hecl::ProjectPath& out, const hecl::ProjectPath& in, BlendStream& ds, bool fast,
                    hecl::BlenderToken& btok, FCookProgress progress)
     {
diff --git a/DataSpec/SpecMP2.cpp b/DataSpec/SpecMP2.cpp
index 17b709a72..f9cddac9d 100644
--- a/DataSpec/SpecMP2.cpp
+++ b/DataSpec/SpecMP2.cpp
@@ -329,6 +329,12 @@ struct SpecMP2 : SpecBase
     {
     }
 
+    void cookColMesh(const hecl::ProjectPath& out, const hecl::ProjectPath& in,
+                     BlendStream& ds, bool fast, hecl::BlenderToken& btok,
+                     FCookProgress progress)
+    {
+    }
+
     void cookActor(const hecl::ProjectPath& out, const hecl::ProjectPath& in,
                    BlendStream& ds, bool fast, hecl::BlenderToken& btok,
                    FCookProgress progress)
diff --git a/DataSpec/SpecMP3.cpp b/DataSpec/SpecMP3.cpp
index bc8ba7836..b9e52f66a 100644
--- a/DataSpec/SpecMP3.cpp
+++ b/DataSpec/SpecMP3.cpp
@@ -523,6 +523,12 @@ struct SpecMP3 : SpecBase
     {
     }
 
+    void cookColMesh(const hecl::ProjectPath& out, const hecl::ProjectPath& in,
+                     BlendStream& ds, bool fast, hecl::BlenderToken& btok,
+                     FCookProgress progress)
+    {
+    }
+
     void cookActor(const hecl::ProjectPath& out, const hecl::ProjectPath& in,
                    BlendStream& ds, bool fast, hecl::BlenderToken& btok,
                    FCookProgress progress)
diff --git a/Editor/ViewManager.cpp b/Editor/ViewManager.cpp
index 5bdbc89e1..628831a76 100644
--- a/Editor/ViewManager.cpp
+++ b/Editor/ViewManager.cpp
@@ -37,6 +37,7 @@ void ViewManager::BuildTestPART()
 void ViewManager::InitMP1(MP1::CMain& main)
 {
     main.Init(m_fileStoreManager, m_mainWindow.get(), m_voiceEngine.get(), *m_amuseAllocWrapper);
+    main.WarmupShaders();
 }
 
 void ViewManager::TestGameView::resized(const boo::SWindowRect& root, const boo::SWindowRect& sub)
diff --git a/GMM-LICENSE b/GMM-LICENSE
new file mode 100644
index 000000000..65bf0a958
--- /dev/null
+++ b/GMM-LICENSE
@@ -0,0 +1,13 @@
+GetFEM++  is  free software;  you  can  redistribute  it  and/or modify it
+under  the  terms  of the  GNU  Lesser General Public License as published
+by  the  Free  Software  Foundation;  either version 3 of the License,  or
+(at your option)  any  later  version  along  with the GCC Runtime Library
+Exception either version 3.1 or (at your option) any later version.
+This program  is  distributed  in  the  hope  that it will be useful,  but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or  FITNESS  FOR  A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License and the GCC Runtime Library Exception for more details.
+You  should  have received a copy of the GNU Lesser General Public License
+along   with    this    program    (see  GNU_GPL_V3,    GNU_LGPL_V3    and
+GNU_GCC_RUNTIME_EXCEPTION files);  if  not,  write  to  the  Free Software
+Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
\ No newline at end of file
diff --git a/Runtime/Camera/CBallCamera.cpp b/Runtime/Camera/CBallCamera.cpp
index d1514edf6..8a56f6f35 100644
--- a/Runtime/Camera/CBallCamera.cpp
+++ b/Runtime/Camera/CBallCamera.cpp
@@ -2423,9 +2423,9 @@ void CBallCamera::ApplyCameraHint(CStateManager& mgr)
             zeus::CVector3f camPos = mgr.GetPlayer().GetBallPosition() + hint->GetHint().GetBallToCam();
             if ((hint->GetHint().GetOverrideFlags() & 0x1) != 0)
             {
-                float f30 = hint->GetHint().GetBallToCam().toVec2f().magnitude();
-                zeus::CVector3f x23c = -zeus::CVector3f(hint->GetHint().GetBallToCam().toVec2f()).normalized();
-                camPos = FindDesiredPosition(f30, hint->GetHint().GetBallToCam().z, x23c, mgr, false);
+                float distance = hint->GetHint().GetBallToCam().toVec2f().magnitude();
+                zeus::CVector3f camToBall = -zeus::CVector3f(hint->GetHint().GetBallToCam().toVec2f()).normalized();
+                camPos = FindDesiredPosition(distance, hint->GetHint().GetBallToCam().z, camToBall, mgr, false);
             }
             TeleportCamera(zeus::lookAt(camPos, x1d8_lookPos), mgr);
             break;
diff --git a/Runtime/Graphics/CBooRenderer.hpp b/Runtime/Graphics/CBooRenderer.hpp
index afdf5e508..bd3741e25 100644
--- a/Runtime/Graphics/CBooRenderer.hpp
+++ b/Runtime/Graphics/CBooRenderer.hpp
@@ -53,6 +53,8 @@ public:
 class CBooRenderer : public IRenderer
 {
     friend class CBooModel;
+    friend class CModel;
+    friend class CGameArea;
     friend class CWorldTransManager;
     friend class CMorphBallShadow;
 
diff --git a/Runtime/Graphics/CModel.hpp b/Runtime/Graphics/CModel.hpp
index 74381ce09..6d3b7161e 100644
--- a/Runtime/Graphics/CModel.hpp
+++ b/Runtime/Graphics/CModel.hpp
@@ -73,6 +73,7 @@ struct CBooSurface
 class CBooModel
 {
     friend class CModel;
+    friend class CGameArea;
     friend class CBooRenderer;
     friend class CMetroidModelInstance;
     friend class CSkinnedModel;
@@ -151,6 +152,8 @@ private:
     void DrawNormalSurfaces(const CModelFlags& flags) const;
     void DrawSurfaces(const CModelFlags& flags) const;
     void DrawSurface(const CBooSurface& surf, const CModelFlags& flags) const;
+    void WarmupDrawSurfaces() const;
+    void WarmupDrawSurface(const CBooSurface& surf) const;
 
     static zeus::CVector3f g_PlayerPosition;
     static float g_ModSeconds;
@@ -177,6 +180,7 @@ public:
     void RemapMaterialData(SShader& shader);
     bool TryLockTextures() const;
     void UnlockTextures() const;
+    void SyncLoadTextures() const;
     void Touch(int shaderIdx) const;
     void VerifyCurrentShader(int shaderIdx);
     boo::IGraphicsBufferD* UpdateUniformData(const CModelFlags& flags,
@@ -214,15 +218,8 @@ public:
 
     static boo::ITexture* g_shadowMap;
     static zeus::CTransform g_shadowTexXf;
-    static void EnableShadowMaps(boo::ITexture* map, const zeus::CTransform& texXf)
-    {
-        g_shadowMap = map;
-        g_shadowTexXf = texXf;
-    }
-    static void DisableShadowMaps()
-    {
-        g_shadowMap = nullptr;
-    }
+    static void EnableShadowMaps(boo::ITexture* map, const zeus::CTransform& texXf);
+    static void DisableShadowMaps();
 };
 
 class CModel
@@ -270,6 +267,9 @@ public:
     zeus::CVector3f GetPoolNormal(size_t idx) const;
     void ApplyVerticesCPU(boo::IGraphicsBufferD* vertBuf,
                           const std::vector<std::pair<zeus::CVector3f, zeus::CVector3f>>& vn) const;
+
+    void _WarmupShaders();
+    static void WarmupShaders(const SObjectTag& cmdlTag);
 };
 
 CFactoryFnReturn FModelFactory(const urde::SObjectTag& tag,
diff --git a/Runtime/Graphics/CModelBoo.cpp b/Runtime/Graphics/CModelBoo.cpp
index d2dba2be0..9bbdf46c1 100644
--- a/Runtime/Graphics/CModelBoo.cpp
+++ b/Runtime/Graphics/CModelBoo.cpp
@@ -126,6 +126,16 @@ void CBooModel::EnsureViewDepStateCached(const CBooModel& model, const CBooSurfa
 boo::ITexture* CBooModel::g_shadowMap = nullptr;
 zeus::CTransform CBooModel::g_shadowTexXf;
 
+void CBooModel::EnableShadowMaps(boo::ITexture* map, const zeus::CTransform& texXf)
+{
+    g_shadowMap = map;
+    g_shadowTexXf = texXf;
+}
+void CBooModel::DisableShadowMaps()
+{
+    g_shadowMap = nullptr;
+}
+
 CBooModel::~CBooModel()
 {
     if (m_prev)
@@ -488,6 +498,16 @@ void CBooModel::UnlockTextures() const
     const_cast<CBooModel*>(this)->x40_24_texturesLoaded = false;
 }
 
+void CBooModel::SyncLoadTextures() const
+{
+    if (!x40_24_texturesLoaded)
+    {
+        for (TCachedToken<CTexture>& tex : const_cast<std::vector<TCachedToken<CTexture>>&>(x1c_textures))
+            tex.GetObj();
+        const_cast<CBooModel*>(this)->x40_24_texturesLoaded = true;
+    }
+}
+
 void CBooModel::DrawFlat(ESurfaceSelection sel, EExtendedShader extendedIdx) const
 {
     const CBooSurface* surf;
@@ -571,6 +591,39 @@ void CBooModel::DrawSurface(const CBooSurface& surf, const CModelFlags& flags) c
     CGraphics::DrawArrayIndexed(surf.m_data.idxStart, surf.m_data.idxCount);
 }
 
+void CBooModel::WarmupDrawSurfaces() const
+{
+    const CBooSurface* surf = x38_firstUnsortedSurface;
+    while (surf)
+    {
+        WarmupDrawSurface(*surf);
+        surf = surf->m_next;
+    }
+
+    surf = x3c_firstSortedSurface;
+    while (surf)
+    {
+        WarmupDrawSurface(*surf);
+        surf = surf->m_next;
+    }
+}
+
+void CBooModel::WarmupDrawSurface(const CBooSurface& surf) const
+{
+    if (m_uniUpdateCount > m_instances.size())
+        return;
+    const ModelInstance& inst = m_instances[m_uniUpdateCount-1];
+
+    for (const std::vector<boo::IShaderDataBinding*>& extendeds : inst.m_shaderDataBindings)
+    {
+        for (boo::IShaderDataBinding* binding : extendeds)
+        {
+            CGraphics::SetShaderDataBinding(binding);
+            CGraphics::DrawArrayIndexed(surf.m_data.idxStart, std::min(u32(3), surf.m_data.idxCount));
+        }
+    }
+}
+
 void CBooModel::UVAnimationBuffer::ProcessAnimation(u8*& bufOut, const UVAnimation& anim)
 {
     zeus::CMatrix4f& texMtxOut = reinterpret_cast<zeus::CMatrix4f&>(*bufOut);
@@ -1041,6 +1094,9 @@ CModel::CModel(std::unique_ptr<u8[]>&& in, u32 /* dataLen */, IObjectStore* stor
 
     m_gfxToken = CGraphics::CommitResources([&](boo::IGraphicsDataFactory::Context& ctx) -> bool
     {
+        /* Index buffer is always static */
+        m_ibo = ctx.newStaticBuffer(boo::BufferUse::Index, iboData, 4, m_hmdlMeta.indexCount);
+
         if (!m_hmdlMeta.bankCount)
         {
             /* Non-skinned models use static vertex buffers shared with CBooModel instances */
@@ -1056,8 +1112,6 @@ CModel::CModel(std::unique_ptr<u8[]>&& in, u32 /* dataLen */, IObjectStore* stor
             memmove(m_dynamicVertexData.get(), vboData, vboSz);
         }
 
-        /* Index buffer is always static */
-        m_ibo = ctx.newStaticBuffer(boo::BufferUse::Index, iboData, 4, m_hmdlMeta.indexCount);
         return true;
     });
 
@@ -1172,6 +1226,36 @@ void CModel::ApplyVerticesCPU(boo::IGraphicsBufferD* vertBuf,
     vertBuf->unmap();
 }
 
+void CModel::_WarmupShaders()
+{
+    CBooModel::EnableShadowMaps(g_Renderer->x220_sphereRamp, zeus::CTransform::Identity());
+    CGraphics::CProjectionState backupProj = CGraphics::GetProjectionState();
+    zeus::CTransform backupViewPoint = CGraphics::g_ViewMatrix;
+    zeus::CTransform backupModel = CGraphics::g_GXModelMatrix;
+    CGraphics::SetModelMatrix(zeus::CTransform::Translate(-m_aabb.center()));
+    CGraphics::SetViewPointMatrix(zeus::CTransform::Translate(0.f, -2048.f, 0.f));
+    CGraphics::SetOrtho(-2048.f, 2048.f, 2048.f, -2048.f, 0.f, 4096.f);
+    CModelFlags defaultFlags;
+    for (CBooModel::SShader& shader : x18_matSets)
+    {
+        GetInstance().RemapMaterialData(shader);
+        GetInstance().SyncLoadTextures();
+        GetInstance().UpdateUniformData(defaultFlags, nullptr, nullptr);
+        GetInstance().WarmupDrawSurfaces();
+    }
+    CGraphics::SetProjectionState(backupProj);
+    CGraphics::SetViewPointMatrix(backupViewPoint);
+    CGraphics::SetModelMatrix(backupModel);
+    CBooModel::DisableShadowMaps();
+}
+
+void CModel::WarmupShaders(const SObjectTag& cmdlTag)
+{
+    TToken<CModel> model = g_SimplePool->GetObj(cmdlTag);
+    CModel* modelObj = model.GetObj();
+    modelObj->_WarmupShaders();
+}
+
 CFactoryFnReturn FModelFactory(const urde::SObjectTag& tag,
                                std::unique_ptr<u8[]>&& in, u32 len,
                                const urde::CVParamTransfer& vparms,
diff --git a/Runtime/Graphics/Shaders/CModelShadersGLSL.cpp b/Runtime/Graphics/Shaders/CModelShadersGLSL.cpp
index 6457c5712..ec5090ad0 100644
--- a/Runtime/Graphics/Shaders/CModelShadersGLSL.cpp
+++ b/Runtime/Graphics/Shaders/CModelShadersGLSL.cpp
@@ -93,7 +93,7 @@ static const char* LightingShadowGLSL =
 "                   lights[0].angAtt[1] * angDot +\n"
 "                   lights[0].angAtt[0];\n"
 "    ret += lights[0].color * clamp(angAtt, 0.0, 1.0) * att * clamp(dot(normalize(-delta), mvNormIn.xyz), 0.0, 1.0) *\n"
-"           texture(extTex0, vtf.extTcgs[0]).r;\n"
+"           texture(extTex7, vtf.extTcgs[0]).r;\n"
 "    \n"
 "    for (int i=1 ; i<" _XSTR(URDE_MAX_LIGHTS) " ; ++i)\n"
 "    {\n"
diff --git a/Runtime/Graphics/Shaders/CModelShadersHLSL.cpp b/Runtime/Graphics/Shaders/CModelShadersHLSL.cpp
index c5f113295..0a2b87d4e 100644
--- a/Runtime/Graphics/Shaders/CModelShadersHLSL.cpp
+++ b/Runtime/Graphics/Shaders/CModelShadersHLSL.cpp
@@ -92,7 +92,7 @@ static const char* LightingShadowHLSL =
 "                   lights[0].angAtt[1] * angDot +\n"
 "                   lights[0].angAtt[0];\n"
 "    ret += lights[0].color * saturate(angAtt) * att * saturate(dot(normalize(-delta), mvNormIn.xyz)) *\n"
-"           extTex0.Sample(clampSamp, vtf.extTcgs[0]).r;\n"
+"           extTex7.Sample(clampSamp, vtf.extTcgs[0]).r;\n"
 "    \n"
 "    for (int i=1 ; i<" _XSTR(URDE_MAX_LIGHTS) " ; ++i)\n"
 "    {\n"
diff --git a/Runtime/Graphics/Shaders/CModelShadersMetal.cpp b/Runtime/Graphics/Shaders/CModelShadersMetal.cpp
index bc79754ec..57a7805fd 100644
--- a/Runtime/Graphics/Shaders/CModelShadersMetal.cpp
+++ b/Runtime/Graphics/Shaders/CModelShadersMetal.cpp
@@ -79,7 +79,7 @@ static const char* LightingShadowMetal =
 "};\n"
 "\n"
 "static float4 EXTLightingShadowFunc(constant LightingUniform& lu, float4 mvPosIn, float4 mvNormIn,\n"
-"                                    thread VertToFrag& vtf, texture2d<float> extTex0)\n"
+"                                    thread VertToFrag& vtf, texture2d<float> extTex7)\n"
 "{\n"
 "    float4 ret = lu.ambient;\n"
 "    \n"
@@ -93,7 +93,7 @@ static const char* LightingShadowMetal =
 "                   lu.lights[0].angAtt[1] * angDot +\n"
 "                   lu.lights[0].angAtt[0];\n"
 "    ret += lu.lights[0].color * saturate(angAtt) * att * saturate(dot(normalize(-delta), mvNormIn.xyz)) *\n"
-"           extTex0.sample(clampSamp, vtf.extTcgs0);\n"
+"           extTex7.sample(clampSamp, vtf.extTcgs0);\n"
 "    \n"
 "    for (int i=1 ; i<" _XSTR(URDE_MAX_LIGHTS) " ; ++i)\n"
 "    {\n"
diff --git a/Runtime/Graphics/Shaders/CParticleSwooshShadersGLSL.cpp b/Runtime/Graphics/Shaders/CParticleSwooshShadersGLSL.cpp
index e9f59e739..88fe99026 100644
--- a/Runtime/Graphics/Shaders/CParticleSwooshShadersGLSL.cpp
+++ b/Runtime/Graphics/Shaders/CParticleSwooshShadersGLSL.cpp
@@ -27,7 +27,7 @@ BOO_GLSL_BINDING_HEAD
 "void main()\n"
 "{\n"
 "    vtf.color = colorIn;\n"
-"    vtf.uv = uvIn;\n"
+"    vtf.uv = uvIn.xy;\n"
 "    gl_Position = mvp * vec4(posIn.xyz, 1.0);\n"
 "}\n";
 
diff --git a/Runtime/Graphics/Shaders/CParticleSwooshShadersHLSL.cpp b/Runtime/Graphics/Shaders/CParticleSwooshShadersHLSL.cpp
index 3d7f1e985..64e1d3810 100644
--- a/Runtime/Graphics/Shaders/CParticleSwooshShadersHLSL.cpp
+++ b/Runtime/Graphics/Shaders/CParticleSwooshShadersHLSL.cpp
@@ -29,7 +29,7 @@ static const char* VS =
 "{\n"
 "    VertToFrag vtf;\n"
 "    vtf.color = v.colorIn;\n"
-"    vtf.uv = v.uvIn;\n"
+"    vtf.uv = v.uvIn.xy;\n"
 "    vtf.pos = mul(mvp, float4(v.posIn.xyz, 1.0));\n"
 "    return vtf;\n"
 "}\n";
diff --git a/Runtime/Graphics/Shaders/CParticleSwooshShadersMetal.cpp b/Runtime/Graphics/Shaders/CParticleSwooshShadersMetal.cpp
index 0d1ef934c..f775ff8d9 100644
--- a/Runtime/Graphics/Shaders/CParticleSwooshShadersMetal.cpp
+++ b/Runtime/Graphics/Shaders/CParticleSwooshShadersMetal.cpp
@@ -31,7 +31,7 @@ static const char* VS =
 "{\n"
 "    VertToFrag vtf;\n"
 "    vtf.color = v.colorIn;\n"
-"    vtf.uv = v.uvIn;\n"
+"    vtf.uv = v.uvIn.xy;\n"
 "    vtf.pos = su.mvp * float4(v.posIn.xyz, 1.0);\n"
 "    return vtf;\n"
 "}\n";
diff --git a/Runtime/IMain.hpp b/Runtime/IMain.hpp
index 7fd01bb8a..eb9671202 100644
--- a/Runtime/IMain.hpp
+++ b/Runtime/IMain.hpp
@@ -40,9 +40,10 @@ public:
     virtual void Draw()=0;
     virtual bool Proc()=0;
     virtual void Shutdown()=0;
-    virtual boo::IWindow* GetMainWindow() const=0;
+    virtual boo::IWindow* GetMainWindow() const= 0;
     virtual void SetFlowState(EFlowState) = 0;
     virtual size_t GetExpectedIdSize() const = 0;
+    virtual void WarmupShaders() = 0;
 };
 }
 
diff --git a/Runtime/MP1/MP1.cpp b/Runtime/MP1/MP1.cpp
index 265b189ca..f06ef2519 100644
--- a/Runtime/MP1/MP1.cpp
+++ b/Runtime/MP1/MP1.cpp
@@ -17,14 +17,14 @@
 #include "Graphics/Shaders/CFluidPlaneShader.hpp"
 #include "Graphics/Shaders/CAABoxShader.hpp"
 #include "Graphics/Shaders/CWorldShadowShader.hpp"
-#include "Character/CCharLayoutInfo.hpp"
+#include "Graphics/Shaders/CParticleSwooshShaders.hpp"
 #include "Audio/CStreamAudioManager.hpp"
 #include "CGBASupport.hpp"
-#include "CBasics.hpp"
 #include "Audio/CAudioGroupSet.hpp"
 
 namespace urde
 {
+URDE_DECL_SPECIALIZE_SHADER(CParticleSwooshShaders)
 URDE_DECL_SPECIALIZE_SHADER(CThermalColdFilter)
 URDE_DECL_SPECIALIZE_SHADER(CThermalHotFilter)
 URDE_DECL_SPECIALIZE_SHADER(CSpaceWarpFilter)
@@ -223,6 +223,7 @@ CMain::BooSetter::BooSetter(boo::IGraphicsDataFactory* factory,
                             boo::ITextureR* spareTex)
 {
     CGraphics::InitializeBoo(factory, cmdQ, spareTex);
+    TShader<CParticleSwooshShaders>::Initialize();
     TShader<CThermalColdFilter>::Initialize();
     TShader<CThermalHotFilter>::Initialize();
     TShader<CSpaceWarpFilter>::Initialize();
@@ -318,8 +319,40 @@ void CMain::Init(const hecl::Runtime::FileStoreManager& storeMgr,
     //CStreamAudioManager::Start(false, "Audio/rui_samusL.dsp|Audio/rui_samusR.dsp", 0x7f, true, 1.f, 1.f);
 }
 
+static logvisor::Module WarmupLog("Shader Warmup");
+
+void CMain::WarmupShaders()
+{
+    if (m_warmupTags.size())
+        return;
+
+    size_t modelCount = 0;
+    g_ResFactory->EnumerateResources([&](const SObjectTag& tag)
+    {
+        if (tag.type == FOURCC('CMDL') || tag.type == FOURCC('MREA'))
+            ++modelCount;
+        return true;
+    });
+    m_warmupTags.reserve(modelCount);
+
+    g_ResFactory->EnumerateResources([&](const SObjectTag& tag)
+    {
+        if (tag.type == FOURCC('CMDL') || tag.type == FOURCC('MREA'))
+            m_warmupTags.push_back(tag);
+        return true;
+    });
+
+    m_warmupIt = m_warmupTags.begin();
+
+    WarmupLog.report(logvisor::Info, "Began warmup of %" PRISize " objects", modelCount);
+}
+
 bool CMain::Proc()
 {
+    // Warmup cycle overrides update
+    if (m_warmupTags.size())
+        return false;
+
     CGBASupport::GlobalPoll();
     x164_archSupport->UpdateTicks();
     x164_archSupport->Update();
@@ -340,6 +373,33 @@ bool CMain::Proc()
 
 void CMain::Draw()
 {
+    // Warmup cycle overrides draw
+    if (m_warmupTags.size())
+    {
+        auto startTime = std::chrono::steady_clock::now();
+        while (m_warmupIt != m_warmupTags.end())
+        {
+            WarmupLog.report(logvisor::Info, "Warming %.4s %08X", m_warmupIt->type.getChars(), m_warmupIt->id.Value());
+
+            if (m_warmupIt->type == FOURCC('CMDL'))
+                CModel::WarmupShaders(*m_warmupIt);
+            else if (m_warmupIt->type == FOURCC('MREA'))
+                CGameArea::WarmupShaders(*m_warmupIt);
+            ++m_warmupIt;
+
+            // Approximately 3/4 frame of warmups
+            auto curTime = std::chrono::steady_clock::now();
+            if (std::chrono::duration_cast<std::chrono::milliseconds>(curTime - startTime).count() > 12)
+                break;
+        }
+        if (m_warmupIt == m_warmupTags.end())
+        {
+            m_warmupTags = std::vector<SObjectTag>();
+            WarmupLog.report(logvisor::Info, "Finished warmup");
+        }
+        return;
+    }
+
     x164_archSupport->Draw();
 }
 
@@ -359,6 +419,7 @@ void CMain::Shutdown()
 {
     x164_archSupport.reset();
     ShutdownSubsystems();
+    TShader<CParticleSwooshShaders>::Shutdown();
     TShader<CThermalColdFilter>::Shutdown();
     TShader<CThermalHotFilter>::Shutdown();
     TShader<CSpaceWarpFilter>::Shutdown();
diff --git a/Runtime/MP1/MP1.hpp b/Runtime/MP1/MP1.hpp
index b9ee98d07..7b3c90232 100644
--- a/Runtime/MP1/MP1.hpp
+++ b/Runtime/MP1/MP1.hpp
@@ -240,6 +240,10 @@ private:
 
     boo::IWindow* m_mainWindow = nullptr;
 
+    // Warmup state
+    std::vector<SObjectTag> m_warmupTags;
+    std::vector<SObjectTag>::iterator m_warmupIt;
+
     void InitializeSubsystems(const hecl::Runtime::FileStoreManager& storeMgr);
 
 public:
@@ -259,6 +263,7 @@ public:
               boo::IWindow* window,
               boo::IAudioVoiceEngine* voiceEngine,
               amuse::IBackendVoiceAllocator& backend);
+    void WarmupShaders();
     bool Proc();
     void Draw();
     void Shutdown();
diff --git a/Runtime/World/CGameArea.cpp b/Runtime/World/CGameArea.cpp
index 9bcc0937a..80b611b77 100644
--- a/Runtime/World/CGameArea.cpp
+++ b/Runtime/World/CGameArea.cpp
@@ -411,6 +411,39 @@ CGameArea::CGameArea(CInputStream& in, int idx, int mlvlVersion)
     xec_totalResourcesSize += g_ResFactory->ResourceSize(SObjectTag{FOURCC('MREA'), x84_mrea});
 }
 
+CGameArea::CGameArea(CAssetId mreaId)
+: x84_mrea(mreaId)
+{
+    while (StartStreamingMainArea()) {}
+
+    for (auto& req : xf8_loadTransactions)
+        req->WaitForComplete();
+
+    MREAHeader header = VerifyHeader();
+    x12c_postConstructed->x4c_insts.reserve(header.modelCount);
+
+    FillInStaticGeometry();
+
+    CBooModel::EnableShadowMaps(g_Renderer->x220_sphereRamp, zeus::CTransform::Identity());
+    CGraphics::CProjectionState backupProj = CGraphics::GetProjectionState();
+    zeus::CTransform backupViewPoint = CGraphics::g_ViewMatrix;
+    zeus::CTransform backupModel = CGraphics::g_GXModelMatrix;
+    CGraphics::SetViewPointMatrix(zeus::CTransform::Translate(0.f, -2048.f, 0.f));
+    CGraphics::SetOrtho(-2048.f, 2048.f, 2048.f, -2048.f, 0.f, 4096.f);
+    CModelFlags defaultFlags;
+    for (CMetroidModelInstance& inst : x12c_postConstructed->x4c_insts)
+    {
+        CGraphics::SetModelMatrix(zeus::CTransform::Translate(-inst.x34_aabb.center()));
+        inst.m_instance->SyncLoadTextures();
+        inst.m_instance->UpdateUniformData(defaultFlags, nullptr, nullptr);
+        inst.m_instance->WarmupDrawSurfaces();
+    }
+    CGraphics::SetProjectionState(backupProj);
+    CGraphics::SetViewPointMatrix(backupViewPoint);
+    CGraphics::SetModelMatrix(backupModel);
+    CBooModel::DisableShadowMaps();
+}
+
 bool CGameArea::IGetScriptingMemoryAlways() const
 {
     return false;
@@ -1192,5 +1225,10 @@ bool CGameArea::CAreaObjectList::IsQualified(const CEntity& ent)
 {
     return (ent.GetAreaIdAlways() == x200c_areaIdx);
 }
+void CGameArea::WarmupShaders(const SObjectTag& mreaTag)
+{
+    // Calling this version of the constructor performs warmup implicitly
+    CGameArea area(mreaTag.id);
+}
 
 }
diff --git a/Runtime/World/CGameArea.hpp b/Runtime/World/CGameArea.hpp
index 78c02ac80..cbe2bb63f 100644
--- a/Runtime/World/CGameArea.hpp
+++ b/Runtime/World/CGameArea.hpp
@@ -289,6 +289,7 @@ private:
 public:
 
     CGameArea(CInputStream& in, int idx, int mlvlVersion);
+    CGameArea(CAssetId mreaId); // Warmup constructor
 
     bool IsFinishedOccluding() const;
     void ReadDependencyList();
@@ -372,6 +373,8 @@ public:
     CObjectList& GetAreaObjects() const { return *GetPostConstructed()->x10c0_areaObjs.get(); }
 
     CGameArea* GetNext() const { return x130_next; }
+
+    static void WarmupShaders(const SObjectTag& mreaTag);
 };
 
 }
diff --git a/gmm/gmm.h b/gmm/gmm.h
new file mode 100644
index 000000000..feeb299fa
--- /dev/null
+++ b/gmm/gmm.h
@@ -0,0 +1,54 @@
+/* -*- c++ -*- (enables emacs c++ mode) */
+/*===========================================================================
+
+ Copyright (C) 2002-2017 Yves Renard
+
+ This file is a part of GetFEM++
+
+ GetFEM++  is  free software;  you  can  redistribute  it  and/or modify it
+ under  the  terms  of the  GNU  Lesser General Public License as published
+ by  the  Free Software Foundation;  either version 3 of the License,  or
+ (at your option) any later version along with the GCC Runtime Library
+ Exception either version 3.1 or (at your option) any later version.
+ This program  is  distributed  in  the  hope  that it will be useful,  but
+ WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ or  FITNESS  FOR  A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+ License and GCC Runtime Library Exception for more details.
+ You  should  have received a copy of the GNU Lesser General Public License
+ along  with  this program;  if not, write to the Free Software Foundation,
+ Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301, USA.
+
+ As a special exception, you  may use  this file  as it is a part of a free
+ software  library  without  restriction.  Specifically,  if   other  files
+ instantiate  templates  or  use macros or inline functions from this file,
+ or  you compile this  file  and  link  it  with other files  to produce an
+ executable, this file  does  not  by itself cause the resulting executable
+ to be covered  by the GNU Lesser General Public License.  This   exception
+ does not  however  invalidate  any  other  reasons why the executable file
+ might be covered by the GNU Lesser General Public License.
+
+===========================================================================*/
+
+/**@file gmm.h
+   @author  Yves Renard <Yves.Renard@insa-lyon.fr>
+   @date October 13, 2002.
+   @brief Include common gmm files.
+*/
+#ifndef GMM_H__
+#define GMM_H__
+
+#include "gmm_kernel.h"
+#include "gmm_dense_lu.h"
+#include "gmm_dense_qr.h"
+
+#include "gmm_iter_solvers.h"
+#include "gmm_condition_number.h"
+#include "gmm_inoutput.h"
+
+#include "gmm_lapack_interface.h"
+#include "gmm_superlu_interface.h"
+#include "gmm_range_basis.h"
+
+#include "gmm_domain_decomp.h"
+
+#endif //  GMM_H__
diff --git a/gmm/gmm_MUMPS_interface.h b/gmm/gmm_MUMPS_interface.h
new file mode 100644
index 000000000..bc68777fc
--- /dev/null
+++ b/gmm/gmm_MUMPS_interface.h
@@ -0,0 +1,355 @@
+/* -*- c++ -*- (enables emacs c++ mode) */
+/*===========================================================================
+
+ Copyright (C) 2003-2017 Yves Renard, Julien Pommier
+
+ This file is a part of GetFEM++
+
+ GetFEM++  is  free software;  you  can  redistribute  it  and/or modify it
+ under  the  terms  of the  GNU  Lesser General Public License as published
+ by  the  Free Software Foundation;  either version 3 of the License,  or
+ (at your option) any later version along with the GCC Runtime Library
+ Exception either version 3.1 or (at your option) any later version.
+ This program  is  distributed  in  the  hope  that it will be useful,  but
+ WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ or  FITNESS  FOR  A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+ License and GCC Runtime Library Exception for more details.
+ You  should  have received a copy of the GNU Lesser General Public License
+ along  with  this program;  if not, write to the Free Software Foundation,
+ Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301, USA.
+
+ As a special exception, you  may use  this file  as it is a part of a free
+ software  library  without  restriction.  Specifically,  if   other  files
+ instantiate  templates  or  use macros or inline functions from this file,
+ or  you compile this  file  and  link  it  with other files  to produce an
+ executable, this file  does  not  by itself cause the resulting executable
+ to be covered  by the GNU Lesser General Public License.  This   exception
+ does not  however  invalidate  any  other  reasons why the executable file
+ might be covered by the GNU Lesser General Public License.
+
+===========================================================================*/
+
+/**@file gmm_MUMPS_interface.h
+   @author Yves Renard <Yves.Renard@insa-lyon.fr>,
+   @author Julien Pommier <Julien.Pommier@insa-toulouse.fr>
+   @date December 8, 2005.
+   @brief Interface with MUMPS (LU direct solver for sparse matrices).
+*/
+#if defined(GMM_USES_MUMPS) || defined(HAVE_DMUMPS_C_H)
+
+#ifndef GMM_MUMPS_INTERFACE_H
+#define GMM_MUMPS_INTERFACE_H
+
+#include "gmm_kernel.h"
+
+
+extern "C" {
+
+#include <smumps_c.h>
+#undef F_INT
+#undef F_DOUBLE
+#undef F_DOUBLE2
+#include <dmumps_c.h>
+#undef F_INT
+#undef F_DOUBLE
+#undef F_DOUBLE2
+#include <cmumps_c.h>
+#undef F_INT
+#undef F_DOUBLE
+#undef F_DOUBLE2
+#include <zmumps_c.h>
+#undef F_INT
+#undef F_DOUBLE
+#undef F_DOUBLE2
+
+}
+
+namespace gmm {
+
+#define ICNTL(I) icntl[(I)-1]
+#define INFO(I) info[(I)-1]
+#define INFOG(I) infog[(I)-1]
+#define RINFOG(I) rinfog[(I)-1]
+
+  template <typename T> struct ij_sparse_matrix {
+    std::vector<int> irn;
+    std::vector<int> jcn;
+    std::vector<T> a;
+    bool sym;
+    
+    template <typename L> void store(const L& l, size_type i) {
+       typename linalg_traits<L>::const_iterator it = vect_const_begin(l),
+         ite = vect_const_end(l);
+       for (; it != ite; ++it) {
+         int ir = (int)i + 1, jc = (int)it.index() + 1;
+         if (*it != T(0) && (!sym || ir >= jc)) 
+         { irn.push_back(ir); jcn.push_back(jc); a.push_back(*it); }
+       }
+    }
+
+    template <typename L> void build_from(const L& l, row_major) {
+      for (size_type i = 0; i < mat_nrows(l); ++i)
+        store(mat_const_row(l, i), i);
+    }
+
+    template <typename L> void build_from(const L& l, col_major) {
+      for (size_type i = 0; i < mat_ncols(l); ++i)
+        store(mat_const_col(l, i), i);
+      irn.swap(jcn);
+    }
+
+    template <typename L> ij_sparse_matrix(const L& A, bool sym_) {
+      size_type nz = nnz(A);
+      sym = sym_;
+      irn.reserve(nz); jcn.reserve(nz); a.reserve(nz);
+      build_from(A,  typename principal_orientation_type<typename
+                 linalg_traits<L>::sub_orientation>::potype());
+    }
+  };
+
+  /* ********************************************************************* */
+  /*   MUMPS solve interface                                               */
+  /* ********************************************************************* */
+
+  template <typename T> struct mumps_interf {};
+
+  template <> struct mumps_interf<float> {
+    typedef SMUMPS_STRUC_C  MUMPS_STRUC_C;
+    typedef float value_type;
+
+    static void mumps_c(MUMPS_STRUC_C &id) { smumps_c(&id); }
+  };
+
+  template <> struct mumps_interf<double> {
+    typedef DMUMPS_STRUC_C  MUMPS_STRUC_C;
+    typedef double value_type;
+    static void mumps_c(MUMPS_STRUC_C &id) { dmumps_c(&id); }
+  };
+
+  template <> struct mumps_interf<std::complex<float> > {
+    typedef CMUMPS_STRUC_C  MUMPS_STRUC_C;
+    typedef mumps_complex value_type;
+    static void mumps_c(MUMPS_STRUC_C &id) { cmumps_c(&id); }
+  };
+
+  template <> struct mumps_interf<std::complex<double> > {
+    typedef ZMUMPS_STRUC_C  MUMPS_STRUC_C;
+    typedef mumps_double_complex value_type;
+    static void mumps_c(MUMPS_STRUC_C &id) { zmumps_c(&id); }
+  };
+
+
+  template <typename MUMPS_STRUCT>
+  static inline bool mumps_error_check(MUMPS_STRUCT &id) {
+    if (id.INFO(1) < 0) {
+      switch (id.INFO(1)) {
+        case -2:
+          GMM_ASSERT1(false, "Solve with MUMPS failed: NZ = " << id.INFO(2)
+                      << " is out of range");
+        case -6 : case -10 :
+          GMM_WARNING1("Solve with MUMPS failed: matrix is singular");
+          return false;
+        case -9:
+          GMM_ASSERT1(false, "Solve with MUMPS failed: error "
+                      << id.INFO(1) << ", increase ICNTL(14)");
+        case -13 :
+          GMM_ASSERT1(false, "Solve with MUMPS failed: not enough memory");
+        default :
+          GMM_ASSERT1(false, "Solve with MUMPS failed with error "
+                      << id.INFO(1));
+      }
+    }
+    return true;
+  }
+
+
+  /** MUMPS solve interface  
+   *  Works only with sparse or skyline matrices
+   */
+  template <typename MAT, typename VECTX, typename VECTB>
+  bool MUMPS_solve(const MAT &A, const VECTX &X_, const VECTB &B,
+                   bool sym = false, bool distributed = false) {
+    VECTX &X = const_cast<VECTX &>(X_);
+
+    typedef typename linalg_traits<MAT>::value_type T;
+    typedef typename mumps_interf<T>::value_type MUMPS_T;
+    GMM_ASSERT2(gmm::mat_nrows(A) == gmm::mat_ncols(A), "Non-square matrix");
+  
+    std::vector<T> rhs(gmm::vect_size(B)); gmm::copy(B, rhs);
+
+    ij_sparse_matrix<T> AA(A, sym);
+  
+    const int JOB_INIT = -1;
+    const int JOB_END = -2;
+    const int USE_COMM_WORLD = -987654;
+
+    typename mumps_interf<T>::MUMPS_STRUC_C id;
+
+    int rank(0);
+#ifdef GMM_USES_MPI
+    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
+#endif
+    
+    id.job = JOB_INIT;
+    id.par = 1;
+    id.sym = sym ? 2 : 0;
+    id.comm_fortran = USE_COMM_WORLD;
+    mumps_interf<T>::mumps_c(id);
+    
+    if (rank == 0 || distributed) {
+      id.n = int(gmm::mat_nrows(A));
+      if (distributed) {
+        id.nz_loc = int(AA.irn.size());
+        id.irn_loc = &(AA.irn[0]);
+        id.jcn_loc = &(AA.jcn[0]);
+        id.a_loc = (MUMPS_T*)(&(AA.a[0]));
+      } else {
+        id.nz = int(AA.irn.size());
+        id.irn = &(AA.irn[0]);
+        id.jcn = &(AA.jcn[0]);
+        id.a = (MUMPS_T*)(&(AA.a[0]));
+      }
+      if (rank == 0)
+        id.rhs = (MUMPS_T*)(&(rhs[0]));
+    }
+
+    id.ICNTL(1) = -1; // output stream for error messages
+    id.ICNTL(2) = -1; // output stream for other messages
+    id.ICNTL(3) = -1; // output stream for global information
+    id.ICNTL(4) = 0;  // verbosity level
+
+    if (distributed)
+      id.ICNTL(5) = 0;  // assembled input matrix (default)
+
+    id.ICNTL(14) += 80; /* small boost to the workspace size as we have encountered some problem
+                           who did not fit in the default settings of mumps.. 
+                           by default, ICNTL(14) = 15 or 20
+                        */
+    //cout << "ICNTL(14): " << id.ICNTL(14) << "\n";
+
+    if (distributed)
+      id.ICNTL(18) = 3; // strategy for distributed input matrix
+
+    // id.ICNTL(22) = 1;   /* enables out-of-core support */
+
+    id.job = 6;
+    mumps_interf<T>::mumps_c(id);
+    bool ok = mumps_error_check(id);
+
+    id.job = JOB_END;
+    mumps_interf<T>::mumps_c(id);
+
+#ifdef GMM_USES_MPI
+    MPI_Bcast(&(rhs[0]),id.n,gmm::mpi_type(T()),0,MPI_COMM_WORLD);
+#endif
+
+    gmm::copy(rhs, X);
+
+    return ok;
+
+  }
+
+
+
+  /** MUMPS solve interface for distributed matrices 
+   *  Works only with sparse or skyline matrices
+   */
+  template <typename MAT, typename VECTX, typename VECTB>
+  bool MUMPS_distributed_matrix_solve(const MAT &A, const VECTX &X_,
+                                      const VECTB &B, bool sym = false) {
+    return MUMPS_solve(A, X_, B, sym, true);
+  }
+
+
+
+  template<typename T>
+  inline T real_or_complex(std::complex<T> a) { return a.real(); }
+  template<typename T>
+  inline T real_or_complex(T &a) { return a; }
+
+
+  /** Evaluate matrix determinant with MUMPS  
+   *  Works only with sparse or skyline matrices
+   */
+  template <typename MAT, typename T = typename linalg_traits<MAT>::value_type>
+  T MUMPS_determinant(const MAT &A, int &exponent,
+                      bool sym = false, bool distributed = false) {
+    exponent = 0;
+    typedef typename mumps_interf<T>::value_type MUMPS_T;
+    typedef typename number_traits<T>::magnitude_type R;
+    GMM_ASSERT2(gmm::mat_nrows(A) == gmm::mat_ncols(A), "Non-square matrix");
+  
+    ij_sparse_matrix<T> AA(A, sym);
+  
+    const int JOB_INIT = -1;
+    const int JOB_END = -2;
+    const int USE_COMM_WORLD = -987654;
+
+    typename mumps_interf<T>::MUMPS_STRUC_C id;
+
+    int rank(0);
+#ifdef GMM_USES_MPI
+    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
+#endif
+    
+    id.job = JOB_INIT;
+    id.par = 1;
+    id.sym = sym ? 2 : 0;
+    id.comm_fortran = USE_COMM_WORLD;
+    mumps_interf<T>::mumps_c(id);
+    
+    if (rank == 0 || distributed) {
+      id.n = int(gmm::mat_nrows(A));
+      if (distributed) {
+        id.nz_loc = int(AA.irn.size());
+        id.irn_loc = &(AA.irn[0]);
+        id.jcn_loc = &(AA.jcn[0]);
+        id.a_loc = (MUMPS_T*)(&(AA.a[0]));
+      } else {
+        id.nz = int(AA.irn.size());
+        id.irn = &(AA.irn[0]);
+        id.jcn = &(AA.jcn[0]);
+        id.a = (MUMPS_T*)(&(AA.a[0]));
+      }
+    }
+
+    id.ICNTL(1) = -1; // output stream for error messages
+    id.ICNTL(2) = -1; // output stream for other messages
+    id.ICNTL(3) = -1; // output stream for global information
+    id.ICNTL(4) = 0;  // verbosity level
+
+    if (distributed)
+      id.ICNTL(5) = 0;  // assembled input matrix (default)
+
+//    id.ICNTL(14) += 80; // small boost to the workspace size 
+
+    if (distributed)
+      id.ICNTL(18) = 3; // strategy for distributed input matrix
+
+    id.ICNTL(31) = 1;   // only factorization, no solution to follow
+    id.ICNTL(33) = 1;   // request determinant calculation
+
+    id.job = 4; // abalysis (job=1) + factorization (job=2)
+    mumps_interf<T>::mumps_c(id);
+    mumps_error_check(id);
+
+    T det = real_or_complex(std::complex<R>(id.RINFOG(12),id.RINFOG(13)));
+    exponent = id.INFOG(34);
+
+    id.job = JOB_END;
+    mumps_interf<T>::mumps_c(id);
+
+    return det;
+  }
+
+#undef ICNTL
+#undef INFO
+#undef INFOG
+#undef RINFOG
+
+}
+
+  
+#endif // GMM_MUMPS_INTERFACE_H
+
+#endif // GMM_USES_MUMPS
diff --git a/gmm/gmm_algobase.h b/gmm/gmm_algobase.h
new file mode 100644
index 000000000..64a859da1
--- /dev/null
+++ b/gmm/gmm_algobase.h
@@ -0,0 +1,228 @@
+/* -*- c++ -*- (enables emacs c++ mode) */
+/*===========================================================================
+
+ Copyright (C) 2000-2017 Yves Renard
+
+ This file is a part of GetFEM++
+
+ GetFEM++  is  free software;  you  can  redistribute  it  and/or modify it
+ under  the  terms  of the  GNU  Lesser General Public License as published
+ by  the  Free Software Foundation;  either version 3 of the License,  or
+ (at your option) any later version along with the GCC Runtime Library
+ Exception either version 3.1 or (at your option) any later version.
+ This program  is  distributed  in  the  hope  that it will be useful,  but
+ WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ or  FITNESS  FOR  A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+ License and GCC Runtime Library Exception for more details.
+ You  should  have received a copy of the GNU Lesser General Public License
+ along  with  this program;  if not, write to the Free Software Foundation,
+ Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301, USA.
+
+ As a special exception, you  may use  this file  as it is a part of a free
+ software  library  without  restriction.  Specifically,  if   other  files
+ instantiate  templates  or  use macros or inline functions from this file,
+ or  you compile this  file  and  link  it  with other files  to produce an
+ executable, this file  does  not  by itself cause the resulting executable
+ to be covered  by the GNU Lesser General Public License.  This   exception
+ does not  however  invalidate  any  other  reasons why the executable file
+ might be covered by the GNU Lesser General Public License.
+
+===========================================================================*/
+
+/** @file gmm_algobase.h 
+   @author  Yves Renard <Yves.Renard@insa-lyon.fr>
+   @date September 28, 2000.
+    @brief Miscelleanous algorithms on containers.
+*/
+
+#ifndef GMM_ALGOBASE_H__
+#define GMM_ALGOBASE_H__
+#include "gmm_std.h"
+#include "gmm_except.h"
+#include <functional>
+
+namespace gmm {
+
+  /* ********************************************************************* */
+  /* Definitition de classes de comparaison.                               */
+  /* retournant un int.                                                    */
+  /* ********************************************************************* */
+  
+  template <class T>
+    struct less : public std::binary_function<T, T, int> {
+    inline int operator()(const T& x, const T& y) const
+    { return (x < y) ? -1 : ((y < x) ? 1 : 0); }
+  };
+
+  template<> struct less<int> : public std::binary_function<int, int, int>
+  { int operator()(int x, int y) const { return x-y; } };
+  template<> struct less<char> : public std::binary_function<char, char, int>
+  { int operator()(char x, char y) const { return int(x-y); } };
+  template<> struct less<short> : public std::binary_function<short,short,int>
+  { int operator()(short x, short y) const { return int(x-y); } };
+  template<> struct less<unsigned char>
+     : public std::binary_function<unsigned char, unsigned char, int> {
+    int operator()(unsigned char x, unsigned char y) const
+    { return int(x)-int(y); }
+  };
+  
+
+  template <class T>
+    struct greater : public std::binary_function<T, T, int> {
+    inline int operator()(const T& x, const T& y) const
+    { return (y < x) ? -1 : ((x < y) ? 1 : 0); }
+  };
+
+  template<> struct greater<int> : public std::binary_function<int, int, int>
+  { int operator()(int x, int y) const { return y-x; } };
+  template<> struct greater<char> : public std::binary_function<char,char,int>
+  { int operator()(char x, char y) const { return int(y-x); } };
+  template<> struct greater<short>
+      : public std::binary_function<short, short, int>
+  { int operator()(short x, short y) const { return int(y-x); } };
+  template<> struct greater<unsigned char>
+    : public std::binary_function<unsigned char, unsigned char, int> {
+    int operator()(unsigned char x, unsigned char y) const
+      { return int(y)-int(x); }
+  };
+
+  template <typename T> inline T my_abs(T a) { return (a < T(0)) ? T(-a) : a; }
+  
+  template <class T>
+    struct approx_less : public std::binary_function<T, T, int> { 
+    double eps;
+    inline int operator()(const T &x, const T &y) const
+    { if (my_abs(x - y) <= eps) return 0; if (x < y) return -1; return 1; }
+    approx_less(double e = 1E-13) { eps = e; }
+  };
+
+  template <class T>
+    struct approx_greater : public std::binary_function<T, T, int> { 
+    double eps;
+    inline int operator()(const T &x, const T &y) const
+    { if (my_abs(x - y) <= eps) return 0; if (x > y) return -1; return 1; }
+    approx_greater(double e = 1E-13) { eps = e; }
+  };
+
+  template<class ITER1, class ITER2, class COMP>
+    int lexicographical_compare(ITER1 b1, const ITER1 &e1,
+				ITER2 b2, const ITER2 &e2, const COMP &c)  {
+    int i;
+    for ( ; b1 != e1 && b2 != e2; ++b1, ++b2)
+      if ((i = c(*b1, *b2)) != 0) return i;
+    if (b1 != e1) return 1;
+    if (b2 != e2) return -1;
+    return 0; 
+  }
+
+  template<class CONT, class COMP = gmm::less<typename CONT::value_type> >
+    struct lexicographical_less : public std::binary_function<CONT, CONT, int>
+  { 
+    COMP c;
+    int operator()(const CONT &x, const CONT &y) const {
+      return gmm::lexicographical_compare(x.begin(), x.end(),
+					  y.begin(), y.end(), c);
+    }
+    lexicographical_less(const COMP &d = COMP()) { c = d; }
+  };
+
+  template<class CONT, class COMP = gmm::less<typename CONT::value_type> >
+  struct lexicographical_greater
+    : public std::binary_function<CONT, CONT, int> { 
+    COMP c;
+    int operator()(const CONT &x, const CONT &y) const {
+      return -gmm::lexicographical_compare(x.begin(), x.end(),
+					   y.begin(), y.end(), c);
+    }
+    lexicographical_greater(const COMP &d = COMP()) { c = d; }
+  };
+  
+
+  /* ********************************************************************* */
+  /* "Virtual" iterators on sequences.                                     */
+  /* The class T represent a class of sequence.                            */
+  /* ********************************************************************* */
+
+  template<class T> struct sequence_iterator {
+    
+    typedef T             value_type;
+    typedef value_type*   pointer;
+    typedef value_type&   reference;
+    typedef const value_type& const_reference;
+    typedef std::forward_iterator_tag iterator_category;
+
+    T Un;
+
+    sequence_iterator(T U0 = T(0)) { Un = U0; }
+    
+    sequence_iterator &operator ++()
+    { ++Un; return *this; }
+    sequence_iterator operator ++(int)
+    { sequence_iterator tmp = *this; (*this)++; return tmp; }
+	
+    const_reference operator *() const { return Un; }
+    reference operator *() { return Un; }
+    
+    bool operator ==(const sequence_iterator &i) const { return (i.Un==Un);}
+    bool operator !=(const sequence_iterator &i) const { return (i.Un!=Un);}
+  };
+
+  /* ********************************************************************* */
+  /* generic algorithms.                                                   */
+  /* ********************************************************************* */
+
+  template <class ITER1, class SIZE, class ITER2>
+  ITER2 copy_n(ITER1 first, SIZE count, ITER2 result) {
+    for ( ; count > 0; --count, ++first, ++result) *result = *first;
+    return result;
+  }
+
+  template<class ITER>
+    typename std::iterator_traits<ITER>::value_type
+      mean_value(ITER first, const ITER &last) {
+    GMM_ASSERT2(first != last, "mean value of empty container");
+    size_t n = 1;
+    typename std::iterator_traits<ITER>::value_type res = *first++;
+    while (first != last) { res += *first; ++first; ++n; }
+    res /= float(n);
+    return res;
+  }
+
+  template<class CONT>
+    typename CONT::value_type
+  mean_value(const CONT &c) { return mean_value(c.begin(), c.end()); }
+
+  template<class ITER> /* hum ... */
+    void minmax_box(typename std::iterator_traits<ITER>::value_type &pmin,
+		    typename std::iterator_traits<ITER>::value_type &pmax,
+		    ITER first, const ITER &last) {
+    typedef typename std::iterator_traits<ITER>::value_type PT;
+    if (first != last) { pmin = pmax = *first; ++first; }
+    while (first != last) {
+      typename PT::const_iterator b = (*first).begin(), e = (*first).end();
+      typename PT::iterator b1 = pmin.begin(), b2 = pmax.begin();
+      while (b != e)
+	{ *b1 = std::min(*b1, *b); *b2 = std::max(*b2, *b); ++b; ++b1; ++b2; }
+    }
+  }
+
+  template<typename VEC> struct sorted_indexes_aux {
+    const VEC &v;
+  public:
+    sorted_indexes_aux(const VEC& v_) : v(v_) {}
+    template <typename IDX>
+    bool operator()(const IDX &ia, const IDX &ib) const
+    { return v[ia] < v[ib]; }
+  };
+
+  template<typename VEC, typename IVEC> 
+  void sorted_indexes(const VEC &v, IVEC &iv) {
+    iv.clear(); iv.resize(v.size());
+    for (size_t i=0; i < v.size(); ++i) iv[i] = i;
+    std::sort(iv.begin(), iv.end(), sorted_indexes_aux<VEC>(v));
+  }
+
+}
+
+
+#endif /* GMM_ALGOBASE_H__ */
diff --git a/gmm/gmm_blas.h b/gmm/gmm_blas.h
new file mode 100644
index 000000000..b23735559
--- /dev/null
+++ b/gmm/gmm_blas.h
@@ -0,0 +1,2221 @@
+/* -*- c++ -*- (enables emacs c++ mode) */
+/*===========================================================================
+
+ Copyright (C) 2002-2017 Yves Renard
+
+ This file is a part of GetFEM++
+
+ GetFEM++  is  free software;  you  can  redistribute  it  and/or modify it
+ under  the  terms  of the  GNU  Lesser General Public License as published
+ by  the  Free Software Foundation;  either version 3 of the License,  or
+ (at your option) any later version along with the GCC Runtime Library
+ Exception either version 3.1 or (at your option) any later version.
+ This program  is  distributed  in  the  hope  that it will be useful,  but
+ WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ or  FITNESS  FOR  A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+ License and GCC Runtime Library Exception for more details.
+ You  should  have received a copy of the GNU Lesser General Public License
+ along  with  this program;  if not, write to the Free Software Foundation,
+ Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301, USA.
+
+ As a special exception, you  may use  this file  as it is a part of a free
+ software  library  without  restriction.  Specifically,  if   other  files
+ instantiate  templates  or  use macros or inline functions from this file,
+ or  you compile this  file  and  link  it  with other files  to produce an
+ executable, this file  does  not  by itself cause the resulting executable
+ to be covered  by the GNU Lesser General Public License.  This   exception
+ does not  however  invalidate  any  other  reasons why the executable file
+ might be covered by the GNU Lesser General Public License.
+
+===========================================================================*/
+
+/**@file gmm_blas.h
+   @author  Yves Renard <Yves.Renard@insa-lyon.fr>
+   @date October 13, 2002.
+   @brief Basic linear algebra functions.
+*/
+
+#ifndef GMM_BLAS_H__
+#define GMM_BLAS_H__
+
+#include "gmm_scaled.h"
+#include "gmm_transposed.h"
+#include "gmm_conjugated.h"
+
+namespace gmm {
+
+  /* ******************************************************************** */
+  /*		                                         		  */
+  /*		Generic algorithms                           		  */
+  /*		                                         		  */
+  /* ******************************************************************** */
+
+
+  /* ******************************************************************** */
+  /*		Miscellaneous                           		  */
+  /* ******************************************************************** */
+
+  /** clear (fill with zeros) a vector or matrix. */
+  template <typename L> inline void clear(L &l)
+  { linalg_traits<L>::do_clear(l); }
+  /** @cond DOXY_SHOW_ALL_FUNCTIONS 
+      skip all these redundant definitions in doxygen documentation..
+   */
+  template <typename L> inline void clear(const L &l)
+  { linalg_traits<L>::do_clear(linalg_const_cast(l)); }
+
+  ///@endcond
+  /** count the number of non-zero entries of a vector or matrix. */  template <typename L> inline size_type nnz(const L& l)
+  { return nnz(l, typename linalg_traits<L>::linalg_type()); }
+
+  ///@cond DOXY_SHOW_ALL_FUNCTIONS
+  template <typename L> inline size_type nnz(const L& l, abstract_vector) {
+    auto it = vect_const_begin(l), ite = vect_const_end(l);
+    size_type res(0);
+    for (; it != ite; ++it) ++res;
+    return res;
+  }
+
+  template <typename L> inline size_type nnz(const L& l, abstract_matrix) {
+    return nnz(l,  typename principal_orientation_type<typename
+	       linalg_traits<L>::sub_orientation>::potype());
+  }
+
+  template <typename L> inline size_type nnz(const L& l, row_major) {
+    size_type res(0);
+    for (size_type i = 0; i < mat_nrows(l); ++i)
+      res += nnz(mat_const_row(l, i));
+    return res;
+  } 
+
+  template <typename L> inline size_type nnz(const L& l, col_major) {
+    size_type res(0);
+    for (size_type i = 0; i < mat_ncols(l); ++i)
+      res += nnz(mat_const_col(l, i));
+    return res;
+  }
+
+  ///@endcond
+
+
+  /** fill a vector or matrix with x. */
+  template <typename L> inline
+  void fill(L& l, typename gmm::linalg_traits<L>::value_type x) {
+    typedef typename gmm::linalg_traits<L>::value_type T;
+    if (x == T(0)) gmm::clear(l);
+    fill(l, x, typename linalg_traits<L>::linalg_type());
+  }
+
+  template <typename L> inline
+  void fill(const L& l, typename gmm::linalg_traits<L>::value_type x) {
+    fill(linalg_const_cast(l), x);
+  }
+
+  template <typename L> inline // to be optimized for dense vectors ...
+  void fill(L& l,  typename gmm::linalg_traits<L>::value_type x,
+		   abstract_vector) {
+    for (size_type i = 0; i < vect_size(l); ++i) l[i] = x;
+  }
+
+  template <typename L> inline // to be optimized for dense matrices ...
+  void fill(L& l, typename gmm::linalg_traits<L>::value_type x,
+		   abstract_matrix) {
+    for (size_type i = 0; i < mat_nrows(l); ++i)
+      for (size_type j = 0; j < mat_ncols(l); ++j)
+	l(i,j) = x;
+  }
+
+  /** fill a vector or matrix with random value (uniform [-1,1]). */
+  template <typename L> inline void fill_random(L& l)
+  { fill_random(l, typename linalg_traits<L>::linalg_type()); }
+
+  ///@cond DOXY_SHOW_ALL_FUNCTIONS
+  template <typename L> inline void fill_random(const L& l) {
+    fill_random(linalg_const_cast(l),
+		typename linalg_traits<L>::linalg_type());
+  }
+
+  template <typename L> inline void fill_random(L& l, abstract_vector) {
+    for (size_type i = 0; i < vect_size(l); ++i)
+      l[i] = gmm::random(typename linalg_traits<L>::value_type());
+  }
+
+  template <typename L> inline void fill_random(L& l, abstract_matrix) {
+    for (size_type i = 0; i < mat_nrows(l); ++i)
+      for (size_type j = 0; j < mat_ncols(l); ++j)
+	l(i,j) = gmm::random(typename linalg_traits<L>::value_type());
+  }
+
+  ///@endcond
+  /** fill a vector or matrix with random value.
+      @param l a vector or matrix.
+      @param cfill probability of a non-zero value.
+  */
+  template <typename L> inline void fill_random(L& l, double cfill)
+  { fill_random(l, cfill, typename linalg_traits<L>::linalg_type()); }
+  ///@cond DOXY_SHOW_ALL_FUNCTIONS
+
+  template <typename L> inline void fill_random(const L& l, double cfill) {
+    fill_random(linalg_const_cast(l), cfill,
+		typename linalg_traits<L>::linalg_type());
+  }
+
+  template <typename L> inline
+  void fill_random(L& l, double cfill, abstract_vector) {
+    typedef typename linalg_traits<L>::value_type T;
+    size_type ntot = std::min(vect_size(l),
+			      size_type(double(vect_size(l))*cfill) + 1);
+    for (size_type nb = 0; nb < ntot;) {
+      size_type i = gmm::irandom(vect_size(l));
+      if (l[i] == T(0)) { 
+	l[i] = gmm::random(typename linalg_traits<L>::value_type());
+	++nb;
+      }
+    }
+  }
+
+  template <typename L> inline
+  void fill_random(L& l, double cfill, abstract_matrix) {
+    fill_random(l, cfill, typename principal_orientation_type<typename
+		linalg_traits<L>::sub_orientation>::potype());
+  }
+
+  template <typename L> inline
+  void fill_random(L& l, double cfill, row_major) {
+    for (size_type i=0; i < mat_nrows(l); ++i) fill_random(mat_row(l,i),cfill);
+  }
+
+  template <typename L> inline
+  void fill_random(L& l, double cfill, col_major) {
+    for (size_type j=0; j < mat_ncols(l); ++j) fill_random(mat_col(l,j),cfill);
+  }
+
+  /* resize a vector */
+  template <typename V> inline
+  void resize(V &v, size_type n, linalg_false)
+  { linalg_traits<V>::resize(v, n); }
+
+  template <typename V> inline
+  void resize(V &, size_type , linalg_modifiable)
+  { GMM_ASSERT1(false, "You cannot resize a reference"); }
+
+  template <typename V> inline
+  void resize(V &, size_type , linalg_const)
+  { GMM_ASSERT1(false, "You cannot resize a reference"); }
+
+  ///@endcond
+  /** resize a vector. */
+   template <typename V> inline
+  void resize(V &v, size_type n) {
+    resize(v, n, typename linalg_traits<V>::is_reference());
+  }
+  ///@cond DOXY_SHOW_ALL_FUNCTIONS
+
+  /** resize a matrix **/
+  template <typename M> inline
+  void resize(M &v, size_type m, size_type n, linalg_false) {
+    linalg_traits<M>::resize(v, m, n);
+  }
+
+  template <typename M> inline
+  void resize(M &, size_type, size_type, linalg_modifiable)
+  { GMM_ASSERT1(false, "You cannot resize a reference"); }
+
+  template <typename M> inline
+  void resize(M &, size_type, size_type, linalg_const)
+  { GMM_ASSERT1(false, "You cannot resize a reference"); }
+
+  ///@endcond 
+  /** resize a matrix */
+  template <typename M> inline
+  void resize(M &v, size_type m, size_type n)
+  { resize(v, m, n, typename linalg_traits<M>::is_reference()); }
+  ///@cond
+
+  template <typename M> inline
+  void reshape(M &v, size_type m, size_type n, linalg_false)
+  { linalg_traits<M>::reshape(v, m, n); }
+
+  template <typename M> inline
+  void reshape(M &, size_type, size_type, linalg_modifiable)
+  { GMM_ASSERT1(false, "You cannot reshape a reference"); }
+
+  template <typename M> inline
+  void reshape(M &, size_type, size_type, linalg_const)
+  { GMM_ASSERT1(false, "You cannot reshape a reference"); }
+
+  ///@endcond 
+  /** reshape a matrix */
+  template <typename M> inline
+  void reshape(M &v, size_type m, size_type n)
+  { reshape(v, m, n, typename linalg_traits<M>::is_reference()); }
+  ///@cond DOXY_SHOW_ALL_FUNCTIONS
+  
+
+  /* ******************************************************************** */
+  /*		Scalar product                             		  */
+  /* ******************************************************************** */
+
+  ///@endcond
+  /** scalar product between two vectors */
+  template <typename V1, typename V2> inline
+  typename strongest_value_type<V1,V2>::value_type
+    vect_sp(const V1 &v1, const V2 &v2) {
+    GMM_ASSERT2(vect_size(v1) == vect_size(v2), "dimensions mismatch, "
+                << vect_size(v1) << " !=" << vect_size(v2));
+    return vect_sp(v1, v2,
+		   typename linalg_traits<V1>::storage_type(), 
+		   typename linalg_traits<V2>::storage_type());
+  }
+
+  /** scalar product between two vectors, using a matrix.
+      @param ps the matrix of the scalar product.
+      @param v1 the first vector
+      @param v2 the second vector
+  */
+  template <typename MATSP, typename V1, typename V2> inline
+  typename strongest_value_type3<V1,V2,MATSP>::value_type
+    vect_sp(const MATSP &ps, const V1 &v1, const V2 &v2) {
+    return vect_sp_with_mat(ps, v1, v2,
+			    typename linalg_traits<MATSP>::sub_orientation());
+  }
+  ///@cond DOXY_SHOW_ALL_FUNCTIONS
+
+  template <typename MATSP, typename V1, typename V2> inline
+    typename strongest_value_type3<V1,V2,MATSP>::value_type
+    vect_sp_with_mat(const MATSP &ps, const V1 &v1, const V2 &v2, row_major) {
+    return vect_sp_with_matr(ps, v1, v2, 
+			     typename linalg_traits<V2>::storage_type());
+  }
+
+  template <typename MATSP, typename V1, typename V2> inline 
+    typename strongest_value_type3<V1,V2,MATSP>::value_type
+    vect_sp_with_matr(const MATSP &ps, const V1 &v1, const V2 &v2,
+		      abstract_sparse) {
+    GMM_ASSERT2(vect_size(v1) == mat_ncols(ps) &&
+                vect_size(v2) == mat_nrows(ps), "dimensions mismatch");
+    size_type nr = mat_nrows(ps);
+    typename linalg_traits<V2>::const_iterator
+      it = vect_const_begin(v2), ite = vect_const_end(v2);
+    typename strongest_value_type3<V1,V2,MATSP>::value_type res(0);
+    for (; it != ite; ++it)
+      res += vect_sp(mat_const_row(ps, it.index()), v1)* (*it);
+    return res;
+  }
+
+  template <typename MATSP, typename V1, typename V2> inline
+    typename strongest_value_type3<V1,V2,MATSP>::value_type
+    vect_sp_with_matr(const MATSP &ps, const V1 &v1, const V2 &v2,
+		      abstract_skyline)
+  { return vect_sp_with_matr(ps, v1, v2, abstract_sparse()); }
+
+  template <typename MATSP, typename V1, typename V2> inline
+    typename strongest_value_type3<V1,V2,MATSP>::value_type
+    vect_sp_with_matr(const MATSP &ps, const V1 &v1, const V2 &v2,
+		      abstract_dense) {
+    GMM_ASSERT2(vect_size(v1) == mat_ncols(ps) &&
+                vect_size(v2) == mat_nrows(ps), "dimensions mismatch");
+    typename linalg_traits<V2>::const_iterator
+      it = vect_const_begin(v2), ite = vect_const_end(v2);
+    typename strongest_value_type3<V1,V2,MATSP>::value_type res(0);
+    for (size_type i = 0; it != ite; ++i, ++it)
+      res += vect_sp(mat_const_row(ps, i), v1) * (*it);
+    return res;
+  }
+
+  template <typename MATSP, typename V1, typename V2> inline
+  typename strongest_value_type3<V1,V2,MATSP>::value_type
+  vect_sp_with_mat(const MATSP &ps, const V1 &v1,const V2 &v2,row_and_col)
+  { return vect_sp_with_mat(ps, v1, v2, row_major()); }
+
+  template <typename MATSP, typename V1, typename V2> inline
+  typename strongest_value_type3<V1,V2,MATSP>::value_type
+  vect_sp_with_mat(const MATSP &ps, const V1 &v1, const V2 &v2,col_major){
+    return vect_sp_with_matc(ps, v1, v2,
+			     typename linalg_traits<V1>::storage_type());
+  }
+
+  template <typename MATSP, typename V1, typename V2> inline
+    typename strongest_value_type3<V1,V2,MATSP>::value_type
+    vect_sp_with_matc(const MATSP &ps, const V1 &v1, const V2 &v2,
+		      abstract_sparse) {
+    GMM_ASSERT2(vect_size(v1) == mat_ncols(ps) &&
+                vect_size(v2) == mat_nrows(ps), "dimensions mismatch");
+    typename linalg_traits<V1>::const_iterator
+      it = vect_const_begin(v1), ite = vect_const_end(v1);
+    typename strongest_value_type3<V1,V2,MATSP>::value_type res(0);
+    for (; it != ite; ++it)
+      res += vect_sp(mat_const_col(ps, it.index()), v2) * (*it);
+    return res;
+  }
+
+  template <typename MATSP, typename V1, typename V2> inline
+    typename strongest_value_type3<V1,V2,MATSP>::value_type
+    vect_sp_with_matc(const MATSP &ps, const V1 &v1, const V2 &v2,
+		      abstract_skyline)
+  { return vect_sp_with_matc(ps, v1, v2, abstract_sparse()); }
+
+  template <typename MATSP, typename V1, typename V2> inline
+    typename strongest_value_type3<V1,V2,MATSP>::value_type
+    vect_sp_with_matc(const MATSP &ps, const V1 &v1, const V2 &v2,
+		      abstract_dense) {
+    GMM_ASSERT2(vect_size(v1) == mat_ncols(ps) &&
+                vect_size(v2) == mat_nrows(ps), "dimensions mismatch");
+    typename linalg_traits<V1>::const_iterator
+      it = vect_const_begin(v1), ite = vect_const_end(v1);
+    typename strongest_value_type3<V1,V2,MATSP>::value_type res(0);
+    for (size_type i = 0; it != ite; ++i, ++it)
+      res += vect_sp(mat_const_col(ps, i), v2) * (*it);
+    return res;
+  }
+
+  template <typename MATSP, typename V1, typename V2> inline
+  typename strongest_value_type3<V1,V2,MATSP>::value_type
+  vect_sp_with_mat(const MATSP &ps, const V1 &v1,const V2 &v2,col_and_row)
+  { return vect_sp_with_mat(ps, v1, v2, col_major()); }
+
+  template <typename MATSP, typename V1, typename V2> inline
+  typename strongest_value_type3<V1,V2,MATSP>::value_type
+  vect_sp_with_mat(const MATSP &ps, const V1 &v1, const V2 &v2,
+		   abstract_null_type) {
+    typename temporary_vector<V1>::vector_type w(mat_nrows(ps));
+    GMM_WARNING2("Warning, a temporary is used in scalar product\n");
+    mult(ps, v1, w); 
+    return vect_sp(w, v2);
+  }
+
+  template <typename IT1, typename IT2> inline
+  typename strongest_numeric_type<typename std::iterator_traits<IT1>::value_type,
+				  typename std::iterator_traits<IT2>::value_type>::T
+  vect_sp_dense_(IT1 it, IT1 ite, IT2 it2) {
+    typename strongest_numeric_type<typename std::iterator_traits<IT1>::value_type,
+      typename std::iterator_traits<IT2>::value_type>::T res(0);
+    for (; it != ite; ++it, ++it2) res += (*it) * (*it2);
+    return res;
+  }
+  
+  template <typename IT1, typename V> inline
+    typename strongest_numeric_type<typename std::iterator_traits<IT1>::value_type,
+				    typename linalg_traits<V>::value_type>::T
+    vect_sp_sparse_(IT1 it, IT1 ite, const V &v) {
+      typename strongest_numeric_type<typename std::iterator_traits<IT1>::value_type,
+	typename linalg_traits<V>::value_type>::T res(0);
+    for (; it != ite; ++it) res += (*it) * v[it.index()];
+    return res;
+  }
+
+  template <typename V1, typename V2> inline
+  typename strongest_value_type<V1,V2>::value_type
+    vect_sp(const V1 &v1, const V2 &v2, abstract_dense, abstract_dense) {
+    return vect_sp_dense_(vect_const_begin(v1), vect_const_end(v1),
+			  vect_const_begin(v2));
+  }
+
+  template <typename V1, typename V2> inline
+    typename strongest_value_type<V1,V2>::value_type
+    vect_sp(const V1 &v1, const V2 &v2, abstract_skyline, abstract_dense) {
+    typename linalg_traits<V1>::const_iterator it1 = vect_const_begin(v1),
+      ite =  vect_const_end(v1);
+    typename linalg_traits<V2>::const_iterator it2 = vect_const_begin(v2);
+    return vect_sp_dense_(it1, ite, it2 + it1.index());
+  }
+
+  template <typename V1, typename V2> inline
+    typename strongest_value_type<V1,V2>::value_type
+    vect_sp(const V1 &v1, const V2 &v2, abstract_dense, abstract_skyline) {
+    typename linalg_traits<V2>::const_iterator it1 = vect_const_begin(v2),
+      ite =  vect_const_end(v2);
+    typename linalg_traits<V1>::const_iterator it2 = vect_const_begin(v1);
+    return vect_sp_dense_(it1, ite, it2 + it1.index());
+  }
+
+  template <typename V1, typename V2> inline
+    typename strongest_value_type<V1,V2>::value_type
+    vect_sp(const V1 &v1, const V2 &v2, abstract_skyline, abstract_skyline) {
+    typedef typename strongest_value_type<V1,V2>::value_type T;
+    auto it1 = vect_const_begin(v1), ite1 =  vect_const_end(v1);
+    auto it2 = vect_const_begin(v2), ite2 =  vect_const_end(v2);
+    size_type n = std::min(ite1.index(), ite2.index());
+    size_type l = std::max(it1.index(), it2.index());
+
+    if (l < n) {
+      size_type m = l - it1.index(), p = l - it2.index(), q = m + n - l;
+      return vect_sp_dense_(it1+m, it1+q, it2 + p);
+    }
+    return T(0);
+  }
+
+  template <typename V1, typename V2> inline
+    typename strongest_value_type<V1,V2>::value_type
+  vect_sp(const V1 &v1, const V2 &v2,abstract_sparse,abstract_dense) {
+    return vect_sp_sparse_(vect_const_begin(v1), vect_const_end(v1), v2);
+  }
+
+  template <typename V1, typename V2> inline
+    typename strongest_value_type<V1,V2>::value_type
+    vect_sp(const V1 &v1, const V2 &v2, abstract_sparse, abstract_skyline) {
+    return vect_sp_sparse_(vect_const_begin(v1), vect_const_end(v1), v2);
+  }
+
+  template <typename V1, typename V2> inline
+    typename strongest_value_type<V1,V2>::value_type
+    vect_sp(const V1 &v1, const V2 &v2, abstract_skyline, abstract_sparse) {
+    return vect_sp_sparse_(vect_const_begin(v2), vect_const_end(v2), v1);
+  }
+
+  template <typename V1, typename V2> inline
+    typename strongest_value_type<V1,V2>::value_type
+    vect_sp(const V1 &v1, const V2 &v2, abstract_dense,abstract_sparse) {
+    return vect_sp_sparse_(vect_const_begin(v2), vect_const_end(v2), v1);
+  }
+
+
+  template <typename V1, typename V2> inline
+  typename strongest_value_type<V1,V2>::value_type
+  vect_sp_sparse_sparse(const V1 &v1, const V2 &v2, linalg_true) {
+    typename linalg_traits<V1>::const_iterator it1 = vect_const_begin(v1),
+      ite1 = vect_const_end(v1);
+    typename linalg_traits<V2>::const_iterator it2 = vect_const_begin(v2),
+      ite2 = vect_const_end(v2);
+    typename strongest_value_type<V1,V2>::value_type res(0);
+    
+    while (it1 != ite1 && it2 != ite2) {
+      if (it1.index() == it2.index())
+	{ res += (*it1) * *it2; ++it1; ++it2; }
+      else if (it1.index() < it2.index()) ++it1; else ++it2;
+    }
+    return res;
+  }
+
+  template <typename V1, typename V2> inline
+  typename strongest_value_type<V1,V2>::value_type
+  vect_sp_sparse_sparse(const V1 &v1, const V2 &v2, linalg_false) {
+    return vect_sp_sparse_(vect_const_begin(v1), vect_const_end(v1), v2);
+  }
+
+  template <typename V1, typename V2> inline
+    typename strongest_value_type<V1,V2>::value_type
+    vect_sp(const V1 &v1, const V2 &v2,abstract_sparse,abstract_sparse) {
+    return vect_sp_sparse_sparse(v1, v2,
+	    typename linalg_and<typename linalg_traits<V1>::index_sorted,
+	    typename linalg_traits<V2>::index_sorted>::bool_type());
+  }
+  
+  /* ******************************************************************** */
+  /*		Hermitian product                             		  */
+  /* ******************************************************************** */
+  ///@endcond
+  /** Hermitian product. */
+  template <typename V1, typename V2>
+  inline typename strongest_value_type<V1,V2>::value_type
+  vect_hp(const V1 &v1, const V2 &v2)
+  { return vect_sp(v1, conjugated(v2)); }
+
+  /** Hermitian product with a matrix. */
+  template <typename MATSP, typename V1, typename V2> inline
+  typename strongest_value_type3<V1,V2,MATSP>::value_type
+    vect_hp(const MATSP &ps, const V1 &v1, const V2 &v2) {
+    return vect_sp(ps, v1, gmm::conjugated(v2));
+  }
+
+  /* ******************************************************************** */
+  /*		Trace of a matrix                             		  */
+  /* ******************************************************************** */
+  
+  /** Trace of a matrix */
+   template <typename M>
+   typename linalg_traits<M>::value_type
+   mat_trace(const M &m) {
+     typedef typename linalg_traits<M>::value_type T;
+     T res(0);
+     for (size_type i = 0; i < std::min(mat_nrows(m), mat_ncols(m)); ++i)
+       res += m(i,i);
+     return res;
+  }
+
+  /* ******************************************************************** */
+  /*		Euclidean norm                             		  */
+  /* ******************************************************************** */
+
+  /** squared Euclidean norm of a vector. */
+  template <typename V>
+  typename number_traits<typename linalg_traits<V>::value_type>
+  ::magnitude_type
+  vect_norm2_sqr(const V &v) {
+    typedef typename linalg_traits<V>::value_type T;
+    typedef typename number_traits<T>::magnitude_type R;
+    auto it = vect_const_begin(v), ite = vect_const_end(v);
+    R res(0);
+    for (; it != ite; ++it) res += gmm::abs_sqr(*it);
+    return res;
+  }
+
+  /** Euclidean norm of a vector. */
+  template <typename V> inline
+   typename number_traits<typename linalg_traits<V>::value_type>
+   ::magnitude_type 
+  vect_norm2(const V &v)
+  { return sqrt(vect_norm2_sqr(v)); }
+  
+
+  /** squared Euclidean distance between two vectors */
+  template <typename V1, typename V2> inline
+   typename number_traits<typename linalg_traits<V1>::value_type>
+   ::magnitude_type
+  vect_dist2_sqr(const V1 &v1, const V2 &v2) { // not fully optimized 
+    typedef typename linalg_traits<V1>::value_type T;
+    typedef typename number_traits<T>::magnitude_type R;
+    auto it1 = vect_const_begin(v1), ite1 = vect_const_end(v1);
+    auto it2 = vect_const_begin(v2), ite2 = vect_const_end(v2);
+    size_type k1(0), k2(0);
+    R res(0);
+    while (it1 != ite1 && it2 != ite2) {
+      size_type i1 = index_of_it(it1, k1,
+				 typename linalg_traits<V1>::storage_type());
+      size_type i2 = index_of_it(it2, k2,
+				 typename linalg_traits<V2>::storage_type());
+
+      if (i1 == i2) {
+	res += gmm::abs_sqr(*it2 - *it1); ++it1; ++k1; ++it2; ++k2;
+      }
+      else if (i1 < i2) {
+	res += gmm::abs_sqr(*it1); ++it1; ++k1; 
+      }
+      else  {
+	res += gmm::abs_sqr(*it2); ++it2; ++k2; 
+      }
+    }
+    while (it1 != ite1) { res += gmm::abs_sqr(*it1); ++it1; }
+    while (it2 != ite2) { res += gmm::abs_sqr(*it2); ++it2; }
+    return res;
+  }
+ 
+  /** Euclidean distance between two vectors */
+  template <typename V1, typename V2> inline
+   typename number_traits<typename linalg_traits<V1>::value_type>
+   ::magnitude_type
+  vect_dist2(const V1 &v1, const V2 &v2)
+  { return sqrt(vect_dist2_sqr(v1, v2)); }
+  ///@cond DOXY_SHOW_ALL_FUNCTIONS
+  template <typename M>
+   typename number_traits<typename linalg_traits<M>::value_type>
+   ::magnitude_type
+   mat_euclidean_norm_sqr(const M &m, row_major) {
+    typename number_traits<typename linalg_traits<M>::value_type>
+      ::magnitude_type res(0);
+    for (size_type i = 0; i < mat_nrows(m); ++i)
+      res += vect_norm2_sqr(mat_const_row(m, i));
+    return res;
+  }
+
+  template <typename M>
+   typename number_traits<typename linalg_traits<M>::value_type>
+   ::magnitude_type
+   mat_euclidean_norm_sqr(const M &m, col_major) {
+    typename number_traits<typename linalg_traits<M>::value_type>
+      ::magnitude_type res(0);
+    for (size_type i = 0; i < mat_ncols(m); ++i)
+      res += vect_norm2_sqr(mat_const_col(m, i));
+    return res;
+  }
+  ///@endcond
+  /** squared Euclidean norm of a matrix. */
+  template <typename M> inline
+   typename number_traits<typename linalg_traits<M>::value_type>
+   ::magnitude_type
+   mat_euclidean_norm_sqr(const M &m) {
+    return mat_euclidean_norm_sqr(m,
+		     typename principal_orientation_type<typename
+		     linalg_traits<M>::sub_orientation>::potype());
+  }
+
+  /** Euclidean norm of a matrix. */
+  template <typename M> inline
+   typename number_traits<typename linalg_traits<M>::value_type>
+   ::magnitude_type
+   mat_euclidean_norm(const M &m)
+  { return gmm::sqrt(mat_euclidean_norm_sqr(m)); }
+
+  /* ******************************************************************** */
+  /*		vector norm1                                    	  */
+  /* ******************************************************************** */
+  /** 1-norm of a vector */
+  template <typename V>
+  typename number_traits<typename linalg_traits<V>::value_type>
+  ::magnitude_type
+  vect_norm1(const V &v) {
+    auto it = vect_const_begin(v), ite = vect_const_end(v);
+    typename number_traits<typename linalg_traits<V>::value_type>
+	::magnitude_type res(0);
+    for (; it != ite; ++it) res += gmm::abs(*it);
+    return res;
+  }
+
+  /* ******************************************************************** */
+  /*		vector Infinity norm                              	  */
+  /* ******************************************************************** */
+  /** Infinity norm of a vector. */
+  template <typename V>
+  typename number_traits<typename linalg_traits<V>::value_type>
+  ::magnitude_type 
+  vect_norminf(const V &v) {
+    auto it = vect_const_begin(v), ite = vect_const_end(v);
+    typename number_traits<typename linalg_traits<V>::value_type>
+      ::magnitude_type res(0);
+    for (; it != ite; ++it) res = std::max(res, gmm::abs(*it));
+    return res;
+  }
+
+  /* ******************************************************************** */
+  /*		matrix norm1                                    	  */
+  /* ******************************************************************** */
+  ///@cond DOXY_SHOW_ALL_FUNCTIONS
+  template <typename M>
+   typename number_traits<typename linalg_traits<M>::value_type>
+   ::magnitude_type
+   mat_norm1(const M &m, col_major) {
+    typename number_traits<typename linalg_traits<M>::value_type>
+      ::magnitude_type res(0);
+    for (size_type i = 0; i < mat_ncols(m); ++i)
+      res = std::max(res, vect_norm1(mat_const_col(m,i)));
+    return res;
+  }
+
+  template <typename M>
+   typename number_traits<typename linalg_traits<M>::value_type>
+   ::magnitude_type
+   mat_norm1(const M &m, row_major) {
+    typedef typename linalg_traits<M>::value_type T;
+    typedef typename number_traits<T>::magnitude_type R;
+    typedef typename linalg_traits<M>::storage_type store_type;
+    
+    std::vector<R> aux(mat_ncols(m));
+    for (size_type i = 0; i < mat_nrows(m); ++i) {
+      typename linalg_traits<M>::const_sub_row_type row = mat_const_row(m, i);
+      auto it = vect_const_begin(row), ite = vect_const_end(row);
+      for (size_type k = 0; it != ite; ++it, ++k)
+	aux[index_of_it(it, k, store_type())] += gmm::abs(*it);
+    }
+    return vect_norminf(aux);
+  }
+
+  template <typename M>
+   typename number_traits<typename linalg_traits<M>::value_type>
+   ::magnitude_type
+   mat_norm1(const M &m, col_and_row)
+  { return mat_norm1(m, col_major()); }
+
+  template <typename M>
+   typename number_traits<typename linalg_traits<M>::value_type>
+   ::magnitude_type
+   mat_norm1(const M &m, row_and_col)
+  { return mat_norm1(m, col_major()); }
+  ///@endcond
+  /** 1-norm of a matrix */
+  template <typename M>
+   typename number_traits<typename linalg_traits<M>::value_type>
+   ::magnitude_type
+   mat_norm1(const M &m) {
+    return mat_norm1(m, typename linalg_traits<M>::sub_orientation());
+  }
+
+
+  /* ******************************************************************** */
+  /*		matrix Infinity norm                              	  */
+  /* ******************************************************************** */
+  ///@cond DOXY_SHOW_ALL_FUNCTIONS
+  template <typename M>
+   typename number_traits<typename linalg_traits<M>::value_type>
+   ::magnitude_type
+   mat_norminf(const M &m, row_major) {
+    typename number_traits<typename linalg_traits<M>::value_type>
+      ::magnitude_type res(0);
+    for (size_type i = 0; i < mat_nrows(m); ++i)
+      res = std::max(res, vect_norm1(mat_const_row(m,i)));
+    return res;
+  }
+
+  template <typename M>
+   typename number_traits<typename linalg_traits<M>::value_type>
+   ::magnitude_type
+   mat_norminf(const M &m, col_major) {
+    typedef typename linalg_traits<M>::value_type T;
+    typedef typename number_traits<T>::magnitude_type R;
+    typedef typename linalg_traits<M>::storage_type store_type;
+    
+    std::vector<R> aux(mat_nrows(m));
+    for (size_type i = 0; i < mat_ncols(m); ++i) {
+      typename linalg_traits<M>::const_sub_col_type col = mat_const_col(m, i);
+      auto it = vect_const_begin(col), ite = vect_const_end(col);
+      for (size_type k = 0; it != ite; ++it, ++k)
+	aux[index_of_it(it, k, store_type())] += gmm::abs(*it);
+    }
+    return vect_norminf(aux);
+  }
+
+  template <typename M>
+   typename number_traits<typename linalg_traits<M>::value_type>
+   ::magnitude_type
+   mat_norminf(const M &m, col_and_row)
+  { return mat_norminf(m, row_major()); }
+
+  template <typename M>
+   typename number_traits<typename linalg_traits<M>::value_type>
+   ::magnitude_type
+   mat_norminf(const M &m, row_and_col)
+  { return mat_norminf(m, row_major()); }
+  ///@endcond
+  /** infinity-norm of a matrix.*/
+  template <typename M>
+   typename number_traits<typename linalg_traits<M>::value_type>
+   ::magnitude_type
+   mat_norminf(const M &m) {
+    return mat_norminf(m, typename linalg_traits<M>::sub_orientation());
+  }
+
+  /* ******************************************************************** */
+  /*		Max norm for matrices                              	  */
+  /* ******************************************************************** */
+  ///@cond DOXY_SHOW_ALL_FUNCTIONS
+  template <typename M>
+   typename number_traits<typename linalg_traits<M>::value_type>
+   ::magnitude_type
+   mat_maxnorm(const M &m, row_major) {
+    typename number_traits<typename linalg_traits<M>::value_type>
+      ::magnitude_type res(0);
+    for (size_type i = 0; i < mat_nrows(m); ++i)
+      res = std::max(res, vect_norminf(mat_const_row(m,i)));
+    return res;
+  }
+
+  template <typename M>
+   typename number_traits<typename linalg_traits<M>::value_type>
+   ::magnitude_type
+   mat_maxnorm(const M &m, col_major) {
+    typename number_traits<typename linalg_traits<M>::value_type>
+      ::magnitude_type res(0);
+    for (size_type i = 0; i < mat_ncols(m); ++i)
+      res = std::max(res, vect_norminf(mat_const_col(m,i)));
+    return res;
+  }
+  ///@endcond
+  /** max-norm of a matrix. */
+  template <typename M>
+   typename number_traits<typename linalg_traits<M>::value_type>
+   ::magnitude_type
+   mat_maxnorm(const M &m) {
+    return mat_maxnorm(m,
+		     typename principal_orientation_type<typename
+		     linalg_traits<M>::sub_orientation>::potype());
+  }
+
+  /* ******************************************************************** */
+  /*		Clean                                    		  */
+  /* ******************************************************************** */
+  /** Clean a vector or matrix (replace near-zero entries with zeroes).   */
+  
+  template <typename L> inline void clean(L &l, double threshold);
+
+  ///@cond DOXY_SHOW_ALL_FUNCTIONS
+
+  template <typename L, typename T>
+  void clean(L &l, double threshold, abstract_dense, T) {
+    typedef typename number_traits<T>::magnitude_type R;
+    auto it = vect_begin(l), ite = vect_end(l);
+    for (; it != ite; ++it)
+      if (gmm::abs(*it) < R(threshold)) *it = T(0);
+  }
+
+  template <typename L, typename T>
+  void clean(L &l, double threshold, abstract_skyline, T)
+  { gmm::clean(l, threshold, abstract_dense(), T()); }
+
+  template <typename L, typename T>
+  void clean(L &l, double threshold, abstract_sparse, T) {
+    typedef typename number_traits<T>::magnitude_type R;
+    auto it = vect_begin(l), ite = vect_end(l);
+    std::vector<size_type> ind;
+    for (; it != ite; ++it)
+      if (gmm::abs(*it) < R(threshold)) ind.push_back(it.index());
+    for (size_type i = 0; i < ind.size(); ++i) l[ind[i]] = T(0);
+  }
+  
+  template <typename L, typename T>
+  void clean(L &l, double threshold, abstract_dense, std::complex<T>) {
+    auto it = vect_begin(l), ite = vect_end(l);
+    for (; it != ite; ++it){
+      if (gmm::abs((*it).real()) < T(threshold))
+	*it = std::complex<T>(T(0), (*it).imag());
+      if (gmm::abs((*it).imag()) < T(threshold))
+	*it = std::complex<T>((*it).real(), T(0));
+    }
+  }
+
+  template <typename L, typename T>
+  void clean(L &l, double threshold, abstract_skyline, std::complex<T>)
+  { gmm::clean(l, threshold, abstract_dense(), std::complex<T>()); }
+
+  template <typename L, typename T>
+  void clean(L &l, double threshold, abstract_sparse, std::complex<T>) {
+    auto it = vect_begin(l), ite = vect_end(l);
+    std::vector<size_type> ind;
+    for (; it != ite; ++it) {
+      bool r = (gmm::abs((*it).real()) < T(threshold));
+      bool i = (gmm::abs((*it).imag()) < T(threshold));
+      if (r && i) ind.push_back(it.index());
+      else if (r) *it = std::complex<T>(T(0), (*it).imag());
+      else if (i) *it = std::complex<T>((*it).real(), T(0));
+    }
+    for (size_type i = 0; i < ind.size(); ++i)
+      l[ind[i]] = std::complex<T>(T(0),T(0));
+  }
+
+  template <typename L> inline void clean(L &l, double threshold,
+					  abstract_vector) {
+    gmm::clean(l, threshold, typename linalg_traits<L>::storage_type(),
+	       typename linalg_traits<L>::value_type());
+  }
+
+  template <typename L> inline void clean(const L &l, double threshold);
+
+  template <typename L> void clean(L &l, double threshold, row_major) {
+    for (size_type i = 0; i < mat_nrows(l); ++i)
+      gmm::clean(mat_row(l, i), threshold);
+  }
+
+  template <typename L> void clean(L &l, double threshold, col_major) {
+    for (size_type i = 0; i < mat_ncols(l); ++i)
+      gmm::clean(mat_col(l, i), threshold);
+  }
+
+  template <typename L> inline void clean(L &l, double threshold,
+					  abstract_matrix) {
+    gmm::clean(l, threshold,
+	       typename principal_orientation_type<typename
+	       linalg_traits<L>::sub_orientation>::potype());
+  }
+
+  template <typename L> inline void clean(L &l, double threshold)
+  { clean(l, threshold, typename linalg_traits<L>::linalg_type()); }
+ 
+  template <typename L> inline void clean(const L &l, double threshold)
+  { gmm::clean(linalg_const_cast(l), threshold); }
+
+  /* ******************************************************************** */
+  /*		Copy                                    		  */
+  /* ******************************************************************** */
+  ///@endcond
+  /** Copy vectors or matrices. 
+      @param l1 source vector or matrix.
+      @param l2 destination.
+  */
+  template <typename L1, typename L2> inline
+  void copy(const L1& l1, L2& l2) { 
+    if ((const void *)(&l1) != (const void *)(&l2)) {
+      if (same_origin(l1,l2))
+	GMM_WARNING2("Warning : a conflict is possible in copy\n");
+     
+      copy(l1, l2, typename linalg_traits<L1>::linalg_type(),
+	   typename linalg_traits<L2>::linalg_type());
+    }
+  }
+  ///@cond DOXY_SHOW_ALL_FUNCTIONS
+
+  template <typename L1, typename L2> inline
+  void copy(const L1& l1, const L2& l2) { copy(l1, linalg_const_cast(l2)); }
+
+  template <typename L1, typename L2> inline
+  void copy(const L1& l1, L2& l2, abstract_vector, abstract_vector) {
+    GMM_ASSERT2(vect_size(l1) == vect_size(l2), "dimensions mismatch, "
+                << vect_size(l1) << " !=" << vect_size(l2));
+    copy_vect(l1, l2, typename linalg_traits<L1>::storage_type(),
+	      typename linalg_traits<L2>::storage_type());
+  }
+
+  template <typename L1, typename L2> inline
+  void copy(const L1& l1, L2& l2, abstract_matrix, abstract_matrix) {
+    size_type m = mat_nrows(l1), n = mat_ncols(l1);
+    if (!m || !n) return;
+    GMM_ASSERT2(n==mat_ncols(l2) && m==mat_nrows(l2), "dimensions mismatch");
+    copy_mat(l1, l2, typename linalg_traits<L1>::sub_orientation(),
+	     typename linalg_traits<L2>::sub_orientation());
+  }
+
+  template <typename V1, typename V2, typename C1, typename C2> inline 
+  void copy_vect(const V1 &v1, const V2 &v2, C1, C2)
+  { copy_vect(v1, const_cast<V2 &>(v2), C1(), C2()); }
+  
+
+  template <typename L1, typename L2>
+  void copy_mat_by_row(const L1& l1, L2& l2) {
+    size_type nbr = mat_nrows(l1);
+    for (size_type i = 0; i < nbr; ++i)
+      copy(mat_const_row(l1, i), mat_row(l2, i));
+  }
+
+  template <typename L1, typename L2>
+  void copy_mat_by_col(const L1 &l1, L2 &l2) {
+    size_type nbc = mat_ncols(l1);
+    for (size_type i = 0; i < nbc; ++i) {
+      copy(mat_const_col(l1, i), mat_col(l2, i));
+    }
+  }
+
+  template <typename L1, typename L2> inline
+  void copy_mat(const L1& l1, L2& l2, row_major, row_major)
+  { copy_mat_by_row(l1, l2); }
+
+  template <typename L1, typename L2> inline
+  void copy_mat(const L1& l1, L2& l2, row_major, row_and_col)
+  { copy_mat_by_row(l1, l2); }
+
+  template <typename L1, typename L2> inline
+  void copy_mat(const L1& l1, L2& l2, row_and_col, row_and_col)
+  { copy_mat_by_row(l1, l2); }
+
+  template <typename L1, typename L2> inline
+  void copy_mat(const L1& l1, L2& l2, row_and_col, row_major)
+  { copy_mat_by_row(l1, l2); }
+
+  template <typename L1, typename L2> inline
+  void copy_mat(const L1& l1, L2& l2, col_and_row, row_major)
+  { copy_mat_by_row(l1, l2); }
+
+  template <typename L1, typename L2> inline
+  void copy_mat(const L1& l1, L2& l2, row_major, col_and_row)
+  { copy_mat_by_row(l1, l2); }
+
+  template <typename L1, typename L2> inline
+  void copy_mat(const L1& l1, L2& l2, col_and_row, row_and_col)
+  { copy_mat_by_row(l1, l2); }
+
+  template <typename L1, typename L2> inline
+  void copy_mat(const L1& l1, L2& l2, row_and_col, col_and_row)
+  { copy_mat_by_row(l1, l2); }
+
+  template <typename L1, typename L2> inline
+  void copy_mat(const L1& l1, L2& l2, col_major, col_major)
+  { copy_mat_by_col(l1, l2); }
+
+  template <typename L1, typename L2> inline
+  void copy_mat(const L1& l1, L2& l2, col_major, col_and_row)
+  { copy_mat_by_col(l1, l2); }
+
+  template <typename L1, typename L2> inline
+  void copy_mat(const L1& l1, L2& l2, col_major, row_and_col)
+  { copy_mat_by_col(l1, l2); }
+
+  template <typename L1, typename L2> inline
+  void copy_mat(const L1& l1, L2& l2, row_and_col, col_major)
+  { copy_mat_by_col(l1, l2); }
+
+  template <typename L1, typename L2> inline
+  void copy_mat(const L1& l1, L2& l2, col_and_row, col_major)
+  { copy_mat_by_col(l1, l2); }
+
+  template <typename L1, typename L2> inline
+  void copy_mat(const L1& l1, L2& l2, col_and_row, col_and_row)
+  { copy_mat_by_col(l1, l2); }
+  
+  template <typename L1, typename L2> inline
+  void copy_mat_mixed_rc(const L1& l1, L2& l2, size_type i) {
+    copy_mat_mixed_rc(l1, l2, i, typename linalg_traits<L1>::storage_type());
+  }
+
+  template <typename L1, typename L2>
+  void copy_mat_mixed_rc(const L1& l1, L2& l2, size_type i, abstract_sparse) {
+    auto it  = vect_const_begin(l1), ite = vect_const_end(l1);
+    for (; it != ite; ++it)
+      l2(i, it.index()) = *it;
+  }
+
+  template <typename L1, typename L2>
+  void copy_mat_mixed_rc(const L1& l1, L2& l2, size_type i, abstract_skyline) {
+    auto it  = vect_const_begin(l1), ite = vect_const_end(l1);
+    for (; it != ite; ++it)
+      l2(i, it.index()) = *it;
+  }
+
+  template <typename L1, typename L2>
+  void copy_mat_mixed_rc(const L1& l1, L2& l2, size_type i, abstract_dense) {
+    auto it  = vect_const_begin(l1), ite = vect_const_end(l1);
+    for (size_type j = 0; it != ite; ++it, ++j) l2(i, j) = *it;
+  }
+
+  template <typename L1, typename L2> inline
+  void copy_mat_mixed_cr(const L1& l1, L2& l2, size_type i) {
+    copy_mat_mixed_cr(l1, l2, i, typename linalg_traits<L1>::storage_type());
+  }
+
+  template <typename L1, typename L2>
+  void copy_mat_mixed_cr(const L1& l1, L2& l2, size_type i, abstract_sparse) {
+    auto it  = vect_const_begin(l1), ite = vect_const_end(l1);
+    for (; it != ite; ++it) l2(it.index(), i) = *it;
+  }
+
+  template <typename L1, typename L2>
+  void copy_mat_mixed_cr(const L1& l1, L2& l2, size_type i, abstract_skyline) {
+    auto it  = vect_const_begin(l1), ite = vect_const_end(l1);
+    for (; it != ite; ++it) l2(it.index(), i) = *it;
+  }
+
+  template <typename L1, typename L2>
+  void copy_mat_mixed_cr(const L1& l1, L2& l2, size_type i, abstract_dense) {
+    auto it  = vect_const_begin(l1), ite = vect_const_end(l1);
+    for (size_type j = 0; it != ite; ++it, ++j) l2(j, i) = *it;
+  }
+
+  template <typename L1, typename L2>
+  void copy_mat(const L1& l1, L2& l2, row_major, col_major) {
+    clear(l2);
+    size_type nbr = mat_nrows(l1);
+    for (size_type i = 0; i < nbr; ++i)
+      copy_mat_mixed_rc(mat_const_row(l1, i), l2, i);
+  }
+  
+  template <typename L1, typename L2>
+  void copy_mat(const L1& l1, L2& l2, col_major, row_major) {
+    clear(l2);
+    size_type nbc = mat_ncols(l1);
+    for (size_type i = 0; i < nbc; ++i)
+      copy_mat_mixed_cr(mat_const_col(l1, i), l2, i);
+  }
+  
+  template <typename L1, typename L2> inline
+  void copy_vect(const L1 &l1, L2 &l2, abstract_dense, abstract_dense) {
+    std::copy(vect_const_begin(l1), vect_const_end(l1), vect_begin(l2));
+  }
+
+  template <typename L1, typename L2> inline // to be optimised ?
+  void copy_vect(const L1 &l1, L2 &l2, abstract_skyline, abstract_skyline) {
+    auto it1 = vect_const_begin(l1), ite1 = vect_const_end(l1);
+    while (it1 != ite1 && *it1 == typename linalg_traits<L1>::value_type(0))
+      ++it1;
+
+    if (ite1 - it1 > 0) {
+      clear(l2);
+      auto it2 = vect_begin(l2), ite2 = vect_end(l2);
+      while (*(ite1-1) == typename linalg_traits<L1>::value_type(0)) ite1--;
+
+      if (it2 == ite2) {
+	l2[it1.index()] = *it1; ++it1;
+	l2[ite1.index()-1] = *(ite1-1); --ite1;
+	if (it1 < ite1)
+	  { it2 = vect_begin(l2); ++it2; std::copy(it1, ite1, it2); }
+      }
+      else {
+	ptrdiff_t m = it1.index() - it2.index();
+	if (m >= 0 && ite1.index() <= ite2.index())
+	  std::copy(it1, ite1, it2 + m);
+	else {
+	  if (m < 0) l2[it1.index()] = *it1;
+	  if (ite1.index() > ite2.index()) l2[ite1.index()-1] = *(ite1-1);
+	  it2 = vect_begin(l2); ite2 = vect_end(l2);
+	  m = it1.index() - it2.index();
+	  std::copy(it1, ite1, it2 + m);
+	}
+      }
+    }
+  }
+  
+  template <typename L1, typename L2>
+  void copy_vect(const L1& l1, L2& l2, abstract_sparse, abstract_dense) {
+    clear(l2);
+    auto it  = vect_const_begin(l1), ite = vect_const_end(l1);
+    for (; it != ite; ++it) { l2[it.index()] = *it; }
+  }
+
+  template <typename L1, typename L2>
+  void copy_vect(const L1& l1, L2& l2, abstract_sparse, abstract_skyline) {
+    clear(l2);
+    auto it  = vect_const_begin(l1), ite = vect_const_end(l1);
+    for (; it != ite; ++it) l2[it.index()] = *it;
+  }
+
+  template <typename L1, typename L2>
+  void copy_vect(const L1& l1, L2& l2, abstract_skyline, abstract_dense) {
+    typedef typename linalg_traits<L1>::value_type T;
+    auto it = vect_const_begin(l1), ite = vect_const_end(l1);
+    if (it == ite)
+      gmm::clear(l2);
+    else {
+      auto it2 = vect_begin(l2), ite2 = vect_end(l2);
+      
+      size_type i = it.index(), j;
+      for (j = 0; j < i; ++j, ++it2) *it2 = T(0);
+      for (; it != ite; ++it, ++it2) *it2 = *it;
+      for (; it2 != ite2; ++it2) *it2 = T(0);
+    }
+  }
+    
+  template <typename L1, typename L2>
+  void copy_vect(const L1& l1, L2& l2, abstract_sparse, abstract_sparse) {
+    auto  it  = vect_const_begin(l1), ite = vect_const_end(l1);
+    clear(l2);
+    // cout << "copy " << l1 << " of size " << vect_size(l1) << " nnz = " << nnz(l1) << endl;
+    for (; it != ite; ++it) {
+      // cout << "*it = " << *it << endl;
+      // cout << "it.index() = " << it.index() << endl;
+      if (*it != (typename linalg_traits<L1>::value_type)(0))
+	l2[it.index()] = *it;
+    }
+  }
+  
+  template <typename L1, typename L2>
+  void copy_vect(const L1& l1, L2& l2, abstract_dense, abstract_sparse) {
+    clear(l2);
+    auto  it  = vect_const_begin(l1), ite = vect_const_end(l1);
+    for (size_type i = 0; it != ite; ++it, ++i)
+      if (*it != (typename linalg_traits<L1>::value_type)(0))
+	l2[i] = *it;
+  }
+
+  template <typename L1, typename L2> // to be optimised ...
+  void copy_vect(const L1& l1, L2& l2, abstract_dense, abstract_skyline) {
+    clear(l2);
+    auto  it  = vect_const_begin(l1), ite = vect_const_end(l1);
+    for (size_type i = 0; it != ite; ++it, ++i)
+      if (*it != (typename linalg_traits<L1>::value_type)(0))
+	l2[i] = *it;
+  }
+
+  
+  template <typename L1, typename L2>
+  void copy_vect(const L1& l1, L2& l2, abstract_skyline, abstract_sparse) {
+    clear(l2);
+    auto  it  = vect_const_begin(l1), ite = vect_const_end(l1);
+    for (; it != ite; ++it)
+      if (*it != (typename linalg_traits<L1>::value_type)(0))
+	l2[it.index()] = *it;
+  }
+
+  /* ******************************************************************** */
+  /*   Matrix and vector addition                                         */
+  /*   algorithms are built in order to avoid some conflicts with         */
+  /*   repeated arguments or with overlapping part of a same object.      */
+  /*   In the latter case, conflicts are still possible.                  */
+  /* ******************************************************************** */
+  ///@endcond
+  /** Add two vectors or matrices
+      @param l1
+      @param l2 contains on output, l2+l1.
+  */
+  template <typename L1, typename L2> inline
+    void add(const L1& l1, L2& l2) {
+      add_spec(l1, l2, typename linalg_traits<L2>::linalg_type());
+  }
+  ///@cond
+
+  template <typename L1, typename L2> inline
+  void add(const L1& l1, const L2& l2) { add(l1, linalg_const_cast(l2)); }
+
+  template <typename L1, typename L2> inline
+    void add_spec(const L1& l1, L2& l2, abstract_vector) {
+    GMM_ASSERT2(vect_size(l1) == vect_size(l2), "dimensions mismatch, "
+                << vect_size(l1) << " !=" << vect_size(l2));
+    add(l1, l2, typename linalg_traits<L1>::storage_type(),
+	typename linalg_traits<L2>::storage_type());
+  }
+
+  template <typename L1, typename L2> inline
+    void add_spec(const L1& l1, L2& l2, abstract_matrix) {
+    GMM_ASSERT2(mat_nrows(l1)==mat_nrows(l2) && mat_ncols(l1)==mat_ncols(l2),
+                "dimensions mismatch l1 is " << mat_nrows(l1) << "x"
+		<< mat_ncols(l1) << " and l2 is " << mat_nrows(l2)
+		<< "x" << mat_ncols(l2));
+    add(l1, l2, typename linalg_traits<L1>::sub_orientation(),
+	typename linalg_traits<L2>::sub_orientation());
+  }
+
+  template <typename L1, typename L2>
+  void add(const L1& l1, L2& l2, row_major, row_major) {
+    auto it1 = mat_row_begin(l1), ite = mat_row_end(l1);
+    auto it2 = mat_row_begin(l2);
+    for ( ; it1 != ite; ++it1, ++it2)
+      add(linalg_traits<L1>::row(it1), linalg_traits<L2>::row(it2));
+  }
+
+  template <typename L1, typename L2>
+  void add(const L1& l1, L2& l2, col_major, col_major) {
+    auto it1 = mat_col_const_begin(l1), ite = mat_col_const_end(l1);
+    typename linalg_traits<L2>::col_iterator it2 = mat_col_begin(l2);
+    for ( ; it1 != ite; ++it1, ++it2)
+      add(linalg_traits<L1>::col(it1),  linalg_traits<L2>::col(it2));
+  }
+  
+    template <typename L1, typename L2> inline
+  void add_mat_mixed_rc(const L1& l1, L2& l2, size_type i) {
+    add_mat_mixed_rc(l1, l2, i, typename linalg_traits<L1>::storage_type());
+  }
+
+  template <typename L1, typename L2>
+  void add_mat_mixed_rc(const L1& l1, L2& l2, size_type i, abstract_sparse) {
+    auto it  = vect_const_begin(l1), ite = vect_const_end(l1);
+    for (; it != ite; ++it) l2(i, it.index()) += *it;
+  }
+
+  template <typename L1, typename L2>
+  void add_mat_mixed_rc(const L1& l1, L2& l2, size_type i, abstract_skyline) {
+    auto it  = vect_const_begin(l1), ite = vect_const_end(l1);
+    for (; it != ite; ++it) l2(i, it.index()) += *it;
+  }
+
+  template <typename L1, typename L2>
+  void add_mat_mixed_rc(const L1& l1, L2& l2, size_type i, abstract_dense) {
+    auto it  = vect_const_begin(l1), ite = vect_const_end(l1);
+    for (size_type j = 0; it != ite; ++it, ++j) l2(i, j) += *it;
+  }
+
+  template <typename L1, typename L2> inline
+  void add_mat_mixed_cr(const L1& l1, L2& l2, size_type i) {
+    add_mat_mixed_cr(l1, l2, i, typename linalg_traits<L1>::storage_type());
+  }
+
+  template <typename L1, typename L2>
+  void add_mat_mixed_cr(const L1& l1, L2& l2, size_type i, abstract_sparse) {
+    auto it  = vect_const_begin(l1), ite = vect_const_end(l1);
+    for (; it != ite; ++it) l2(it.index(), i) += *it;
+  }
+
+  template <typename L1, typename L2>
+  void add_mat_mixed_cr(const L1& l1, L2& l2, size_type i, abstract_skyline) {
+    auto it  = vect_const_begin(l1), ite = vect_const_end(l1);
+    for (; it != ite; ++it) l2(it.index(), i) += *it;
+  }
+
+  template <typename L1, typename L2>
+  void add_mat_mixed_cr(const L1& l1, L2& l2, size_type i, abstract_dense) {
+    auto it  = vect_const_begin(l1), ite = vect_const_end(l1);
+    for (size_type j = 0; it != ite; ++it, ++j) l2(j, i) += *it;
+  }
+
+  template <typename L1, typename L2>
+  void add(const L1& l1, L2& l2, row_major, col_major) {
+    size_type nbr = mat_nrows(l1);
+    for (size_type i = 0; i < nbr; ++i)
+      add_mat_mixed_rc(mat_const_row(l1, i), l2, i);
+  }
+  
+  template <typename L1, typename L2>
+  void add(const L1& l1, L2& l2, col_major, row_major) {
+    size_type nbc = mat_ncols(l1);
+    for (size_type i = 0; i < nbc; ++i)
+      add_mat_mixed_cr(mat_const_col(l1, i), l2, i);
+  }
+
+  template <typename L1, typename L2> inline
+  void add(const L1& l1, L2& l2, row_and_col, row_major)
+  { add(l1, l2, row_major(), row_major()); }
+
+  template <typename L1, typename L2> inline
+  void add(const L1& l1, L2& l2, row_and_col, row_and_col)
+  { add(l1, l2, row_major(), row_major()); }
+
+  template <typename L1, typename L2> inline
+  void add(const L1& l1, L2& l2, row_and_col, col_and_row)
+  { add(l1, l2, row_major(), row_major()); }
+
+  template <typename L1, typename L2> inline
+  void add(const L1& l1, L2& l2, col_and_row, row_and_col)
+  { add(l1, l2, row_major(), row_major()); }
+
+  template <typename L1, typename L2> inline
+  void add(const L1& l1, L2& l2, row_major, row_and_col)
+  { add(l1, l2, row_major(), row_major()); }
+
+  template <typename L1, typename L2> inline
+  void add(const L1& l1, L2& l2, col_and_row, row_major)
+  { add(l1, l2, row_major(), row_major()); }
+
+  template <typename L1, typename L2> inline
+  void add(const L1& l1, L2& l2, row_major, col_and_row)
+  { add(l1, l2, row_major(), row_major()); }
+
+  template <typename L1, typename L2> inline
+  void add(const L1& l1, L2& l2, row_and_col, col_major)
+  { add(l1, l2, col_major(), col_major()); }
+
+  template <typename L1, typename L2> inline
+  void add(const L1& l1, L2& l2, col_major, row_and_col)
+  { add(l1, l2, col_major(), col_major()); }
+
+  template <typename L1, typename L2> inline
+  void add(const L1& l1, L2& l2, col_and_row, col_major)
+  { add(l1, l2, col_major(), col_major()); }
+
+  template <typename L1, typename L2> inline
+  void add(const L1& l1, L2& l2, col_and_row, col_and_row)
+  { add(l1, l2, col_major(), col_major()); }
+
+  template <typename L1, typename L2> inline
+  void add(const L1& l1, L2& l2, col_major, col_and_row)
+  { add(l1, l2, col_major(), col_major()); }
+
+  ///@endcond
+  /** Addition of two vectors/matrices
+      @param l1
+      @param l2
+      @param l3 contains l1+l2 on output
+  */
+  template <typename L1, typename L2, typename L3> inline
+  void add(const L1& l1, const L2& l2, L3& l3) {
+    add_spec(l1, l2, l3, typename linalg_traits<L2>::linalg_type());
+  }
+  ///@cond DOXY_SHOW_ALL_FUNCTIONS
+
+  template <typename L1, typename L2, typename L3> inline
+  void add(const L1& l1, const L2& l2, const L3& l3)
+  { add(l1, l2, linalg_const_cast(l3)); }
+
+  template <typename L1, typename L2, typename L3> inline
+    void add_spec(const L1& l1, const L2& l2, L3& l3, abstract_matrix)
+  { copy(l2, l3); add(l1, l3); }
+
+  template <typename L1, typename L2, typename L3> inline
+    void add_spec(const L1& l1, const L2& l2, L3& l3, abstract_vector) {
+    GMM_ASSERT2(vect_size(l1) == vect_size(l2) &&
+                vect_size(l1) == vect_size(l3), "dimensions mismatch");
+    if ((const void *)(&l1) == (const void *)(&l3))
+      add(l2, l3);
+    else if ((const void *)(&l2) == (const void *)(&l3))
+      add(l1, l3);
+    else
+      add(l1, l2, l3, typename linalg_traits<L1>::storage_type(),
+	  typename linalg_traits<L2>::storage_type(),
+	  typename linalg_traits<L3>::storage_type());
+  }
+
+  template <typename IT1, typename IT2, typename IT3>
+    void add_full_(IT1 it1, IT2 it2, IT3 it3, IT3 ite) {
+    for (; it3 != ite; ++it3, ++it2, ++it1) *it3 = *it1 + *it2;
+  }
+
+  template <typename IT1, typename IT2, typename IT3>
+    void add_almost_full_(IT1 it1, IT1 ite1, IT2 it2, IT3 it3, IT3 ite3) {
+    IT3 it = it3;
+    for (; it != ite3; ++it, ++it2) *it = *it2;
+    for (; it1 != ite1; ++it1)
+      *(it3 + it1.index()) += *it1;
+  }
+
+  template <typename IT1, typename IT2, typename IT3>
+  void add_to_full_(IT1 it1, IT1 ite1, IT2 it2, IT2 ite2,
+		    IT3 it3, IT3 ite3) {
+    typedef typename std::iterator_traits<IT3>::value_type T;
+    IT3 it = it3;
+    for (; it != ite3; ++it) *it = T(0);
+    for (; it1 != ite1; ++it1) *(it3 + it1.index()) = *it1;
+    for (; it2 != ite2; ++it2) *(it3 + it2.index()) += *it2;    
+  }
+  
+  template <typename L1, typename L2, typename L3> inline
+  void add(const L1& l1, const L2& l2, L3& l3,
+	   abstract_dense, abstract_dense, abstract_dense) {
+    add_full_(vect_const_begin(l1), vect_const_begin(l2),
+	      vect_begin(l3), vect_end(l3));
+  }
+  
+  // generic function for add(v1, v2, v3).
+  // Need to be specialized to optimize particular additions.
+  template <typename L1, typename L2, typename L3,
+	    typename ST1, typename ST2, typename ST3>
+  inline void add(const L1& l1, const L2& l2, L3& l3, ST1, ST2, ST3)
+  { copy(l2, l3); add(l1, l3, ST1(), ST3()); }
+
+  template <typename L1, typename L2, typename L3> inline
+  void add(const L1& l1, const L2& l2, L3& l3,
+	   abstract_sparse, abstract_dense, abstract_dense) {
+    add_almost_full_(vect_const_begin(l1), vect_const_end(l1),
+		     vect_const_begin(l2), vect_begin(l3), vect_end(l3));
+  }
+  
+  template <typename L1, typename L2, typename L3> inline
+  void add(const L1& l1, const L2& l2, L3& l3,
+	   abstract_dense, abstract_sparse, abstract_dense)
+  { add(l2, l1, l3, abstract_sparse(), abstract_dense(), abstract_dense()); }
+  
+  template <typename L1, typename L2, typename L3> inline
+  void add(const L1& l1, const L2& l2, L3& l3,
+	   abstract_sparse, abstract_sparse, abstract_dense) {
+    add_to_full_(vect_const_begin(l1), vect_const_end(l1),
+		 vect_const_begin(l2), vect_const_end(l2),
+		 vect_begin(l3), vect_end(l3));
+  }
+
+
+  template <typename L1, typename L2, typename L3>
+  void add_spspsp(const L1& l1, const L2& l2, L3& l3, linalg_true) {
+    auto it1 = vect_const_begin(l1), ite1 = vect_const_end(l1);
+    auto it2 = vect_const_begin(l2), ite2 = vect_const_end(l2);
+    clear(l3);
+    while (it1 != ite1 && it2 != ite2) {
+      ptrdiff_t d = it1.index() - it2.index();
+      if (d < 0)
+	{ l3[it1.index()] += *it1; ++it1; }
+      else if (d > 0)
+	{ l3[it2.index()] += *it2; ++it2; }
+      else
+	{ l3[it1.index()] = *it1 + *it2; ++it1; ++it2; }
+    }
+    for (; it1 != ite1; ++it1) l3[it1.index()] += *it1;
+    for (; it2 != ite2; ++it2) l3[it2.index()] += *it2;   
+  }
+
+  template <typename L1, typename L2, typename L3>
+  void add_spspsp(const L1& l1, const L2& l2, L3& l3, linalg_false)
+  { copy(l2, l3); add(l2, l3); }
+  
+  template <typename L1, typename L2, typename L3>
+  void add(const L1& l1, const L2& l2, L3& l3,
+	   abstract_sparse, abstract_sparse, abstract_sparse) {
+    add_spspsp(l1, l2, l3, typename linalg_and<typename
+	       linalg_traits<L1>::index_sorted,
+	       typename linalg_traits<L2>::index_sorted>::bool_type());
+  }
+
+  template <typename L1, typename L2>
+  void add(const L1& l1, L2& l2, abstract_dense, abstract_dense) {
+    auto it1 = vect_const_begin(l1); 
+    auto it2 = vect_begin(l2), ite = vect_end(l2);
+    for (; it2 != ite; ++it2, ++it1) *it2 += *it1;
+  }
+
+  template <typename L1, typename L2>
+  void add(const L1& l1, L2& l2, abstract_dense, abstract_skyline) {
+    typedef typename linalg_traits<L1>::value_type T;
+
+    auto it1 = vect_const_begin(l1), ite1 = vect_const_end(l1); 
+    size_type i1 = 0, ie1 = vect_size(l1);
+    while (it1 != ite1 && *it1 == T(0)) { ++it1; ++i1; }
+    if (it1 != ite1) {
+      auto it2 = vect_begin(l2), ite2 = vect_end(l2);
+      while (ie1 && *(ite1-1) == T(0)) { ite1--; --ie1; }
+
+      if (it2 == ite2 || i1 < it2.index()) {
+	l2[i1] = *it1; ++i1; ++it1;
+	if (it1 == ite1) return;
+	it2 = vect_begin(l2); ite2 = vect_end(l2);
+      }
+      if (ie1 > ite2.index()) {
+	--ite1; l2[ie1 - 1] = *ite1;
+	it2 = vect_begin(l2);
+      }
+      it2 += i1 - it2.index();
+      for (; it1 != ite1; ++it1, ++it2) { *it2 += *it1; }
+    }
+  }
+
+
+  template <typename L1, typename L2>
+  void add(const L1& l1, L2& l2, abstract_skyline, abstract_dense) {
+    auto it1 = vect_const_begin(l1), ite1 = vect_const_end(l1);
+    if (it1 != ite1) {
+      auto it2 = vect_begin(l2);
+      it2 += it1.index();
+      for (; it1 != ite1; ++it2, ++it1) *it2 += *it1;
+    }
+  }
+
+  
+  template <typename L1, typename L2>
+  void add(const L1& l1, L2& l2, abstract_sparse, abstract_dense) {
+    auto it1 = vect_const_begin(l1), ite1 = vect_const_end(l1);
+    for (; it1 != ite1; ++it1) l2[it1.index()] += *it1;
+  }
+  
+  template <typename L1, typename L2>
+  void add(const L1& l1, L2& l2, abstract_sparse, abstract_sparse) {
+    auto it1 = vect_const_begin(l1), ite1 = vect_const_end(l1);
+    for (; it1 != ite1; ++it1) l2[it1.index()] += *it1;
+  }
+
+  template <typename L1, typename L2>
+  void add(const L1& l1, L2& l2, abstract_sparse, abstract_skyline) {
+    auto it1 = vect_const_begin(l1), ite1 = vect_const_end(l1);
+    for (; it1 != ite1; ++it1) l2[it1.index()] += *it1;
+  }
+
+
+  template <typename L1, typename L2>
+  void add(const L1& l1, L2& l2, abstract_skyline, abstract_sparse) {
+    auto it1 = vect_const_begin(l1), ite1 = vect_const_end(l1);
+    for (; it1 != ite1; ++it1)
+      if (*it1 != typename linalg_traits<L1>::value_type(0))
+	l2[it1.index()] += *it1;
+  }
+
+  template <typename L1, typename L2>
+  void add(const L1& l1, L2& l2, abstract_skyline, abstract_skyline) {
+    typedef typename linalg_traits<L1>::value_type T1;
+    typedef typename linalg_traits<L2>::value_type T2;
+
+    auto it1 = vect_const_begin(l1), ite1 = vect_const_end(l1);
+    
+    while (it1 != ite1 && *it1 == T1(0)) ++it1;
+    if (ite1 != it1) {
+      auto it2 = vect_begin(l2), ite2 = vect_end(l2);
+      while (*(ite1-1) == T1(0)) ite1--;
+      if (it2 == ite2 || it1.index() < it2.index()) {
+	l2[it1.index()] = T2(0);
+	it2 = vect_begin(l2); ite2 = vect_end(l2);
+      }
+      if (ite1.index() > ite2.index()) {
+	l2[ite1.index() - 1] = T2(0);
+	it2 = vect_begin(l2); 
+      }
+      it2 += it1.index() - it2.index();
+      for (; it1 != ite1; ++it1, ++it2) *it2 += *it1;
+    }
+  }
+  
+  template <typename L1, typename L2>
+  void add(const L1& l1, L2& l2, abstract_dense, abstract_sparse) {
+    auto  it1 = vect_const_begin(l1), ite1 = vect_const_end(l1);
+    for (size_type i = 0; it1 != ite1; ++it1, ++i)
+      if (*it1 != typename linalg_traits<L1>::value_type(0)) l2[i] += *it1;
+  } 
+
+  /* ******************************************************************** */
+  /*		Matrix-vector mult                                    	  */
+  /* ******************************************************************** */
+  ///@endcond
+  /** matrix-vector or matrix-matrix product.
+      @param l1 a matrix.
+      @param l2 a vector or matrix.
+      @param l3 the product l1*l2.
+  */
+  template <typename L1, typename L2, typename L3> inline
+  void mult(const L1& l1, const L2& l2, L3& l3) {
+    mult_dispatch(l1, l2, l3, typename linalg_traits<L2>::linalg_type());
+  }
+  ///@cond DOXY_SHOW_ALL_FUNCTIONS
+
+  template <typename L1, typename L2, typename L3> inline
+  void mult(const L1& l1, const L2& l2, const L3& l3)
+  { mult(l1, l2, linalg_const_cast(l3)); }
+
+  template <typename L1, typename L2, typename L3> inline
+  void mult_dispatch(const L1& l1, const L2& l2, L3& l3, abstract_vector) {
+    size_type m = mat_nrows(l1), n = mat_ncols(l1);
+    if (!m || !n) { gmm::clear(l3); return; }
+    GMM_ASSERT2(n==vect_size(l2) && m==vect_size(l3), "dimensions mismatch");
+    if (!same_origin(l2, l3))
+      mult_spec(l1, l2, l3, typename principal_orientation_type<typename
+		linalg_traits<L1>::sub_orientation>::potype());
+    else {
+      GMM_WARNING2("Warning, A temporary is used for mult\n");
+      typename temporary_vector<L3>::vector_type temp(vect_size(l3));
+      mult_spec(l1, l2, temp, typename principal_orientation_type<typename
+		linalg_traits<L1>::sub_orientation>::potype());
+      copy(temp, l3);
+    }
+  }
+
+  template <typename L1, typename L2, typename L3>
+  void mult_by_row(const L1& l1, const L2& l2, L3& l3, abstract_sparse) {
+    typedef typename  linalg_traits<L3>::value_type T;
+    clear(l3);
+    size_type nr = mat_nrows(l1);
+    for (size_type i = 0; i < nr; ++i) {
+      T aux = vect_sp(mat_const_row(l1, i), l2);
+      if (aux != T(0)) l3[i] = aux;
+    }
+  }
+
+  template <typename L1, typename L2, typename L3>
+  void mult_by_row(const L1& l1, const L2& l2, L3& l3, abstract_skyline) {
+    typedef typename  linalg_traits<L3>::value_type T;
+    clear(l3); 
+    size_type nr = mat_nrows(l1);
+    for (size_type i = 0; i < nr; ++i) {
+      T aux = vect_sp(mat_const_row(l1, i), l2);
+      if (aux != T(0)) l3[i] = aux;
+    }
+  }
+
+  template <typename L1, typename L2, typename L3>
+  void mult_by_row(const L1& l1, const L2& l2, L3& l3, abstract_dense) {
+    typename linalg_traits<L3>::iterator it=vect_begin(l3), ite=vect_end(l3);
+    auto itr = mat_row_const_begin(l1); 
+    for (; it != ite; ++it, ++itr)
+      *it = vect_sp(linalg_traits<L1>::row(itr), l2,
+		    typename linalg_traits<L1>::storage_type(),
+		    typename linalg_traits<L2>::storage_type());
+  }
+
+  template <typename L1, typename L2, typename L3>
+  void mult_by_col(const L1& l1, const L2& l2, L3& l3, abstract_dense) {
+    clear(l3);
+    size_type nc = mat_ncols(l1);
+    for (size_type i = 0; i < nc; ++i)
+      add(scaled(mat_const_col(l1, i), l2[i]), l3);
+  }
+
+  template <typename L1, typename L2, typename L3>
+  void mult_by_col(const L1& l1, const L2& l2, L3& l3, abstract_sparse) {
+    typedef typename linalg_traits<L2>::value_type T;
+    clear(l3);
+    auto it = vect_const_begin(l2), ite = vect_const_end(l2);
+    for (; it != ite; ++it)
+      if (*it != T(0)) add(scaled(mat_const_col(l1, it.index()), *it), l3);
+  }
+
+  template <typename L1, typename L2, typename L3>
+  void mult_by_col(const L1& l1, const L2& l2, L3& l3, abstract_skyline) {
+    typedef typename linalg_traits<L2>::value_type T;
+    clear(l3); 
+    auto it = vect_const_begin(l2), ite = vect_const_end(l2);
+    for (; it != ite; ++it)
+      if (*it != T(0)) add(scaled(mat_const_col(l1, it.index()), *it), l3);
+  }
+
+  template <typename L1, typename L2, typename L3> inline
+  void mult_spec(const L1& l1, const L2& l2, L3& l3, row_major)
+  { mult_by_row(l1, l2, l3, typename linalg_traits<L3>::storage_type()); }
+
+  template <typename L1, typename L2, typename L3> inline
+  void mult_spec(const L1& l1, const L2& l2, L3& l3, col_major)
+  { mult_by_col(l1, l2, l3, typename linalg_traits<L2>::storage_type()); }
+
+  template <typename L1, typename L2, typename L3> inline
+  void mult_spec(const L1& l1, const L2& l2, L3& l3, abstract_null_type)
+  { mult_ind(l1, l2, l3, typename linalg_traits<L1>::storage_type()); }
+
+  template <typename L1, typename L2, typename L3>
+  void mult_ind(const L1& l1, const L2& l2, L3& l3, abstract_indirect) {
+    GMM_ASSERT1(false, "gmm::mult(m, ., .) undefined for this kind of matrix");
+  }
+
+  template <typename L1, typename L2, typename L3, typename L4> inline
+  void mult(const L1& l1, const L2& l2, const L3& l3, L4& l4) {
+    size_type m = mat_nrows(l1), n = mat_ncols(l1);
+    copy(l3, l4);
+    if (!m || !n) { gmm::copy(l3, l4); return; }
+    GMM_ASSERT2(n==vect_size(l2) && m==vect_size(l4), "dimensions mismatch");
+    if (!same_origin(l2, l4)) {
+      mult_add_spec(l1, l2, l4, typename principal_orientation_type<typename
+		    linalg_traits<L1>::sub_orientation>::potype());
+    }
+    else {
+      GMM_WARNING2("Warning, A temporary is used for mult\n");
+      typename temporary_vector<L2>::vector_type temp(vect_size(l2));
+      copy(l2, temp);
+      mult_add_spec(l1,temp, l4, typename principal_orientation_type<typename
+		linalg_traits<L1>::sub_orientation>::potype());
+    }
+  }
+
+  template <typename L1, typename L2, typename L3, typename L4> inline
+  void mult(const L1& l1, const L2& l2, const L3& l3, const L4& l4)
+  { mult(l1, l2, l3, linalg_const_cast(l4)); } 
+
+  ///@endcond
+  /** Multiply-accumulate. l3 += l1*l2; */
+  template <typename L1, typename L2, typename L3> inline
+  void mult_add(const L1& l1, const L2& l2, L3& l3) {
+    size_type m = mat_nrows(l1), n = mat_ncols(l1);
+    if (!m || !n) return;
+    GMM_ASSERT2(n==vect_size(l2) && m==vect_size(l3), "dimensions mismatch");
+    if (!same_origin(l2, l3)) {
+      mult_add_spec(l1, l2, l3, typename principal_orientation_type<typename
+		    linalg_traits<L1>::sub_orientation>::potype());
+    }
+    else {
+      GMM_WARNING2("Warning, A temporary is used for mult\n");
+      typename temporary_vector<L3>::vector_type temp(vect_size(l2));
+      copy(l2, temp);
+      mult_add_spec(l1,temp, l3, typename principal_orientation_type<typename
+		linalg_traits<L1>::sub_orientation>::potype());
+    }
+  }
+  ///@cond DOXY_SHOW_ALL_FUNCTIONS
+
+  template <typename L1, typename L2, typename L3> inline
+  void mult_add(const L1& l1, const L2& l2, const L3& l3)
+  { mult_add(l1, l2, linalg_const_cast(l3)); } 
+
+  template <typename L1, typename L2, typename L3>
+  void mult_add_by_row(const L1& l1, const L2& l2, L3& l3, abstract_sparse) {
+    typedef typename linalg_traits<L3>::value_type T;
+    size_type nr = mat_nrows(l1);
+    for (size_type i = 0; i < nr; ++i) {
+      T aux = vect_sp(mat_const_row(l1, i), l2);
+      if (aux != T(0)) l3[i] += aux;
+    }
+  }
+
+  template <typename L1, typename L2, typename L3>
+  void mult_add_by_row(const L1& l1, const L2& l2, L3& l3, abstract_skyline) {
+    typedef typename linalg_traits<L3>::value_type T;
+    size_type nr = mat_nrows(l1);
+    for (size_type i = 0; i < nr; ++i) {
+      T aux = vect_sp(mat_const_row(l1, i), l2);
+      if (aux != T(0)) l3[i] += aux;
+    }
+  }
+
+  template <typename L1, typename L2, typename L3>
+  void mult_add_by_row(const L1& l1, const L2& l2, L3& l3, abstract_dense) {
+    auto it=vect_begin(l3), ite=vect_end(l3);
+    auto itr = mat_row_const_begin(l1);
+    for (; it != ite; ++it, ++itr)
+      *it += vect_sp(linalg_traits<L1>::row(itr), l2);
+  }
+
+  template <typename L1, typename L2, typename L3>
+  void mult_add_by_col(const L1& l1, const L2& l2, L3& l3, abstract_dense) {
+    size_type nc = mat_ncols(l1);
+    for (size_type i = 0; i < nc; ++i)
+      add(scaled(mat_const_col(l1, i), l2[i]), l3);
+  }
+
+  template <typename L1, typename L2, typename L3>
+  void mult_add_by_col(const L1& l1, const L2& l2, L3& l3, abstract_sparse) {
+    auto it = vect_const_begin(l2), ite = vect_const_end(l2);
+    for (; it != ite; ++it)
+      if (*it != typename linalg_traits<L2>::value_type(0))
+	add(scaled(mat_const_col(l1, it.index()), *it), l3);
+  }
+
+  template <typename L1, typename L2, typename L3>
+  void mult_add_by_col(const L1& l1, const L2& l2, L3& l3, abstract_skyline) {
+    auto it = vect_const_begin(l2), ite = vect_const_end(l2);
+    for (; it != ite; ++it)
+      if (*it != typename linalg_traits<L2>::value_type(0))
+	add(scaled(mat_const_col(l1, it.index()), *it), l3);
+  }
+
+  template <typename L1, typename L2, typename L3> inline
+  void mult_add_spec(const L1& l1, const L2& l2, L3& l3, row_major)
+  { mult_add_by_row(l1, l2, l3, typename linalg_traits<L3>::storage_type()); }
+
+  template <typename L1, typename L2, typename L3> inline
+  void mult_add_spec(const L1& l1, const L2& l2, L3& l3, col_major)
+  { mult_add_by_col(l1, l2, l3, typename linalg_traits<L2>::storage_type()); }
+
+  template <typename L1, typename L2, typename L3> inline
+  void mult_add_spec(const L1& l1, const L2& l2, L3& l3, abstract_null_type)
+  { mult_ind(l1, l2, l3, typename linalg_traits<L1>::storage_type()); }
+
+  template <typename L1, typename L2, typename L3>
+  void transposed_mult(const L1& l1, const L2& l2, const L3& l3)
+  { mult(gmm::transposed(l1), l2, l3); }
+
+
+  /* ******************************************************************** */
+  /*		Matrix-matrix mult                                    	  */
+  /* ******************************************************************** */
+  
+
+  struct g_mult {};  // generic mult, less optimized
+  struct c_mult {};  // col x col -> col mult
+  struct r_mult {};  // row x row -> row mult
+  struct rcmult {};  // row x col -> col mult
+  struct crmult {};  // col x row -> row mult
+
+
+  template<typename SO1, typename SO2, typename SO3> struct mult_t;
+  #define DEFMU__ template<> struct mult_t
+  DEFMU__<row_major  , row_major  , row_major  > { typedef r_mult t; };
+  DEFMU__<row_major  , row_major  , col_major  > { typedef g_mult t; };
+  DEFMU__<row_major  , row_major  , col_and_row> { typedef r_mult t; };
+  DEFMU__<row_major  , row_major  , row_and_col> { typedef r_mult t; };
+  DEFMU__<row_major  , col_major  , row_major  > { typedef rcmult t; };
+  DEFMU__<row_major  , col_major  , col_major  > { typedef rcmult t; };
+  DEFMU__<row_major  , col_major  , col_and_row> { typedef rcmult t; };
+  DEFMU__<row_major  , col_major  , row_and_col> { typedef rcmult t; };
+  DEFMU__<row_major  , col_and_row, row_major  > { typedef r_mult t; };
+  DEFMU__<row_major  , col_and_row, col_major  > { typedef rcmult t; };
+  DEFMU__<row_major  , col_and_row, col_and_row> { typedef rcmult t; };
+  DEFMU__<row_major  , col_and_row, row_and_col> { typedef rcmult t; };
+  DEFMU__<row_major  , row_and_col, row_major  > { typedef r_mult t; };
+  DEFMU__<row_major  , row_and_col, col_major  > { typedef rcmult t; };
+  DEFMU__<row_major  , row_and_col, col_and_row> { typedef r_mult t; };
+  DEFMU__<row_major  , row_and_col, row_and_col> { typedef r_mult t; };
+  DEFMU__<col_major  , row_major  , row_major  > { typedef crmult t; };
+  DEFMU__<col_major  , row_major  , col_major  > { typedef g_mult t; };
+  DEFMU__<col_major  , row_major  , col_and_row> { typedef crmult t; };
+  DEFMU__<col_major  , row_major  , row_and_col> { typedef crmult t; };
+  DEFMU__<col_major  , col_major  , row_major  > { typedef g_mult t; };
+  DEFMU__<col_major  , col_major  , col_major  > { typedef c_mult t; };
+  DEFMU__<col_major  , col_major  , col_and_row> { typedef c_mult t; };
+  DEFMU__<col_major  , col_major  , row_and_col> { typedef c_mult t; };
+  DEFMU__<col_major  , col_and_row, row_major  > { typedef crmult t; };
+  DEFMU__<col_major  , col_and_row, col_major  > { typedef c_mult t; };
+  DEFMU__<col_major  , col_and_row, col_and_row> { typedef c_mult t; };
+  DEFMU__<col_major  , col_and_row, row_and_col> { typedef c_mult t; };
+  DEFMU__<col_major  , row_and_col, row_major  > { typedef crmult t; };
+  DEFMU__<col_major  , row_and_col, col_major  > { typedef c_mult t; };
+  DEFMU__<col_major  , row_and_col, col_and_row> { typedef c_mult t; };
+  DEFMU__<col_major  , row_and_col, row_and_col> { typedef c_mult t; };
+  DEFMU__<col_and_row, row_major  , row_major  > { typedef r_mult t; };
+  DEFMU__<col_and_row, row_major  , col_major  > { typedef c_mult t; };
+  DEFMU__<col_and_row, row_major  , col_and_row> { typedef r_mult t; };
+  DEFMU__<col_and_row, row_major  , row_and_col> { typedef r_mult t; };
+  DEFMU__<col_and_row, col_major  , row_major  > { typedef rcmult t; };
+  DEFMU__<col_and_row, col_major  , col_major  > { typedef c_mult t; };
+  DEFMU__<col_and_row, col_major  , col_and_row> { typedef c_mult t; };
+  DEFMU__<col_and_row, col_major  , row_and_col> { typedef c_mult t; };
+  DEFMU__<col_and_row, col_and_row, row_major  > { typedef r_mult t; };
+  DEFMU__<col_and_row, col_and_row, col_major  > { typedef c_mult t; };
+  DEFMU__<col_and_row, col_and_row, col_and_row> { typedef c_mult t; };
+  DEFMU__<col_and_row, col_and_row, row_and_col> { typedef c_mult t; };
+  DEFMU__<col_and_row, row_and_col, row_major  > { typedef r_mult t; };
+  DEFMU__<col_and_row, row_and_col, col_major  > { typedef c_mult t; };
+  DEFMU__<col_and_row, row_and_col, col_and_row> { typedef c_mult t; };
+  DEFMU__<col_and_row, row_and_col, row_and_col> { typedef r_mult t; };
+  DEFMU__<row_and_col, row_major  , row_major  > { typedef r_mult t; };
+  DEFMU__<row_and_col, row_major  , col_major  > { typedef c_mult t; };
+  DEFMU__<row_and_col, row_major  , col_and_row> { typedef r_mult t; };
+  DEFMU__<row_and_col, row_major  , row_and_col> { typedef r_mult t; };
+  DEFMU__<row_and_col, col_major  , row_major  > { typedef rcmult t; };
+  DEFMU__<row_and_col, col_major  , col_major  > { typedef c_mult t; };
+  DEFMU__<row_and_col, col_major  , col_and_row> { typedef c_mult t; };
+  DEFMU__<row_and_col, col_major  , row_and_col> { typedef c_mult t; };
+  DEFMU__<row_and_col, col_and_row, row_major  > { typedef rcmult t; };
+  DEFMU__<row_and_col, col_and_row, col_major  > { typedef rcmult t; };
+  DEFMU__<row_and_col, col_and_row, col_and_row> { typedef rcmult t; };
+  DEFMU__<row_and_col, col_and_row, row_and_col> { typedef rcmult t; };
+  DEFMU__<row_and_col, row_and_col, row_major  > { typedef r_mult t; };
+  DEFMU__<row_and_col, row_and_col, col_major  > { typedef c_mult t; };
+  DEFMU__<row_and_col, row_and_col, col_and_row> { typedef r_mult t; };
+  DEFMU__<row_and_col, row_and_col, row_and_col> { typedef r_mult t; };
+
+  template <typename L1, typename L2, typename L3>
+  void mult_dispatch(const L1& l1, const L2& l2, L3& l3, abstract_matrix) {
+    typedef typename temporary_matrix<L3>::matrix_type temp_mat_type;
+    size_type n = mat_ncols(l1);
+    if (n == 0) { gmm::clear(l3); return; }
+    GMM_ASSERT2(n == mat_nrows(l2) && mat_nrows(l1) == mat_nrows(l3) &&
+                mat_ncols(l2) == mat_ncols(l3),	"dimensions mismatch");
+
+    if (same_origin(l2, l3) || same_origin(l1, l3)) {
+      GMM_WARNING2("A temporary is used for mult");
+      temp_mat_type temp(mat_nrows(l3), mat_ncols(l3));
+      mult_spec(l1, l2, temp, typename mult_t<
+		typename linalg_traits<L1>::sub_orientation,
+		typename linalg_traits<L2>::sub_orientation,
+		typename linalg_traits<temp_mat_type>::sub_orientation>::t());
+      copy(temp, l3);
+    }
+    else
+      mult_spec(l1, l2, l3, typename mult_t<
+		typename linalg_traits<L1>::sub_orientation,
+		typename linalg_traits<L2>::sub_orientation,
+		typename linalg_traits<L3>::sub_orientation>::t());
+  }
+
+  // Completely generic but inefficient
+
+  template <typename L1, typename L2, typename L3>
+  void mult_spec(const L1& l1, const L2& l2, L3& l3, g_mult) {
+    typedef typename linalg_traits<L3>::value_type T;
+    GMM_WARNING2("Inefficient generic matrix-matrix mult is used");
+    for (size_type i = 0; i < mat_nrows(l3) ; ++i)
+      for (size_type j = 0; j < mat_ncols(l3) ; ++j) {
+	T a(0);
+	for (size_type k = 0; k < mat_nrows(l2) ; ++k) a += l1(i, k)*l2(k, j);
+	l3(i, j) = a;
+      }
+  }
+
+  // row x col matrix-matrix mult
+
+  template <typename L1, typename L2, typename L3>
+  void mult_row_col_with_temp(const L1& l1, const L2& l2, L3& l3, col_major) {
+    typedef typename temporary_col_matrix<L1>::matrix_type temp_col_mat;
+    temp_col_mat temp(mat_nrows(l1), mat_ncols(l1));
+    copy(l1, temp);
+    mult(temp, l2, l3);
+  }
+
+  template <typename L1, typename L2, typename L3>
+  void mult_row_col_with_temp(const L1& l1, const L2& l2, L3& l3, row_major) {
+    typedef typename temporary_row_matrix<L2>::matrix_type temp_row_mat;
+    temp_row_mat temp(mat_nrows(l2), mat_ncols(l2));
+    copy(l2, temp);
+    mult(l1, temp, l3);
+  }
+
+  template <typename L1, typename L2, typename L3>
+  void mult_spec(const L1& l1, const L2& l2, L3& l3, rcmult) {
+    if (is_sparse(l1) && is_sparse(l2)) {
+      GMM_WARNING3("Inefficient row matrix - col matrix mult for "
+		  "sparse matrices, using temporary");
+      mult_row_col_with_temp(l1, l2, l3, 
+			     typename principal_orientation_type<typename
+			     linalg_traits<L3>::sub_orientation>::potype());
+    }
+    else {
+      auto it2b = linalg_traits<L2>::col_begin(l2), it2 = it2b,
+	ite = linalg_traits<L2>::col_end(l2);
+      size_type i,j, k = mat_nrows(l1);
+      
+      for (i = 0; i < k; ++i) {
+	typename linalg_traits<L1>::const_sub_row_type r1=mat_const_row(l1, i);
+	for (it2 = it2b, j = 0; it2 != ite; ++it2, ++j)
+	  l3(i,j) = vect_sp(r1, linalg_traits<L2>::col(it2));
+      }
+    }
+  }
+
+  // row - row matrix-matrix mult
+
+  template <typename L1, typename L2, typename L3> inline
+  void mult_spec(const L1& l1, const L2& l2, L3& l3, r_mult) {
+    mult_spec(l1, l2, l3,r_mult(),typename linalg_traits<L1>::storage_type());
+  }
+
+  template <typename L1, typename L2, typename L3>
+  void mult_spec(const L1& l1, const L2& l2, L3& l3, r_mult, abstract_dense) {
+    // optimizable
+    clear(l3);
+    size_type nn = mat_nrows(l3), mm = mat_nrows(l2);
+    for (size_type i = 0; i < nn; ++i) {
+      for (size_type j = 0; j < mm; ++j) {
+	add(scaled(mat_const_row(l2, j), l1(i, j)), mat_row(l3, i));
+      }
+    }
+  }
+
+  template <typename L1, typename L2, typename L3>
+  void mult_spec(const L1& l1, const L2& l2, L3& l3, r_mult, abstract_sparse) {
+    // optimizable
+    clear(l3);
+    size_type nn = mat_nrows(l3);
+    for (size_type i = 0; i < nn; ++i) {
+      typename linalg_traits<L1>::const_sub_row_type rl1=mat_const_row(l1, i);
+      auto it = vect_const_begin(rl1), ite = vect_const_end(rl1);
+      for (; it != ite; ++it)
+	add(scaled(mat_const_row(l2, it.index()), *it), mat_row(l3, i));
+    }
+  }
+
+  template <typename L1, typename L2, typename L3>
+  void mult_spec(const L1& l1, const L2& l2, L3& l3, r_mult, abstract_skyline)
+  { mult_spec(l1, l2, l3, r_mult(), abstract_sparse()); }
+
+  // col - col matrix-matrix mult
+
+  template <typename L1, typename L2, typename L3> inline
+  void mult_spec(const L1& l1, const L2& l2, L3& l3, c_mult) {
+    mult_spec(l1, l2,l3,c_mult(),typename linalg_traits<L2>::storage_type(),
+	      typename linalg_traits<L2>::sub_orientation());
+  }
+
+
+  template <typename L1, typename L2, typename L3, typename ORIEN>
+  void mult_spec(const L1& l1, const L2& l2, L3& l3, c_mult,
+		 abstract_dense, ORIEN) {
+    typedef typename linalg_traits<L2>::value_type T;
+    size_type nn = mat_ncols(l3), mm = mat_ncols(l1);
+
+    for (size_type i = 0; i < nn; ++i) {
+      clear(mat_col(l3, i));
+      for (size_type j = 0; j < mm; ++j) {
+	T b = l2(j, i);
+	if (b != T(0)) add(scaled(mat_const_col(l1, j), b), mat_col(l3, i));
+      }
+    }
+  }
+
+  template <typename L1, typename L2, typename L3, typename ORIEN>
+  void mult_spec(const L1& l1, const L2& l2, L3& l3, c_mult,
+		 abstract_sparse, ORIEN) {
+    // optimizable
+    clear(l3);
+    size_type nn = mat_ncols(l3);
+    for (size_type i = 0; i < nn; ++i) {
+      typename linalg_traits<L2>::const_sub_col_type rc2 = mat_const_col(l2, i);
+      auto it = vect_const_begin(rc2), ite = vect_const_end(rc2);
+      for (; it != ite; ++it)
+	add(scaled(mat_const_col(l1, it.index()), *it), mat_col(l3, i));
+    }
+  }
+
+  template <typename L1, typename L2, typename L3>
+  void mult_spec(const L1& l1, const L2& l2, L3& l3, c_mult,
+		 abstract_sparse, row_major) {
+     typedef typename linalg_traits<L2>::value_type T;
+     GMM_WARNING3("Inefficient matrix-matrix mult for sparse matrices");
+     clear(l3);
+     size_type mm = mat_nrows(l2), nn = mat_ncols(l3);
+     for (size_type i = 0; i < nn; ++i)
+       for (size_type j = 0; j < mm; ++j) {
+	 T a = l2(i,j);
+	 if (a != T(0)) add(scaled(mat_const_col(l1, j), a), mat_col(l3, i));
+       }
+   }
+
+  template <typename L1, typename L2, typename L3, typename ORIEN> inline
+  void mult_spec(const L1& l1, const L2& l2, L3& l3, c_mult,
+		 abstract_skyline, ORIEN)
+  { mult_spec(l1, l2, l3, c_mult(), abstract_sparse(), ORIEN()); }
+
+  
+  // col - row matrix-matrix mult
+
+  template <typename L1, typename L2, typename L3> inline
+  void mult_spec(const L1& l1, const L2& l2, L3& l3, crmult)
+  { mult_spec(l1,l2,l3,crmult(), typename linalg_traits<L1>::storage_type()); }
+
+
+  template <typename L1, typename L2, typename L3>
+  void mult_spec(const L1& l1, const L2& l2, L3& l3, crmult, abstract_dense) {
+    // optimizable
+    clear(l3);
+    size_type nn = mat_ncols(l1), mm = mat_nrows(l1);
+    for (size_type i = 0; i < nn; ++i) {
+      for (size_type j = 0; j < mm; ++j)
+      add(scaled(mat_const_row(l2, i), l1(j, i)), mat_row(l3, j));
+    }
+  }
+
+  template <typename L1, typename L2, typename L3>
+  void mult_spec(const L1& l1, const L2& l2, L3& l3, crmult, abstract_sparse) {
+    // optimizable
+    clear(l3);
+    size_type nn = mat_ncols(l1);
+    for (size_type i = 0; i < nn; ++i) {
+      typename linalg_traits<L1>::const_sub_col_type rc1 = mat_const_col(l1, i);
+      auto it = vect_const_begin(rc1), ite = vect_const_end(rc1);
+      for (; it != ite; ++it)
+	add(scaled(mat_const_row(l2, i), *it), mat_row(l3, it.index()));
+    }
+  }
+
+  template <typename L1, typename L2, typename L3> inline
+  void mult_spec(const L1& l1, const L2& l2, L3& l3, crmult, abstract_skyline)
+  { mult_spec(l1, l2, l3, crmult(), abstract_sparse()); }
+  
+
+  /* ******************************************************************** */
+  /*		Symmetry test.                                     	  */
+  /* ******************************************************************** */
+
+  ///@endcond
+  /** test if A is symmetric.
+      @param A a matrix.
+      @param tol a threshold.
+  */
+  template <typename MAT> inline
+  bool is_symmetric(const MAT &A, magnitude_of_linalg(MAT) tol
+		    = magnitude_of_linalg(MAT)(-1)) {
+    typedef magnitude_of_linalg(MAT) R;
+    if (tol < R(0)) tol = default_tol(R()) * mat_maxnorm(A);
+    if (mat_nrows(A) != mat_ncols(A)) return false;
+    return is_symmetric(A, tol, typename linalg_traits<MAT>::storage_type());
+  }
+  ///@cond DOXY_SHOW_ALL_FUNCTIONS
+
+  template <typename MAT> 
+  bool is_symmetric(const MAT &A, magnitude_of_linalg(MAT) tol, 
+		    abstract_dense) {
+    size_type m = mat_nrows(A);
+    for (size_type i = 1; i < m; ++i)
+      for (size_type j = 0; j < i; ++j)
+	if (gmm::abs(A(i, j)-A(j, i)) > tol) return false;
+    return true;
+  }
+
+  template <typename MAT> 
+  bool is_symmetric(const MAT &A, magnitude_of_linalg(MAT) tol, 
+		    abstract_sparse) {
+    return is_symmetric(A, tol, typename principal_orientation_type<typename
+			linalg_traits<MAT>::sub_orientation>::potype());
+  }
+
+  template <typename MAT> 
+  bool is_symmetric(const MAT &A, magnitude_of_linalg(MAT) tol, 
+		    row_major) {
+    for (size_type i = 0; i < mat_nrows(A); ++i) {
+      typename linalg_traits<MAT>::const_sub_row_type row = mat_const_row(A, i);
+      auto it = vect_const_begin(row), ite = vect_const_end(row);
+      for (; it != ite; ++it)
+	if (gmm::abs(*it - A(it.index(), i)) > tol) return false;
+    }
+    return true;
+  }
+
+  template <typename MAT> 
+  bool is_symmetric(const MAT &A, magnitude_of_linalg(MAT) tol, 
+		    col_major) {
+    for (size_type i = 0; i < mat_ncols(A); ++i) {
+      typename linalg_traits<MAT>::const_sub_col_type col = mat_const_col(A, i);
+      auto it = vect_const_begin(col), ite = vect_const_end(col);
+      for (; it != ite; ++it)
+	if (gmm::abs(*it - A(i, it.index())) > tol) return false;
+    }
+    return true;
+  }
+
+  template <typename MAT> 
+  bool is_symmetric(const MAT &A, magnitude_of_linalg(MAT) tol, 
+		    abstract_skyline)
+  { return is_symmetric(A, tol, abstract_sparse()); }
+
+  ///@endcond
+  /** test if A is Hermitian.
+      @param A a matrix.
+      @param tol a threshold.
+  */
+  template <typename MAT> inline
+  bool is_hermitian(const MAT &A, magnitude_of_linalg(MAT) tol
+		    = magnitude_of_linalg(MAT)(-1)) {
+    typedef magnitude_of_linalg(MAT) R;
+    if (tol < R(0)) tol = default_tol(R()) * mat_maxnorm(A);
+    if (mat_nrows(A) != mat_ncols(A)) return false;
+    return is_hermitian(A, tol, typename linalg_traits<MAT>::storage_type());
+  }
+  ///@cond DOXY_SHOW_ALL_FUNCTIONS
+
+  template <typename MAT> 
+  bool is_hermitian(const MAT &A, magnitude_of_linalg(MAT) tol,
+		    abstract_dense) {
+    size_type m = mat_nrows(A);
+    for (size_type i = 1; i < m; ++i)
+      for (size_type j = 0; j < i; ++j)
+	if (gmm::abs(A(i, j)-gmm::conj(A(j, i))) > tol) return false;
+    return true;
+  }
+
+  template <typename MAT> 
+  bool is_hermitian(const MAT &A, magnitude_of_linalg(MAT) tol, 
+		    abstract_sparse) {
+    return is_hermitian(A, tol, typename principal_orientation_type<typename
+			linalg_traits<MAT>::sub_orientation>::potype());
+  }
+
+  template <typename MAT> 
+  bool is_hermitian(const MAT &A, magnitude_of_linalg(MAT) tol, 
+		    row_major) {
+    for (size_type i = 0; i < mat_nrows(A); ++i) {
+      typename linalg_traits<MAT>::const_sub_row_type row = mat_const_row(A, i);
+      auto it = vect_const_begin(row), ite = vect_const_end(row);
+      for (; it != ite; ++it)
+	if (gmm::abs(gmm::conj(*it) - A(it.index(), i)) > tol) return false;
+    }
+    return true;
+  }
+
+  template <typename MAT> 
+  bool is_hermitian(const MAT &A, magnitude_of_linalg(MAT) tol, 
+		    col_major) {
+    for (size_type i = 0; i < mat_ncols(A); ++i) {
+      typename linalg_traits<MAT>::const_sub_col_type col = mat_const_col(A, i);
+      auto it = vect_const_begin(col), ite = vect_const_end(col);
+      for (; it != ite; ++it)
+	if (gmm::abs(gmm::conj(*it) - A(i, it.index())) > tol) return false;
+    }
+    return true;
+  }
+
+  template <typename MAT> 
+  bool is_hermitian(const MAT &A, magnitude_of_linalg(MAT) tol, 
+		    abstract_skyline)
+  { return is_hermitian(A, tol, abstract_sparse()); }
+  ///@endcond
+}
+
+
+#endif //  GMM_BLAS_H__
diff --git a/gmm/gmm_blas_interface.h b/gmm/gmm_blas_interface.h
new file mode 100644
index 000000000..c41ae95d3
--- /dev/null
+++ b/gmm/gmm_blas_interface.h
@@ -0,0 +1,948 @@
+/* -*- c++ -*- (enables emacs c++ mode) */
+/*===========================================================================
+
+ Copyright (C) 2003-2017 Yves Renard
+
+ This file is a part of GetFEM++
+
+ GetFEM++  is  free software;  you  can  redistribute  it  and/or modify it
+ under  the  terms  of the  GNU  Lesser General Public License as published
+ by  the  Free Software Foundation;  either version 3 of the License,  or
+ (at your option) any later version along with the GCC Runtime Library
+ Exception either version 3.1 or (at your option) any later version.
+ This program  is  distributed  in  the  hope  that it will be useful,  but
+ WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ or  FITNESS  FOR  A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+ License and GCC Runtime Library Exception for more details.
+ You  should  have received a copy of the GNU Lesser General Public License
+ along  with  this program;  if not, write to the Free Software Foundation,
+ Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301, USA.
+
+ As a special exception, you  may use  this file  as it is a part of a free
+ software  library  without  restriction.  Specifically,  if   other  files
+ instantiate  templates  or  use macros or inline functions from this file,
+ or  you compile this  file  and  link  it  with other files  to produce an
+ executable, this file  does  not  by itself cause the resulting executable
+ to be covered  by the GNU Lesser General Public License.  This   exception
+ does not  however  invalidate  any  other  reasons why the executable file
+ might be covered by the GNU Lesser General Public License.
+
+===========================================================================*/
+
+/**@file gmm_blas_interface.h
+   @author  Yves Renard <Yves.Renard@insa-lyon.fr>
+   @date October 7, 2003.
+   @brief gmm interface for fortran BLAS.
+*/
+
+#if defined(GETFEM_USES_BLAS) || defined(GMM_USES_BLAS) \
+  || defined(GMM_USES_LAPACK) || defined(GMM_USES_ATLAS)
+
+#ifndef GMM_BLAS_INTERFACE_H
+#define GMM_BLAS_INTERFACE_H
+
+#include "gmm_blas.h"
+#include "gmm_interface.h"
+#include "gmm_matrix.h"
+
+namespace gmm {
+
+  // Use ./configure --enable-blas-interface to activate this interface.
+
+#define GMMLAPACK_TRACE(f) 
+  // #define GMMLAPACK_TRACE(f) cout << "function " << f << " called" << endl;
+
+  /* ********************************************************************* */
+  /* Operations interfaced for T = float, double, std::complex<float>      */
+  /*    or std::complex<double> :                                          */
+  /*                                                                       */
+  /* vect_norm2(std::vector<T>)                                            */
+  /*                                                                       */
+  /* vect_sp(std::vector<T>, std::vector<T>)                               */
+  /* vect_sp(scaled(std::vector<T>), std::vector<T>)                       */
+  /* vect_sp(std::vector<T>, scaled(std::vector<T>))                       */
+  /* vect_sp(scaled(std::vector<T>), scaled(std::vector<T>))               */
+  /*                                                                       */
+  /* vect_hp(std::vector<T>, std::vector<T>)                               */
+  /* vect_hp(scaled(std::vector<T>), std::vector<T>)                       */
+  /* vect_hp(std::vector<T>, scaled(std::vector<T>))                       */
+  /* vect_hp(scaled(std::vector<T>), scaled(std::vector<T>))               */
+  /*                                                                       */
+  /* add(std::vector<T>, std::vector<T>)                                   */
+  /* add(scaled(std::vector<T>, a), std::vector<T>)                        */ 
+  /*                                                                       */
+  /* mult(dense_matrix<T>, dense_matrix<T>, dense_matrix<T>)               */
+  /* mult(transposed(dense_matrix<T>), dense_matrix<T>, dense_matrix<T>)   */
+  /* mult(dense_matrix<T>, transposed(dense_matrix<T>), dense_matrix<T>)   */
+  /* mult(transposed(dense_matrix<T>), transposed(dense_matrix<T>),        */
+  /*      dense_matrix<T>)                                                 */
+  /* mult(conjugated(dense_matrix<T>), dense_matrix<T>, dense_matrix<T>)   */
+  /* mult(dense_matrix<T>, conjugated(dense_matrix<T>), dense_matrix<T>)   */
+  /* mult(conjugated(dense_matrix<T>), conjugated(dense_matrix<T>),        */
+  /*      dense_matrix<T>)                                                 */
+  /*                                                                       */
+  /* mult(dense_matrix<T>, std::vector<T>, std::vector<T>)                 */
+  /* mult(transposed(dense_matrix<T>), std::vector<T>, std::vector<T>)     */
+  /* mult(conjugated(dense_matrix<T>), std::vector<T>, std::vector<T>)     */
+  /* mult(dense_matrix<T>, scaled(std::vector<T>), std::vector<T>)         */
+  /* mult(transposed(dense_matrix<T>), scaled(std::vector<T>),             */
+  /*      std::vector<T>)                                                  */
+  /* mult(conjugated(dense_matrix<T>), scaled(std::vector<T>),             */
+  /*      std::vector<T>)                                                  */
+  /*                                                                       */
+  /* mult_add(dense_matrix<T>, std::vector<T>, std::vector<T>)             */
+  /* mult_add(transposed(dense_matrix<T>), std::vector<T>, std::vector<T>) */
+  /* mult_add(conjugated(dense_matrix<T>), std::vector<T>, std::vector<T>) */
+  /* mult_add(dense_matrix<T>, scaled(std::vector<T>), std::vector<T>)     */
+  /* mult_add(transposed(dense_matrix<T>), scaled(std::vector<T>),         */
+  /*          std::vector<T>)                                              */
+  /* mult_add(conjugated(dense_matrix<T>), scaled(std::vector<T>),         */
+  /*          std::vector<T>)                                              */
+  /*                                                                       */
+  /* mult(dense_matrix<T>, std::vector<T>, std::vector<T>, std::vector<T>) */
+  /* mult(transposed(dense_matrix<T>), std::vector<T>, std::vector<T>,     */
+  /*      std::vector<T>)                                                  */
+  /* mult(conjugated(dense_matrix<T>), std::vector<T>, std::vector<T>,     */
+  /*      std::vector<T>)                                                  */
+  /* mult(dense_matrix<T>, scaled(std::vector<T>), std::vector<T>,         */
+  /*      std::vector<T>)                                                  */
+  /* mult(transposed(dense_matrix<T>), scaled(std::vector<T>),             */
+  /*      std::vector<T>, std::vector<T>)                                  */
+  /* mult(conjugated(dense_matrix<T>), scaled(std::vector<T>),             */
+  /*      std::vector<T>, std::vector<T>)                                  */
+  /* mult(dense_matrix<T>, std::vector<T>, scaled(std::vector<T>),         */
+  /*      std::vector<T>)                                                  */
+  /* mult(transposed(dense_matrix<T>), std::vector<T>,                     */
+  /*      scaled(std::vector<T>), std::vector<T>)                          */
+  /* mult(conjugated(dense_matrix<T>), std::vector<T>,                     */
+  /*      scaled(std::vector<T>), std::vector<T>)                          */
+  /* mult(dense_matrix<T>, scaled(std::vector<T>), scaled(std::vector<T>), */
+  /*   std::vector<T>)                                                     */
+  /* mult(transposed(dense_matrix<T>), scaled(std::vector<T>),             */
+  /*      scaled(std::vector<T>), std::vector<T>)                          */
+  /* mult(conjugated(dense_matrix<T>), scaled(std::vector<T>),             */
+  /*      scaled(std::vector<T>), std::vector<T>)                          */
+  /*                                                                       */
+  /* lower_tri_solve(dense_matrix<T>, std::vector<T>, k, b)                */
+  /* upper_tri_solve(dense_matrix<T>, std::vector<T>, k, b)                */
+  /* lower_tri_solve(transposed(dense_matrix<T>), std::vector<T>, k, b)    */
+  /* upper_tri_solve(transposed(dense_matrix<T>), std::vector<T>, k, b)    */
+  /* lower_tri_solve(conjugated(dense_matrix<T>), std::vector<T>, k, b)    */
+  /* upper_tri_solve(conjugated(dense_matrix<T>), std::vector<T>, k, b)    */
+  /*                                                                       */
+  /* rank_one_update(dense_matrix<T>, std::vector<T>, std::vector<T>)      */
+  /* rank_one_update(dense_matrix<T>, scaled(std::vector<T>),              */
+  /*                                  std::vector<T>)                      */
+  /* rank_one_update(dense_matrix<T>, std::vector<T>,                      */
+  /*                                  scaled(std::vector<T>))              */
+  /*                                                                       */
+  /* ********************************************************************* */
+
+  /* ********************************************************************* */
+  /* Basic defines.                                                        */
+  /* ********************************************************************* */
+
+# define BLAS_S float
+# define BLAS_D double
+# define BLAS_C std::complex<float>
+# define BLAS_Z std::complex<double>
+
+  /* ********************************************************************* */
+  /* BLAS functions used.                                                  */
+  /* ********************************************************************* */
+  extern "C" {
+    void daxpy_(const int *n, const double *alpha, const double *x,
+                const int *incx, double *y, const int *incy);
+    void dgemm_(const char *tA, const char *tB, const int *m,
+                const int *n, const int *k, const double *alpha,
+                const double *A, const int *ldA, const double *B,
+                const int *ldB, const double *beta, double *C,
+                const int *ldC);
+    void sgemm_(...); void cgemm_(...); void zgemm_(...);
+    void sgemv_(...); void dgemv_(...); void cgemv_(...); void zgemv_(...);
+    void strsv_(...); void dtrsv_(...); void ctrsv_(...); void ztrsv_(...);
+    void saxpy_(...); /*void daxpy_(...); */void caxpy_(...); void zaxpy_(...);
+    BLAS_S sdot_ (...); BLAS_D ddot_ (...);
+    BLAS_C cdotu_(...); BLAS_Z zdotu_(...);
+    BLAS_C cdotc_(...); BLAS_Z zdotc_(...);
+    BLAS_S snrm2_(...); BLAS_D dnrm2_(...);
+    BLAS_S scnrm2_(...); BLAS_D dznrm2_(...);
+    void  sger_(...); void  dger_(...); void  cgerc_(...); void  zgerc_(...); 
+  }
+
+#if 1
+
+  /* ********************************************************************* */
+  /* vect_norm2(x).                                                        */
+  /* ********************************************************************* */
+
+# define nrm2_interface(param1, trans1, blas_name, base_type)		   \
+  inline number_traits<base_type >::magnitude_type			   \
+  vect_norm2(param1(base_type)) {					   \
+    GMMLAPACK_TRACE("nrm2_interface");					   \
+    int inc(1), n(int(vect_size(x))); trans1(base_type);		   \
+    return blas_name(&n, &x[0], &inc);					   \
+  }
+
+# define nrm2_p1(base_type) const std::vector<base_type > &x
+# define nrm2_trans1(base_type)
+
+  nrm2_interface(nrm2_p1, nrm2_trans1, snrm2_ , BLAS_S)
+  nrm2_interface(nrm2_p1, nrm2_trans1, dnrm2_ , BLAS_D)
+  nrm2_interface(nrm2_p1, nrm2_trans1, scnrm2_, BLAS_C)
+  nrm2_interface(nrm2_p1, nrm2_trans1, dznrm2_, BLAS_Z)
+
+  /* ********************************************************************* */
+  /* vect_sp(x, y).                                                        */
+  /* ********************************************************************* */
+
+# define dot_interface(param1, trans1, mult1, param2, trans2, mult2,	   \
+                         blas_name, base_type)                             \
+  inline base_type vect_sp(param1(base_type), param2(base_type)) {         \
+    GMMLAPACK_TRACE("dot_interface");                                      \
+    trans1(base_type); trans2(base_type); int inc(1), n(int(vect_size(y)));\
+    return mult1 mult2 blas_name(&n, &x[0], &inc, &y[0], &inc);            \
+  }
+
+# define dot_p1(base_type) const std::vector<base_type > &x
+# define dot_trans1(base_type)
+# define dot_p1_s(base_type)                                               \
+    const scaled_vector_const_ref<std::vector<base_type >, base_type > &x_
+# define dot_trans1_s(base_type)                                           \
+         std::vector<base_type > &x =                                      \
+         const_cast<std::vector<base_type > &>(*(linalg_origin(x_)));      \
+         base_type a(x_.r)
+
+# define dot_p2(base_type) const std::vector<base_type > &y
+# define dot_trans2(base_type)
+# define dot_p2_s(base_type)                                               \
+    const scaled_vector_const_ref<std::vector<base_type >, base_type > &y_
+# define dot_trans2_s(base_type)                                           \
+         std::vector<base_type > &y =                                      \
+         const_cast<std::vector<base_type > &>(*(linalg_origin(y_)));      \
+         base_type b(y_.r)
+
+  dot_interface(dot_p1, dot_trans1, (BLAS_S), dot_p2, dot_trans2, (BLAS_S),
+		sdot_ , BLAS_S)
+  dot_interface(dot_p1, dot_trans1, (BLAS_D), dot_p2, dot_trans2, (BLAS_D),
+		ddot_ , BLAS_D)
+  dot_interface(dot_p1, dot_trans1, (BLAS_C), dot_p2, dot_trans2, (BLAS_C),
+		cdotu_, BLAS_C)
+  dot_interface(dot_p1, dot_trans1, (BLAS_Z), dot_p2, dot_trans2, (BLAS_Z),
+		zdotu_, BLAS_Z)
+  
+  dot_interface(dot_p1_s, dot_trans1_s, a*, dot_p2, dot_trans2, (BLAS_S),
+		sdot_ ,BLAS_S)
+  dot_interface(dot_p1_s, dot_trans1_s, a*, dot_p2, dot_trans2, (BLAS_D),
+		ddot_ ,BLAS_D)
+  dot_interface(dot_p1_s, dot_trans1_s, a*, dot_p2, dot_trans2, (BLAS_C),
+		cdotu_,BLAS_C)
+  dot_interface(dot_p1_s, dot_trans1_s, a*, dot_p2, dot_trans2, (BLAS_Z),
+		zdotu_,BLAS_Z)
+  
+  dot_interface(dot_p1, dot_trans1, (BLAS_S), dot_p2_s, dot_trans2_s, b*,
+		sdot_ ,BLAS_S)
+  dot_interface(dot_p1, dot_trans1, (BLAS_D), dot_p2_s, dot_trans2_s, b*,
+		ddot_ ,BLAS_D)
+  dot_interface(dot_p1, dot_trans1, (BLAS_C), dot_p2_s, dot_trans2_s, b*,
+		cdotu_,BLAS_C)
+  dot_interface(dot_p1, dot_trans1, (BLAS_Z), dot_p2_s, dot_trans2_s, b*,
+		  zdotu_,BLAS_Z)
+
+  dot_interface(dot_p1_s,dot_trans1_s,a*,dot_p2_s,dot_trans2_s,b*,sdot_ ,
+		BLAS_S)
+  dot_interface(dot_p1_s,dot_trans1_s,a*,dot_p2_s,dot_trans2_s,b*,ddot_ ,
+		BLAS_D)
+  dot_interface(dot_p1_s,dot_trans1_s,a*,dot_p2_s,dot_trans2_s,b*,cdotu_,
+		BLAS_C)
+  dot_interface(dot_p1_s,dot_trans1_s,a*,dot_p2_s,dot_trans2_s,b*,zdotu_,
+		BLAS_Z)
+
+
+  /* ********************************************************************* */
+  /* vect_hp(x, y).                                                        */
+  /* ********************************************************************* */
+
+# define dotc_interface(param1, trans1, mult1, param2, trans2, mult2,	   \
+			blas_name, base_type)				   \
+  inline base_type vect_hp(param1(base_type), param2(base_type)) {         \
+    GMMLAPACK_TRACE("dotc_interface");                                     \
+    trans1(base_type); trans2(base_type); int inc(1), n(int(vect_size(y)));\
+    return mult1 mult2 blas_name(&n, &x[0], &inc, &y[0], &inc);            \
+  }
+
+# define dotc_p1(base_type) const std::vector<base_type > &x
+# define dotc_trans1(base_type)
+# define dotc_p1_s(base_type)                                              \
+    const scaled_vector_const_ref<std::vector<base_type >, base_type > &x_
+# define dotc_trans1_s(base_type)                                          \
+         std::vector<base_type > &x =                                      \
+         const_cast<std::vector<base_type > &>(*(linalg_origin(x_)));      \
+         base_type a(x_.r)
+
+# define dotc_p2(base_type) const std::vector<base_type > &y
+# define dotc_trans2(base_type)
+# define dotc_p2_s(base_type)                                              \
+    const scaled_vector_const_ref<std::vector<base_type >, base_type > &y_
+# define dotc_trans2_s(base_type)                                          \
+         std::vector<base_type > &y =                                      \
+         const_cast<std::vector<base_type > &>(*(linalg_origin(y_)));      \
+         base_type b(gmm::conj(y_.r))
+
+  dotc_interface(dotc_p1, dotc_trans1, (BLAS_S), dotc_p2, dotc_trans2,
+		 (BLAS_S),sdot_ ,BLAS_S)
+  dotc_interface(dotc_p1, dotc_trans1, (BLAS_D), dotc_p2, dotc_trans2,
+		 (BLAS_D),ddot_ ,BLAS_D)
+  dotc_interface(dotc_p1, dotc_trans1, (BLAS_C), dotc_p2, dotc_trans2,
+		 (BLAS_C),cdotc_,BLAS_C)
+  dotc_interface(dotc_p1, dotc_trans1, (BLAS_Z), dotc_p2, dotc_trans2,
+		 (BLAS_Z),zdotc_,BLAS_Z)
+  
+  dotc_interface(dotc_p1_s, dotc_trans1_s, a*, dotc_p2, dotc_trans2,
+		 (BLAS_S),sdot_, BLAS_S)
+  dotc_interface(dotc_p1_s, dotc_trans1_s, a*, dotc_p2, dotc_trans2,
+		 (BLAS_D),ddot_ , BLAS_D)
+  dotc_interface(dotc_p1_s, dotc_trans1_s, a*, dotc_p2, dotc_trans2,
+		 (BLAS_C),cdotc_, BLAS_C)
+  dotc_interface(dotc_p1_s, dotc_trans1_s, a*, dotc_p2, dotc_trans2,
+		 (BLAS_Z),zdotc_, BLAS_Z)
+  
+  dotc_interface(dotc_p1, dotc_trans1, (BLAS_S), dotc_p2_s, dotc_trans2_s,
+		 b*,sdot_ , BLAS_S)
+  dotc_interface(dotc_p1, dotc_trans1, (BLAS_D), dotc_p2_s, dotc_trans2_s,
+		 b*,ddot_ , BLAS_D)
+  dotc_interface(dotc_p1, dotc_trans1, (BLAS_C), dotc_p2_s, dotc_trans2_s,
+		 b*,cdotc_, BLAS_C)
+  dotc_interface(dotc_p1, dotc_trans1, (BLAS_Z), dotc_p2_s, dotc_trans2_s,
+		   b*,zdotc_, BLAS_Z)
+
+  dotc_interface(dotc_p1_s,dotc_trans1_s,a*,dotc_p2_s,dotc_trans2_s,b*,sdot_ ,
+		 BLAS_S)
+  dotc_interface(dotc_p1_s,dotc_trans1_s,a*,dotc_p2_s,dotc_trans2_s,b*,ddot_ ,
+		 BLAS_D)
+  dotc_interface(dotc_p1_s,dotc_trans1_s,a*,dotc_p2_s,dotc_trans2_s,b*,cdotc_,
+		 BLAS_C)
+  dotc_interface(dotc_p1_s,dotc_trans1_s,a*,dotc_p2_s,dotc_trans2_s,b*,zdotc_,
+		 BLAS_Z)
+
+  /* ********************************************************************* */
+  /* add(x, y).                                                            */
+  /* ********************************************************************* */
+
+# define axpy_interface(param1, trans1, blas_name, base_type)              \
+  inline void add(param1(base_type), std::vector<base_type > &y) {         \
+    GMMLAPACK_TRACE("axpy_interface");                                     \
+    int inc(1), n(int(vect_size(y))); trans1(base_type);	 	   \
+    if (n == 0) return;							   \
+    blas_name(&n, &a, &x[0], &inc, &y[0], &inc);                           \
+  }
+
+# define axpy_p1(base_type) const std::vector<base_type > &x
+# define axpy_trans1(base_type) base_type a(1)
+# define axpy_p1_s(base_type)                                              \
+    const scaled_vector_const_ref<std::vector<base_type >, base_type > &x_
+# define axpy_trans1_s(base_type)                                          \
+         std::vector<base_type > &x =                                      \
+         const_cast<std::vector<base_type > &>(*(linalg_origin(x_)));      \
+         base_type a(x_.r)
+
+  axpy_interface(axpy_p1, axpy_trans1, saxpy_, BLAS_S)
+  axpy_interface(axpy_p1, axpy_trans1, daxpy_, BLAS_D)
+  axpy_interface(axpy_p1, axpy_trans1, caxpy_, BLAS_C)
+  axpy_interface(axpy_p1, axpy_trans1, zaxpy_, BLAS_Z)
+  
+  axpy_interface(axpy_p1_s, axpy_trans1_s, saxpy_, BLAS_S)
+  axpy_interface(axpy_p1_s, axpy_trans1_s, daxpy_, BLAS_D)
+  axpy_interface(axpy_p1_s, axpy_trans1_s, caxpy_, BLAS_C)
+  axpy_interface(axpy_p1_s, axpy_trans1_s, zaxpy_, BLAS_Z)
+  
+
+  /* ********************************************************************* */
+  /* mult_add(A, x, z).                                                    */
+  /* ********************************************************************* */
+  
+# define gemv_interface(param1, trans1, param2, trans2, blas_name,         \
+			base_type, orien)                                  \
+  inline void mult_add_spec(param1(base_type), param2(base_type),          \
+              std::vector<base_type > &z, orien) {                         \
+    GMMLAPACK_TRACE("gemv_interface");                                     \
+    trans1(base_type); trans2(base_type); base_type beta(1);               \
+    int m(int(mat_nrows(A))), lda(m), n(int(mat_ncols(A))), inc(1);	   \
+    if (m && n) blas_name(&t, &m, &n, &alpha, &A(0,0), &lda, &x[0], &inc,  \
+                          &beta, &z[0], &inc);                             \
+    else gmm::clear(z);                                                    \
+  }
+
+  // First parameter
+# define gem_p1_n(base_type)  const dense_matrix<base_type > &A
+# define gem_trans1_n(base_type) const char t = 'N'
+# define gem_p1_t(base_type)                                               \
+         const transposed_col_ref<dense_matrix<base_type > *> &A_
+# define gem_trans1_t(base_type) dense_matrix<base_type > &A =             \
+         const_cast<dense_matrix<base_type > &>(*(linalg_origin(A_)));     \
+         const char t = 'T'
+# define gem_p1_tc(base_type)                                              \
+         const transposed_col_ref<const dense_matrix<base_type > *> &A_
+# define gem_p1_c(base_type)                                               \
+         const conjugated_col_matrix_const_ref<dense_matrix<base_type > > &A_
+# define gem_trans1_c(base_type) dense_matrix<base_type > &A =             \
+         const_cast<dense_matrix<base_type > &>(*(linalg_origin(A_)));     \
+         const char t = 'C'
+
+  // second parameter 
+# define gemv_p2_n(base_type)  const std::vector<base_type > &x
+# define gemv_trans2_n(base_type) base_type alpha(1)
+# define gemv_p2_s(base_type)                                              \
+    const scaled_vector_const_ref<std::vector<base_type >, base_type > &x_
+# define gemv_trans2_s(base_type) std::vector<base_type > &x =             \
+         const_cast<std::vector<base_type > &>(*(linalg_origin(x_)));      \
+         base_type alpha(x_.r)
+
+  // Z <- AX + Z.
+  gemv_interface(gem_p1_n, gem_trans1_n, gemv_p2_n, gemv_trans2_n, sgemv_,
+		 BLAS_S, col_major)
+  gemv_interface(gem_p1_n, gem_trans1_n, gemv_p2_n, gemv_trans2_n, dgemv_,
+		 BLAS_D, col_major)
+  gemv_interface(gem_p1_n, gem_trans1_n, gemv_p2_n, gemv_trans2_n, cgemv_,
+		 BLAS_C, col_major)
+  gemv_interface(gem_p1_n, gem_trans1_n, gemv_p2_n, gemv_trans2_n, zgemv_,
+		 BLAS_Z, col_major)
+
+  // Z <- transposed(A)X + Z.
+  gemv_interface(gem_p1_t, gem_trans1_t, gemv_p2_n, gemv_trans2_n, sgemv_,
+		 BLAS_S, row_major)
+  gemv_interface(gem_p1_t, gem_trans1_t, gemv_p2_n, gemv_trans2_n, dgemv_,
+		 BLAS_D, row_major)
+  gemv_interface(gem_p1_t, gem_trans1_t, gemv_p2_n, gemv_trans2_n, cgemv_,
+		 BLAS_C, row_major)
+  gemv_interface(gem_p1_t, gem_trans1_t, gemv_p2_n, gemv_trans2_n, zgemv_,
+		 BLAS_Z, row_major)
+  
+  // Z <- transposed(const A)X + Z.
+  gemv_interface(gem_p1_tc, gem_trans1_t, gemv_p2_n, gemv_trans2_n, sgemv_,
+		 BLAS_S, row_major)
+  gemv_interface(gem_p1_tc, gem_trans1_t, gemv_p2_n, gemv_trans2_n, dgemv_,
+		 BLAS_D, row_major)
+  gemv_interface(gem_p1_tc, gem_trans1_t, gemv_p2_n, gemv_trans2_n, cgemv_,
+		 BLAS_C, row_major)
+  gemv_interface(gem_p1_tc, gem_trans1_t, gemv_p2_n, gemv_trans2_n, zgemv_,
+		 BLAS_Z, row_major)
+  
+  // Z <- conjugated(A)X + Z.
+  gemv_interface(gem_p1_c, gem_trans1_c, gemv_p2_n, gemv_trans2_n, sgemv_,
+		 BLAS_S, row_major)
+  gemv_interface(gem_p1_c, gem_trans1_c, gemv_p2_n, gemv_trans2_n, dgemv_,
+		 BLAS_D, row_major)
+  gemv_interface(gem_p1_c, gem_trans1_c, gemv_p2_n, gemv_trans2_n, cgemv_,
+		 BLAS_C, row_major)
+  gemv_interface(gem_p1_c, gem_trans1_c, gemv_p2_n, gemv_trans2_n, zgemv_,
+		 BLAS_Z, row_major)
+
+  // Z <- A scaled(X) + Z.
+  gemv_interface(gem_p1_n, gem_trans1_n, gemv_p2_s, gemv_trans2_s, sgemv_,
+		 BLAS_S, col_major)
+  gemv_interface(gem_p1_n, gem_trans1_n, gemv_p2_s, gemv_trans2_s, dgemv_,
+		 BLAS_D, col_major)
+  gemv_interface(gem_p1_n, gem_trans1_n, gemv_p2_s, gemv_trans2_s, cgemv_,
+		 BLAS_C, col_major)
+  gemv_interface(gem_p1_n, gem_trans1_n, gemv_p2_s, gemv_trans2_s, zgemv_,
+		 BLAS_Z, col_major)
+
+  // Z <- transposed(A) scaled(X) + Z.
+  gemv_interface(gem_p1_t, gem_trans1_t, gemv_p2_s, gemv_trans2_s, sgemv_,
+		 BLAS_S, row_major)
+  gemv_interface(gem_p1_t, gem_trans1_t, gemv_p2_s, gemv_trans2_s, dgemv_,
+		 BLAS_D, row_major)
+  gemv_interface(gem_p1_t, gem_trans1_t, gemv_p2_s, gemv_trans2_s, cgemv_,
+		 BLAS_C, row_major)
+  gemv_interface(gem_p1_t, gem_trans1_t, gemv_p2_s, gemv_trans2_s, zgemv_,
+		 BLAS_Z, row_major)
+  
+  // Z <- transposed(const A) scaled(X) + Z.
+  gemv_interface(gem_p1_tc, gem_trans1_t, gemv_p2_s, gemv_trans2_s, sgemv_,
+		 BLAS_S, row_major)
+  gemv_interface(gem_p1_tc, gem_trans1_t, gemv_p2_s, gemv_trans2_s, dgemv_,
+		 BLAS_D, row_major)
+  gemv_interface(gem_p1_tc, gem_trans1_t, gemv_p2_s, gemv_trans2_s, cgemv_,
+		 BLAS_C, row_major)
+  gemv_interface(gem_p1_tc, gem_trans1_t, gemv_p2_s, gemv_trans2_s, zgemv_,
+		 BLAS_Z, row_major)
+  
+  // Z <- conjugated(A) scaled(X) + Z.
+  gemv_interface(gem_p1_c, gem_trans1_c, gemv_p2_s, gemv_trans2_s, sgemv_,
+		 BLAS_S, row_major)
+  gemv_interface(gem_p1_c, gem_trans1_c, gemv_p2_s, gemv_trans2_s, dgemv_,
+		 BLAS_D, row_major)
+  gemv_interface(gem_p1_c, gem_trans1_c, gemv_p2_s, gemv_trans2_s, cgemv_,
+		 BLAS_C, row_major)
+  gemv_interface(gem_p1_c, gem_trans1_c, gemv_p2_s, gemv_trans2_s, zgemv_,
+		 BLAS_Z, row_major)
+
+
+  /* ********************************************************************* */
+  /* mult(A, x, y).                                                        */
+  /* ********************************************************************* */
+  
+# define gemv_interface2(param1, trans1, param2, trans2, blas_name,        \
+                         base_type, orien)                                 \
+  inline void mult_spec(param1(base_type), param2(base_type),              \
+              std::vector<base_type > &z, orien) {                         \
+    GMMLAPACK_TRACE("gemv_interface2");                                    \
+    trans1(base_type); trans2(base_type); base_type beta(0);               \
+    int m(int(mat_nrows(A))), lda(m), n(int(mat_ncols(A))), inc(1);	   \
+    if (m && n)                                                            \
+      blas_name(&t, &m, &n, &alpha, &A(0,0), &lda, &x[0], &inc, &beta,     \
+                &z[0], &inc);                                              \
+    else gmm::clear(z);                                                    \
+  }
+
+  // Y <- AX.
+  gemv_interface2(gem_p1_n, gem_trans1_n, gemv_p2_n, gemv_trans2_n, sgemv_,
+		  BLAS_S, col_major)
+  gemv_interface2(gem_p1_n, gem_trans1_n, gemv_p2_n, gemv_trans2_n, dgemv_,
+		  BLAS_D, col_major)
+  gemv_interface2(gem_p1_n, gem_trans1_n, gemv_p2_n, gemv_trans2_n, cgemv_,
+		  BLAS_C, col_major)
+  gemv_interface2(gem_p1_n, gem_trans1_n, gemv_p2_n, gemv_trans2_n, zgemv_,
+		  BLAS_Z, col_major)
+
+  // Y <- transposed(A)X.
+  gemv_interface2(gem_p1_t, gem_trans1_t, gemv_p2_n, gemv_trans2_n, sgemv_,
+		  BLAS_S, row_major)
+  gemv_interface2(gem_p1_t, gem_trans1_t, gemv_p2_n, gemv_trans2_n, dgemv_,
+		  BLAS_D, row_major)
+  gemv_interface2(gem_p1_t, gem_trans1_t, gemv_p2_n, gemv_trans2_n, cgemv_,
+		  BLAS_C, row_major)
+  gemv_interface2(gem_p1_t, gem_trans1_t, gemv_p2_n, gemv_trans2_n, zgemv_,
+		  BLAS_Z, row_major)
+  
+  // Y <- transposed(const A)X.
+  gemv_interface2(gem_p1_tc, gem_trans1_t, gemv_p2_n, gemv_trans2_n, sgemv_,
+		  BLAS_S, row_major)
+  gemv_interface2(gem_p1_tc, gem_trans1_t, gemv_p2_n, gemv_trans2_n, dgemv_,
+		  BLAS_D, row_major)
+  gemv_interface2(gem_p1_tc, gem_trans1_t, gemv_p2_n, gemv_trans2_n, cgemv_,
+		  BLAS_C, row_major)
+  gemv_interface2(gem_p1_tc, gem_trans1_t, gemv_p2_n, gemv_trans2_n, zgemv_,
+		  BLAS_Z, row_major)
+  
+  // Y <- conjugated(A)X.
+  gemv_interface2(gem_p1_c, gem_trans1_c, gemv_p2_n, gemv_trans2_n, sgemv_,
+		  BLAS_S, row_major)
+  gemv_interface2(gem_p1_c, gem_trans1_c, gemv_p2_n, gemv_trans2_n, dgemv_,
+		  BLAS_D, row_major)
+  gemv_interface2(gem_p1_c, gem_trans1_c, gemv_p2_n, gemv_trans2_n, cgemv_,
+		  BLAS_C, row_major)
+  gemv_interface2(gem_p1_c, gem_trans1_c, gemv_p2_n, gemv_trans2_n, zgemv_,
+		  BLAS_Z, row_major)
+
+  // Y <- A scaled(X).
+  gemv_interface2(gem_p1_n, gem_trans1_n, gemv_p2_s, gemv_trans2_s, sgemv_,
+		  BLAS_S, col_major)
+  gemv_interface2(gem_p1_n, gem_trans1_n, gemv_p2_s, gemv_trans2_s, dgemv_,
+		  BLAS_D, col_major)
+  gemv_interface2(gem_p1_n, gem_trans1_n, gemv_p2_s, gemv_trans2_s, cgemv_,
+		  BLAS_C, col_major)
+  gemv_interface2(gem_p1_n, gem_trans1_n, gemv_p2_s, gemv_trans2_s, zgemv_,
+		  BLAS_Z, col_major)
+
+  // Y <- transposed(A) scaled(X).
+  gemv_interface2(gem_p1_t, gem_trans1_t, gemv_p2_s, gemv_trans2_s, sgemv_,
+		  BLAS_S, row_major)
+  gemv_interface2(gem_p1_t, gem_trans1_t, gemv_p2_s, gemv_trans2_s, dgemv_,
+		  BLAS_D, row_major)
+  gemv_interface2(gem_p1_t, gem_trans1_t, gemv_p2_s, gemv_trans2_s, cgemv_,
+		  BLAS_C, row_major)
+  gemv_interface2(gem_p1_t, gem_trans1_t, gemv_p2_s, gemv_trans2_s, zgemv_,
+		  BLAS_Z, row_major)
+  
+  // Y <- transposed(const A) scaled(X).
+  gemv_interface2(gem_p1_tc, gem_trans1_t, gemv_p2_s, gemv_trans2_s, sgemv_,
+		  BLAS_S, row_major)
+  gemv_interface2(gem_p1_tc, gem_trans1_t, gemv_p2_s, gemv_trans2_s, dgemv_,
+		  BLAS_D, row_major)
+  gemv_interface2(gem_p1_tc, gem_trans1_t, gemv_p2_s, gemv_trans2_s, cgemv_,
+		  BLAS_C, row_major)
+  gemv_interface2(gem_p1_tc, gem_trans1_t, gemv_p2_s, gemv_trans2_s, zgemv_,
+		  BLAS_Z, row_major)
+  
+  // Y <- conjugated(A) scaled(X).
+  gemv_interface2(gem_p1_c, gem_trans1_c, gemv_p2_s, gemv_trans2_s, sgemv_,
+		  BLAS_S, row_major)
+  gemv_interface2(gem_p1_c, gem_trans1_c, gemv_p2_s, gemv_trans2_s, dgemv_,
+		  BLAS_D, row_major)
+  gemv_interface2(gem_p1_c, gem_trans1_c, gemv_p2_s, gemv_trans2_s, cgemv_,
+		  BLAS_C, row_major)
+  gemv_interface2(gem_p1_c, gem_trans1_c, gemv_p2_s, gemv_trans2_s, zgemv_,
+		  BLAS_Z, row_major)
+
+
+  /* ********************************************************************* */
+  /* Rank one update.                                                      */
+  /* ********************************************************************* */
+
+# define ger_interface(blas_name, base_type)                               \
+  inline void rank_one_update(const dense_matrix<base_type > &A,           \
+			      const std::vector<base_type > &V,	   	   \
+			      const std::vector<base_type > &W) {	   \
+    GMMLAPACK_TRACE("ger_interface");                                      \
+    int m(int(mat_nrows(A))), lda = m, n(int(mat_ncols(A)));		   \
+    int incx = 1, incy = 1;						   \
+    base_type alpha(1);                                                    \
+    if (m && n)								   \
+      blas_name(&m, &n, &alpha, &V[0], &incx, &W[0], &incy, &A(0,0), &lda);\
+  }
+
+  ger_interface(sger_, BLAS_S)
+  ger_interface(dger_, BLAS_D)
+  ger_interface(cgerc_, BLAS_C)
+  ger_interface(zgerc_, BLAS_Z)
+
+# define ger_interface_sn(blas_name, base_type)                            \
+  inline void rank_one_update(const dense_matrix<base_type > &A,	   \
+			      gemv_p2_s(base_type),			   \
+			      const std::vector<base_type > &W) {	   \
+    GMMLAPACK_TRACE("ger_interface");                                      \
+    gemv_trans2_s(base_type); 						   \
+    int m(int(mat_nrows(A))), lda = m, n(int(mat_ncols(A)));		   \
+    int incx = 1, incy = 1;						   \
+    if (m && n)								   \
+      blas_name(&m, &n, &alpha, &x[0], &incx, &W[0], &incy, &A(0,0), &lda);\
+  }
+
+  ger_interface_sn(sger_, BLAS_S)
+  ger_interface_sn(dger_, BLAS_D)
+  ger_interface_sn(cgerc_, BLAS_C)
+  ger_interface_sn(zgerc_, BLAS_Z)
+
+# define ger_interface_ns(blas_name, base_type)                            \
+  inline void rank_one_update(const dense_matrix<base_type > &A,	   \
+			      const std::vector<base_type > &V,		   \
+			      gemv_p2_s(base_type)) {			   \
+    GMMLAPACK_TRACE("ger_interface");                                      \
+    gemv_trans2_s(base_type); 						   \
+    int m(int(mat_nrows(A))), lda = m, n(int(mat_ncols(A)));		   \
+    int incx = 1, incy = 1;						   \
+    base_type al2 = gmm::conj(alpha);					   \
+    if (m && n)								   \
+      blas_name(&m, &n, &al2, &V[0], &incx, &x[0], &incy, &A(0,0), &lda);  \
+  }
+
+  ger_interface_ns(sger_, BLAS_S)
+  ger_interface_ns(dger_, BLAS_D)
+  ger_interface_ns(cgerc_, BLAS_C)
+  ger_interface_ns(zgerc_, BLAS_Z)
+
+  /* ********************************************************************* */
+  /* dense matrix x dense matrix multiplication.                           */
+  /* ********************************************************************* */
+
+# define gemm_interface_nn(blas_name, base_type)                           \
+  inline void mult_spec(const dense_matrix<base_type > &A,                 \
+            const dense_matrix<base_type > &B,                             \
+            dense_matrix<base_type > &C, c_mult) {                         \
+    GMMLAPACK_TRACE("gemm_interface_nn");                                  \
+    const char t = 'N';                                                    \
+    int m(int(mat_nrows(A))), lda = m, k(int(mat_ncols(A)));  		   \
+    int n(int(mat_ncols(B)));						   \
+    int ldb = k, ldc = m;                                                  \
+    base_type alpha(1), beta(0);                                           \
+    if (m && k && n)                                                       \
+      blas_name(&t, &t, &m, &n, &k, &alpha,                                \
+	          &A(0,0), &lda, &B(0,0), &ldb, &beta, &C(0,0), &ldc);     \
+    else gmm::clear(C);                                                    \
+  }
+
+  gemm_interface_nn(sgemm_, BLAS_S)
+  gemm_interface_nn(dgemm_, BLAS_D)
+  gemm_interface_nn(cgemm_, BLAS_C)
+  gemm_interface_nn(zgemm_, BLAS_Z)
+  
+  /* ********************************************************************* */
+  /* transposed(dense matrix) x dense matrix multiplication.               */
+  /* ********************************************************************* */
+
+# define gemm_interface_tn(blas_name, base_type, is_const)                 \
+  inline void mult_spec(                                                   \
+         const transposed_col_ref<is_const<base_type > *> &A_,\
+         const dense_matrix<base_type > &B,                                \
+         dense_matrix<base_type > &C, rcmult) {                            \
+    GMMLAPACK_TRACE("gemm_interface_tn");                                  \
+    dense_matrix<base_type > &A                                            \
+         = const_cast<dense_matrix<base_type > &>(*(linalg_origin(A_)));   \
+    const char t = 'T', u = 'N';                                           \
+    int m(int(mat_ncols(A))), k(int(mat_nrows(A))), n(int(mat_ncols(B)));  \
+    int lda = k, ldb = k, ldc = m;					   \
+    base_type alpha(1), beta(0);                                           \
+    if (m && k && n)                                                       \
+      blas_name(&t, &u, &m, &n, &k, &alpha,                                \
+	          &A(0,0), &lda, &B(0,0), &ldb, &beta, &C(0,0), &ldc);     \
+    else gmm::clear(C);                                                    \
+  }
+
+  gemm_interface_tn(sgemm_, BLAS_S, dense_matrix)
+  gemm_interface_tn(dgemm_, BLAS_D, dense_matrix)
+  gemm_interface_tn(cgemm_, BLAS_C, dense_matrix)
+  gemm_interface_tn(zgemm_, BLAS_Z, dense_matrix)
+  gemm_interface_tn(sgemm_, BLAS_S, const dense_matrix)
+  gemm_interface_tn(dgemm_, BLAS_D, const dense_matrix)
+  gemm_interface_tn(cgemm_, BLAS_C, const dense_matrix)
+  gemm_interface_tn(zgemm_, BLAS_Z, const dense_matrix)
+
+  /* ********************************************************************* */
+  /* dense matrix x transposed(dense matrix) multiplication.               */
+  /* ********************************************************************* */
+
+# define gemm_interface_nt(blas_name, base_type, is_const)                 \
+  inline void mult_spec(const dense_matrix<base_type > &A,                 \
+		     const transposed_col_ref<is_const<base_type > *> &B_, \
+         dense_matrix<base_type > &C, r_mult) {                            \
+    GMMLAPACK_TRACE("gemm_interface_nt");                                  \
+    dense_matrix<base_type > &B                                            \
+        = const_cast<dense_matrix<base_type > &>(*(linalg_origin(B_)));    \
+    const char t = 'N', u = 'T';                                           \
+    int m(int(mat_nrows(A))), lda = m, k(int(mat_ncols(A)));               \
+    int n(int(mat_nrows(B)));						   \
+    int ldb = n, ldc = m;                                                  \
+    base_type alpha(1), beta(0);                                           \
+    if (m && k && n)                                                       \
+      blas_name(&t, &u, &m, &n, &k, &alpha,                                \
+	        &A(0,0), &lda, &B(0,0), &ldb, &beta, &C(0,0), &ldc);       \
+    else gmm::clear(C);                                                    \
+  }
+
+  gemm_interface_nt(sgemm_, BLAS_S, dense_matrix)
+  gemm_interface_nt(dgemm_, BLAS_D, dense_matrix)
+  gemm_interface_nt(cgemm_, BLAS_C, dense_matrix)
+  gemm_interface_nt(zgemm_, BLAS_Z, dense_matrix)
+  gemm_interface_nt(sgemm_, BLAS_S, const dense_matrix)
+  gemm_interface_nt(dgemm_, BLAS_D, const dense_matrix)
+  gemm_interface_nt(cgemm_, BLAS_C, const dense_matrix)
+  gemm_interface_nt(zgemm_, BLAS_Z, const dense_matrix)
+
+  /* ********************************************************************* */
+  /* transposed(dense matrix) x transposed(dense matrix) multiplication.   */
+  /* ********************************************************************* */
+
+# define gemm_interface_tt(blas_name, base_type, isA_const, isB_const)     \
+  inline void mult_spec(                                                   \
+	       const transposed_col_ref<isA_const <base_type > *> &A_,	   \
+               const transposed_col_ref<isB_const <base_type > *> &B_,	   \
+	       dense_matrix<base_type > &C, r_mult) {			   \
+    GMMLAPACK_TRACE("gemm_interface_tt");                                  \
+    dense_matrix<base_type > &A                                            \
+        = const_cast<dense_matrix<base_type > &>(*(linalg_origin(A_)));    \
+    dense_matrix<base_type > &B                                            \
+        = const_cast<dense_matrix<base_type > &>(*(linalg_origin(B_)));    \
+    const char t = 'T', u = 'T';                                           \
+    int m(int(mat_ncols(A))), k(int(mat_nrows(A))), n(int(mat_nrows(B)));  \
+    int lda = k, ldb = n, ldc = m;					   \
+    base_type alpha(1), beta(0);                                           \
+    if (m && k && n)                                                       \
+      blas_name(&t, &u, &m, &n, &k, &alpha,                                \
+	        &A(0,0), &lda, &B(0,0), &ldb, &beta, &C(0,0), &ldc);       \
+    else gmm::clear(C);                                                    \
+  }
+
+  gemm_interface_tt(sgemm_, BLAS_S, dense_matrix, dense_matrix)
+  gemm_interface_tt(dgemm_, BLAS_D, dense_matrix, dense_matrix)
+  gemm_interface_tt(cgemm_, BLAS_C, dense_matrix, dense_matrix)
+  gemm_interface_tt(zgemm_, BLAS_Z, dense_matrix, dense_matrix)
+  gemm_interface_tt(sgemm_, BLAS_S, const dense_matrix, dense_matrix)
+  gemm_interface_tt(dgemm_, BLAS_D, const dense_matrix, dense_matrix)
+  gemm_interface_tt(cgemm_, BLAS_C, const dense_matrix, dense_matrix)
+  gemm_interface_tt(zgemm_, BLAS_Z, const dense_matrix, dense_matrix)
+  gemm_interface_tt(sgemm_, BLAS_S, dense_matrix, const dense_matrix)
+  gemm_interface_tt(dgemm_, BLAS_D, dense_matrix, const dense_matrix)
+  gemm_interface_tt(cgemm_, BLAS_C, dense_matrix, const dense_matrix)
+  gemm_interface_tt(zgemm_, BLAS_Z, dense_matrix, const dense_matrix)
+  gemm_interface_tt(sgemm_, BLAS_S, const dense_matrix, const dense_matrix)
+  gemm_interface_tt(dgemm_, BLAS_D, const dense_matrix, const dense_matrix)
+  gemm_interface_tt(cgemm_, BLAS_C, const dense_matrix, const dense_matrix)
+  gemm_interface_tt(zgemm_, BLAS_Z, const dense_matrix, const dense_matrix)
+
+
+  /* ********************************************************************* */
+  /* conjugated(dense matrix) x dense matrix multiplication.               */
+  /* ********************************************************************* */
+
+# define gemm_interface_cn(blas_name, base_type)                           \
+  inline void mult_spec(                                                   \
+      const conjugated_col_matrix_const_ref<dense_matrix<base_type > > &A_,\
+      const dense_matrix<base_type > &B,                                   \
+      dense_matrix<base_type > &C, rcmult) {                               \
+    GMMLAPACK_TRACE("gemm_interface_cn");                                  \
+    dense_matrix<base_type > &A                                            \
+          = const_cast<dense_matrix<base_type > &>(*(linalg_origin(A_)));  \
+    const char t = 'C', u = 'N';                                           \
+    int m(int(mat_ncols(A))), k(int(mat_nrows(A))), n(int(mat_ncols(B)));  \
+    int lda = k, ldb = k, ldc = m;					   \
+    base_type alpha(1), beta(0);                                           \
+    if (m && k && n)                                                       \
+      blas_name(&t, &u, &m, &n, &k, &alpha,                                \
+	        &A(0,0), &lda, &B(0,0), &ldb, &beta, &C(0,0), &ldc);       \
+    else gmm::clear(C);                                                    \
+  }
+
+  gemm_interface_cn(sgemm_, BLAS_S)
+  gemm_interface_cn(dgemm_, BLAS_D)
+  gemm_interface_cn(cgemm_, BLAS_C)
+  gemm_interface_cn(zgemm_, BLAS_Z)
+
+  /* ********************************************************************* */
+  /* dense matrix x conjugated(dense matrix) multiplication.               */
+  /* ********************************************************************* */
+
+# define gemm_interface_nc(blas_name, base_type)                           \
+  inline void mult_spec(const dense_matrix<base_type > &A,                 \
+      const conjugated_col_matrix_const_ref<dense_matrix<base_type > > &B_,\
+      dense_matrix<base_type > &C, c_mult, row_major) {                    \
+    GMMLAPACK_TRACE("gemm_interface_nc");                                  \
+    dense_matrix<base_type > &B                                            \
+         = const_cast<dense_matrix<base_type > &>(*(linalg_origin(B_)));   \
+    const char t = 'N', u = 'C';                                           \
+    int m(int(mat_nrows(A))), lda = m, k(int(mat_ncols(A)));               \
+    int n(int(mat_nrows(B))), ldb = n, ldc = m;				   \
+    base_type alpha(1), beta(0);                                           \
+    if (m && k && n)                                                       \
+      blas_name(&t, &u, &m, &n, &k, &alpha,                                \
+	        &A(0,0), &lda, &B(0,0), &ldb, &beta, &C(0,0), &ldc);       \
+    else gmm::clear(C);                                                    \
+  }
+
+  gemm_interface_nc(sgemm_, BLAS_S)
+  gemm_interface_nc(dgemm_, BLAS_D)
+  gemm_interface_nc(cgemm_, BLAS_C)
+  gemm_interface_nc(zgemm_, BLAS_Z)
+
+  /* ********************************************************************* */
+  /* conjugated(dense matrix) x conjugated(dense matrix) multiplication.   */
+  /* ********************************************************************* */
+
+# define gemm_interface_cc(blas_name, base_type)                           \
+  inline void mult_spec(                                                   \
+      const conjugated_col_matrix_const_ref<dense_matrix<base_type > > &A_,\
+      const conjugated_col_matrix_const_ref<dense_matrix<base_type > > &B_,\
+      dense_matrix<base_type > &C, r_mult) {                               \
+    GMMLAPACK_TRACE("gemm_interface_cc");                                  \
+    dense_matrix<base_type > &A                                            \
+        = const_cast<dense_matrix<base_type > &>(*(linalg_origin(A_)));    \
+    dense_matrix<base_type > &B                                            \
+        = const_cast<dense_matrix<base_type > &>(*(linalg_origin(B_)));    \
+    const char t = 'C', u = 'C';                                           \
+    int m(int(mat_ncols(A))), k(int(mat_nrows(A))), lda = k;               \
+    int n(int(mat_nrows(B))), ldb = n, ldc = m;				   \
+    base_type alpha(1), beta(0);                                           \
+    if (m && k && n)                                                       \
+      blas_name(&t, &u, &m, &n, &k, &alpha,                                \
+	        &A(0,0), &lda, &B(0,0), &ldb, &beta, &C(0,0), &ldc);       \
+    else gmm::clear(C);                                                    \
+  }
+
+  gemm_interface_cc(sgemm_, BLAS_S)
+  gemm_interface_cc(dgemm_, BLAS_D)
+  gemm_interface_cc(cgemm_, BLAS_C)
+  gemm_interface_cc(zgemm_, BLAS_Z)
+   
+  /* ********************************************************************* */
+  /* Tri solve.                                                            */
+  /* ********************************************************************* */
+
+# define trsv_interface(f_name, loru, param1, trans1, blas_name, base_type)\
+  inline void f_name(param1(base_type), std::vector<base_type > &x,        \
+                              size_type k, bool is_unit) {                 \
+    GMMLAPACK_TRACE("trsv_interface");                                     \
+    loru; trans1(base_type); char d = is_unit ? 'U' : 'N';                 \
+    int lda(int(mat_nrows(A))), inc(1), n = int(k);			   \
+    if (lda) blas_name(&l, &t, &d, &n, &A(0,0), &lda, &x[0], &inc);        \
+  }
+
+# define trsv_upper const char l = 'U'
+# define trsv_lower const char l = 'L'
+
+  // X <- LOWER(A)^{-1}X.
+  trsv_interface(lower_tri_solve, trsv_lower, gem_p1_n, gem_trans1_n,
+		 strsv_, BLAS_S)
+  trsv_interface(lower_tri_solve, trsv_lower, gem_p1_n, gem_trans1_n,
+		 dtrsv_, BLAS_D) 
+  trsv_interface(lower_tri_solve, trsv_lower, gem_p1_n, gem_trans1_n,
+		 ctrsv_, BLAS_C) 
+  trsv_interface(lower_tri_solve, trsv_lower, gem_p1_n, gem_trans1_n,
+		 ztrsv_, BLAS_Z)
+  
+  // X <- UPPER(A)^{-1}X.
+  trsv_interface(upper_tri_solve, trsv_upper, gem_p1_n, gem_trans1_n,
+		 strsv_, BLAS_S)
+  trsv_interface(upper_tri_solve, trsv_upper, gem_p1_n, gem_trans1_n,
+		 dtrsv_, BLAS_D) 
+  trsv_interface(upper_tri_solve, trsv_upper, gem_p1_n, gem_trans1_n,
+		 ctrsv_, BLAS_C) 
+  trsv_interface(upper_tri_solve, trsv_upper, gem_p1_n, gem_trans1_n,
+		 ztrsv_, BLAS_Z)
+  
+  // X <- LOWER(transposed(A))^{-1}X.
+  trsv_interface(lower_tri_solve, trsv_upper, gem_p1_t, gem_trans1_t,
+		 strsv_, BLAS_S)
+  trsv_interface(lower_tri_solve, trsv_upper, gem_p1_t, gem_trans1_t,
+		 dtrsv_, BLAS_D) 
+  trsv_interface(lower_tri_solve, trsv_upper, gem_p1_t, gem_trans1_t,
+		 ctrsv_, BLAS_C) 
+  trsv_interface(lower_tri_solve, trsv_upper, gem_p1_t, gem_trans1_t,
+		 ztrsv_, BLAS_Z)
+  
+  // X <- UPPER(transposed(A))^{-1}X.
+  trsv_interface(upper_tri_solve, trsv_lower, gem_p1_t, gem_trans1_t,
+		 strsv_, BLAS_S)
+  trsv_interface(upper_tri_solve, trsv_lower, gem_p1_t, gem_trans1_t,
+		 dtrsv_, BLAS_D) 
+  trsv_interface(upper_tri_solve, trsv_lower, gem_p1_t, gem_trans1_t,
+		 ctrsv_, BLAS_C) 
+  trsv_interface(upper_tri_solve, trsv_lower, gem_p1_t, gem_trans1_t,
+		 ztrsv_, BLAS_Z)
+
+  // X <- LOWER(transposed(const A))^{-1}X.
+  trsv_interface(lower_tri_solve, trsv_upper, gem_p1_tc, gem_trans1_t,
+		 strsv_, BLAS_S)
+  trsv_interface(lower_tri_solve, trsv_upper, gem_p1_tc, gem_trans1_t,
+		 dtrsv_, BLAS_D) 
+  trsv_interface(lower_tri_solve, trsv_upper, gem_p1_tc, gem_trans1_t,
+		 ctrsv_, BLAS_C) 
+  trsv_interface(lower_tri_solve, trsv_upper, gem_p1_tc, gem_trans1_t,
+		 ztrsv_, BLAS_Z)
+  
+  // X <- UPPER(transposed(const A))^{-1}X.
+  trsv_interface(upper_tri_solve, trsv_lower, gem_p1_tc, gem_trans1_t,
+		 strsv_, BLAS_S)
+  trsv_interface(upper_tri_solve, trsv_lower, gem_p1_tc, gem_trans1_t,
+		 dtrsv_, BLAS_D) 
+  trsv_interface(upper_tri_solve, trsv_lower, gem_p1_tc, gem_trans1_t,
+		 ctrsv_, BLAS_C) 
+  trsv_interface(upper_tri_solve, trsv_lower, gem_p1_tc, gem_trans1_t,
+		 ztrsv_, BLAS_Z)
+
+  // X <- LOWER(conjugated(A))^{-1}X.
+  trsv_interface(lower_tri_solve, trsv_upper, gem_p1_c, gem_trans1_c,
+		 strsv_, BLAS_S)
+  trsv_interface(lower_tri_solve, trsv_upper, gem_p1_c, gem_trans1_c,
+		 dtrsv_, BLAS_D) 
+  trsv_interface(lower_tri_solve, trsv_upper, gem_p1_c, gem_trans1_c,
+		 ctrsv_, BLAS_C) 
+  trsv_interface(lower_tri_solve, trsv_upper, gem_p1_c, gem_trans1_c,
+		 ztrsv_, BLAS_Z)
+  
+  // X <- UPPER(conjugated(A))^{-1}X.
+  trsv_interface(upper_tri_solve, trsv_lower, gem_p1_c, gem_trans1_c,
+		 strsv_, BLAS_S)
+  trsv_interface(upper_tri_solve, trsv_lower, gem_p1_c, gem_trans1_c,
+		 dtrsv_, BLAS_D) 
+  trsv_interface(upper_tri_solve, trsv_lower, gem_p1_c, gem_trans1_c,
+		 ctrsv_, BLAS_C) 
+  trsv_interface(upper_tri_solve, trsv_lower, gem_p1_c, gem_trans1_c,
+		 ztrsv_, BLAS_Z)
+  
+#endif
+}
+
+#endif // GMM_BLAS_INTERFACE_H
+
+#endif // GMM_USES_BLAS
diff --git a/gmm/gmm_condition_number.h b/gmm/gmm_condition_number.h
new file mode 100644
index 000000000..0dac20e6b
--- /dev/null
+++ b/gmm/gmm_condition_number.h
@@ -0,0 +1,147 @@
+/* -*- c++ -*- (enables emacs c++ mode) */
+/*===========================================================================
+
+ Copyright (C) 2003-2017 Yves Renard, Julien Pommier
+
+ This file is a part of GetFEM++
+
+ GetFEM++  is  free software;  you  can  redistribute  it  and/or modify it
+ under  the  terms  of the  GNU  Lesser General Public License as published
+ by  the  Free Software Foundation;  either version 3 of the License,  or
+ (at your option) any later version along with the GCC Runtime Library
+ Exception either version 3.1 or (at your option) any later version.
+ This program  is  distributed  in  the  hope  that it will be useful,  but
+ WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ or  FITNESS  FOR  A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+ License and GCC Runtime Library Exception for more details.
+ You  should  have received a copy of the GNU Lesser General Public License
+ along  with  this program;  if not, write to the Free Software Foundation,
+ Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301, USA.
+
+ As a special exception, you  may use  this file  as it is a part of a free
+ software  library  without  restriction.  Specifically,  if   other  files
+ instantiate  templates  or  use macros or inline functions from this file,
+ or  you compile this  file  and  link  it  with other files  to produce an
+ executable, this file  does  not  by itself cause the resulting executable
+ to be covered  by the GNU Lesser General Public License.  This   exception
+ does not  however  invalidate  any  other  reasons why the executable file
+ might be covered by the GNU Lesser General Public License.
+
+===========================================================================*/
+
+/**@file gmm_condition_number.h
+   @author  Yves Renard <Yves.Renard@insa-lyon.fr>, Julien Pommier <Julien.Pommier@insa-toulouse.fr>
+   @date August 27, 2003.
+   @brief computation of the condition number of dense matrices.
+*/
+#ifndef GMM_CONDITION_NUMBER_H__
+#define GMM_CONDITION_NUMBER_H__
+
+#include "gmm_dense_qr.h"
+
+namespace gmm {
+
+  /** computation of the condition number of dense matrices using SVD.
+
+      Uses symmetric_qr_algorithm => dense matrices only.
+
+      @param M a matrix.
+      @param emin smallest (in magnitude) eigenvalue
+      @param emax largest eigenvalue.
+   */
+  template <typename MAT> 
+  typename number_traits<typename 
+  linalg_traits<MAT>::value_type>::magnitude_type
+  condition_number(const MAT& M, 
+	  typename number_traits<typename
+	  linalg_traits<MAT>::value_type>::magnitude_type& emin,
+	  typename number_traits<typename
+	  linalg_traits<MAT>::value_type>::magnitude_type& emax) {
+    typedef typename linalg_traits<MAT>::value_type T;
+    typedef typename number_traits<T>::magnitude_type R;
+
+    // Added because of errors in complex with zero det
+    if (sizeof(T) != sizeof(R) && gmm::abs(gmm::lu_det(M)) == R(0))
+      return  gmm::default_max(R());
+      
+    size_type m = mat_nrows(M), n = mat_ncols(M);
+    emax = emin = R(0);
+    std::vector<R> eig(m+n);
+
+    if (m+n == 0) return R(0);
+    if (is_hermitian(M)) {
+      eig.resize(m);
+      gmm::symmetric_qr_algorithm(M, eig);
+    }
+    else {
+      dense_matrix<T> B(m+n, m+n); // not very efficient ??
+      gmm::copy(conjugated(M), sub_matrix(B, sub_interval(m, n), sub_interval(0, m)));
+      gmm::copy(M, sub_matrix(B, sub_interval(0, m),
+					  sub_interval(m, n)));
+      gmm::symmetric_qr_algorithm(B, eig);
+    }
+    emin = emax = gmm::abs(eig[0]);
+    for (size_type i = 1; i < eig.size(); ++i) {
+      R e = gmm::abs(eig[i]); 
+      emin = std::min(emin, e);
+      emax = std::max(emax, e);
+    }
+    // cout << "emin = " << emin << " emax = " << emax << endl;
+    if (emin == R(0)) return gmm::default_max(R());
+    return emax / emin;
+  }
+
+  template <typename MAT> 
+  typename number_traits<typename 
+  linalg_traits<MAT>::value_type>::magnitude_type
+  condition_number(const MAT& M) { 
+    typename number_traits<typename
+      linalg_traits<MAT>::value_type>::magnitude_type emax, emin;
+    return condition_number(M, emin, emax);
+  }
+
+  template <typename MAT> 
+  typename number_traits<typename 
+  linalg_traits<MAT>::value_type>::magnitude_type
+  Frobenius_condition_number_sqr(const MAT& M) { 
+    typedef typename linalg_traits<MAT>::value_type T;
+    typedef typename number_traits<T>::magnitude_type R;
+    size_type m = mat_nrows(M), n = mat_ncols(M);
+    dense_matrix<T> B(std::min(m,n), std::min(m,n));
+    if (m < n) mult(M,gmm::conjugated(M),B);
+    else       mult(gmm::conjugated(M),M,B);
+    R trB = abs(mat_trace(B));
+    lu_inverse(B);
+    return trB*abs(mat_trace(B));
+  }
+
+  template <typename MAT> 
+  typename number_traits<typename 
+  linalg_traits<MAT>::value_type>::magnitude_type
+  Frobenius_condition_number(const MAT& M)
+  { return sqrt(Frobenius_condition_number_sqr(M)); }
+
+  /** estimation of the condition number (TO BE DONE...)
+   */
+  template <typename MAT> 
+  typename number_traits<typename 
+  linalg_traits<MAT>::value_type>::magnitude_type
+  condest(const MAT& M, 
+	  typename number_traits<typename
+	  linalg_traits<MAT>::value_type>::magnitude_type& emin,
+	  typename number_traits<typename
+	  linalg_traits<MAT>::value_type>::magnitude_type& emax) {
+    return condition_number(M, emin, emax);
+  }
+  
+  template <typename MAT> 
+  typename number_traits<typename 
+  linalg_traits<MAT>::value_type>::magnitude_type
+  condest(const MAT& M) { 
+    typename number_traits<typename
+      linalg_traits<MAT>::value_type>::magnitude_type emax, emin;
+    return condest(M, emin, emax);
+  }
+}
+
+#endif
diff --git a/gmm/gmm_conjugated.h b/gmm/gmm_conjugated.h
new file mode 100644
index 000000000..1e3e7fc61
--- /dev/null
+++ b/gmm/gmm_conjugated.h
@@ -0,0 +1,398 @@
+/* -*- c++ -*- (enables emacs c++ mode) */
+/*===========================================================================
+
+ Copyright (C) 2003-2017 Yves Renard
+
+ This file is a part of GetFEM++
+
+ GetFEM++  is  free software;  you  can  redistribute  it  and/or modify it
+ under  the  terms  of the  GNU  Lesser General Public License as published
+ by  the  Free Software Foundation;  either version 3 of the License,  or
+ (at your option) any later version along with the GCC Runtime Library
+ Exception either version 3.1 or (at your option) any later version.
+ This program  is  distributed  in  the  hope  that it will be useful,  but
+ WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ or  FITNESS  FOR  A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+ License and GCC Runtime Library Exception for more details.
+ You  should  have received a copy of the GNU Lesser General Public License
+ along  with  this program;  if not, write to the Free Software Foundation,
+ Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301, USA.
+
+ As a special exception, you  may use  this file  as it is a part of a free
+ software  library  without  restriction.  Specifically,  if   other  files
+ instantiate  templates  or  use macros or inline functions from this file,
+ or  you compile this  file  and  link  it  with other files  to produce an
+ executable, this file  does  not  by itself cause the resulting executable
+ to be covered  by the GNU Lesser General Public License.  This   exception
+ does not  however  invalidate  any  other  reasons why the executable file
+ might be covered by the GNU Lesser General Public License.
+
+===========================================================================*/
+
+/**@file gmm_conjugated.h
+   @author  Yves Renard <Yves.Renard@insa-lyon.fr>
+   @date September 18, 2003.
+   @brief handle conjugation of complex matrices/vectors.
+*/
+#ifndef GMM_CONJUGATED_H__
+#define GMM_CONJUGATED_H__
+
+#include "gmm_def.h"
+
+namespace gmm {
+  ///@cond DOXY_SHOW_ALL_FUNCTIONS
+
+  /* ********************************************************************* */
+  /*		Conjugated references on vectors            		   */
+  /* ********************************************************************* */
+
+  template <typename IT> struct conjugated_const_iterator {
+    typedef typename std::iterator_traits<IT>::value_type      value_type;
+    typedef typename std::iterator_traits<IT>::pointer         pointer;
+    typedef typename std::iterator_traits<IT>::reference       reference;
+    typedef typename std::iterator_traits<IT>::difference_type difference_type;
+    typedef typename std::iterator_traits<IT>::iterator_category
+    iterator_category;
+
+    IT it;
+    
+    conjugated_const_iterator(void) {}
+    conjugated_const_iterator(const IT &i) : it(i) {}
+    
+    inline size_type index(void) const { return it.index(); }
+    conjugated_const_iterator operator ++(int)
+    { conjugated_const_iterator tmp = *this; ++it; return tmp; }
+    conjugated_const_iterator operator --(int) 
+    { conjugated_const_iterator tmp = *this; --it; return tmp; }
+    conjugated_const_iterator &operator ++() { ++it; return *this; }
+    conjugated_const_iterator &operator --() { --it; return *this; }
+    conjugated_const_iterator &operator +=(difference_type i)
+      { it += i; return *this; }
+    conjugated_const_iterator &operator -=(difference_type i)
+      { it -= i; return *this; }
+    conjugated_const_iterator operator +(difference_type i) const
+      { conjugated_const_iterator itb = *this; return (itb += i); }
+    conjugated_const_iterator operator -(difference_type i) const
+      { conjugated_const_iterator itb = *this; return (itb -= i); }
+    difference_type operator -(const conjugated_const_iterator &i) const
+      { return difference_type(it - i.it); }
+    
+    value_type operator  *() const { return gmm::conj(*it); }
+    value_type operator [](size_type ii) const { return gmm::conj(it[ii]); }
+    
+    bool operator ==(const conjugated_const_iterator &i) const
+      { return (i.it == it); }
+    bool operator !=(const conjugated_const_iterator &i) const
+      { return (i.it != it); }
+    bool operator < (const conjugated_const_iterator &i) const
+      { return (it < i.it); }
+  };
+
+  template <typename V> struct conjugated_vector_const_ref {
+    typedef conjugated_vector_const_ref<V> this_type;
+    typedef typename linalg_traits<V>::value_type value_type;
+    typedef typename linalg_traits<V>::const_iterator iterator;
+    typedef typename linalg_traits<this_type>::reference reference;
+    typedef typename linalg_traits<this_type>::origin_type origin_type;
+
+    iterator begin_, end_;
+    const origin_type *origin;
+    size_type size_;
+
+    conjugated_vector_const_ref(const V &v)
+      : begin_(vect_const_begin(v)), end_(vect_const_end(v)),
+	origin(linalg_origin(v)),
+	size_(vect_size(v)) {}
+
+    reference operator[](size_type i) const
+    { return gmm::conj(linalg_traits<V>::access(origin, begin_, end_, i)); }
+  };
+
+  template <typename V> struct linalg_traits<conjugated_vector_const_ref<V> > {
+    typedef conjugated_vector_const_ref<V> this_type;
+    typedef typename linalg_traits<V>::origin_type origin_type;
+    typedef linalg_const is_reference;
+    typedef abstract_vector linalg_type;
+    typedef typename linalg_traits<V>::value_type value_type;
+    typedef value_type reference;
+    typedef abstract_null_type iterator;
+    typedef conjugated_const_iterator<typename
+                   linalg_traits<V>::const_iterator> const_iterator;
+    typedef typename linalg_traits<V>::storage_type storage_type;
+    typedef typename linalg_traits<V>::index_sorted index_sorted;
+    static size_type size(const this_type &v) { return v.size_; }
+    static iterator begin(this_type &v) { return iterator(v.begin_); }
+    static const_iterator begin(const this_type &v)
+    { return const_iterator(v.begin_); }
+    static iterator end(this_type &v)
+    { return iterator(v.end_); }
+    static const_iterator end(const this_type &v)
+    { return const_iterator(v.end_); }
+    static value_type access(const origin_type *o, const const_iterator &it,
+			     const const_iterator &ite, size_type i)
+    { return gmm::conj(linalg_traits<V>::access(o, it.it, ite.it, i)); }
+    static const origin_type* origin(const this_type &v) { return v.origin; }
+  };
+
+  template<typename V> std::ostream &operator <<
+    (std::ostream &o, const conjugated_vector_const_ref<V>& m)
+  { gmm::write(o,m); return o; }
+
+  /* ********************************************************************* */
+  /*		Conjugated references on matrices            		   */
+  /* ********************************************************************* */
+
+  template <typename M> struct conjugated_row_const_iterator {
+    typedef conjugated_row_const_iterator<M> iterator;
+    typedef typename linalg_traits<M>::const_row_iterator ITER;
+    typedef typename linalg_traits<M>::value_type value_type;
+    typedef ptrdiff_t difference_type;
+    typedef size_t size_type;
+
+    ITER it;
+
+    iterator operator ++(int) { iterator tmp = *this; it++; return tmp; }
+    iterator operator --(int) { iterator tmp = *this; it--; return tmp; }
+    iterator &operator ++()   { it++; return *this; }
+    iterator &operator --()   { it--; return *this; }
+    iterator &operator +=(difference_type i) { it += i; return *this; }
+    iterator &operator -=(difference_type i) { it -= i; return *this; }
+    iterator operator +(difference_type i) const 
+    { iterator itt = *this; return (itt += i); }
+    iterator operator -(difference_type i) const
+    { iterator itt = *this; return (itt -= i); }
+    difference_type operator -(const iterator &i) const
+    { return it - i.it; }
+
+    ITER operator *() const { return it; }
+    ITER operator [](int i) { return it + i; }
+
+    bool operator ==(const iterator &i) const { return (it == i.it); }
+    bool operator !=(const iterator &i) const { return !(i == *this); }
+    bool operator < (const iterator &i) const { return (it < i.it); }
+
+    conjugated_row_const_iterator(void) {}
+    conjugated_row_const_iterator(const ITER &i) : it(i) { }
+
+  };
+
+  template <typename M> struct  conjugated_row_matrix_const_ref {
+    
+    typedef conjugated_row_matrix_const_ref<M> this_type;
+    typedef typename linalg_traits<M>::const_row_iterator iterator;
+    typedef typename linalg_traits<M>::value_type value_type;
+    typedef typename linalg_traits<this_type>::origin_type origin_type;
+
+    iterator begin_, end_;
+    const origin_type *origin;
+    size_type nr, nc;
+
+    conjugated_row_matrix_const_ref(const M &m)
+      : begin_(mat_row_begin(m)), end_(mat_row_end(m)),
+	origin(linalg_origin(m)), nr(mat_ncols(m)), nc(mat_nrows(m)) {}
+
+    value_type operator()(size_type i, size_type j) const
+    { return gmm::conj(linalg_traits<M>::access(begin_+j, i)); }
+  };
+
+  template<typename M> std::ostream &operator <<
+  (std::ostream &o, const conjugated_row_matrix_const_ref<M>& m)
+  { gmm::write(o,m); return o; }
+
+
+  template <typename M> struct conjugated_col_const_iterator {
+    typedef conjugated_col_const_iterator<M> iterator;
+    typedef typename linalg_traits<M>::const_col_iterator ITER;
+    typedef typename linalg_traits<M>::value_type value_type;
+    typedef ptrdiff_t difference_type;
+    typedef size_t size_type;
+
+    ITER it;
+
+    iterator operator ++(int) { iterator tmp = *this; it++; return tmp; }
+    iterator operator --(int) { iterator tmp = *this; it--; return tmp; }
+    iterator &operator ++()   { it++; return *this; }
+    iterator &operator --()   { it--; return *this; }
+    iterator &operator +=(difference_type i) { it += i; return *this; }
+    iterator &operator -=(difference_type i) { it -= i; return *this; }
+    iterator operator +(difference_type i) const 
+    { iterator itt = *this; return (itt += i); }
+    iterator operator -(difference_type i) const
+    { iterator itt = *this; return (itt -= i); }
+    difference_type operator -(const iterator &i) const
+    { return it - i.it; }
+
+    ITER operator *() const { return it; }
+    ITER operator [](int i) { return it + i; }
+
+    bool operator ==(const iterator &i) const { return (it == i.it); }
+    bool operator !=(const iterator &i) const { return !(i == *this); }
+    bool operator < (const iterator &i) const { return (it < i.it); }
+
+    conjugated_col_const_iterator(void) {}
+    conjugated_col_const_iterator(const ITER &i) : it(i) { }
+
+  };
+
+  template <typename M> struct  conjugated_col_matrix_const_ref {
+    
+    typedef conjugated_col_matrix_const_ref<M> this_type;
+    typedef typename linalg_traits<M>::const_col_iterator iterator;
+    typedef typename linalg_traits<M>::value_type value_type;
+    typedef typename linalg_traits<this_type>::origin_type origin_type;
+
+    iterator begin_, end_;
+    const origin_type *origin;
+    size_type nr, nc;
+
+    conjugated_col_matrix_const_ref(const M &m)
+      : begin_(mat_col_begin(m)), end_(mat_col_end(m)),
+	origin(linalg_origin(m)), nr(mat_ncols(m)), nc(mat_nrows(m)) {}
+
+    value_type operator()(size_type i, size_type j) const
+    { return gmm::conj(linalg_traits<M>::access(begin_+i, j)); }
+  };
+
+
+
+  template<typename M> std::ostream &operator <<
+  (std::ostream &o, const conjugated_col_matrix_const_ref<M>& m)
+  { gmm::write(o,m); return o; }
+
+
+  template <typename L, typename SO> struct conjugated_return__ {
+    typedef conjugated_row_matrix_const_ref<L> return_type;
+  };
+  template <typename L> struct conjugated_return__<L, col_major> {
+    typedef conjugated_col_matrix_const_ref<L> return_type;
+  };
+  template <typename L, typename T, typename LT> struct conjugated_return_ {
+    typedef const L & return_type;
+  };
+  template <typename L, typename T>
+  struct conjugated_return_<L, std::complex<T>, abstract_vector> {
+    typedef conjugated_vector_const_ref<L> return_type;
+  };
+  template <typename L, typename T>
+  struct conjugated_return_<L, T, abstract_matrix> {
+    typedef typename conjugated_return__<L,
+    typename principal_orientation_type<typename
+    linalg_traits<L>::sub_orientation>::potype
+    >::return_type return_type;
+  };
+  template <typename L> struct conjugated_return {
+    typedef typename
+    conjugated_return_<L, typename linalg_traits<L>::value_type,
+		       typename linalg_traits<L>::linalg_type		       
+		       >::return_type return_type;
+  };
+
+  ///@endcond
+  /** return a conjugated view of the input matrix or vector. */
+  template <typename L> inline
+  typename conjugated_return<L>::return_type
+  conjugated(const L &v) {
+    return conjugated(v, typename linalg_traits<L>::value_type(),
+		      typename linalg_traits<L>::linalg_type());
+  }
+  ///@cond DOXY_SHOW_ALL_FUNCTIONS
+
+  template <typename L, typename T, typename LT> inline
+  const L & conjugated(const L &v, T, LT) { return v; }
+
+  template <typename L, typename T> inline
+  conjugated_vector_const_ref<L> conjugated(const L &v, std::complex<T>,
+					    abstract_vector)
+  { return conjugated_vector_const_ref<L>(v); }
+
+  template <typename L, typename T> inline
+  typename conjugated_return__<L,
+    typename principal_orientation_type<typename
+    linalg_traits<L>::sub_orientation>::potype>::return_type
+  conjugated(const L &v, T, abstract_matrix) {
+    return conjugated(v, typename principal_orientation_type<typename
+		      linalg_traits<L>::sub_orientation>::potype());
+  }
+
+  template <typename L> inline
+  conjugated_row_matrix_const_ref<L> conjugated(const L &v, row_major)
+  { return conjugated_row_matrix_const_ref<L>(v); }
+
+  template <typename L> inline
+  conjugated_col_matrix_const_ref<L> conjugated(const L &v, col_major)
+  { return conjugated_col_matrix_const_ref<L>(v); }
+
+  template <typename M>
+  struct linalg_traits<conjugated_row_matrix_const_ref<M> > {
+    typedef conjugated_row_matrix_const_ref<M> this_type;
+    typedef typename linalg_traits<M>::origin_type origin_type;
+    typedef linalg_const is_reference;
+    typedef abstract_matrix linalg_type;
+    typedef typename linalg_traits<M>::value_type value_type;
+    typedef value_type reference;
+    typedef typename linalg_traits<M>::storage_type storage_type;
+    typedef typename org_type<typename linalg_traits<M>::const_sub_row_type>::t vector_type;
+    typedef conjugated_vector_const_ref<vector_type> sub_col_type;
+    typedef conjugated_vector_const_ref<vector_type> const_sub_col_type;
+    typedef conjugated_row_const_iterator<M> col_iterator;
+    typedef conjugated_row_const_iterator<M> const_col_iterator;
+    typedef abstract_null_type const_sub_row_type;
+    typedef abstract_null_type sub_row_type;
+    typedef abstract_null_type const_row_iterator;
+    typedef abstract_null_type row_iterator;
+    typedef col_major sub_orientation;
+    typedef typename linalg_traits<M>::index_sorted index_sorted;
+    static inline size_type ncols(const this_type &m) { return m.nc; }
+    static inline size_type nrows(const this_type &m) { return m.nr; }
+    static inline const_sub_col_type col(const const_col_iterator &it)
+    { return conjugated(linalg_traits<M>::row(it.it)); }
+    static inline const_col_iterator col_begin(const this_type &m)
+    { return const_col_iterator(m.begin_); }
+    static inline const_col_iterator col_end(const this_type &m)
+    { return const_col_iterator(m.end_); }
+    static inline const origin_type* origin(const this_type &m)
+    { return m.origin; }
+    static value_type access(const const_col_iterator &it, size_type i)
+    { return gmm::conj(linalg_traits<M>::access(it.it, i)); }
+  };
+  
+  template <typename M>
+  struct linalg_traits<conjugated_col_matrix_const_ref<M> > {
+    typedef conjugated_col_matrix_const_ref<M> this_type;
+    typedef typename linalg_traits<M>::origin_type origin_type;
+    typedef linalg_const is_reference;
+    typedef abstract_matrix linalg_type;
+    typedef typename linalg_traits<M>::value_type value_type;
+    typedef value_type reference;
+    typedef typename linalg_traits<M>::storage_type storage_type;
+    typedef typename org_type<typename linalg_traits<M>::const_sub_col_type>::t vector_type;
+    typedef conjugated_vector_const_ref<vector_type> sub_row_type;
+    typedef conjugated_vector_const_ref<vector_type> const_sub_row_type;
+    typedef conjugated_col_const_iterator<M> row_iterator;
+    typedef conjugated_col_const_iterator<M> const_row_iterator;
+    typedef abstract_null_type const_sub_col_type;
+    typedef abstract_null_type sub_col_type;
+    typedef abstract_null_type const_col_iterator;
+    typedef abstract_null_type col_iterator;
+    typedef row_major sub_orientation;
+    typedef typename linalg_traits<M>::index_sorted index_sorted;
+    static inline size_type nrows(const this_type &m) { return m.nr; }
+    static inline size_type ncols(const this_type &m) { return m.nc; }
+    static inline const_sub_row_type row(const const_row_iterator &it)
+    { return conjugated(linalg_traits<M>::col(it.it)); }
+    static inline const_row_iterator row_begin(const this_type &m)
+    { return const_row_iterator(m.begin_); }
+    static inline const_row_iterator row_end(const this_type &m)
+    { return const_row_iterator(m.end_); }
+    static inline const origin_type* origin(const this_type &m)
+    { return m.origin; }
+    static value_type access(const const_row_iterator &it, size_type i)
+    { return gmm::conj(linalg_traits<M>::access(it.it, i)); }
+  };
+  
+  ///@endcond
+  
+
+}
+
+#endif //  GMM_CONJUGATED_H__
diff --git a/gmm/gmm_def.h b/gmm/gmm_def.h
new file mode 100644
index 000000000..603c57b69
--- /dev/null
+++ b/gmm/gmm_def.h
@@ -0,0 +1,1123 @@
+/* -*- c++ -*- (enables emacs c++ mode) */
+/*===========================================================================
+
+ Copyright (C) 2002-2017 Yves Renard
+
+ This file is a part of GetFEM++
+
+ GetFEM++  is  free software;  you  can  redistribute  it  and/or modify it
+ under  the  terms  of the  GNU  Lesser General Public License as published
+ by  the  Free Software Foundation;  either version 3 of the License,  or
+ (at your option) any later version along with the GCC Runtime Library
+ Exception either version 3.1 or (at your option) any later version.
+ This program  is  distributed  in  the  hope  that it will be useful,  but
+ WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ or  FITNESS  FOR  A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+ License and GCC Runtime Library Exception for more details.
+ You  should  have received a copy of the GNU Lesser General Public License
+ along  with  this program;  if not, write to the Free Software Foundation,
+ Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301, USA.
+
+ As a special exception, you  may use  this file  as it is a part of a free
+ software  library  without  restriction.  Specifically,  if   other  files
+ instantiate  templates  or  use macros or inline functions from this file,
+ or  you compile this  file  and  link  it  with other files  to produce an
+ executable, this file  does  not  by itself cause the resulting executable
+ to be covered  by the GNU Lesser General Public License.  This   exception
+ does not  however  invalidate  any  other  reasons why the executable file
+ might be covered by the GNU Lesser General Public License.
+
+===========================================================================*/
+
+/**@file gmm_def.h
+   @author  Yves Renard <Yves.Renard@insa-lyon.fr>
+   @date October 13, 2002.
+   @brief Basic definitions and tools of GMM.
+*/
+#ifndef GMM_DEF_H__
+#define GMM_DEF_H__
+
+#include "gmm_ref.h"
+#include <complex>
+
+#ifndef M_PI
+# define	M_E		2.7182818284590452354       /* e          */
+# define	M_LOG2E		1.4426950408889634074       /* 1/ln(2)    */
+# define	M_LOG10E	0.43429448190325182765      /* 1/ln(10)   */
+# define	M_LN2		0.69314718055994530942      /* ln(2)      */
+# define	M_LN10		2.30258509299404568402      /* ln(10)     */
+# define	M_PI		3.14159265358979323846      /* pi         */
+# define	M_PI_2		1.57079632679489661923      /* pi/2       */
+# define	M_PI_4		0.78539816339744830962      /* pi/4       */
+# define	M_1_PI		0.31830988618379067154      /* 1/pi       */
+# define	M_2_PI		0.63661977236758134308      /* 2/pi       */
+# define	M_2_SQRTPI	1.12837916709551257390      /* 2/sqrt(pi) */
+# define	M_SQRT2		1.41421356237309504880      /* sqrt(2)    */
+# define	M_SQRT1_2	0.70710678118654752440      /* sqrt(2)/2  */
+#endif 
+
+#ifndef M_PIl
+# define M_PIl       3.1415926535897932384626433832795029L  /* pi         */
+# define M_PI_2l     1.5707963267948966192313216916397514L  /* pi/2       */
+# define M_PI_4l     0.7853981633974483096156608458198757L  /* pi/4       */
+# define M_1_PIl     0.3183098861837906715377675267450287L  /* 1/pi       */
+# define M_2_PIl     0.6366197723675813430755350534900574L  /* 2/pi       */
+# define M_2_SQRTPIl 1.1283791670955125738961589031215452L  /* 2/sqrt(pi) */
+#endif
+
+namespace gmm {
+
+  typedef size_t size_type;
+
+  /* ******************************************************************** */
+  /*		Specifier types                             		  */
+  /* ******************************************************************** */
+  /* not perfectly null, required by aCC 3.33                             */
+  struct abstract_null_type { 
+    abstract_null_type(int=0) {}
+    template <typename A,typename B,typename C> void operator()(A,B,C) {}
+  }; // specify an information lake.
+
+  struct linalg_true {};
+  struct linalg_false {};
+
+  template <typename V, typename W> struct linalg_and
+  { typedef linalg_false bool_type; };
+  template <> struct linalg_and<linalg_true, linalg_true>
+  { typedef linalg_true bool_type; };
+  template <typename V, typename W> struct linalg_or
+  { typedef linalg_true bool_type; };
+  template <> struct linalg_and<linalg_false, linalg_false>
+  { typedef linalg_false bool_type; };
+
+  struct linalg_const {};       // A reference is either linalg_const,
+  struct linalg_modifiable {};  //  linalg_modifiable or linalg_false.
+
+  struct abstract_vector {};    // The object is a vector
+  struct abstract_matrix {};    // The object is a matrix
+  
+  struct abstract_sparse {};    // sparse matrix or vector
+  struct abstract_skyline {};   // 'sky-line' matrix or vector
+  struct abstract_dense {};     // dense matrix or vector
+  struct abstract_indirect {};  // matrix given by the product with a vector
+
+  struct row_major {};          // matrix with a row access.
+  struct col_major {};          // matrix with a column access
+  struct row_and_col {};        // both accesses but row preference
+  struct col_and_row {};        // both accesses but column preference
+
+  template <typename T> struct transposed_type;
+  template<> struct transposed_type<row_major>   {typedef col_major   t_type;};
+  template<> struct transposed_type<col_major>   {typedef row_major   t_type;};
+  template<> struct transposed_type<row_and_col> {typedef col_and_row t_type;};
+  template<> struct transposed_type<col_and_row> {typedef row_and_col t_type;};
+
+  template <typename T> struct principal_orientation_type
+  { typedef abstract_null_type potype; };
+  template<> struct principal_orientation_type<row_major>
+  { typedef row_major potype; };
+  template<> struct principal_orientation_type<col_major>
+  { typedef col_major potype; };
+  template<> struct principal_orientation_type<row_and_col>
+  { typedef row_major potype; };
+  template<> struct principal_orientation_type<col_and_row>
+  { typedef col_major potype; };
+
+  //  template <typename V> struct linalg_traits;
+  template <typename V> struct linalg_traits {    
+    typedef abstract_null_type this_type;
+    typedef abstract_null_type linalg_type;
+    typedef abstract_null_type value_type;
+    typedef abstract_null_type is_reference;
+    typedef abstract_null_type& reference;
+    typedef abstract_null_type* iterator;
+    typedef const abstract_null_type* const_iterator;
+    typedef abstract_null_type index_sorted;
+    typedef abstract_null_type storage_type;
+    typedef abstract_null_type origin_type;
+    typedef abstract_null_type const_sub_row_type;
+    typedef abstract_null_type sub_row_type;
+    typedef abstract_null_type const_row_iterator;
+    typedef abstract_null_type row_iterator;
+    typedef abstract_null_type const_sub_col_type;
+    typedef abstract_null_type sub_col_type;
+    typedef abstract_null_type const_col_iterator;
+    typedef abstract_null_type col_iterator;
+    typedef abstract_null_type sub_orientation;    
+  };
+
+  template <typename PT, typename V> struct vect_ref_type;
+  template <typename P, typename V> struct vect_ref_type<P *, V> {
+    typedef typename linalg_traits<V>::reference access_type;
+    typedef typename linalg_traits<V>::iterator iterator;
+  };
+  template <typename P, typename V> struct vect_ref_type<const P *, V> {
+    typedef typename linalg_traits<V>::value_type access_type;
+    typedef typename linalg_traits<V>::const_iterator iterator;
+  };
+  
+  template <typename PT> struct const_pointer;
+  template <typename P> struct const_pointer<P *>
+  { typedef const P* pointer; };
+  template <typename P> struct const_pointer<const P *>
+  { typedef const P* pointer; };
+
+  template <typename PT> struct modifiable_pointer;
+  template <typename P> struct modifiable_pointer<P *>
+  { typedef P* pointer; };
+  template <typename P> struct modifiable_pointer<const P *>
+  { typedef P* pointer; };
+
+  template <typename R> struct const_reference;
+  template <typename R> struct const_reference<R &>
+  { typedef const R &reference; };
+  template <typename R> struct const_reference<const R &>
+  { typedef const R  &reference; };
+
+
+  inline bool is_sparse(abstract_sparse)   { return true;  }
+  inline bool is_sparse(abstract_dense)    { return false; }
+  inline bool is_sparse(abstract_skyline)  { return true;  }
+  inline bool is_sparse(abstract_indirect) { return false; }
+
+  template <typename L> inline bool is_sparse(const L &) 
+  { return is_sparse(typename linalg_traits<L>::storage_type()); }
+
+  inline bool is_row_matrix_(row_major)     { return true;  }
+  inline bool is_row_matrix_(col_major)     { return false; }
+  inline bool is_row_matrix_(row_and_col)   { return true;  }
+  inline bool is_row_matrix_(col_and_row)   { return true;  }
+
+  template <typename L> inline bool is_row_matrix(const L &) 
+  { return is_row_matrix_(typename linalg_traits<L>::sub_orientation()); }
+
+  inline bool is_col_matrix_(row_major)     { return false; }
+  inline bool is_col_matrix_(col_major)     { return true;  }
+  inline bool is_col_matrix_(row_and_col)   { return true;  }
+  inline bool is_col_matrix_(col_and_row)   { return true;  }
+
+  template <typename L> inline bool is_col_matrix(const L &) 
+  { return is_col_matrix_(typename linalg_traits<L>::sub_orientation()); }
+
+  inline bool is_col_matrix(row_major) { return false; }
+  inline bool is_col_matrix(col_major) { return true; }
+  inline bool is_row_matrix(row_major) { return true; }
+  inline bool is_row_matrix(col_major) { return false; }
+
+  template <typename L> inline bool is_const_reference(L) { return false; }
+  inline bool is_const_reference(linalg_const) { return true; }  
+
+
+  template <typename T> struct is_gmm_interfaced_ {
+    typedef linalg_true result;
+  };
+  
+  template<> struct is_gmm_interfaced_<abstract_null_type> {
+    typedef linalg_false result;
+  };
+  
+  template <typename T> struct is_gmm_interfaced {
+    typedef typename is_gmm_interfaced_<typename gmm::linalg_traits<T>::this_type >::result result;
+  };
+
+  /* ******************************************************************** */
+  /* Original type from a pointer or a reference.                         */
+  /* ******************************************************************** */
+
+  template <typename V> struct org_type            { typedef V t; };
+  template <typename V> struct org_type<V *>       { typedef V t; };
+  template <typename V> struct org_type<const V *> { typedef V t; };
+  template <typename V> struct org_type<V &>       { typedef V t; };
+  template <typename V> struct org_type<const V &> { typedef V t; };
+
+  /* ******************************************************************** */
+  /*  Types to deal with const object representing a modifiable reference */
+  /* ******************************************************************** */
+  
+  template <typename PT, typename R> struct mref_type_ 
+  { typedef abstract_null_type return_type; };
+  template <typename L, typename R> struct mref_type_<L *, R>
+  { typedef typename org_type<L>::t & return_type; };
+  template <typename L, typename R> struct mref_type_<const L *, R>
+  { typedef const typename org_type<L>::t & return_type; };
+  template <typename L> struct mref_type_<L *, linalg_const>
+  { typedef const typename org_type<L>::t & return_type; };
+  template <typename L> struct mref_type_<const L *, linalg_const>
+  { typedef const typename org_type<L>::t & return_type; };
+  template <typename L> struct mref_type_<const L *, linalg_modifiable>
+  { typedef typename org_type<L>::t & return_type; };
+  template <typename L> struct mref_type_<L *, linalg_modifiable>
+  { typedef typename org_type<L>::t & return_type; };
+
+  template <typename PT> struct mref_type {
+    typedef typename std::iterator_traits<PT>::value_type L;
+    typedef typename mref_type_<PT, 
+      typename linalg_traits<L>::is_reference>::return_type return_type;
+  };
+
+  template <typename L> typename mref_type<const L *>::return_type 
+  linalg_cast(const L &l)
+  { return const_cast<typename mref_type<const L *>::return_type>(l); }
+
+  template <typename L> typename mref_type<L *>::return_type linalg_cast(L &l)
+  { return const_cast<typename mref_type<L *>::return_type>(l); }
+
+  template <typename L, typename R> struct cref_type_
+  { typedef abstract_null_type return_type; };
+  template <typename L> struct cref_type_<L, linalg_modifiable>
+  { typedef typename org_type<L>::t & return_type; };
+  template <typename L> struct cref_type {
+    typedef typename cref_type_<L, 
+      typename linalg_traits<L>::is_reference>::return_type return_type;
+  };
+
+  template <typename L> typename cref_type<L>::return_type 
+  linalg_const_cast(const L &l)
+  { return const_cast<typename cref_type<L>::return_type>(l); }
+
+
+  // To be used to select between a reference or a const refercence for
+  // the return type of a function
+  // select_return<C1, C2, L *> return C1 if L is a const reference,
+  //                                   C2 otherwise.
+  // select_return<C1, C2, const L *> return C2 if L is a modifiable reference
+  //                                         C1 otherwise. 
+  template <typename C1, typename C2, typename REF> struct select_return_ {
+    typedef abstract_null_type return_type;
+  };
+  template <typename C1, typename C2, typename L>
+  struct select_return_<C1, C2, const L &> { typedef C1 return_type; };
+  template <typename C1, typename C2, typename L>
+  struct select_return_<C1, C2, L &> { typedef C2 return_type; };
+  template <typename C1, typename C2, typename PT> struct select_return {
+    typedef typename std::iterator_traits<PT>::value_type L;
+    typedef typename select_return_<C1, C2, 
+      typename mref_type<PT>::return_type>::return_type return_type;
+  };
+
+  
+  // To be used to select between a reference or a const refercence inside
+  // a structure or a linagl_traits
+  // select_ref<C1, C2, L *> return C1 if L is a const reference,
+  //                                C2 otherwise.
+  // select_ref<C1, C2, const L *> return C2 in any case. 
+  template <typename C1, typename C2, typename REF> struct select_ref_
+  { typedef abstract_null_type ref_type; };
+  template <typename C1, typename C2, typename L>
+  struct select_ref_<C1, C2, const L &> { typedef C1 ref_type; };
+  template <typename C1, typename C2, typename L>
+  struct select_ref_<C1, C2, L &> { typedef C2 ref_type; };
+  template <typename C1, typename C2, typename PT> struct select_ref {
+    typedef typename std::iterator_traits<PT>::value_type L;
+    typedef typename select_ref_<C1, C2, 
+      typename mref_type<PT>::return_type>::ref_type ref_type;
+  };
+  template <typename C1, typename C2, typename L>
+  struct select_ref<C1, C2, const L *>
+  { typedef C1 ref_type; };
+
+
+  template<typename R> struct is_a_reference_
+  { typedef linalg_true reference; };
+  template<> struct is_a_reference_<linalg_false>
+  { typedef linalg_false reference; };
+
+  template<typename L> struct is_a_reference {
+    typedef typename is_a_reference_<typename linalg_traits<L>::is_reference>
+      ::reference reference;
+  };
+
+
+  template <typename L> inline bool is_original_linalg(const L &) 
+  { return is_original_linalg(typename is_a_reference<L>::reference()); }
+  inline bool is_original_linalg(linalg_false) { return true; }
+  inline bool is_original_linalg(linalg_true) { return false; }
+
+
+  template <typename PT> struct which_reference 
+  { typedef abstract_null_type is_reference; };
+  template <typename PT> struct which_reference<PT *>
+  { typedef linalg_modifiable is_reference; };
+  template <typename PT> struct which_reference<const PT *>
+  { typedef linalg_const is_reference; };
+
+
+  template <typename C1, typename C2, typename R> struct select_orientation_
+  { typedef abstract_null_type return_type; };
+  template <typename C1, typename C2>
+  struct select_orientation_<C1, C2, row_major>
+  { typedef C1 return_type; };
+  template <typename C1, typename C2>
+  struct select_orientation_<C1, C2, col_major>
+  { typedef C2 return_type; };
+  template <typename C1, typename C2, typename L> struct select_orientation {
+    typedef typename select_orientation_<C1, C2,
+      typename principal_orientation_type<typename
+      linalg_traits<L>::sub_orientation>::potype>::return_type return_type;
+  };
+  
+  /* ******************************************************************** */
+  /*		Operations on scalars                         		  */
+  /* ******************************************************************** */
+
+  template <typename T> inline T sqr(T a) { return T(a * a); }
+  template <typename T> inline T abs(T a) { return (a < T(0)) ? T(-a) : a; }
+  template <typename T> inline T abs(std::complex<T> a)
+  { T x = a.real(), y = a.imag(); return T(::sqrt(x*x+y*y)); }
+  template <typename T> inline T abs_sqr(T a) { return T(a*a); }
+  template <typename T> inline T abs_sqr(std::complex<T> a)
+  { return gmm::sqr(a.real()) + gmm::sqr(a.imag()); }
+  template <typename T> inline T pos(T a) { return (a < T(0)) ? T(0) : a; }
+  template <typename T> inline T neg(T a) { return (a < T(0)) ? T(-a) : T(0); }
+  template <typename T> inline T sgn(T a) { return (a < T(0)) ? T(-1) : T(1); }
+  template <typename T> inline T Heaviside(T a) { return (a < T(0)) ? T(0) : T(1); }
+  inline double random() { return double(rand())/(RAND_MAX+0.5); }
+  template <typename T> inline T random(T)
+  { return T(rand()*2.0)/(T(RAND_MAX)+T(1)/T(2)) - T(1); }
+  template <typename T> inline std::complex<T> random(std::complex<T>)
+  { return std::complex<T>(gmm::random(T()), gmm::random(T())); }
+  template <typename T> inline T irandom(T max)
+  { return T(gmm::random() * double(max)); }
+  template <typename T> inline T conj(T a) { return a; }
+  template <typename T> inline std::complex<T> conj(std::complex<T> a)
+  { return std::conj(a); }
+  template <typename T> inline T real(T a) { return a; }
+  template <typename T> inline T real(std::complex<T> a) { return a.real(); }
+  template <typename T> inline T imag(T ) { return T(0); }
+  template <typename T> inline T imag(std::complex<T> a) { return a.imag(); }  
+  template <typename T> inline T sqrt(T a) { return T(::sqrt(a)); }
+  template <typename T> inline std::complex<T> sqrt(std::complex<T> a) {
+    T x = a.real(), y = a.imag();
+    if (x == T(0)) {
+      T t = T(::sqrt(gmm::abs(y) / T(2)));
+      return std::complex<T>(t, y < T(0) ? -t : t);
+    }
+    T t = T(::sqrt(T(2) * (gmm::abs(a) + gmm::abs(x)))), u = t / T(2);
+    return x > T(0) ? std::complex<T>(u, y / t)
+      : std::complex<T>(gmm::abs(y) / t, y < T(0) ? -u : u);
+  }
+  using std::swap;
+
+
+  template <typename T> struct number_traits {
+    typedef T magnitude_type;
+  };
+ 
+  template <typename T> struct number_traits<std::complex<T> > {
+    typedef T magnitude_type;
+  };
+
+  template <typename T> inline T conj_product(T a, T b) { return a * b; }
+  template <typename T> inline
+  std::complex<T> conj_product(std::complex<T> a, std::complex<T> b)
+  { return std::conj(a) * b; } // to be optimized ?
+
+  template <typename T> inline bool is_complex(T) { return false; }
+  template <typename T> inline bool is_complex(std::complex<T> )
+  { return true; }
+
+# define magnitude_of_linalg(M) typename number_traits<typename \
+                    linalg_traits<M>::value_type>::magnitude_type
+  
+  /* ******************************************************************** */
+  /*  types promotion                                                     */
+  /* ******************************************************************** */
+
+  /* should be completed for more specific cases <unsigned int, float> etc */
+  template <typename T1, typename T2, bool c>
+  struct strongest_numeric_type_aux {
+    typedef T1 T;
+  };
+  template <typename T1, typename T2>
+  struct strongest_numeric_type_aux<T1,T2,false> {
+    typedef T2 T;
+  };
+
+  template <typename T1, typename T2>
+  struct strongest_numeric_type {
+    typedef typename
+    strongest_numeric_type_aux<T1,T2,(sizeof(T1)>sizeof(T2))>::T T;
+  };
+  template <typename T1, typename T2>
+  struct strongest_numeric_type<T1,std::complex<T2> > {
+    typedef typename number_traits<T1>::magnitude_type R1;
+    typedef std::complex<typename strongest_numeric_type<R1,T2>::T > T;
+  };
+  template <typename T1, typename T2>
+  struct strongest_numeric_type<std::complex<T1>,T2 > {
+    typedef typename number_traits<T2>::magnitude_type R2;
+    typedef std::complex<typename strongest_numeric_type<T1,R2>::T > T;
+  };
+  template <typename T1, typename T2> 
+  struct strongest_numeric_type<std::complex<T1>,std::complex<T2> > {
+    typedef std::complex<typename strongest_numeric_type<T1,T2>::T > T;
+  };
+
+  template<> struct strongest_numeric_type<int,float>   { typedef float T;  };
+  template<> struct strongest_numeric_type<float,int>   { typedef float T;  };
+  template<> struct strongest_numeric_type<long,float>  { typedef float T;  };
+  template<> struct strongest_numeric_type<float,long>  { typedef float T;  };
+  template<> struct strongest_numeric_type<long,double> { typedef double T; };
+  template<> struct strongest_numeric_type<double,long> { typedef double T; };
+
+  template <typename V1, typename V2>
+  struct strongest_value_type {
+    typedef typename
+    strongest_numeric_type<typename linalg_traits<V1>::value_type,
+			   typename linalg_traits<V2>::value_type>::T
+    value_type;
+  };
+  template <typename V1, typename V2, typename V3>
+  struct strongest_value_type3 {
+    typedef typename
+    strongest_value_type<V1, typename
+			 strongest_value_type<V2,V3>::value_type>::value_type
+    value_type;
+  };
+
+  
+
+  /* ******************************************************************** */
+  /*		Basic vectors used                         		  */
+  /* ******************************************************************** */
+  
+  template<typename T> struct dense_vector_type 
+  { typedef std::vector<T> vector_type; };
+
+  template <typename T> class wsvector;
+  template <typename T> class rsvector;
+  template <typename T> class dsvector;
+  template<typename T> struct sparse_vector_type 
+  { typedef wsvector<T> vector_type; };
+
+  template <typename T> class slvector;
+  template <typename T> class dense_matrix;
+  template <typename VECT> class row_matrix;
+  template <typename VECT> class col_matrix;
+  
+
+  /* ******************************************************************** */
+  /*   Selects a temporary vector type                                    */
+  /*   V if V is a valid vector type,                                     */
+  /*   wsvector if V is a reference on a sparse vector,                   */
+  /*   std::vector if V is a reference on a dense vector.                 */
+  /* ******************************************************************** */
+
+  
+  template <typename R, typename S, typename L, typename V>
+  struct temporary_vector_ {
+    typedef abstract_null_type vector_type;
+  };
+  template <typename V, typename L>
+  struct temporary_vector_<linalg_true, abstract_sparse, L, V>
+  { typedef wsvector<typename linalg_traits<V>::value_type> vector_type; };
+  template <typename V, typename L>
+  struct temporary_vector_<linalg_true, abstract_skyline, L, V>
+  { typedef slvector<typename linalg_traits<V>::value_type> vector_type; };
+  template <typename V, typename L>
+  struct temporary_vector_<linalg_true, abstract_dense, L, V>
+  { typedef std::vector<typename linalg_traits<V>::value_type> vector_type; };
+  template <typename S, typename V>
+  struct temporary_vector_<linalg_false, S, abstract_vector, V>
+  { typedef V vector_type; };
+  template <typename V>
+  struct temporary_vector_<linalg_false, abstract_dense, abstract_matrix, V>
+  { typedef std::vector<typename linalg_traits<V>::value_type> vector_type; };
+  template <typename V>
+  struct temporary_vector_<linalg_false, abstract_sparse, abstract_matrix, V>
+  { typedef wsvector<typename linalg_traits<V>::value_type> vector_type; };
+
+  template <typename V> struct temporary_vector {
+    typedef typename temporary_vector_<typename is_a_reference<V>::reference,
+				       typename linalg_traits<V>::storage_type,
+				       typename linalg_traits<V>::linalg_type,
+				       V>::vector_type vector_type;
+  };
+
+  /* ******************************************************************** */
+  /*   Selects a temporary matrix type                                    */
+  /*   M if M is a valid matrix type,                                     */
+  /*   row_matrix<wsvector> if M is a reference on a sparse matrix,       */
+  /*   dense_matrix if M is a reference on a dense matrix.                */
+  /* ******************************************************************** */
+
+  
+  template <typename R, typename S, typename L, typename V>
+  struct temporary_matrix_ { typedef abstract_null_type matrix_type; };
+  template <typename V, typename L>
+  struct temporary_matrix_<linalg_true, abstract_sparse, L, V> {
+    typedef typename linalg_traits<V>::value_type T;
+    typedef row_matrix<wsvector<T> > matrix_type;
+  };
+  template <typename V, typename L>
+  struct temporary_matrix_<linalg_true, abstract_skyline, L, V> {
+    typedef typename linalg_traits<V>::value_type T;
+    typedef row_matrix<slvector<T> > matrix_type;
+  };
+  template <typename V, typename L>
+  struct temporary_matrix_<linalg_true, abstract_dense, L, V>
+  { typedef dense_matrix<typename linalg_traits<V>::value_type> matrix_type; };
+  template <typename S, typename V>
+  struct temporary_matrix_<linalg_false, S, abstract_matrix, V>
+  { typedef V matrix_type; };
+
+  template <typename V> struct temporary_matrix {
+    typedef typename temporary_matrix_<typename is_a_reference<V>::reference,
+				       typename linalg_traits<V>::storage_type,
+				       typename linalg_traits<V>::linalg_type,
+				       V>::matrix_type matrix_type;
+  };
+
+  
+  template <typename S, typename L, typename V>
+  struct temporary_col_matrix_ { typedef abstract_null_type matrix_type; };
+  template <typename V, typename L>
+  struct temporary_col_matrix_<abstract_sparse, L, V> {
+    typedef typename linalg_traits<V>::value_type T;
+    typedef col_matrix<wsvector<T> > matrix_type;
+  };
+  template <typename V, typename L>
+  struct temporary_col_matrix_<abstract_skyline, L, V> {
+    typedef typename linalg_traits<V>::value_type T;
+    typedef col_matrix<slvector<T> > matrix_type;
+  };
+  template <typename V, typename L>
+  struct temporary_col_matrix_<abstract_dense, L, V>
+  { typedef dense_matrix<typename linalg_traits<V>::value_type> matrix_type; };
+
+  template <typename V> struct temporary_col_matrix {
+    typedef typename temporary_col_matrix_<
+      typename linalg_traits<V>::storage_type,
+      typename linalg_traits<V>::linalg_type,
+      V>::matrix_type matrix_type;
+  };
+
+
+
+
+  template <typename S, typename L, typename V>
+  struct temporary_row_matrix_ { typedef abstract_null_type matrix_type; };
+  template <typename V, typename L>
+  struct temporary_row_matrix_<abstract_sparse, L, V> {
+    typedef typename linalg_traits<V>::value_type T;
+    typedef row_matrix<wsvector<T> > matrix_type;
+  };
+  template <typename V, typename L>
+  struct temporary_row_matrix_<abstract_skyline, L, V> {
+    typedef typename linalg_traits<V>::value_type T;
+    typedef row_matrix<slvector<T> > matrix_type;
+  };
+  template <typename V, typename L>
+  struct temporary_row_matrix_<abstract_dense, L, V>
+  { typedef dense_matrix<typename linalg_traits<V>::value_type> matrix_type; };
+
+  template <typename V> struct temporary_row_matrix {
+    typedef typename temporary_row_matrix_<
+      typename linalg_traits<V>::storage_type,
+      typename linalg_traits<V>::linalg_type,
+      V>::matrix_type matrix_type;
+  };
+
+
+
+  /* ******************************************************************** */
+  /*   Selects a temporary dense vector type                              */
+  /*   V if V is a valid dense vector type,                               */
+  /*   std::vector if V is a reference or another type of vector          */
+  /* ******************************************************************** */
+
+  template <typename R, typename S, typename V>
+  struct temporary_dense_vector_ { typedef abstract_null_type vector_type; };
+  template <typename S, typename V>
+  struct temporary_dense_vector_<linalg_true, S, V>
+  { typedef std::vector<typename linalg_traits<V>::value_type> vector_type; };
+  template <typename V>
+  struct temporary_dense_vector_<linalg_false, abstract_sparse, V>
+  { typedef std::vector<typename linalg_traits<V>::value_type> vector_type; };
+  template <typename V>
+  struct temporary_dense_vector_<linalg_false, abstract_skyline, V>
+  { typedef std::vector<typename linalg_traits<V>::value_type> vector_type; };
+  template <typename V>
+  struct temporary_dense_vector_<linalg_false, abstract_dense, V>
+  { typedef V vector_type; };
+
+  template <typename V> struct temporary_dense_vector {
+    typedef typename temporary_dense_vector_<typename
+    is_a_reference<V>::reference,
+    typename linalg_traits<V>::storage_type, V>::vector_type vector_type;
+  };
+
+  /* ******************************************************************** */
+  /*   Selects a temporary sparse vector type                             */
+  /*   V if V is a valid sparse vector type,                              */
+  /*   wsvector if V is a reference or another type of vector             */
+  /* ******************************************************************** */
+
+  template <typename R, typename S, typename V>
+  struct temporary_sparse_vector_ { typedef abstract_null_type vector_type; };
+  template <typename S, typename V>
+  struct temporary_sparse_vector_<linalg_true, S, V>
+  { typedef wsvector<typename linalg_traits<V>::value_type> vector_type; };
+  template <typename V>
+  struct temporary_sparse_vector_<linalg_false, abstract_sparse, V>
+  { typedef V vector_type; };
+  template <typename V>
+  struct temporary_sparse_vector_<linalg_false, abstract_dense, V>
+  { typedef wsvector<typename linalg_traits<V>::value_type> vector_type; };
+  template <typename V>
+  struct temporary_sparse_vector_<linalg_false, abstract_skyline, V>
+  { typedef wsvector<typename linalg_traits<V>::value_type> vector_type; };
+
+  template <typename V> struct temporary_sparse_vector {
+    typedef typename temporary_sparse_vector_<typename
+    is_a_reference<V>::reference,
+    typename linalg_traits<V>::storage_type, V>::vector_type vector_type;
+  };
+
+  /* ******************************************************************** */
+  /*   Selects a temporary sky-line vector type                           */
+  /*   V if V is a valid sky-line vector type,                            */
+  /*   slvector if V is a reference or another type of vector             */
+  /* ******************************************************************** */
+
+  template <typename R, typename S, typename V>
+  struct temporary_skyline_vector_
+  { typedef abstract_null_type vector_type; };
+  template <typename S, typename V>
+  struct temporary_skyline_vector_<linalg_true, S, V>
+  { typedef slvector<typename linalg_traits<V>::value_type> vector_type; };
+  template <typename V>
+  struct temporary_skyline_vector_<linalg_false, abstract_skyline, V>
+  { typedef V vector_type; };
+  template <typename V>
+  struct temporary_skyline_vector_<linalg_false, abstract_dense, V>
+  { typedef slvector<typename linalg_traits<V>::value_type> vector_type; };
+  template <typename V>
+  struct temporary_skyline_vector_<linalg_false, abstract_sparse, V>
+  { typedef slvector<typename linalg_traits<V>::value_type> vector_type; };
+
+  template <typename V> struct temporary_skylines_vector {
+    typedef typename temporary_skyline_vector_<typename
+    is_a_reference<V>::reference,
+    typename linalg_traits<V>::storage_type, V>::vector_type vector_type;
+  };
+
+  /* ********************************************************************* */
+  /*  Definition & Comparison of origins.                                  */
+  /* ********************************************************************* */
+
+  template <typename L> 
+  typename select_return<const typename linalg_traits<L>::origin_type *,
+			 typename linalg_traits<L>::origin_type *,
+			 L *>::return_type
+  linalg_origin(L &l)
+  { return linalg_traits<L>::origin(linalg_cast(l)); }
+
+  template <typename L> 
+  typename select_return<const typename linalg_traits<L>::origin_type *,
+			 typename linalg_traits<L>::origin_type *,
+			 const L *>::return_type
+  linalg_origin(const L &l)
+  { return linalg_traits<L>::origin(linalg_cast(l)); }
+
+  template <typename PT1, typename PT2>
+  bool same_porigin(PT1, PT2) { return false; }
+
+  template <typename PT>
+  bool same_porigin(PT pt1, PT pt2) { return (pt1 == pt2); }
+
+  template <typename L1, typename L2>
+  bool same_origin(const L1 &l1, const L2 &l2)
+  { return same_porigin(linalg_origin(l1), linalg_origin(l2)); }
+
+
+  /* ******************************************************************** */
+  /*		Miscellaneous                           		  */
+  /* ******************************************************************** */
+
+  template <typename V> inline size_type vect_size(const V &v)
+  { return linalg_traits<V>::size(v); }
+
+  template <typename MAT> inline size_type mat_nrows(const MAT &m)
+  { return linalg_traits<MAT>::nrows(m); }
+
+  template <typename MAT> inline size_type mat_ncols(const MAT &m)
+  { return linalg_traits<MAT>::ncols(m); }
+
+
+  template <typename V> inline
+  typename select_return<typename linalg_traits<V>::const_iterator,
+           typename linalg_traits<V>::iterator, V *>::return_type
+  vect_begin(V &v)
+  { return linalg_traits<V>::begin(linalg_cast(v)); }
+
+  template <typename V> inline
+  typename select_return<typename linalg_traits<V>::const_iterator,
+	   typename linalg_traits<V>::iterator, const V *>::return_type
+  vect_begin(const V &v)
+  { return linalg_traits<V>::begin(linalg_cast(v)); }
+
+  template <typename V> inline
+  typename linalg_traits<V>::const_iterator
+  vect_const_begin(const V &v)
+  { return linalg_traits<V>::begin(v); }
+
+  template <typename V> inline
+  typename select_return<typename linalg_traits<V>::const_iterator,
+    typename linalg_traits<V>::iterator, V *>::return_type
+  vect_end(V &v)
+  { return linalg_traits<V>::end(linalg_cast(v)); }
+
+  template <typename V> inline
+  typename select_return<typename linalg_traits<V>::const_iterator,
+    typename linalg_traits<V>::iterator, const V *>::return_type
+  vect_end(const V &v)
+  { return linalg_traits<V>::end(linalg_cast(v)); }
+
+  template <typename V> inline
+  typename linalg_traits<V>::const_iterator
+  vect_const_end(const V &v)
+  { return linalg_traits<V>::end(v); }
+
+  template <typename M> inline
+  typename select_return<typename linalg_traits<M>::const_row_iterator,
+    typename linalg_traits<M>::row_iterator, M *>::return_type
+  mat_row_begin(M &m) { return linalg_traits<M>::row_begin(linalg_cast(m)); }
+  
+  template <typename M> inline
+  typename select_return<typename linalg_traits<M>::const_row_iterator,
+    typename linalg_traits<M>::row_iterator, const M *>::return_type
+  mat_row_begin(const M &m)
+  { return linalg_traits<M>::row_begin(linalg_cast(m)); }
+  
+  template <typename M> inline typename linalg_traits<M>::const_row_iterator
+  mat_row_const_begin(const M &m)
+  { return linalg_traits<M>::row_begin(m); }
+
+  template <typename M> inline
+  typename select_return<typename linalg_traits<M>::const_row_iterator,
+    typename linalg_traits<M>::row_iterator, M *>::return_type
+  mat_row_end(M &v) {
+    return linalg_traits<M>::row_end(linalg_cast(v));
+  }
+
+  template <typename M> inline
+  typename select_return<typename linalg_traits<M>::const_row_iterator,
+    typename linalg_traits<M>::row_iterator, const M *>::return_type
+  mat_row_end(const M &v) {
+    return linalg_traits<M>::row_end(linalg_cast(v));
+  }
+
+  template <typename M> inline
+  typename linalg_traits<M>::const_row_iterator
+  mat_row_const_end(const M &v)
+  { return linalg_traits<M>::row_end(v); }
+
+  template <typename M> inline
+  typename select_return<typename linalg_traits<M>::const_col_iterator,
+    typename linalg_traits<M>::col_iterator, M *>::return_type
+  mat_col_begin(M &v) {
+    return linalg_traits<M>::col_begin(linalg_cast(v));
+  }
+
+  template <typename M> inline
+  typename select_return<typename linalg_traits<M>::const_col_iterator,
+    typename linalg_traits<M>::col_iterator, const M *>::return_type
+  mat_col_begin(const M &v) {
+    return linalg_traits<M>::col_begin(linalg_cast(v));
+  }
+
+  template <typename M> inline
+  typename linalg_traits<M>::const_col_iterator
+  mat_col_const_begin(const M &v)
+  { return linalg_traits<M>::col_begin(v); }
+
+  template <typename M> inline
+  typename linalg_traits<M>::const_col_iterator
+  mat_col_const_end(const M &v)
+  { return linalg_traits<M>::col_end(v); }
+
+  template <typename M> inline
+  typename select_return<typename linalg_traits<M>::const_col_iterator,
+                         typename linalg_traits<M>::col_iterator,
+                         M *>::return_type
+  mat_col_end(M &m)
+  { return linalg_traits<M>::col_end(linalg_cast(m)); }
+
+  template <typename M> inline
+  typename select_return<typename linalg_traits<M>::const_col_iterator,
+                         typename linalg_traits<M>::col_iterator,
+                         const M *>::return_type
+  mat_col_end(const M &m)
+  { return linalg_traits<M>::col_end(linalg_cast(m)); }
+
+  template <typename MAT> inline
+  typename select_return<typename linalg_traits<MAT>::const_sub_row_type,
+                         typename linalg_traits<MAT>::sub_row_type,
+                         const MAT *>::return_type
+  mat_row(const MAT &m, size_type i)
+  { return linalg_traits<MAT>::row(mat_row_begin(m) + i); }
+
+  template <typename MAT> inline
+  typename select_return<typename linalg_traits<MAT>::const_sub_row_type,
+                         typename linalg_traits<MAT>::sub_row_type,
+                         MAT *>::return_type
+  mat_row(MAT &m, size_type i)
+  { return linalg_traits<MAT>::row(mat_row_begin(m) + i); }
+
+  template <typename MAT> inline
+  typename linalg_traits<MAT>::const_sub_row_type
+  mat_const_row(const MAT &m, size_type i)
+  { return linalg_traits<MAT>::row(mat_row_const_begin(m) + i); }
+
+  template <typename MAT> inline
+  typename select_return<typename linalg_traits<MAT>::const_sub_col_type,
+                         typename linalg_traits<MAT>::sub_col_type,
+                         const MAT *>::return_type
+  mat_col(const MAT &m, size_type i)
+  { return linalg_traits<MAT>::col(mat_col_begin(m) + i); }
+
+
+  template <typename MAT> inline
+  typename select_return<typename linalg_traits<MAT>::const_sub_col_type,
+                         typename linalg_traits<MAT>::sub_col_type,
+                         MAT *>::return_type
+  mat_col(MAT &m, size_type i)
+  { return linalg_traits<MAT>::col(mat_col_begin(m) + i); }
+  
+  template <typename MAT> inline
+  typename linalg_traits<MAT>::const_sub_col_type
+  mat_const_col(const MAT &m, size_type i)
+  { return linalg_traits<MAT>::col(mat_col_const_begin(m) + i); }
+  
+  /* ********************************************************************* */
+  /* Set to begin end set to end for iterators on non-const sparse vectors.*/
+  /* ********************************************************************* */
+
+  template <typename IT, typename ORG, typename VECT> inline
+  void set_to_begin(IT &it, ORG o, VECT *, linalg_false)
+  { it = vect_begin(*o); }
+
+  template <typename IT, typename ORG, typename VECT> inline
+  void set_to_begin(IT &it, ORG o, const VECT *, linalg_false) 
+  { it = vect_const_begin(*o); }
+
+  template <typename IT, typename ORG, typename VECT> inline
+  void set_to_end(IT &it, ORG o, VECT *, linalg_false)
+  { it = vect_end(*o); }
+  
+  template <typename IT, typename ORG, typename VECT> inline
+  void set_to_end(IT &it, ORG o, const VECT *, linalg_false)
+  { it = vect_const_end(*o); }
+
+
+  template <typename IT, typename ORG, typename VECT> inline
+  void set_to_begin(IT &, ORG, VECT *, linalg_const) { }
+
+  template <typename IT, typename ORG, typename VECT> inline
+  void set_to_begin(IT &, ORG, const VECT *, linalg_const) { }
+
+  template <typename IT, typename ORG, typename VECT> inline
+  void set_to_end(IT &, ORG, VECT *, linalg_const) { }
+  
+  template <typename IT, typename ORG, typename VECT> inline
+  void set_to_end(IT &, ORG, const VECT *, linalg_const) { }
+
+
+  template <typename IT, typename ORG, typename VECT> inline
+  void set_to_begin(IT &, ORG, VECT *v, linalg_modifiable)
+  { GMM_ASSERT3(!is_sparse(*v), "internal_error"); v = 0; }
+
+  template <typename IT, typename ORG, typename VECT> inline
+  void set_to_begin(IT &, ORG, const VECT *v, linalg_modifiable)
+  { GMM_ASSERT3(!is_sparse(*v), "internal_error"); v = 0; }
+ 
+  template <typename IT, typename ORG, typename VECT> inline
+  void set_to_end(IT &, ORG, VECT *v, linalg_modifiable)
+  { GMM_ASSERT3(!is_sparse(*v), "internal_error"); v = 0; }
+  
+  template <typename IT, typename ORG, typename VECT> inline
+  void set_to_end(IT &, ORG, const VECT *v, linalg_modifiable)
+  { GMM_ASSERT3(!is_sparse(*v), "internal_error"); v = 0; }
+
+  /* ******************************************************************** */
+  /*		General index for certain algorithms.         		  */
+  /* ******************************************************************** */
+
+  template<class IT> 
+  size_type index_of_it(const IT &it, size_type, abstract_sparse)
+  { return it.index(); }
+  template<class IT> 
+  size_type index_of_it(const IT &it, size_type, abstract_skyline)
+  { return it.index(); }
+  template<class IT> 
+  size_type index_of_it(const IT &, size_type k, abstract_dense)
+  { return k; }
+
+  /* ********************************************************************* */
+  /* Numeric limits.                                                       */
+  /* ********************************************************************* */
+  
+  template<typename T> inline T default_tol(T) {
+    using namespace std;
+    static T tol(10);
+    if (tol == T(10)) {
+      if (numeric_limits<T>::is_specialized)
+	tol = numeric_limits<T>::epsilon();
+      else {
+	int i=int(sizeof(T)/4); while(i-- > 0) tol*=T(1E-8); 
+	GMM_WARNING1("The numeric type " << typeid(T).name()
+		    << " has no numeric_limits defined !!\n"
+		    << "Taking " << tol << " as default tolerance");
+      }
+    }
+    return tol;
+  }
+  template<typename T> inline T default_tol(std::complex<T>)
+  { return default_tol(T()); }
+
+  template<typename T> inline T default_min(T) {
+    using namespace std;
+    static T mi(10);
+    if (mi == T(10)) {
+      if (numeric_limits<T>::is_specialized)
+	mi = std::numeric_limits<T>::min();
+      else {
+	mi = T(0);
+	GMM_WARNING1("The numeric type " << typeid(T).name()
+		    << " has no numeric_limits defined !!\n"
+		    << "Taking 0 as default minimum");
+      }
+    }
+    return mi;
+  }
+  template<typename T> inline T default_min(std::complex<T>)
+  { return default_min(T()); }
+
+  template<typename T> inline T default_max(T) {
+    using namespace std;
+    static T mi(10);
+    if (mi == T(10)) {
+      if (numeric_limits<T>::is_specialized)
+	mi = std::numeric_limits<T>::max();
+      else {
+	mi = T(1);
+	GMM_WARNING1("The numeric type " << typeid(T).name()
+		    << " has no numeric_limits defined !!\n"
+		    << "Taking 1 as default maximum !");
+      }
+    }
+    return mi;
+  }
+  template<typename T> inline T default_max(std::complex<T>)
+  { return default_max(T()); }
+
+  
+  /*
+    use safe_divide to avoid NaNs when dividing very small complex
+    numbers, for example
+    std::complex<float>(1e-23,1e-30)/std::complex<float>(1e-23,1e-30)
+  */
+  template<typename T> inline T safe_divide(T a, T b) { return a/b; }
+  template<typename T> inline std::complex<T>
+  safe_divide(std::complex<T> a, std::complex<T> b) {
+    T m = std::max(gmm::abs(b.real()), gmm::abs(b.imag()));
+    a = std::complex<T>(a.real()/m, a.imag()/m);
+    b = std::complex<T>(b.real()/m, b.imag()/m);
+    return a / b;
+  }
+
+
+  /* ******************************************************************** */
+  /*		Write                                   		  */
+  /* ******************************************************************** */
+
+  template <typename T> struct cast_char_type { typedef T return_type; };
+  template <> struct cast_char_type<signed char> { typedef int return_type; };
+  template <> struct cast_char_type<unsigned char>
+  { typedef unsigned int return_type; };
+  template <typename T> inline typename cast_char_type<T>::return_type
+  cast_char(const T &c) { return typename cast_char_type<T>::return_type(c); }
+
+
+  template <typename L> inline void write(std::ostream &o, const L &l)
+  { write(o, l, typename linalg_traits<L>::linalg_type()); }
+
+  template <typename L> void write(std::ostream &o, const L &l,
+				       abstract_vector) {
+    o << "vector(" << vect_size(l) << ") [";
+    write(o, l, typename linalg_traits<L>::storage_type());
+    o << " ]";
+  }
+
+  template <typename L> void write(std::ostream &o, const L &l,
+				       abstract_sparse) {
+    typename linalg_traits<L>::const_iterator it = vect_const_begin(l),
+      ite = vect_const_end(l);
+    for (; it != ite; ++it) 
+      o << " (r" << it.index() << ", " << cast_char(*it) << ")";
+  }
+
+  template <typename L> void write(std::ostream &o, const L &l,
+				       abstract_dense) {
+    typename linalg_traits<L>::const_iterator it = vect_const_begin(l),
+      ite = vect_const_end(l);
+    if (it != ite) o << " " << cast_char(*it++);
+    for (; it != ite; ++it) o << ", " << cast_char(*it);
+  }
+
+  template <typename L> void write(std::ostream &o, const L &l,
+				       abstract_skyline) {
+    typedef typename linalg_traits<L>::const_iterator const_iterator;
+    const_iterator it = vect_const_begin(l), ite = vect_const_end(l);
+    if (it != ite) {
+      o << "<r+" << it.index() << ">";
+      if (it != ite) o << " " << cast_char(*it++);
+      for (; it != ite; ++it) { o << ", " << cast_char(*it); }
+    }
+  }
+
+  template <typename L> inline void write(std::ostream &o, const L &l,
+				       abstract_matrix) {
+    write(o, l, typename linalg_traits<L>::sub_orientation());
+  }
+
+
+  template <typename L> void write(std::ostream &o, const L &l,
+				       row_major) {
+    o << "matrix(" << mat_nrows(l) << ", " << mat_ncols(l) << ")" << endl;
+    for (size_type i = 0; i < mat_nrows(l); ++i) {
+      o << "(";
+      write(o, mat_const_row(l, i), typename linalg_traits<L>::storage_type());
+      o << " )\n";
+    }
+  }
+
+  template <typename L> inline
+  void write(std::ostream &o, const L &l, row_and_col) 
+  { write(o, l, row_major()); }
+
+  template <typename L> inline
+  void write(std::ostream &o, const L &l, col_and_row)
+  { write(o, l, row_major()); }
+
+  template <typename L> void write(std::ostream &o, const L &l, col_major) {
+    o << "matrix(" << mat_nrows(l) << ", " << mat_ncols(l) << ")" << endl;
+    for (size_type i = 0; i < mat_nrows(l); ++i) {
+      o << "(";
+      if (is_sparse(l)) { // not optimized ...
+	for (size_type j = 0; j < mat_ncols(l); ++j)
+	  if (l(i,j) != typename linalg_traits<L>::value_type(0)) 
+	    o << " (r" << j << ", " << l(i,j) << ")";
+      }
+      else {
+	if (mat_ncols(l) != 0) o << ' ' << l(i, 0);
+	for (size_type j = 1; j < mat_ncols(l); ++j) o << ", " << l(i, j); 
+      }
+      o << " )\n";
+    }
+  }
+
+}
+
+#endif //  GMM_DEF_H__
diff --git a/gmm/gmm_dense_Householder.h b/gmm/gmm_dense_Householder.h
new file mode 100644
index 000000000..4dcb3cd24
--- /dev/null
+++ b/gmm/gmm_dense_Householder.h
@@ -0,0 +1,317 @@
+/* -*- c++ -*- (enables emacs c++ mode) */
+/*===========================================================================
+
+ Copyright (C) 2003-2017 Yves Renard, Caroline Lecalvez
+
+ This file is a part of GetFEM++
+
+ GetFEM++  is  free software;  you  can  redistribute  it  and/or modify it
+ under  the  terms  of the  GNU  Lesser General Public License as published
+ by  the  Free Software Foundation;  either version 3 of the License,  or
+ (at your option) any later version along with the GCC Runtime Library
+ Exception either version 3.1 or (at your option) any later version.
+ This program  is  distributed  in  the  hope  that it will be useful,  but
+ WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ or  FITNESS  FOR  A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+ License and GCC Runtime Library Exception for more details.
+ You  should  have received a copy of the GNU Lesser General Public License
+ along  with  this program;  if not, write to the Free Software Foundation,
+ Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301, USA.
+
+ As a special exception, you  may use  this file  as it is a part of a free
+ software  library  without  restriction.  Specifically,  if   other  files
+ instantiate  templates  or  use macros or inline functions from this file,
+ or  you compile this  file  and  link  it  with other files  to produce an
+ executable, this file  does  not  by itself cause the resulting executable
+ to be covered  by the GNU Lesser General Public License.  This   exception
+ does not  however  invalidate  any  other  reasons why the executable file
+ might be covered by the GNU Lesser General Public License.
+
+===========================================================================*/
+
+/**@file gmm_dense_Householder.h
+   @author Caroline Lecalvez <Caroline.Lecalvez@gmm.insa-toulouse.fr>
+   @author Yves Renard <Yves.Renard@insa-lyon.fr>
+   @date June 5, 2003.
+   @brief Householder for dense matrices.
+*/
+
+#ifndef GMM_DENSE_HOUSEHOLDER_H
+#define GMM_DENSE_HOUSEHOLDER_H
+
+#include "gmm_kernel.h"
+
+namespace gmm {
+  ///@cond DOXY_SHOW_ALL_FUNCTIONS
+
+  /* ********************************************************************* */
+  /*    Rank one update  (complex and real version)                        */
+  /* ********************************************************************* */
+
+  template <typename Matrix, typename VecX, typename VecY>
+  inline void rank_one_update(Matrix &A, const VecX& x,
+                              const VecY& y, row_major) {
+    typedef typename linalg_traits<Matrix>::value_type T;
+    size_type N = mat_nrows(A);
+    GMM_ASSERT2(N <= vect_size(x) && mat_ncols(A) <= vect_size(y),
+                "dimensions mismatch");
+    typename linalg_traits<VecX>::const_iterator itx = vect_const_begin(x);
+    for (size_type i = 0; i < N; ++i, ++itx) {
+      typedef typename linalg_traits<Matrix>::sub_row_type row_type;
+      row_type row = mat_row(A, i);
+      typename linalg_traits<typename org_type<row_type>::t>::iterator
+        it = vect_begin(row), ite = vect_end(row);
+      typename linalg_traits<VecY>::const_iterator ity = vect_const_begin(y);
+      T tx = *itx;
+      for (; it != ite; ++it, ++ity) *it += conj_product(*ity, tx);
+    }
+  }
+
+  template <typename Matrix, typename VecX, typename VecY>
+  inline void rank_one_update(Matrix &A, const VecX& x,
+                              const VecY& y, col_major) {
+    typedef typename linalg_traits<Matrix>::value_type T;
+    size_type M = mat_ncols(A);
+    GMM_ASSERT2(mat_nrows(A) <= vect_size(x) && M <= vect_size(y),
+                "dimensions mismatch");
+    typename linalg_traits<VecY>::const_iterator ity = vect_const_begin(y);
+    for (size_type i = 0; i < M; ++i, ++ity) {
+      typedef typename linalg_traits<Matrix>::sub_col_type col_type;
+      col_type col = mat_col(A, i);
+      typename linalg_traits<typename org_type<col_type>::t>::iterator
+        it = vect_begin(col), ite = vect_end(col);
+      typename linalg_traits<VecX>::const_iterator itx = vect_const_begin(x);
+      T ty = *ity;
+      for (; it != ite; ++it, ++itx) *it += conj_product(ty, *itx);
+    }
+  }
+
+  ///@endcond
+  template <typename Matrix, typename VecX, typename VecY>
+  inline void rank_one_update(const Matrix &AA, const VecX& x,
+                              const VecY& y) {
+    Matrix& A = const_cast<Matrix&>(AA);
+    rank_one_update(A, x, y, typename principal_orientation_type<typename
+                    linalg_traits<Matrix>::sub_orientation>::potype());
+  }
+  ///@cond DOXY_SHOW_ALL_FUNCTIONS
+
+  /* ********************************************************************* */
+  /*    Rank two update  (complex and real version)                        */
+  /* ********************************************************************* */
+
+  template <typename Matrix, typename VecX, typename VecY>
+  inline void rank_two_update(Matrix &A, const VecX& x,
+                              const VecY& y, row_major) {
+    typedef typename linalg_traits<Matrix>::value_type value_type;
+    size_type N = mat_nrows(A);
+    GMM_ASSERT2(N <= vect_size(x) && mat_ncols(A) <= vect_size(y),
+                "dimensions mismatch");
+    typename linalg_traits<VecX>::const_iterator itx1 = vect_const_begin(x);
+    typename linalg_traits<VecY>::const_iterator ity2 = vect_const_begin(y);
+    for (size_type i = 0; i < N; ++i, ++itx1, ++ity2) {
+      typedef typename linalg_traits<Matrix>::sub_row_type row_type;
+      row_type row = mat_row(A, i);
+      typename linalg_traits<typename org_type<row_type>::t>::iterator
+        it = vect_begin(row), ite = vect_end(row);
+      typename linalg_traits<VecX>::const_iterator itx2 = vect_const_begin(x);
+      typename linalg_traits<VecY>::const_iterator ity1 = vect_const_begin(y);
+      value_type tx = *itx1, ty = *ity2;
+      for (; it != ite; ++it, ++ity1, ++itx2)
+        *it += conj_product(*ity1, tx) + conj_product(*itx2, ty);
+    }
+  }
+
+  template <typename Matrix, typename VecX, typename VecY>
+  inline void rank_two_update(Matrix &A, const VecX& x,
+                              const VecY& y, col_major) {
+    typedef typename linalg_traits<Matrix>::value_type value_type;
+    size_type M = mat_ncols(A);
+    GMM_ASSERT2(mat_nrows(A) <= vect_size(x) && M <= vect_size(y),
+                "dimensions mismatch");
+    typename linalg_traits<VecX>::const_iterator itx2 = vect_const_begin(x);
+    typename linalg_traits<VecY>::const_iterator ity1 = vect_const_begin(y);
+    for (size_type i = 0; i < M; ++i, ++ity1, ++itx2) {
+      typedef typename linalg_traits<Matrix>::sub_col_type col_type;
+      col_type col = mat_col(A, i);
+      typename linalg_traits<typename org_type<col_type>::t>::iterator
+        it = vect_begin(col), ite = vect_end(col);
+      typename linalg_traits<VecX>::const_iterator itx1 = vect_const_begin(x);
+      typename linalg_traits<VecY>::const_iterator ity2 = vect_const_begin(y);
+      value_type ty = *ity1, tx = *itx2;
+      for (; it != ite; ++it, ++itx1, ++ity2)
+        *it += conj_product(ty, *itx1) + conj_product(tx, *ity2);
+    }
+  }
+
+  ///@endcond
+  template <typename Matrix, typename VecX, typename VecY>
+  inline void rank_two_update(const Matrix &AA, const VecX& x,
+                              const VecY& y) {
+    Matrix& A = const_cast<Matrix&>(AA);
+    rank_two_update(A, x, y, typename principal_orientation_type<typename
+                    linalg_traits<Matrix>::sub_orientation>::potype());
+  }
+  ///@cond DOXY_SHOW_ALL_FUNCTIONS
+
+  /* ********************************************************************* */
+  /*    Householder vector computation (complex and real version)          */
+  /* ********************************************************************* */
+
+  template <typename VECT> void house_vector(const VECT &VV) {
+    VECT &V = const_cast<VECT &>(VV);
+    typedef typename linalg_traits<VECT>::value_type T;
+    typedef typename number_traits<T>::magnitude_type R;
+
+    R mu = vect_norm2(V), abs_v0 = gmm::abs(V[0]);
+    if (mu != R(0))
+      gmm::scale(V, (abs_v0 == R(0)) ? T(R(1) / mu)
+                 : (safe_divide(T(abs_v0), V[0]) / (abs_v0 + mu)));
+    if (gmm::real(V[vect_size(V)-1]) * R(0) != R(0)) gmm::clear(V);
+    V[0] = T(1);
+  }
+
+  template <typename VECT> void house_vector_last(const VECT &VV) {
+    VECT &V = const_cast<VECT &>(VV);
+    typedef typename linalg_traits<VECT>::value_type T;
+    typedef typename number_traits<T>::magnitude_type R;
+
+    size_type m = vect_size(V);
+    R mu = vect_norm2(V), abs_v0 = gmm::abs(V[m-1]);
+    if (mu != R(0))
+      gmm::scale(V, (abs_v0 == R(0)) ? T(R(1) / mu)
+                 : ((abs_v0 / V[m-1]) / (abs_v0 + mu)));
+    if (gmm::real(V[0]) * R(0) != R(0)) gmm::clear(V);
+    V[m-1] = T(1);
+  }
+
+  /* ********************************************************************* */
+  /*    Householder updates  (complex and real version)                    */
+  /* ********************************************************************* */
+
+  // multiply A to the left by the reflector stored in V. W is a temporary.
+  template <typename MAT, typename VECT1, typename VECT2> inline
+  void row_house_update(const MAT &AA, const VECT1 &V, const VECT2 &WW) {
+    VECT2 &W = const_cast<VECT2 &>(WW); MAT &A = const_cast<MAT &>(AA);
+    typedef typename linalg_traits<MAT>::value_type value_type;
+    typedef typename number_traits<value_type>::magnitude_type magnitude_type;
+
+    gmm::mult(conjugated(A),
+              scaled(V, value_type(magnitude_type(-2)/vect_norm2_sqr(V))), W);
+    rank_one_update(A, V, W);
+  }
+
+  // multiply A to the right by the reflector stored in V. W is a temporary.
+  template <typename MAT, typename VECT1, typename VECT2> inline
+  void col_house_update(const MAT &AA, const VECT1 &V, const VECT2 &WW) {
+    VECT2 &W = const_cast<VECT2 &>(WW); MAT &A = const_cast<MAT &>(AA);
+    typedef typename linalg_traits<MAT>::value_type value_type;
+    typedef typename number_traits<value_type>::magnitude_type magnitude_type;
+
+    gmm::mult(A,
+              scaled(V, value_type(magnitude_type(-2)/vect_norm2_sqr(V))), W);
+    rank_one_update(A, W, V);
+  }
+
+  ///@endcond
+
+  /* ********************************************************************* */
+  /*    Hessenberg reduction with Householder.                             */
+  /* ********************************************************************* */
+
+  template <typename MAT1, typename MAT2>
+  void Hessenberg_reduction(const MAT1& AA, const MAT2 &QQ, bool compute_Q){
+    MAT1& A = const_cast<MAT1&>(AA); MAT2& Q = const_cast<MAT2&>(QQ);
+    typedef typename linalg_traits<MAT1>::value_type value_type;
+    if (compute_Q) gmm::copy(identity_matrix(), Q);
+    size_type n = mat_nrows(A); if (n < 2) return;
+    std::vector<value_type> v(n), w(n);
+    sub_interval SUBK(0,n);
+    for (size_type k = 1; k+1 < n; ++k) {
+      sub_interval SUBI(k, n-k), SUBJ(k-1,n-k+1);
+      v.resize(n-k);
+      for (size_type j = k; j < n; ++j) v[j-k] = A(j, k-1);
+      house_vector(v);
+      row_house_update(sub_matrix(A, SUBI, SUBJ), v, sub_vector(w, SUBJ));
+      col_house_update(sub_matrix(A, SUBK, SUBI), v, w);
+      // is it possible to "unify" the two on the common part of the matrix?
+      if (compute_Q) col_house_update(sub_matrix(Q, SUBK, SUBI), v, w);
+    }
+  }
+
+  /* ********************************************************************* */
+  /*    Householder tridiagonalization for symmetric matrices              */
+  /* ********************************************************************* */
+
+  template <typename MAT1, typename MAT2>
+  void Householder_tridiagonalization(const MAT1 &AA, const MAT2 &QQ,
+                                      bool compute_q) {
+    MAT1 &A = const_cast<MAT1 &>(AA); MAT2 &Q = const_cast<MAT2 &>(QQ);
+    typedef typename linalg_traits<MAT1>::value_type T;
+    typedef typename number_traits<T>::magnitude_type R;
+
+    size_type n = mat_nrows(A); if (n < 2) return;
+    std::vector<T> v(n), p(n), w(n), ww(n);
+    sub_interval SUBK(0,n);
+
+    for (size_type k = 1; k+1 < n; ++k) { // not optimized ...
+      sub_interval SUBI(k, n-k);
+      v.resize(n-k); p.resize(n-k); w.resize(n-k);
+      for (size_type l = k; l < n; ++l)
+        { v[l-k] = w[l-k] = A(l, k-1); A(l, k-1) = A(k-1, l) = T(0); }
+      house_vector(v);
+      R norm = vect_norm2_sqr(v);
+      A(k-1, k) = gmm::conj(A(k, k-1) = w[0] - T(2)*v[0]*vect_hp(w, v)/norm);
+
+      gmm::mult(sub_matrix(A, SUBI), gmm::scaled(v, T(-2) / norm), p);
+      gmm::add(p, gmm::scaled(v, -vect_hp(v, p) / norm), w);
+      rank_two_update(sub_matrix(A, SUBI), v, w);
+      // it should be possible to compute only the upper or lower part
+
+      if (compute_q) col_house_update(sub_matrix(Q, SUBK, SUBI), v, ww);
+    }
+  }
+
+  /* ********************************************************************* */
+  /*    Real and complex Givens rotations                                  */
+  /* ********************************************************************* */
+
+  template <typename T> void Givens_rotation(T a, T b, T &c, T &s) {
+    typedef typename number_traits<T>::magnitude_type R;
+    R aa = gmm::abs(a), bb = gmm::abs(b);
+    if (bb == R(0)) { c = T(1); s = T(0);   return; }
+    if (aa == R(0)) { c = T(0); s = b / bb; return; }
+    if (bb > aa)
+      { T t = -safe_divide(a,b); s = T(R(1) / (sqrt(R(1)+gmm::abs_sqr(t)))); c = s * t; }
+    else
+      { T t = -safe_divide(b,a); c = T(R(1) / (sqrt(R(1)+gmm::abs_sqr(t)))); s = c * t; }
+  }
+
+  // Apply Q* v
+  template <typename T> inline
+  void Apply_Givens_rotation_left(T &x, T &y, T c, T s)
+  { T t1=x, t2=y; x = gmm::conj(c)*t1 - gmm::conj(s)*t2; y = c*t2 + s*t1; }
+
+  // Apply v^T Q
+  template <typename T> inline
+  void Apply_Givens_rotation_right(T &x, T &y, T c, T s)
+  { T t1=x, t2=y; x = c*t1 - s*t2; y = gmm::conj(c)*t2 + gmm::conj(s)*t1; }
+
+  template <typename MAT, typename T>
+  void row_rot(const MAT &AA, T c, T s, size_type i, size_type k) {
+    MAT &A = const_cast<MAT &>(AA); // can be specialized for row matrices
+    for (size_type j = 0; j < mat_ncols(A); ++j)
+      Apply_Givens_rotation_left(A(i,j), A(k,j), c, s);
+  }
+
+  template <typename MAT, typename T>
+  void col_rot(const MAT &AA, T c, T s, size_type i, size_type k) {
+    MAT &A = const_cast<MAT &>(AA); // can be specialized for column matrices
+    for (size_type j = 0; j < mat_nrows(A); ++j)
+      Apply_Givens_rotation_right(A(j,i), A(j,k), c, s);
+  }
+
+}
+
+#endif
+
diff --git a/gmm/gmm_dense_lu.h b/gmm/gmm_dense_lu.h
new file mode 100644
index 000000000..5107abebf
--- /dev/null
+++ b/gmm/gmm_dense_lu.h
@@ -0,0 +1,250 @@
+/* -*- c++ -*- (enables emacs c++ mode) */
+/*===========================================================================
+
+ Copyright (C) 2003-2017 Yves Renard
+
+ This file is a part of GetFEM++
+
+ GetFEM++  is  free software;  you  can  redistribute  it  and/or modify it
+ under  the  terms  of the  GNU  Lesser General Public License as published
+ by  the  Free Software Foundation;  either version 3 of the License,  or
+ (at your option) any later version along with the GCC Runtime Library
+ Exception either version 3.1 or (at your option) any later version.
+ This program  is  distributed  in  the  hope  that it will be useful,  but
+ WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ or  FITNESS  FOR  A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+ License and GCC Runtime Library Exception for more details.
+ You  should  have received a copy of the GNU Lesser General Public License
+ along  with  this program;  if not, write to the Free Software Foundation,
+ Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301, USA.
+
+ As a special exception, you  may use  this file  as it is a part of a free
+ software  library  without  restriction.  Specifically,  if   other  files
+ instantiate  templates  or  use macros or inline functions from this file,
+ or  you compile this  file  and  link  it  with other files  to produce an
+ executable, this file  does  not  by itself cause the resulting executable
+ to be covered  by the GNU Lesser General Public License.  This   exception
+ does not  however  invalidate  any  other  reasons why the executable file
+ might be covered by the GNU Lesser General Public License.
+
+===========================================================================*/
+
+// This file is a modified version of lu.h from MTL.
+// See http://osl.iu.edu/research/mtl/
+// Following the corresponding Copyright notice.
+//===========================================================================
+//
+// Copyright (c) 1998-2001, University of Notre Dame. All rights reserved.
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+//    * Redistributions of source code must retain the above copyright
+//      notice, this list of conditions and the following disclaimer.
+//    * Redistributions in binary form must reproduce the above copyright
+//      notice, this list of conditions and the following disclaimer in the
+//      documentation and/or other materials provided with the distribution.
+//    * Neither the name of the University of Notre Dame nor the
+//      names of its contributors may be used to endorse or promote products
+//      derived from this software without specific prior written permission.
+//
+// THIS SOFTWARE  IS  PROVIDED  BY  THE TRUSTEES  OF  INDIANA UNIVERSITY  AND
+// CONTRIBUTORS  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,
+// BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND  FITNESS
+// FOR  A PARTICULAR PURPOSE ARE DISCLAIMED. IN  NO  EVENT SHALL THE TRUSTEES
+// OF INDIANA UNIVERSITY AND CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+// INCIDENTAL, SPECIAL, EXEMPLARY,  OR CONSEQUENTIAL DAMAGES (INCLUDING,  BUT
+// NOT  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA,  OR PROFITS;  OR BUSINESS  INTERRUPTION)  HOWEVER  CAUSED AND ON ANY
+// THEORY  OF  LIABILITY,  WHETHER  IN  CONTRACT,  STRICT  LIABILITY, OR TORT
+// (INCLUDING  NEGLIGENCE  OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+// THIS  SOFTWARE,  EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+//===========================================================================
+
+/**@file gmm_dense_lu.h
+   @author  Andrew Lumsdaine, Jeremy G. Siek, Lie-Quan Lee, Y. Renard
+   @date June 5, 2003.
+   @brief LU factorizations and determinant computation for dense matrices.
+*/
+#ifndef GMM_DENSE_LU_H
+#define GMM_DENSE_LU_H
+
+#include "gmm_dense_Householder.h"
+#include "gmm_opt.h"
+
+namespace gmm {
+
+
+  /** LU Factorization of a general (dense) matrix (real or complex).
+  
+  This is the outer product (a level-2 operation) form of the LU
+  Factorization with pivoting algorithm . This is equivalent to
+  LAPACK's dgetf2. Also see "Matrix Computations" 3rd Ed.  by Golub
+  and Van Loan section 3.2.5 and especially page 115.
+  
+  The pivot indices in ipvt are indexed starting from 1
+  so that this is compatible with LAPACK (Fortran).
+  */
+  template <typename DenseMatrix, typename Pvector>
+  size_type lu_factor(DenseMatrix& A, Pvector& ipvt) {
+    typedef typename linalg_traits<DenseMatrix>::value_type T;
+    typedef typename linalg_traits<Pvector>::value_type int_T;
+    typedef typename number_traits<T>::magnitude_type R;
+    size_type info(0), i, j, jp, M(mat_nrows(A)), N(mat_ncols(A));
+    size_type NN = std::min(M, N);
+    std::vector<T> c(M), r(N);
+    
+    GMM_ASSERT2(ipvt.size()+1 >= NN, "IPVT too small");
+    for (i = 0; i+1 < NN; ++i) ipvt[i] = int_T(i);
+      
+    if (M || N) {
+      for (j = 0; j+1 < NN; ++j) {
+	R max = gmm::abs(A(j,j)); jp = j;
+	for (i = j+1; i < M; ++i)		   /* find pivot.          */
+	  if (gmm::abs(A(i,j)) > max) { jp = i; max = gmm::abs(A(i,j)); }
+	ipvt[j] = int_T(jp + 1);
+	
+	if (max == R(0)) { info = j + 1; break; }
+        if (jp != j) for (i = 0; i < N; ++i) std::swap(A(jp, i), A(j, i));
+	
+        for (i = j+1; i < M; ++i) { A(i, j) /= A(j,j); c[i-j-1] = -A(i, j); }
+        for (i = j+1; i < N; ++i) r[i-j-1] = A(j, i);  // avoid the copy ?
+	rank_one_update(sub_matrix(A, sub_interval(j+1, M-j-1),
+				 sub_interval(j+1, N-j-1)), c, conjugated(r));
+      }
+      ipvt[NN-1] = int_T(NN);
+    }
+    return info;
+  }
+  
+  /** LU Solve : Solve equation Ax=b, given an LU factored matrix.*/
+  //  Thanks to Valient Gough for this routine!
+  template <typename DenseMatrix, typename VectorB, typename VectorX,
+	    typename Pvector>
+  void lu_solve(const DenseMatrix &LU, const Pvector& pvector, 
+		VectorX &x, const VectorB &b) {
+    typedef typename linalg_traits<DenseMatrix>::value_type T;
+    copy(b, x);
+    for(size_type i = 0; i < pvector.size(); ++i) {
+      size_type perm = pvector[i]-1;     // permutations stored in 1's offset
+      if(i != perm) { T aux = x[i]; x[i] = x[perm]; x[perm] = aux; }
+    }
+    /* solve  Ax = b  ->  LUx = b  ->  Ux = L^-1 b.                        */
+    lower_tri_solve(LU, x, true);
+    upper_tri_solve(LU, x, false);
+  }
+
+  template <typename DenseMatrix, typename VectorB, typename VectorX>
+  void lu_solve(const DenseMatrix &A, VectorX &x, const VectorB &b) {
+    typedef typename linalg_traits<DenseMatrix>::value_type T;
+    dense_matrix<T> B(mat_nrows(A), mat_ncols(A));
+    std::vector<int> ipvt(mat_nrows(A));
+    gmm::copy(A, B);
+    size_type info = lu_factor(B, ipvt);
+    GMM_ASSERT1(!info, "Singular system, pivot = " << info);
+    lu_solve(B, ipvt, x, b);
+  }
+  
+  template <typename DenseMatrix, typename VectorB, typename VectorX,
+	    typename Pvector>
+  void lu_solve_transposed(const DenseMatrix &LU, const Pvector& pvector, 
+			   VectorX &x, const VectorB &b) {
+    typedef typename linalg_traits<DenseMatrix>::value_type T;
+    copy(b, x);
+    lower_tri_solve(transposed(LU), x, false);
+    upper_tri_solve(transposed(LU), x, true);
+    for(size_type i = pvector.size(); i > 0; --i) {
+      size_type perm = pvector[i-1]-1;    // permutations stored in 1's offset
+      if(i-1 != perm) { T aux = x[i-1]; x[i-1] = x[perm]; x[perm] = aux; }
+    }
+
+  }
+
+
+  ///@cond DOXY_SHOW_ALL_FUNCTIONS
+  template <typename DenseMatrixLU, typename DenseMatrix, typename Pvector>
+  void lu_inverse(const DenseMatrixLU& LU, const Pvector& pvector,
+		  DenseMatrix& AInv, col_major) {
+    typedef typename linalg_traits<DenseMatrixLU>::value_type T;
+    std::vector<T> tmp(pvector.size(), T(0));
+    std::vector<T> result(pvector.size());
+    for(size_type i = 0; i < pvector.size(); ++i) {
+      tmp[i] = T(1);
+      lu_solve(LU, pvector, result, tmp);
+      copy(result, mat_col(AInv, i));
+      tmp[i] = T(0);
+    }
+  }
+
+  template <typename DenseMatrixLU, typename DenseMatrix, typename Pvector>
+  void lu_inverse(const DenseMatrixLU& LU, const Pvector& pvector,
+		  DenseMatrix& AInv, row_major) {
+    typedef typename linalg_traits<DenseMatrixLU>::value_type T;
+    std::vector<T> tmp(pvector.size(), T(0));
+    std::vector<T> result(pvector.size());
+    for(size_type i = 0; i < pvector.size(); ++i) {
+      tmp[i] = T(1); // to be optimized !!
+      // on peut sur le premier tri solve reduire le systeme
+      // et peut etre faire un solve sur une serie de vecteurs au lieu
+      // de vecteur a vecteur (accumulation directe de l'inverse dans la
+      // matrice au fur et a mesure du calcul ... -> evite la copie finale
+      lu_solve_transposed(LU, pvector, result, tmp);
+      copy(result, mat_row(AInv, i));
+      tmp[i] = T(0);
+    }
+  }
+  ///@endcond  
+
+  /** Given an LU factored matrix, build the inverse of the matrix. */
+  template <typename DenseMatrixLU, typename DenseMatrix, typename Pvector>
+  void lu_inverse(const DenseMatrixLU& LU, const Pvector& pvector,
+		  const DenseMatrix& AInv_) {
+    DenseMatrix& AInv = const_cast<DenseMatrix&>(AInv_);
+    lu_inverse(LU, pvector, AInv, typename principal_orientation_type<typename
+	       linalg_traits<DenseMatrix>::sub_orientation>::potype());
+  }
+
+  /** Given a dense matrix, build the inverse of the matrix, and
+      return the determinant */
+  template <typename DenseMatrix>
+  typename linalg_traits<DenseMatrix>::value_type
+  lu_inverse(const DenseMatrix& A_, bool doassert = true) {
+    typedef typename linalg_traits<DenseMatrix>::value_type T;
+    DenseMatrix& A = const_cast<DenseMatrix&>(A_);
+    dense_matrix<T> B(mat_nrows(A), mat_ncols(A));
+    std::vector<int> ipvt(mat_nrows(A));
+    gmm::copy(A, B);
+    size_type info = lu_factor(B, ipvt);
+    if (doassert) GMM_ASSERT1(!info, "Non invertible matrix, pivot = "<<info);
+    if (!info) lu_inverse(B, ipvt, A);
+    return lu_det(B, ipvt);
+  }
+
+  /** Compute the matrix determinant (via a LU factorization) */
+  template <typename DenseMatrixLU, typename Pvector>
+  typename linalg_traits<DenseMatrixLU>::value_type
+  lu_det(const DenseMatrixLU& LU, const Pvector &pvector) {
+    typedef typename linalg_traits<DenseMatrixLU>::value_type T;
+    T det(1);
+    for (size_type j = 0; j < std::min(mat_nrows(LU), mat_ncols(LU)); ++j)
+      det *= LU(j,j);
+    for(size_type i = 0; i < pvector.size(); ++i)
+      if (i != size_type(pvector[i]-1)) { det = -det; }
+    return det;
+  }
+
+  template <typename DenseMatrix>
+  typename linalg_traits<DenseMatrix>::value_type
+  lu_det(const DenseMatrix& A) {
+    typedef typename linalg_traits<DenseMatrix>::value_type T;
+    dense_matrix<T> B(mat_nrows(A), mat_ncols(A));
+    std::vector<int> ipvt(mat_nrows(A));
+    gmm::copy(A, B);
+    lu_factor(B, ipvt);
+    return lu_det(B, ipvt);
+  }
+
+}
+
+#endif
+
diff --git a/gmm/gmm_dense_matrix_functions.h b/gmm/gmm_dense_matrix_functions.h
new file mode 100644
index 000000000..6005918a4
--- /dev/null
+++ b/gmm/gmm_dense_matrix_functions.h
@@ -0,0 +1,302 @@
+/* -*- c++ -*- (enables emacs c++ mode) */
+/*===========================================================================
+
+ Copyright (C) 2014-2017 Konstantinos Poulios
+
+ This file is a part of GetFEM++
+
+ GetFEM++  is  free software;  you  can  redistribute  it  and/or modify it
+ under  the  terms  of the  GNU  Lesser General Public License as published
+ by  the  Free Software Foundation;  either version 3 of the License,  or
+ (at your option) any later version along with the GCC Runtime Library
+ Exception either version 3.1 or (at your option) any later version.
+ This program  is  distributed  in  the  hope  that it will be useful,  but
+ WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ or  FITNESS  FOR  A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+ License and GCC Runtime Library Exception for more details.
+ You  should  have received a copy of the GNU Lesser General Public License
+ along  with  this program;  if not, write to the Free Software Foundation,
+ Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301, USA.
+
+ As a special exception, you  may use  this file  as it is a part of a free
+ software  library  without  restriction.  Specifically,  if   other  files
+ instantiate  templates  or  use macros or inline functions from this file,
+ or  you compile this  file  and  link  it  with other files  to produce an
+ executable, this file  does  not  by itself cause the resulting executable
+ to be covered  by the GNU Lesser General Public License.  This   exception
+ does not  however  invalidate  any  other  reasons why the executable file
+ might be covered by the GNU Lesser General Public License.
+
+===========================================================================*/
+
+/**@file gmm_dense_matrix_functions.h
+   @author  Konstantinos Poulios <poulios.konstantinos@gmail.com>
+   @date December 10, 2014.
+   @brief Common matrix functions for dense matrices.
+*/
+#ifndef GMM_DENSE_MATRIX_FUNCTIONS_H
+#define GMM_DENSE_MATRIX_FUNCTIONS_H
+
+
+namespace gmm {
+
+
+  /**
+     Matrix square root for upper triangular matrices (from GNU Octave).
+  */
+  template <typename T>
+  void sqrtm_utri_inplace(dense_matrix<T>& A)
+  {
+    typedef typename number_traits<T>::magnitude_type R;
+    bool singular = false;
+
+    // The following code is equivalent to this triple loop:
+    //
+    //   n = rows (A);
+    //   for j = 1:n
+    //     A(j,j) = sqrt (A(j,j));
+    //     for i = j-1:-1:1
+    //       A(i,j) /= (A(i,i) + A(j,j));
+    //       k = 1:i-1;
+    //   t storing a    A(k,j) -= A(k,i) * A(i,j);
+    //     endfor
+    //   endfor
+
+    R tol = R(0); // default_tol(R()) * gmm::mat_maxnorm(A);
+
+    const size_type n = mat_nrows(A);
+    for (int j=0; j < int(n); j++) {
+      typename dense_matrix<T>::iterator colj = A.begin() + j*n;
+      if (gmm::abs(colj[j]) > tol)
+        colj[j] = gmm::sqrt(colj[j]);
+      else
+        singular = true;
+
+      for (int i=j-1; i >= 0; i--) {
+        typename dense_matrix<T>::const_iterator coli = A.begin() + i*n;
+        T colji = colj[i] = safe_divide(colj[i], (coli[i] + colj[j]));
+        for (int k = 0; k < i; k++)
+          colj[k] -= coli[k] * colji;
+      }
+    }
+
+    if (singular)
+      GMM_WARNING1("Matrix is singular, may not have a square root");
+  }
+
+
+  template <typename T>
+  void sqrtm(const dense_matrix<std::complex<T> >& A,
+             dense_matrix<std::complex<T> >& SQRTMA)
+  {
+    GMM_ASSERT1(gmm::mat_nrows(A) == gmm::mat_ncols(A),
+                "Matrix square root requires a square matrix");
+    gmm::resize(SQRTMA, gmm::mat_nrows(A), gmm::mat_ncols(A));
+    dense_matrix<std::complex<T> > S(A), Q(A), TMP(A);
+    #if defined(GMM_USES_LAPACK)
+    schur(TMP, S, Q);
+    #else
+    GMM_ASSERT1(false, "Please recompile with lapack and blas librairies "
+                "to use sqrtm matrix function.");
+    #endif
+    sqrtm_utri_inplace(S);
+    gmm::mult(Q, S, TMP);
+    gmm::mult(TMP, gmm::transposed(Q), SQRTMA);
+  }
+
+  template <typename T>
+  void sqrtm(const dense_matrix<T>& A,
+             dense_matrix<std::complex<T> >& SQRTMA)
+  {
+    dense_matrix<std::complex<T> > cA(mat_nrows(A), mat_ncols(A));
+    gmm::copy(A, gmm::real_part(cA));
+    sqrtm(cA, SQRTMA);
+  }
+
+  template <typename T>
+  void sqrtm(const dense_matrix<T>& A, dense_matrix<T>& SQRTMA)
+  {
+    dense_matrix<std::complex<T> > cA(mat_nrows(A), mat_ncols(A));
+    gmm::copy(A, gmm::real_part(cA));
+    dense_matrix<std::complex<T> > cSQRTMA(cA);
+    sqrtm(cA, cSQRTMA);
+    gmm::resize(SQRTMA, gmm::mat_nrows(A), gmm::mat_ncols(A));
+    gmm::copy(gmm::real_part(cSQRTMA), SQRTMA);
+//    dense_matrix<std::complex<T1> >::const_reference
+//      it = cSQRTMA.begin(), ite = cSQRTMA.end();
+//    dense_matrix<std::complex<T1> >::reference
+//      rit = SQRTMA.begin();
+//    for (; it != ite; ++it, ++rit) *rit = it->real();
+  }
+
+
+  /**
+   Matrix logarithm for upper triangular matrices (from GNU/Octave)
+  */
+  template <typename T>
+  void logm_utri_inplace(dense_matrix<T>& S)
+  {
+    typedef typename number_traits<T>::magnitude_type R;
+
+    size_type n = gmm::mat_nrows(S);
+    GMM_ASSERT1(n == gmm::mat_ncols(S),
+                "Matrix logarithm is not defined for non-square matrices");
+    for (size_type i=0; i < n-1; ++i)
+      if (gmm::abs(S(i+1,i)) > default_tol(T())) {
+        GMM_ASSERT1(false, "An upper triangular matrix is expected");
+        break;
+      }
+    for (size_type i=0; i < n-1; ++i)
+      if (gmm::real(S(i,i)) <= -default_tol(R()) &&
+          gmm::abs(gmm::imag(S(i,i))) <= default_tol(R())) {
+        GMM_ASSERT1(false, "Principal matrix logarithm is not defined "
+                           "for matrices with negative eigenvalues");
+        break;
+      }
+
+    // Algorithm 11.9 in "Function of matrices", by N. Higham
+    R theta[] = { R(0),R(0),R(1.61e-2),R(5.38e-2),R(1.13e-1),R(1.86e-1),R(2.6429608311114350e-1) };
+
+    R scaling(1);
+    size_type p(0), m(6), opt_iters(100);
+    for (size_type k=0; k < opt_iters; ++k, scaling *= R(2)) {
+      dense_matrix<T> auxS(S);
+      for (size_type i = 0; i < n; ++i) auxS(i,i) -= R(1);
+      R tau = gmm::mat_norm1(auxS);
+      if (tau <= theta[6]) {
+        ++p;
+        size_type j1(6), j2(6);
+        for (size_type j=0; j < 6; ++j)
+          if (tau <= theta[j]) { j1 = j; break; }
+        for (size_type j=0; j < j1; ++j)
+          if (tau <= 2*theta[j]) { j2 = j; break; }
+        if (j1 - j2 <= 1 || p == 2) { m = j1; break; }
+      }
+      sqrtm_utri_inplace(S);
+      if (k == opt_iters-1)
+        GMM_WARNING1 ("Maximum number of square roots exceeded; "
+                      "the calculated matrix logarithm may still be accurate");
+    }
+
+    for (size_type i = 0; i < n; ++i) S(i,i) -= R(1);
+
+    if (m > 0) {
+
+      std::vector<R> nodes, wts;
+      switch(m) {
+      case 0: {
+        R nodes_[] = { R(0.5) };
+        R wts_[] = { R(1) };
+        nodes.assign(nodes_, nodes_+m+1);
+        wts.assign(wts_, wts_+m+1);
+        } break;
+      case 1: {
+        R nodes_[] = { R(0.211324865405187),R(0.788675134594813) };
+        R wts_[] = { R(0.5),R(0.5) };
+        nodes.assign(nodes_, nodes_+m+1);
+        wts.assign(wts_, wts_+m+1);
+        } break;
+      case 2: {
+        R nodes_[] = { R(0.112701665379258),R(0.500000000000000),R(0.887298334620742) };
+        R wts_[] = { R(0.277777777777778),R(0.444444444444444),R(0.277777777777778) };
+        nodes.assign(nodes_, nodes_+m+1);
+        wts.assign(wts_, wts_+m+1);
+        } break;
+      case 3: {
+        R nodes_[] = { R(0.0694318442029737),R(0.3300094782075718),R(0.6699905217924281),R(0.9305681557970263) };
+        R wts_[] = { R(0.173927422568727),R(0.326072577431273),R(0.326072577431273),R(0.173927422568727) };
+        nodes.assign(nodes_, nodes_+m+1);
+        wts.assign(wts_, wts_+m+1);
+        } break;
+      case 4: {
+        R nodes_[] = { R(0.0469100770306681),R(0.2307653449471584),R(0.5000000000000000),R(0.7692346550528415),R(0.9530899229693319) };
+        R wts_[] = { R(0.118463442528095),R(0.239314335249683),R(0.284444444444444),R(0.239314335249683),R(0.118463442528094) };
+        nodes.assign(nodes_, nodes_+m+1);
+        wts.assign(wts_, wts_+m+1);
+        } break;
+      case 5: {
+        R nodes_[] = { R(0.0337652428984240),R(0.1693953067668678),R(0.3806904069584015),R(0.6193095930415985),R(0.8306046932331322),R(0.9662347571015761) };
+        R wts_[] = { R(0.0856622461895853),R(0.1803807865240693),R(0.2339569672863452),R(0.2339569672863459),R(0.1803807865240693),R(0.0856622461895852) };
+        nodes.assign(nodes_, nodes_+m+1);
+        wts.assign(wts_, wts_+m+1);
+        } break;
+      case 6: {
+        R nodes_[] = { R(0.0254460438286208),R(0.1292344072003028),R(0.2970774243113015),R(0.4999999999999999),R(0.7029225756886985),R(0.8707655927996973),R(0.9745539561713792) };
+        R wts_[] = { R(0.0647424830844348),R(0.1398526957446384),R(0.1909150252525594),R(0.2089795918367343),R(0.1909150252525595),R(0.1398526957446383),R(0.0647424830844349) };
+        nodes.assign(nodes_, nodes_+m+1);
+        wts.assign(wts_, wts_+m+1);
+        } break;
+      }
+
+      dense_matrix<T> auxS1(S), auxS2(S);
+      std::vector<T> auxvec(n);
+      gmm::clear(S);
+      for (size_type j=0; j <= m; ++j) {
+        gmm::copy(gmm::scaled(auxS1, nodes[j]), auxS2);
+        gmm::add(gmm::identity_matrix(), auxS2);
+        // S += wts[i] * auxS1 * inv(auxS2)
+        for (size_type i=0; i < n; ++i) {
+          gmm::copy(gmm::mat_row(auxS1, i), auxvec);
+          gmm::lower_tri_solve(gmm::transposed(auxS2), auxvec, false);
+          gmm::add(gmm::scaled(auxvec, wts[j]), gmm::mat_row(S, i));
+        }
+      }
+    }
+    gmm::scale(S, scaling);
+  }
+
+  /**
+   Matrix logarithm (from GNU/Octave)
+  */
+  template <typename T>
+  void logm(const dense_matrix<T>& A, dense_matrix<T>& LOGMA)
+  {
+    typedef typename number_traits<T>::magnitude_type R;
+    size_type n = gmm::mat_nrows(A);
+    GMM_ASSERT1(n == gmm::mat_ncols(A),
+                "Matrix logarithm is not defined for non-square matrices");
+    dense_matrix<T> S(A), Q(A);
+    #if defined(GMM_USES_LAPACK)
+    schur(A, S, Q); // A = Q * S * Q^T
+    #else
+    GMM_ASSERT1(false, "Please recompile with lapack and blas librairies "
+                "to use logm matrix function.");
+    #endif
+
+    bool convert_to_complex(false);
+    if (!is_complex(T()))
+      for (size_type i=0; i < n-1; ++i)
+        if (gmm::abs(S(i+1,i)) > default_tol(T())) {
+          convert_to_complex = true;
+          break;
+        }
+
+    gmm::resize(LOGMA, n, n);
+    if (convert_to_complex) {
+      dense_matrix<std::complex<R> > cS(n,n), cQ(n,n), auxmat(n,n);
+      gmm::copy(gmm::real_part(S), gmm::real_part(cS));
+      gmm::copy(gmm::real_part(Q), gmm::real_part(cQ));
+      block2x2_reduction(cS, cQ, default_tol(R())*R(3));
+      for (size_type j=0; j < n-1; ++j)
+        for (size_type i=j+1; i < n; ++i)
+          cS(i,j) = T(0);
+      logm_utri_inplace(cS);
+      gmm::mult(cQ, cS, auxmat);
+      gmm::mult(auxmat, gmm::transposed(cQ), cS);
+      // Remove small complex values which may have entered calculation
+      gmm::copy(gmm::real_part(cS), LOGMA);
+//      GMM_ASSERT1(gmm::mat_norm1(gmm::imag_part(cS)) < n*default_tol(T()),
+//                  "Internal error, imag part should be zero");
+    } else {
+      dense_matrix<T> auxmat(n,n);
+      logm_utri_inplace(S);
+      gmm::mult(Q, S, auxmat);
+      gmm::mult(auxmat, gmm::transposed(Q), LOGMA);
+    }
+
+  }
+
+}
+
+#endif
+
diff --git a/gmm/gmm_dense_qr.h b/gmm/gmm_dense_qr.h
new file mode 100644
index 000000000..9de7dbeb8
--- /dev/null
+++ b/gmm/gmm_dense_qr.h
@@ -0,0 +1,789 @@
+/* -*- c++ -*- (enables emacs c++ mode) */
+/*===========================================================================
+
+ Copyright (C) 2003-2017 Yves Renard
+
+ This file is a part of GetFEM++
+
+ GetFEM++  is  free software;  you  can  redistribute  it  and/or modify it
+ under  the  terms  of the  GNU  Lesser General Public License as published
+ by  the  Free Software Foundation;  either version 3 of the License,  or
+ (at your option) any later version along with the GCC Runtime Library
+ Exception either version 3.1 or (at your option) any later version.
+ This program  is  distributed  in  the  hope  that it will be useful,  but
+ WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ or  FITNESS  FOR  A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+ License and GCC Runtime Library Exception for more details.
+ You  should  have received a copy of the GNU Lesser General Public License
+ along  with  this program;  if not, write to the Free Software Foundation,
+ Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301, USA.
+
+ As a special exception, you  may use  this file  as it is a part of a free
+ software  library  without  restriction.  Specifically,  if   other  files
+ instantiate  templates  or  use macros or inline functions from this file,
+ or  you compile this  file  and  link  it  with other files  to produce an
+ executable, this file  does  not  by itself cause the resulting executable
+ to be covered  by the GNU Lesser General Public License.  This   exception
+ does not  however  invalidate  any  other  reasons why the executable file
+ might be covered by the GNU Lesser General Public License.
+
+===========================================================================*/
+
+/**@file gmm_dense_qr.h
+   @author  Caroline Lecalvez, Caroline.Lecalvez@gmm.insa-tlse.fr, Yves Renard <Yves.Renard@insa-lyon.fr>
+   @date September 12, 2003.
+   @brief Dense QR factorization.
+*/
+#ifndef GMM_DENSE_QR_H
+#define GMM_DENSE_QR_H
+
+#include "gmm_dense_Householder.h"
+
+namespace gmm {
+
+
+  /**
+     QR factorization using Householder method (complex and real version).
+  */
+  template <typename MAT1>
+  void qr_factor(const MAT1 &A_) {
+    MAT1 &A = const_cast<MAT1 &>(A_);
+    typedef typename linalg_traits<MAT1>::value_type value_type;
+
+    size_type m = mat_nrows(A), n = mat_ncols(A);
+    GMM_ASSERT2(m >= n, "dimensions mismatch");
+
+    std::vector<value_type> W(m), V(m);
+
+    for (size_type j = 0; j < n; ++j) {
+      sub_interval SUBI(j, m-j), SUBJ(j, n-j);
+      V.resize(m-j); W.resize(n-j);
+
+      for (size_type i = j; i < m; ++i) V[i-j] = A(i, j);
+      house_vector(V);
+
+      row_house_update(sub_matrix(A, SUBI, SUBJ), V, W);
+      for (size_type i = j+1; i < m; ++i) A(i, j) = V[i-j];
+    }
+  }
+
+
+  // QR comes from QR_factor(QR) where the upper triangular part stands for R
+  // and the lower part contains the Householder reflectors.
+  // A <- AQ
+  template <typename MAT1, typename MAT2>
+  void apply_house_right(const MAT1 &QR, const MAT2 &A_) {
+    MAT2 &A = const_cast<MAT2 &>(A_);
+    typedef typename linalg_traits<MAT1>::value_type T;
+    size_type m = mat_nrows(QR), n = mat_ncols(QR);
+    GMM_ASSERT2(m == mat_ncols(A), "dimensions mismatch");
+    if (m == 0) return;
+    std::vector<T> V(m), W(mat_nrows(A));
+    V[0] = T(1);
+    for (size_type j = 0; j < n; ++j) {
+      V.resize(m-j);
+      for (size_type i = j+1; i < m; ++i) V[i-j] = QR(i, j);
+      col_house_update(sub_matrix(A, sub_interval(0, mat_nrows(A)),
+                                  sub_interval(j, m-j)), V, W);
+    }
+  }
+
+  // QR comes from QR_factor(QR) where the upper triangular part stands for R
+  // and the lower part contains the Householder reflectors.
+  // A <- Q*A
+  template <typename MAT1, typename MAT2>
+  void apply_house_left(const MAT1 &QR, const MAT2 &A_) {
+    MAT2 &A = const_cast<MAT2 &>(A_);
+    typedef typename linalg_traits<MAT1>::value_type T;
+    size_type m = mat_nrows(QR), n = mat_ncols(QR);
+    GMM_ASSERT2(m == mat_nrows(A), "dimensions mismatch");
+    if (m == 0) return;
+    std::vector<T> V(m), W(mat_ncols(A));
+    V[0] = T(1);
+    for (size_type j = 0; j < n; ++j) {
+      V.resize(m-j);
+      for (size_type i = j+1; i < m; ++i) V[i-j] = QR(i, j);
+      row_house_update(sub_matrix(A, sub_interval(j, m-j),
+                                  sub_interval(0, mat_ncols(A))), V, W);
+    }
+  }
+
+  /** Compute the QR factorization, where Q is assembled. */
+  template <typename MAT1, typename MAT2, typename MAT3>
+    void qr_factor(const MAT1 &A, const MAT2 &QQ, const MAT3 &RR) {
+    MAT2 &Q = const_cast<MAT2 &>(QQ); MAT3 &R = const_cast<MAT3 &>(RR);
+    typedef typename linalg_traits<MAT1>::value_type value_type;
+
+    size_type m = mat_nrows(A), n = mat_ncols(A);
+    GMM_ASSERT2(m >= n, "dimensions mismatch");
+    gmm::copy(A, Q);
+
+    std::vector<value_type> W(m);
+    dense_matrix<value_type> VV(m, n);
+
+    for (size_type j = 0; j < n; ++j) {
+      sub_interval SUBI(j, m-j), SUBJ(j, n-j);
+
+      for (size_type i = j; i < m; ++i) VV(i,j) = Q(i, j);
+      house_vector(sub_vector(mat_col(VV,j), SUBI));
+
+      row_house_update(sub_matrix(Q, SUBI, SUBJ),
+                       sub_vector(mat_col(VV,j), SUBI), sub_vector(W, SUBJ));
+    }
+
+    gmm::copy(sub_matrix(Q, sub_interval(0, n), sub_interval(0, n)), R);
+    gmm::copy(identity_matrix(), Q);
+
+    for (size_type j = n-1; j != size_type(-1); --j) {
+      sub_interval SUBI(j, m-j), SUBJ(j, n-j);
+      row_house_update(sub_matrix(Q, SUBI, SUBJ),
+                       sub_vector(mat_col(VV,j), SUBI), sub_vector(W, SUBJ));
+    }
+  }
+
+  ///@cond DOXY_SHOW_ALL_FUNCTIONS
+  template <typename TA, typename TV, typename Ttol,
+            typename MAT, typename VECT>
+  void extract_eig(const MAT &A, VECT &V, Ttol tol, TA, TV) {
+    size_type n = mat_nrows(A);
+    if (n == 0) return;
+    tol *= Ttol(2);
+    Ttol tol_i = tol * gmm::abs(A(0,0)), tol_cplx = tol_i;
+    for (size_type i = 0; i < n; ++i) {
+      if (i < n-1) {
+        tol_i = (gmm::abs(A(i,i))+gmm::abs(A(i+1,i+1)))*tol;
+        tol_cplx = std::max(tol_cplx, tol_i);
+      }
+      if ((i < n-1) && gmm::abs(A(i+1,i)) >= tol_i) {
+        TA tr = A(i,i) + A(i+1, i+1);
+        TA det = A(i,i)*A(i+1, i+1) - A(i,i+1)*A(i+1, i);
+        TA delta = tr*tr - TA(4) * det;
+        if (delta < -tol_cplx) {
+          GMM_WARNING1("A complex eigenvalue has been detected : "
+                      << std::complex<TA>(tr/TA(2), gmm::sqrt(-delta)/TA(2)));
+          V[i] = V[i+1] = tr / TA(2);
+        }
+        else {
+          delta = std::max(TA(0), delta);
+          V[i  ] = TA(tr + gmm::sqrt(delta))/ TA(2);
+          V[i+1] = TA(tr -  gmm::sqrt(delta))/ TA(2);
+        }
+        ++i;
+      }
+      else
+        V[i] = TV(A(i,i));
+    }
+  }
+
+  template <typename TA, typename TV, typename Ttol,
+            typename MAT, typename VECT>
+  void extract_eig(const MAT &A, VECT &V, Ttol tol, TA, std::complex<TV>) {
+    size_type n = mat_nrows(A);
+    tol *= Ttol(2);
+    for (size_type i = 0; i < n; ++i)
+      if ((i == n-1) ||
+          gmm::abs(A(i+1,i)) < (gmm::abs(A(i,i))+gmm::abs(A(i+1,i+1)))*tol)
+        V[i] = std::complex<TV>(A(i,i));
+      else {
+        TA tr = A(i,i) + A(i+1, i+1);
+        TA det = A(i,i)*A(i+1, i+1) - A(i,i+1)*A(i+1, i);
+        TA delta = tr*tr - TA(4) * det;
+        if (delta < TA(0)) {
+          V[i] = std::complex<TV>(tr / TA(2), gmm::sqrt(-delta) / TA(2));
+          V[i+1] = std::complex<TV>(tr / TA(2), -gmm::sqrt(-delta)/ TA(2));
+        }
+        else {
+          V[i  ] = TA(tr + gmm::sqrt(delta)) / TA(2);
+          V[i+1] = TA(tr -  gmm::sqrt(delta)) / TA(2);
+        }
+        ++i;
+      }
+  }
+
+  template <typename TA, typename TV, typename Ttol,
+            typename MAT, typename VECT>
+  void extract_eig(const MAT &A, VECT &V, Ttol tol, std::complex<TA>, TV) {
+    typedef std::complex<TA> T;
+    size_type n = mat_nrows(A);
+    if (n == 0) return;
+    tol *= Ttol(2);
+    Ttol tol_i = tol * gmm::abs(A(0,0)), tol_cplx = tol_i;
+    for (size_type i = 0; i < n; ++i) {
+      if (i < n-1) {
+        tol_i = (gmm::abs(A(i,i))+gmm::abs(A(i+1,i+1)))*tol;
+        tol_cplx = std::max(tol_cplx, tol_i);
+      }
+      if ((i == n-1) || gmm::abs(A(i+1,i)) < tol_i) {
+        if (gmm::abs(std::imag(A(i,i))) > tol_cplx)
+          GMM_WARNING1("A complex eigenvalue has been detected : "
+                      << T(A(i,i)) << " : "  << gmm::abs(std::imag(A(i,i)))
+                      / gmm::abs(std::real(A(i,i))) << " : " << tol_cplx);
+        V[i] = std::real(A(i,i));
+      }
+      else {
+        T tr = A(i,i) + A(i+1, i+1);
+        T det = A(i,i)*A(i+1, i+1) - A(i,i+1)*A(i+1, i);
+        T delta = tr*tr - TA(4) * det;
+        T a1 = (tr + gmm::sqrt(delta)) / TA(2);
+        T a2 = (tr - gmm::sqrt(delta)) / TA(2);
+        if (gmm::abs(std::imag(a1)) > tol_cplx)
+          GMM_WARNING1("A complex eigenvalue has been detected : " << a1);
+        if (gmm::abs(std::imag(a2)) > tol_cplx)
+          GMM_WARNING1("A complex eigenvalue has been detected : " << a2);
+
+        V[i] = std::real(a1); V[i+1] = std::real(a2);
+        ++i;
+      }
+    }
+  }
+
+  template <typename TA, typename TV, typename Ttol,
+            typename MAT, typename VECT>
+  void extract_eig(const MAT &A, VECT &V, Ttol tol,
+                   std::complex<TA>, std::complex<TV>) {
+    size_type n = mat_nrows(A);
+    tol *= Ttol(2);
+    for (size_type i = 0; i < n; ++i)
+      if ((i == n-1) ||
+          gmm::abs(A(i+1,i)) < (gmm::abs(A(i,i))+gmm::abs(A(i+1,i+1)))*tol)
+        V[i] = std::complex<TV>(A(i,i));
+      else {
+        std::complex<TA> tr = A(i,i) + A(i+1, i+1);
+        std::complex<TA> det = A(i,i)*A(i+1, i+1) - A(i,i+1)*A(i+1, i);
+        std::complex<TA> delta = tr*tr - TA(4) * det;
+        V[i] = (tr + gmm::sqrt(delta)) / TA(2);
+        V[i+1] = (tr - gmm::sqrt(delta)) / TA(2);
+        ++i;
+      }
+  }
+
+  ///@endcond
+  /**
+     Compute eigenvalue vector.
+  */
+  template <typename MAT, typename Ttol, typename VECT> inline
+  void extract_eig(const MAT &A, const VECT &V, Ttol tol) {
+    extract_eig(A, const_cast<VECT&>(V), tol,
+                typename linalg_traits<MAT>::value_type(),
+                typename linalg_traits<VECT>::value_type());
+  }
+
+  /* ********************************************************************* */
+  /*    Stop criterion for QR algorithms                                   */
+  /* ********************************************************************* */
+
+  template <typename MAT, typename Ttol>
+  void qr_stop_criterion(MAT &A, size_type &p, size_type &q, Ttol tol) {
+    typedef typename linalg_traits<MAT>::value_type T;
+    typedef typename number_traits<T>::magnitude_type R;
+    R rmin = default_min(R()) * R(2);
+    size_type n = mat_nrows(A);
+    if (n <= 2) { q = n; p = 0; }
+    else {
+      for (size_type i = 1; i < n-q; ++i)
+        if (gmm::abs(A(i,i-1)) < (gmm::abs(A(i,i))+ gmm::abs(A(i-1,i-1)))*tol
+            || gmm::abs(A(i,i-1)) < rmin)
+          A(i,i-1) = T(0);
+
+      while ((q < n-1 && A(n-1-q, n-2-q) == T(0)) ||
+             (q < n-2 && A(n-2-q, n-3-q) == T(0))) ++q;
+      if (q >= n-2) q = n;
+      p = n-q; if (p) --p; if (p) --p;
+      while (p > 0 && A(p,p-1) != T(0)) --p;
+    }
+  }
+
+  template <typename MAT, typename Ttol> inline
+  void symmetric_qr_stop_criterion(const MAT &AA, size_type &p, size_type &q,
+                                Ttol tol) {
+    typedef typename linalg_traits<MAT>::value_type T;
+    typedef typename number_traits<T>::magnitude_type R;
+    R rmin = default_min(R()) * R(2);
+    MAT& A = const_cast<MAT&>(AA);
+    size_type n = mat_nrows(A);
+    if (n <= 1) { q = n; p = 0; }
+    else {
+      for (size_type i = 1; i < n-q; ++i)
+        if (gmm::abs(A(i,i-1)) < (gmm::abs(A(i,i))+ gmm::abs(A(i-1,i-1)))*tol
+            || gmm::abs(A(i,i-1)) < rmin)
+          A(i,i-1) = T(0);
+
+      while (q < n-1 && A(n-1-q, n-2-q) == T(0)) ++q;
+      if (q >= n-1) q = n;
+      p = n-q; if (p) --p; if (p) --p;
+      while (p > 0 && A(p,p-1) != T(0)) --p;
+    }
+  }
+
+  template <typename VECT1, typename VECT2, typename Ttol> inline
+  void symmetric_qr_stop_criterion(const VECT1 &diag, const VECT2 &sdiag_,
+                                   size_type &p, size_type &q, Ttol tol) {
+    typedef typename linalg_traits<VECT2>::value_type T;
+    typedef typename number_traits<T>::magnitude_type R;
+    R rmin = default_min(R()) * R(2);
+    VECT2 &sdiag = const_cast<VECT2 &>(sdiag_);
+    size_type n = vect_size(diag);
+    if (n <= 1) { q = n; p = 0; return; }
+    for (size_type i = 1; i < n-q; ++i)
+      if (gmm::abs(sdiag[i-1]) < (gmm::abs(diag[i])+ gmm::abs(diag[i-1]))*tol
+          || gmm::abs(sdiag[i-1]) < rmin)
+        sdiag[i-1] = T(0);
+    while (q < n-1 && sdiag[n-2-q] == T(0)) ++q;
+    if (q >= n-1) q = n;
+    p = n-q; if (p) --p; if (p) --p;
+    while (p > 0 && sdiag[p-1] != T(0)) --p;
+  }
+
+  /* ********************************************************************* */
+  /*    2x2 blocks reduction for Schur vectors                             */
+  /* ********************************************************************* */
+
+  template <typename MATH, typename MATQ, typename Ttol>
+  void block2x2_reduction(MATH &H, MATQ &Q, Ttol tol) {
+    typedef typename linalg_traits<MATH>::value_type T;
+    typedef typename number_traits<T>::magnitude_type R;
+
+    size_type n = mat_nrows(H), nq = mat_nrows(Q);
+    if (n < 2) return;
+    sub_interval SUBQ(0, nq), SUBL(0, 2);
+    std::vector<T> v(2), w(std::max(n, nq)); v[0] = T(1);
+    tol *= Ttol(2);
+    Ttol tol_i = tol * gmm::abs(H(0,0)), tol_cplx = tol_i;
+    for (size_type i = 0; i < n-1; ++i) {
+      tol_i = (gmm::abs(H(i,i))+gmm::abs(H(i+1,i+1)))*tol;
+      tol_cplx = std::max(tol_cplx, tol_i);
+
+      if (gmm::abs(H(i+1,i)) > tol_i) { // 2x2 block detected
+        T tr = (H(i+1, i+1) - H(i,i)) / T(2);
+        T delta = tr*tr + H(i,i+1)*H(i+1, i);
+
+        if (is_complex(T()) || gmm::real(delta) >= R(0)) {
+          sub_interval SUBI(i, 2);
+          T theta = (tr - gmm::sqrt(delta)) / H(i+1,i);
+          R a = gmm::abs(theta);
+          v[1] = (a == R(0)) ? T(-1)
+            : gmm::conj(theta) * (R(1) - gmm::sqrt(a*a + R(1)) / a);
+          row_house_update(sub_matrix(H, SUBI), v, sub_vector(w, SUBL));
+          col_house_update(sub_matrix(H, SUBI), v, sub_vector(w, SUBL));
+          col_house_update(sub_matrix(Q, SUBQ, SUBI), v, sub_vector(w, SUBQ));
+        }
+        ++i;
+      }
+    }
+  }
+
+  /* ********************************************************************* */
+  /*    Basic qr algorithm.                                                */
+  /* ********************************************************************* */
+
+  #define tol_type_for_qr typename number_traits<typename \
+                          linalg_traits<MAT1>::value_type>::magnitude_type
+  #define default_tol_for_qr \
+    (gmm::default_tol(tol_type_for_qr()) *  tol_type_for_qr(3))
+
+  // QR method for real or complex square matrices based on QR factorisation.
+  // eigval has to be a complex vector if A has complex eigeinvalues.
+  // Very slow method. Use implicit_qr_method instead.
+  template <typename MAT1, typename VECT, typename MAT2>
+    void rudimentary_qr_algorithm(const MAT1 &A, const VECT &eigval_,
+                                  const MAT2 &eigvect_,
+                                  tol_type_for_qr tol = default_tol_for_qr,
+                                  bool compvect = true) {
+    VECT &eigval = const_cast<VECT &>(eigval_);
+    MAT2 &eigvect = const_cast<MAT2 &>(eigvect_);
+
+    typedef typename linalg_traits<MAT1>::value_type value_type;
+
+    size_type n = mat_nrows(A), p, q = 0, ite = 0;
+    dense_matrix<value_type> Q(n, n), R(n,n), A1(n,n);
+    gmm::copy(A, A1);
+
+    Hessenberg_reduction(A1, eigvect, compvect);
+    qr_stop_criterion(A1, p, q, tol);
+
+    while (q < n) {
+      qr_factor(A1, Q, R);
+      gmm::mult(R, Q, A1);
+      if (compvect) { gmm::mult(eigvect, Q, R); gmm::copy(R, eigvect); }
+
+      qr_stop_criterion(A1, p, q, tol);
+      ++ite;
+      GMM_ASSERT1(ite < n*1000, "QR algorithm failed");
+    }
+    if (compvect) block2x2_reduction(A1, Q, tol);
+    extract_eig(A1, eigval, tol);
+  }
+
+  template <typename MAT1, typename VECT>
+    void rudimentary_qr_algorithm(const MAT1 &a, VECT &eigval,
+                                  tol_type_for_qr tol = default_tol_for_qr) {
+    dense_matrix<typename linalg_traits<MAT1>::value_type> m(0,0);
+    rudimentary_qr_algorithm(a, eigval, m, tol, false);
+  }
+
+  /* ********************************************************************* */
+  /*    Francis QR step.                                                   */
+  /* ********************************************************************* */
+
+  template <typename MAT1, typename MAT2>
+    void Francis_qr_step(const MAT1& HH, const MAT2 &QQ, bool compute_Q) {
+    MAT1& H = const_cast<MAT1&>(HH); MAT2& Q = const_cast<MAT2&>(QQ);
+    typedef typename linalg_traits<MAT1>::value_type value_type;
+    size_type n = mat_nrows(H), nq = mat_nrows(Q);
+
+    std::vector<value_type> v(3), w(std::max(n, nq));
+
+    value_type s = H(n-2, n-2) + H(n-1, n-1);
+    value_type t = H(n-2, n-2) * H(n-1, n-1) - H(n-2, n-1) * H(n-1, n-2);
+    value_type x = H(0, 0) * H(0, 0) + H(0,1) * H(1, 0) - s * H(0,0) + t;
+    value_type y = H(1, 0) * (H(0,0) + H(1,1) - s);
+    value_type z = H(1, 0) * H(2, 1);
+
+    sub_interval SUBQ(0, nq);
+
+    for (size_type k = 0; k < n - 2; ++k) {
+      v[0] = x; v[1] = y; v[2] = z;
+      house_vector(v);
+      size_type r = std::min(k+4, n), q = (k==0) ? 0 : k-1;
+      sub_interval SUBI(k, 3), SUBJ(0, r), SUBK(q, n-q);
+
+      row_house_update(sub_matrix(H, SUBI, SUBK),  v, sub_vector(w, SUBK));
+      col_house_update(sub_matrix(H, SUBJ, SUBI),  v, sub_vector(w, SUBJ));
+
+      if (compute_Q)
+        col_house_update(sub_matrix(Q, SUBQ, SUBI),  v, sub_vector(w, SUBQ));
+
+      x = H(k+1, k); y = H(k+2, k);
+      if (k < n-3) z = H(k+3, k);
+    }
+    sub_interval SUBI(n-2,2), SUBJ(0, n), SUBK(n-3,3), SUBL(0, 3);
+    v.resize(2);
+    v[0] = x; v[1] = y;
+    house_vector(v);
+    row_house_update(sub_matrix(H, SUBI, SUBK), v, sub_vector(w, SUBL));
+    col_house_update(sub_matrix(H, SUBJ, SUBI), v, sub_vector(w, SUBJ));
+    if (compute_Q)
+      col_house_update(sub_matrix(Q, SUBQ, SUBI), v, sub_vector(w, SUBQ));
+  }
+
+  /* ********************************************************************* */
+  /*    Wilkinson Double shift QR step (from Lapack).                      */
+  /* ********************************************************************* */
+
+  template <typename MAT1, typename MAT2, typename Ttol>
+  void Wilkinson_double_shift_qr_step(const MAT1& HH, const MAT2 &QQ,
+                                      Ttol tol, bool exc, bool compute_Q) {
+    MAT1& H = const_cast<MAT1&>(HH); MAT2& Q = const_cast<MAT2&>(QQ);
+    typedef typename linalg_traits<MAT1>::value_type T;
+    typedef typename number_traits<T>::magnitude_type R;
+
+    size_type n = mat_nrows(H), nq = mat_nrows(Q), m;
+    std::vector<T> v(3), w(std::max(n, nq));
+    const R dat1(0.75), dat2(-0.4375);
+    T h33, h44, h43h34, v1(0), v2(0), v3(0);
+
+    if (exc) {                    /* Exceptional shift.                    */
+      R s = gmm::abs(H(n-1, n-2)) + gmm::abs(H(n-2, n-3));
+      h33 = h44 = dat1 * s;
+      h43h34 = dat2*s*s;
+    }
+    else {                        /* Wilkinson double shift.               */
+      h44 = H(n-1,n-1); h33 = H(n-2, n-2);
+      h43h34 = H(n-1, n-2) * H(n-2, n-1);
+    }
+
+    /* Look for two consecutive small subdiagonal elements.                */
+    /* Determine the effect of starting the double-shift QR iteration at   */
+    /* row m, and see if this would make H(m-1, m-2) negligible.           */
+    for (m = n-2; m != 0; --m) {
+      T h11  = H(m-1, m-1), h22  = H(m, m);
+      T h21  = H(m, m-1),   h12  = H(m-1, m);
+      T h44s = h44 - h11,   h33s = h33 - h11;
+      v1 = (h33s*h44s-h43h34) / h21 + h12;
+      v2 = h22 - h11 - h33s - h44s;
+      v3 = H(m+1, m);
+      R s = gmm::abs(v1) + gmm::abs(v2) + gmm::abs(v3);
+      v1 /= s; v2 /= s; v3 /= s;
+      if (m == 1) break;
+      T h00 = H(m-2, m-2);
+      T h10 = H(m-1, m-2);
+      R tst1 = gmm::abs(v1)*(gmm::abs(h00)+gmm::abs(h11)+gmm::abs(h22));
+      if (gmm::abs(h10)*(gmm::abs(v2)+gmm::abs(v3)) <= tol * tst1) break;
+    }
+
+    /* Double shift QR step.                                               */
+    sub_interval SUBQ(0, nq);
+    for (size_type k = (m == 0) ? 0 : m-1; k < n-2; ++k) {
+      v[0] = v1; v[1] = v2; v[2] = v3;
+      house_vector(v);
+      size_type r = std::min(k+4, n), q = (k==0) ? 0 : k-1;
+      sub_interval SUBI(k, 3), SUBJ(0, r), SUBK(q, n-q);
+
+      row_house_update(sub_matrix(H, SUBI, SUBK),  v, sub_vector(w, SUBK));
+      col_house_update(sub_matrix(H, SUBJ, SUBI),  v, sub_vector(w, SUBJ));
+      if (k > m-1) { H(k+1, k-1) = T(0); if (k < n-3) H(k+2, k-1) = T(0); }
+
+      if (compute_Q)
+        col_house_update(sub_matrix(Q, SUBQ, SUBI),  v, sub_vector(w, SUBQ));
+
+      v1 = H(k+1, k); v2 = H(k+2, k);
+      if (k < n-3) v3 = H(k+3, k);
+    }
+    sub_interval SUBI(n-2,2), SUBJ(0, n), SUBK(n-3,3), SUBL(0, 3);
+    v.resize(2); v[0] = v1; v[1] = v2;
+    house_vector(v);
+    row_house_update(sub_matrix(H, SUBI, SUBK), v, sub_vector(w, SUBL));
+    col_house_update(sub_matrix(H, SUBJ, SUBI), v, sub_vector(w, SUBJ));
+    if (compute_Q)
+      col_house_update(sub_matrix(Q, SUBQ, SUBI), v, sub_vector(w, SUBQ));
+  }
+
+  /* ********************************************************************* */
+  /*    Implicit QR algorithm.                                             */
+  /* ********************************************************************* */
+
+  // QR method for real or complex square matrices based on an
+  // implicit QR factorisation. eigval has to be a complex vector
+  // if A has complex eigenvalues. Complexity about 10n^3, 25n^3 if
+  // eigenvectors are computed
+  template <typename MAT1, typename VECT, typename MAT2>
+    void implicit_qr_algorithm(const MAT1 &A, const VECT &eigval_,
+                               const MAT2 &Q_,
+                               tol_type_for_qr tol = default_tol_for_qr,
+                               bool compvect = true) {
+    VECT &eigval = const_cast<VECT &>(eigval_);
+    MAT2 &Q = const_cast<MAT2 &>(Q_);
+    typedef typename linalg_traits<MAT1>::value_type value_type;
+
+    size_type n(mat_nrows(A)), q(0), q_old, p(0), ite(0), its(0);
+    dense_matrix<value_type> H(n,n);
+    sub_interval SUBK(0,0);
+
+    gmm::copy(A, H);
+    Hessenberg_reduction(H, Q, compvect);
+    qr_stop_criterion(H, p, q, tol);
+
+    while (q < n) {
+      sub_interval SUBI(p, n-p-q), SUBJ(0, mat_ncols(Q));
+      if (compvect) SUBK = SUBI;
+//       Francis_qr_step(sub_matrix(H, SUBI),
+//                       sub_matrix(Q, SUBJ, SUBK), compvect);
+      Wilkinson_double_shift_qr_step(sub_matrix(H, SUBI),
+                                     sub_matrix(Q, SUBJ, SUBK),
+                                     tol, (its == 10 || its == 20), compvect);
+      q_old = q;
+      qr_stop_criterion(H, p, q, tol*2);
+      if (q != q_old) its = 0;
+      ++its; ++ite;
+      GMM_ASSERT1(ite < n*100, "QR algorithm failed");
+    }
+    if (compvect) block2x2_reduction(H, Q, tol);
+    extract_eig(H, eigval, tol);
+  }
+
+
+  template <typename MAT1, typename VECT>
+    void implicit_qr_algorithm(const MAT1 &a, VECT &eigval,
+                               tol_type_for_qr tol = default_tol_for_qr) {
+    dense_matrix<typename linalg_traits<MAT1>::value_type> m(1,1);
+    implicit_qr_algorithm(a, eigval, m, tol, false);
+  }
+
+  /* ********************************************************************* */
+  /*    Implicit symmetric QR step with Wilkinson Shift.                   */
+  /* ********************************************************************* */
+
+  template <typename MAT1, typename MAT2>
+    void symmetric_Wilkinson_qr_step(const MAT1& MM, const MAT2 &ZZ,
+                                     bool compute_z) {
+    MAT1& M = const_cast<MAT1&>(MM); MAT2& Z = const_cast<MAT2&>(ZZ);
+    typedef typename linalg_traits<MAT1>::value_type T;
+    typedef typename number_traits<T>::magnitude_type R;
+    size_type n = mat_nrows(M);
+
+    for (size_type i = 0; i < n; ++i) {
+      M(i, i) = T(gmm::real(M(i, i)));
+      if (i > 0) {
+        T a = (M(i, i-1) + gmm::conj(M(i-1, i)))/R(2);
+        M(i, i-1) = a; M(i-1, i) = gmm::conj(a);
+      }
+    }
+
+    R d = gmm::real(M(n-2, n-2) - M(n-1, n-1)) / R(2);
+    R e = gmm::abs_sqr(M(n-1, n-2));
+    R nu = d + gmm::sgn(d)*gmm::sqrt(d*d+e);
+    if (nu == R(0)) { M(n-1, n-2) = T(0); return; }
+    R mu = gmm::real(M(n-1, n-1)) - e / nu;
+    T x = M(0,0) - T(mu), z = M(1, 0), c, s;
+
+    for (size_type k = 1; k < n; ++k) {
+      Givens_rotation(x, z, c, s);
+
+      if (k > 1) Apply_Givens_rotation_left(M(k-1,k-2), M(k,k-2), c, s);
+      Apply_Givens_rotation_left(M(k-1,k-1), M(k,k-1), c, s);
+      Apply_Givens_rotation_left(M(k-1,k  ), M(k,k  ), c, s);
+      if (k < n-1) Apply_Givens_rotation_left(M(k-1,k+1), M(k,k+1), c, s);
+      if (k > 1) Apply_Givens_rotation_right(M(k-2,k-1), M(k-2,k), c, s);
+      Apply_Givens_rotation_right(M(k-1,k-1), M(k-1,k), c, s);
+      Apply_Givens_rotation_right(M(k  ,k-1), M(k,k)  , c, s);
+      if (k < n-1) Apply_Givens_rotation_right(M(k+1,k-1), M(k+1,k), c, s);
+
+      if (compute_z) col_rot(Z, c, s, k-1, k);
+      if (k < n-1) { x = M(k, k-1); z = M(k+1, k-1); }
+    }
+
+  }
+
+  template <typename VECT1, typename VECT2, typename MAT>
+  void symmetric_Wilkinson_qr_step(const VECT1& diag_, const VECT2& sdiag_,
+                                   const MAT &ZZ, bool compute_z) {
+    VECT1& diag = const_cast<VECT1&>(diag_);
+    VECT2& sdiag = const_cast<VECT2&>(sdiag_);
+    MAT& Z = const_cast<MAT&>(ZZ);
+    typedef typename linalg_traits<VECT2>::value_type T;
+    typedef typename number_traits<T>::magnitude_type R;
+
+    size_type n = vect_size(diag);
+    R d = (diag[n-2] - diag[n-1]) / R(2);
+    R e = gmm::abs_sqr(sdiag[n-2]);
+    R nu = d + gmm::sgn(d)*gmm::sqrt(d*d+e);
+    if (nu == R(0)) { sdiag[n-2] = T(0); return; }
+    R mu = diag[n-1] - e / nu;
+    T x = diag[0] - T(mu), z = sdiag[0], c, s;
+
+    T a01(0), a02(0);
+    T a10(0), a11(diag[0]), a12(gmm::conj(sdiag[0])), a13(0);
+    T a20(0), a21(sdiag[0]), a22(diag[1]), a23(gmm::conj(sdiag[1]));
+    T a31(0), a32(sdiag[1]);
+
+    for (size_type k = 1; k < n; ++k) {
+      Givens_rotation(x, z, c, s);
+
+      if (k > 1) Apply_Givens_rotation_left(a10, a20, c, s);
+      Apply_Givens_rotation_left(a11, a21, c, s);
+      Apply_Givens_rotation_left(a12, a22, c, s);
+      if (k < n-1) Apply_Givens_rotation_left(a13, a23, c, s);
+
+      if (k > 1) Apply_Givens_rotation_right(a01, a02, c, s);
+      Apply_Givens_rotation_right(a11, a12, c, s);
+      Apply_Givens_rotation_right(a21, a22, c, s);
+      if (k < n-1) Apply_Givens_rotation_right(a31, a32, c, s);
+
+      if (compute_z) col_rot(Z, c, s, k-1, k);
+
+      diag[k-1] = gmm::real(a11);
+      diag[k] = gmm::real(a22);
+      if (k > 1) sdiag[k-2] = (gmm::conj(a01) + a10) / R(2);
+      sdiag[k-1] = (gmm::conj(a12) + a21) / R(2);
+
+      x = sdiag[k-1]; z = (gmm::conj(a13) + a31) / R(2);
+
+      a01 = a12; a02 = a13;
+      a10 = a21; a11 = a22; a12 = a23; a13 = T(0);
+      a20 = a31; a21 = a32; a31 = T(0);
+
+      if (k < n-1) {
+        sdiag[k] = (gmm::conj(a23) + a32) / R(2);
+        a22 = T(diag[k+1]); a32 = sdiag[k+1]; a23 = gmm::conj(a32);
+      }
+    }
+  }
+
+  /* ********************************************************************* */
+  /*    Implicit QR algorithm for symmetric or hermitian matrices.         */
+  /* ********************************************************************* */
+
+  // implicit QR method for real square symmetric matrices or complex
+  // hermitian matrices.
+  // eigval has to be a complex vector if A has complex eigeinvalues.
+  // complexity about 4n^3/3, 9n^3 if eigenvectors are computed
+  template <typename MAT1, typename VECT, typename MAT2>
+  void symmetric_qr_algorithm_old(const MAT1 &A, const VECT &eigval_,
+                              const MAT2 &eigvect_,
+                              tol_type_for_qr tol = default_tol_for_qr,
+                              bool compvect = true) {
+    VECT &eigval = const_cast<VECT &>(eigval_);
+    MAT2 &eigvect = const_cast<MAT2 &>(eigvect_);
+    typedef typename linalg_traits<MAT1>::value_type T;
+    typedef typename number_traits<T>::magnitude_type R;
+
+    if (compvect) gmm::copy(identity_matrix(), eigvect);
+    size_type n = mat_nrows(A), q = 0, p, ite = 0;
+    dense_matrix<T> Tri(n, n);
+    gmm::copy(A, Tri);
+
+    Householder_tridiagonalization(Tri, eigvect, compvect);
+
+    symmetric_qr_stop_criterion(Tri, p, q, tol);
+
+    while (q < n) {
+
+      sub_interval SUBI(p, n-p-q), SUBJ(0, mat_ncols(eigvect)), SUBK(p, n-p-q);
+      if (!compvect) SUBK = sub_interval(0,0);
+      symmetric_Wilkinson_qr_step(sub_matrix(Tri, SUBI),
+                                  sub_matrix(eigvect, SUBJ, SUBK), compvect);
+
+      symmetric_qr_stop_criterion(Tri, p, q, tol*R(2));
+      ++ite;
+      GMM_ASSERT1(ite < n*100, "QR algorithm failed. Probably, your matrix"
+                  " is not real symmetric or complex hermitian");
+    }
+
+    extract_eig(Tri, eigval, tol);
+  }
+
+  template <typename MAT1, typename VECT, typename MAT2>
+  void symmetric_qr_algorithm(const MAT1 &A, const VECT &eigval_,
+                              const MAT2 &eigvect_,
+                              tol_type_for_qr tol = default_tol_for_qr,
+                              bool compvect = true) {
+    VECT &eigval = const_cast<VECT &>(eigval_);
+    MAT2 &eigvect = const_cast<MAT2 &>(eigvect_);
+    typedef typename linalg_traits<MAT1>::value_type T;
+    typedef typename number_traits<T>::magnitude_type R;
+
+    size_type n = mat_nrows(A), q = 0, p, ite = 0;
+    if (compvect) gmm::copy(identity_matrix(), eigvect);
+    if (n == 0) return;
+    if (n == 1) { eigval[0]=gmm::real(A(0,0)); return; }
+    dense_matrix<T> Tri(n, n);
+    gmm::copy(A, Tri);
+
+    Householder_tridiagonalization(Tri, eigvect, compvect);
+
+    std::vector<R> diag(n);
+    std::vector<T> sdiag(n);
+    for (size_type i = 0; i < n; ++i)
+      { diag[i] = gmm::real(Tri(i, i)); if (i+1 < n) sdiag[i] = Tri(i+1, i); }
+
+    symmetric_qr_stop_criterion(diag, sdiag, p, q, tol);
+
+    while (q < n) {
+      sub_interval SUBI(p, n-p-q), SUBJ(0, mat_ncols(eigvect)), SUBK(p, n-p-q);
+      if (!compvect) SUBK = sub_interval(0,0);
+
+      symmetric_Wilkinson_qr_step(sub_vector(diag, SUBI),
+                                  sub_vector(sdiag, SUBI),
+                                  sub_matrix(eigvect, SUBJ, SUBK), compvect);
+
+      symmetric_qr_stop_criterion(diag, sdiag, p, q, tol*R(3));
+      ++ite;
+      GMM_ASSERT1(ite < n*100, "QR algorithm failed.");
+    }
+
+    gmm::copy(diag, eigval);
+  }
+
+
+  template <typename MAT1, typename VECT>
+    void symmetric_qr_algorithm(const MAT1 &a, VECT &eigval,
+                                tol_type_for_qr tol = default_tol_for_qr) {
+    dense_matrix<typename linalg_traits<MAT1>::value_type> m(0,0);
+    symmetric_qr_algorithm(a, eigval, m, tol, false);
+  }
+
+
+}
+
+#endif
+
diff --git a/gmm/gmm_dense_sylvester.h b/gmm/gmm_dense_sylvester.h
new file mode 100644
index 000000000..3b184ccbf
--- /dev/null
+++ b/gmm/gmm_dense_sylvester.h
@@ -0,0 +1,174 @@
+/* -*- c++ -*- (enables emacs c++ mode) */
+/*===========================================================================
+
+ Copyright (C) 2003-2017 Yves Renard
+
+ This file is a part of GetFEM++
+
+ GetFEM++  is  free software;  you  can  redistribute  it  and/or modify it
+ under  the  terms  of the  GNU  Lesser General Public License as published
+ by  the  Free Software Foundation;  either version 3 of the License,  or
+ (at your option) any later version along with the GCC Runtime Library
+ Exception either version 3.1 or (at your option) any later version.
+ This program  is  distributed  in  the  hope  that it will be useful,  but
+ WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ or  FITNESS  FOR  A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+ License and GCC Runtime Library Exception for more details.
+ You  should  have received a copy of the GNU Lesser General Public License
+ along  with  this program;  if not, write to the Free Software Foundation,
+ Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301, USA.
+
+ As a special exception, you  may use  this file  as it is a part of a free
+ software  library  without  restriction.  Specifically,  if   other  files
+ instantiate  templates  or  use macros or inline functions from this file,
+ or  you compile this  file  and  link  it  with other files  to produce an
+ executable, this file  does  not  by itself cause the resulting executable
+ to be covered  by the GNU Lesser General Public License.  This   exception
+ does not  however  invalidate  any  other  reasons why the executable file
+ might be covered by the GNU Lesser General Public License.
+
+===========================================================================*/
+
+/** @file gmm_dense_sylvester.h
+    @author  Yves Renard <Yves.Renard@insa-lyon.fr>
+    @date June 5, 2003.
+    @brief Sylvester equation solver.
+*/
+#ifndef GMM_DENSE_SYLVESTER_H
+#define GMM_DENSE_SYLVESTER_H
+
+#include "gmm_kernel.h"
+
+namespace gmm {
+
+  /* ********************************************************************* */
+  /*   Kronecker system matrix.                                            */
+  /* ********************************************************************* */
+  template <typename MAT1, typename MAT2, typename MAT3>
+  void kron(const MAT1 &m1, const MAT2 &m2, const MAT3 &m3_,
+	    bool init = true) {
+    MAT3 &m3 = const_cast<MAT3 &>(m3_);
+    size_type m = mat_nrows(m1), n = mat_ncols(m1);
+    size_type l = mat_nrows(m2), k = mat_ncols(m2);
+
+    GMM_ASSERT2(mat_nrows(m3) == m*l && mat_ncols(m3) == n*k,
+		"dimensions mismatch");
+
+    for (size_type i = 0; i < m; ++i)
+      for (size_type j = 0; j < m; ++j)
+	if (init)
+	  gmm::copy(gmm::scaled(m2, m1(i,j)),
+		    gmm::sub_matrix(m3, sub_interval(l*i, l),
+				    sub_interval(k*j, k)));
+	else
+	  gmm::add(gmm::scaled(m2, m1(i,j)),
+		    gmm::sub_matrix(m3, sub_interval(l*i, l),
+				    sub_interval(k*j, k)));
+  }
+	
+
+  /* ********************************************************************* */
+  /*   Copy a matrix into a vector.                                        */
+  /* ********************************************************************* */
+
+  template <typename MAT, typename VECT>
+  colmatrix_to_vector(const MAT &A, VECT &v, col_major) {
+    size_type m = mat_nrows(A), n = mat_ncols(A);
+    GMM_ASSERT2(m*n == vect_size(v), "dimensions mismatch");
+    for (size_type i = 0; i < n; ++i)
+      gmm::copy(mat_col(A, i), sub_vector(v, sub_interval(i*m, m)));
+  }
+
+  template <typename MAT, typename VECT>
+  colmatrix_to_vector(const MAT &A, VECT &v, row_and_col)
+  { colmatrix_to_vector(A, v, col_major()); }
+
+  template <typename MAT, typename VECT>
+  colmatrix_to_vector(const MAT &A, VECT &v, col_and_row)
+  { colmatrix_to_vector(A, v, col_major()); }
+
+  template <typename MAT, typename VECT>
+  colmatrix_to_vector(const MAT &A, VECT &v, row_major) {
+    size_type m = mat_nrows(mat), n = mat_ncols(A);
+    GMM_ASSERT2(m*n == vect_size(v), "dimensions mismatch");
+    for (size_type i = 0; i < m; ++i)
+      gmm::copy(mat_row(A, i), sub_vector(v, sub_slice(i, n, m)));
+  }
+
+  template <typename MAT, typename VECT> inline
+  colmatrix_to_vector(const MAT &A, const VECT &v_) {
+    VECT &v = const_cast<VECT &>(v_);
+    colmatrix_to_vector(A, v, typename linalg_traits<MAT>::sub_orientation());
+  }
+
+
+  /* ********************************************************************* */
+  /*   Copy a vector into a matrix.                                        */
+  /* ********************************************************************* */
+
+  template <typename MAT, typename VECT>
+  vector_to_colmatrix(const VECT &v, MAT &A, col_major) {
+    size_type m = mat_nrows(A), n = mat_ncols(A);
+    GMM_ASSERT2(m*n == vect_size(v), "dimensions mismatch");
+    for (size_type i = 0; i < n; ++i)
+      gmm::copy(sub_vector(v, sub_interval(i*m, m)), mat_col(A, i));
+  }
+
+  template <typename MAT, typename VECT>
+  vector_to_colmatrix(const VECT &v, MAT &A, row_and_col)
+  { vector_to_colmatrix(v, A, col_major()); }
+
+  template <typename MAT, typename VECT>
+  vector_to_colmatrix(const VECT &v, MAT &A, col_and_row)
+  { vector_to_colmatrix(v, A, col_major()); }
+
+  template <typename MAT, typename VECT>
+  vector_to_colmatrix(const VECT &v, MAT &A, row_major) {
+    size_type m = mat_nrows(mat), n = mat_ncols(A);
+    GMM_ASSERT2(m*n == vect_size(v), "dimensions mismatch");
+    for (size_type i = 0; i < m; ++i)
+      gmm::copy(sub_vector(v, sub_slice(i, n, m)), mat_row(A, i));
+  }
+
+  template <typename MAT, typename VECT> inline
+  vector_to_colmatrix(const VECT &v, const MAT &A_) {
+    MAT &A = const_cast<MAT &>(A_);
+    vector_to_colmatrix(v, A, typename linalg_traits<MAT>::sub_orientation());
+  }
+
+  /* ********************************************************************* */
+  /*   Solve sylvester equation.                                           */
+  /* ********************************************************************* */
+
+  // very prohibitive solver, to be replaced ... 
+  template <typename MAT1, typename MAT2, typename MAT3, typename MAT4 >
+  void sylvester(const MAT1 &m1, const MAT2 &m2, const MAT3 &m3,
+		 const MAT4 &m4_) {
+    typedef typename linalg_traits<Mat>::value_type T;
+    
+    MAT3 &m4 = const_cast<MAT4 &>(m4_);
+    size_type m = mat_nrows(m1), n = mat_ncols(m1);
+    size_type l = mat_nrows(m2), k = mat_ncols(m2);
+    
+    GMM_ASSERT2(m == n && l == k && m == mat_nrows(m3) &&
+		l == mat_ncols(m3) && m == mat_nrows(m4) && l == mat_ncols(m4),
+		"dimensions mismatch");
+
+    gmm::dense_matrix<T> akronb(m*l, m*l);
+    gmm::dense_matrix<T> idm(m, m), idl(l,l);
+    gmm::copy(identity_matrix(), idm);
+    gmm::copy(identity_matrix(), idl);
+    std::vector<T> x(m*l), c(m*l);
+    
+    kron(idl, m1, akronb);
+    kron(gmm::transposed(m2), idm, akronb, false);
+
+    colmatrix_to_vector(m3, c);
+    lu_solve(akronb, c, x);
+    vector_to_colmatrix(x, m4);
+
+  }
+}
+
+#endif
+
diff --git a/gmm/gmm_domain_decomp.h b/gmm/gmm_domain_decomp.h
new file mode 100644
index 000000000..89c1841cf
--- /dev/null
+++ b/gmm/gmm_domain_decomp.h
@@ -0,0 +1,165 @@
+/* -*- c++ -*- (enables emacs c++ mode) */
+/*===========================================================================
+
+ Copyright (C) 2004-2017 Yves Renard
+
+ This file is a part of GetFEM++
+
+ GetFEM++  is  free software;  you  can  redistribute  it  and/or modify it
+ under  the  terms  of the  GNU  Lesser General Public License as published
+ by  the  Free Software Foundation;  either version 3 of the License,  or
+ (at your option) any later version along with the GCC Runtime Library
+ Exception either version 3.1 or (at your option) any later version.
+ This program  is  distributed  in  the  hope  that it will be useful,  but
+ WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ or  FITNESS  FOR  A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+ License and GCC Runtime Library Exception for more details.
+ You  should  have received a copy of the GNU Lesser General Public License
+ along  with  this program;  if not, write to the Free Software Foundation,
+ Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301, USA.
+
+ As a special exception, you  may use  this file  as it is a part of a free
+ software  library  without  restriction.  Specifically,  if   other  files
+ instantiate  templates  or  use macros or inline functions from this file,
+ or  you compile this  file  and  link  it  with other files  to produce an
+ executable, this file  does  not  by itself cause the resulting executable
+ to be covered  by the GNU Lesser General Public License.  This   exception
+ does not  however  invalidate  any  other  reasons why the executable file
+ might be covered by the GNU Lesser General Public License.
+
+===========================================================================*/
+
+/** @file gmm_domain_decomp.h
+   @author  Yves Renard <Yves.Renard@insa-lyon.fr>
+   @date May 21, 2004.
+    @brief Domain decomposition.
+*/
+#ifndef GMM_DOMAIN_DECOMP_H__
+#define GMM_DOMAIN_DECOMP_H__
+
+#include "gmm_kernel.h"
+#include <map>
+
+
+namespace gmm {
+
+  /** This function separates into small boxes of size msize with a ratio
+   * of overlap (in [0,1[) a set of points. The result is given into a
+   * vector of sparse matrices vB.
+   */
+  template <typename Matrix, typename Point>
+  void rudimentary_regular_decomposition(std::vector<Point> pts,
+					 double msize,
+					 double overlap,
+					 std::vector<Matrix> &vB) {
+    typedef typename linalg_traits<Matrix>::value_type value_type;
+    typedef abstract_null_type void_type;
+    typedef std::map<size_type, void_type> map_type;
+
+    size_type nbpts = pts.size();
+    if (!nbpts || pts[0].size() == 0) { vB.resize(0); return; }
+    int dim = int(pts[0].size());
+
+    // computation of the global box and the number of sub-domains
+    Point pmin = pts[0], pmax = pts[0];
+    for (size_type i = 1; i < nbpts; ++i)
+      for (int k = 0; k < dim; ++k) {
+	pmin[k] = std::min(pmin[k], pts[i][k]);
+	pmax[k] = std::max(pmax[k], pts[i][k]);
+      }
+    
+    std::vector<size_type> nbsub(dim), mult(dim);
+    std::vector<int> pts1(dim), pts2(dim);
+    size_type nbtotsub = 1;
+    for (int k = 0; k < dim; ++k) {
+      nbsub[k] = size_type((pmax[k] - pmin[k]) / msize)+1;
+      mult[k] = nbtotsub; nbtotsub *= nbsub[k];
+    }
+    
+    std::vector<map_type> subs(nbtotsub);
+    // points ventilation
+    std::vector<size_type> ns(dim), na(dim), nu(dim);
+    for (size_type i = 0; i < nbpts; ++i) {
+      for (int k = 0; k < dim; ++k) {
+	double a = (pts[i][k] - pmin[k]) / msize;
+	ns[k] = size_type(a) - 1; na[k] = 0;
+	pts1[k] = int(a + overlap); pts2[k] = int(ceil(a-1.0-overlap));
+      }
+      size_type sum = 0;
+      do {
+	bool ok = 1;
+	for (int k = 0; k < dim; ++k)
+	  if ((ns[k] >= nbsub[k]) || (pts1[k] < int(ns[k]))
+	      || (pts2[k] > int(ns[k]))) { ok = false; break; }
+	if (ok) {
+	  size_type ind = ns[0];
+	  for (int k=1; k < dim; ++k) ind += ns[k]*mult[k];
+	  subs[ind][i] = void_type();
+	}
+	for (int k = 0; k < dim; ++k) {
+	  if (na[k] < 2) { na[k]++; ns[k]++; ++sum; break; }
+	  na[k] = 0; ns[k] -= 2; sum -= 2;
+	}
+      } while (sum);
+    }
+    // delete too small domains.
+    size_type nbmaxinsub = 0;
+    for (size_type i = 0; i < nbtotsub; ++i)
+      nbmaxinsub = std::max(nbmaxinsub, subs[i].size());
+    
+    std::fill(ns.begin(), ns.end(), size_type(0));
+    for (size_type i = 0; i < nbtotsub; ++i) {
+      if (subs[i].size() > 0 && subs[i].size() < nbmaxinsub / 10) {
+	
+	for (int k = 0; k < dim; ++k) nu[k] = ns[k];
+	size_type nbmax = 0, imax = 0;
+	
+	for (int l = 0; l < dim; ++l) {
+	  nu[l]--;
+	  for (int m = 0; m < 2; ++m, nu[l]+=2) {
+	    bool ok = true;
+	    for (int k = 0; k < dim && ok; ++k) 
+	      if (nu[k] >= nbsub[k]) ok = false;
+	    if (ok) {
+	      size_type ind = ns[0];
+	      for (int k=1; k < dim; ++k) ind += ns[k]*mult[k];
+	      if (subs[ind].size() > nbmax)
+		{ nbmax = subs[ind].size(); imax = ind; }
+	    }
+	  }
+	  nu[l]--;
+	}
+	
+	if (nbmax > subs[i].size()) {
+	  for (map_type::iterator it=subs[i].begin(); it!=subs[i].end(); ++it)
+	    subs[imax][it->first] = void_type();
+	  subs[i].clear();
+	}
+      }
+      for (int k = 0; k < dim; ++k)
+	{ ns[k]++; if (ns[k] < nbsub[k]) break; ns[k] = 0; }
+    }
+    
+    // delete empty domains.
+    size_type effnb = 0;
+    for (size_type i = 0; i < nbtotsub; ++i) {
+      if (subs[i].size() > 0)
+	{ if (i != effnb) std::swap(subs[i], subs[effnb]); ++effnb; }
+    }
+
+    // build matrices
+    subs.resize(effnb);
+    vB.resize(effnb);
+    for (size_type i = 0; i < effnb; ++i) {
+      clear(vB[i]); resize(vB[i], nbpts, subs[i].size());
+      size_type j = 0;
+      for (map_type::iterator it=subs[i].begin(); it!=subs[i].end(); ++it, ++j)
+	vB[i](it->first, j) = value_type(1);
+    }
+  }
+  
+
+}
+
+
+#endif
diff --git a/gmm/gmm_except.h b/gmm/gmm_except.h
new file mode 100644
index 000000000..30b813a26
--- /dev/null
+++ b/gmm/gmm_except.h
@@ -0,0 +1,328 @@
+/* -*- c++ -*- (enables emacs c++ mode) */
+/*===========================================================================
+
+ Copyright (C) 2002-2017 Yves Renard
+
+ This file is a part of GetFEM++
+
+ GetFEM++  is  free software;  you  can  redistribute  it  and/or modify it
+ under  the  terms  of the  GNU  Lesser General Public License as published
+ by  the  Free Software Foundation;  either version 3 of the License,  or
+ (at your option) any later version along with the GCC Runtime Library
+ Exception either version 3.1 or (at your option) any later version.
+ This program  is  distributed  in  the  hope  that it will be useful,  but
+ WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ or  FITNESS  FOR  A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+ License and GCC Runtime Library Exception for more details.
+ You  should  have received a copy of the GNU Lesser General Public License
+ along  with  this program;  if not, write to the Free Software Foundation,
+ Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301, USA.
+
+ As a special exception, you  may use  this file  as it is a part of a free
+ software  library  without  restriction.  Specifically,  if   other  files
+ instantiate  templates  or  use macros or inline functions from this file,
+ or  you compile this  file  and  link  it  with other files  to produce an
+ executable, this file  does  not  by itself cause the resulting executable
+ to be covered  by the GNU Lesser General Public License.  This   exception
+ does not  however  invalidate  any  other  reasons why the executable file
+ might be covered by the GNU Lesser General Public License.
+
+===========================================================================*/
+
+/** @file gmm_except.h 
+    @author Yves Renard <Yves.Renard@insa-lyon.fr>
+    @author Julien Pommier <Julien.Pommier@insa-toulouse.fr>
+    @date September 01, 2002.
+    @brief Definition of basic exceptions.
+*/
+
+#ifndef GMM_EXCEPT_H__
+#define GMM_EXCEPT_H__
+
+#include "gmm_std.h"
+
+//provides external implementation of gmm_exception and logging.
+#ifndef EXTERNAL_EXCEPT_
+
+namespace gmm {
+
+/* *********************************************************************** */
+/*	GetFEM++ generic errors.                     			   */
+/* *********************************************************************** */
+
+  class gmm_error: public std::logic_error {
+  public:
+    gmm_error(const std::string& what_arg): std::logic_error (what_arg) {}
+  };
+
+#ifdef GETFEM_HAVE_PRETTY_FUNCTION
+#  define GMM_PRETTY_FUNCTION __PRETTY_FUNCTION__
+#else 
+#  define GMM_PRETTY_FUNCTION ""
+#endif
+
+  // Errors : GMM_THROW should not be used on its own.
+  //          GMM_ASSERT1 : Non-maskable errors. Typically for in/ouput and
+  //               when the test do not significantly reduces the performance.
+  //          GMM_ASSERT2 : All tests which are potentially performance
+  //               consuming. Not hidden by default. Hidden when NDEBUG is
+  //               defined.
+  //          GMM_ASSERT3 : For internal checks. Hidden by default. Active
+  //               only when DEBUG_MODE is defined.
+  // __EXCEPTIONS is defined by gcc, _CPPUNWIND is defined by visual c++
+#if defined(__EXCEPTIONS) || defined(_CPPUNWIND)
+  inline void short_error_throw(const char *file, int line, const char *func,
+				const char *errormsg) {
+    std::stringstream msg__;
+    msg__ << "Error in " << file << ", line " << line << " " << func
+	  << ": \n" << errormsg << std::ends;
+    throw gmm::gmm_error(msg__.str());
+  }
+# define GMM_THROW_(type, errormsg) {					\
+    std::stringstream msg__;						\
+    msg__ << "Error in " << __FILE__ << ", line "                       \
+	  << __LINE__ << " " << GMM_PRETTY_FUNCTION << ": \n"		\
+	  << errormsg << std::ends;				       	\
+    throw (type)(msg__.str());						\
+  }
+#else
+#ifndef _MSC_VER
+# define abort_no_return() ::abort()
+#else
+// apparently ::abort() on windows is not declared with __declspec(noreturn) so the compiler spits a lot of warnings when abort is used.
+# define abort_no_return() { assert("GMM ABORT"==0); throw "GMM ABORT"; }
+#endif
+
+  inline void short_error_throw(const char *file, int line, const char *func,
+				const char *errormsg) {
+    std::stringstream msg__;
+    msg__ << "Error in " << file << ", line " << line << " " << func
+	  << ": \n" << errormsg << std::ends;
+    std::cerr << msg__.str() << std::endl;
+    abort_no_return();	
+  }
+
+# define GMM_THROW_(type, errormsg) {					\
+    std::stringstream msg__;						\
+    msg__ << "Error in " << __FILE__ << ", line "	       		\
+	  << __LINE__ << " " << GMM_PRETTY_FUNCTION << ": \n"		\
+	  << errormsg;					        	\
+    std::cerr << msg__.str() << std::endl;                              \
+    abort_no_return();							\
+  }
+#endif
+
+# define GMM_ASSERT1(test, errormsg)		        		\
+  { if (!(test)) GMM_THROW_(gmm::gmm_error, errormsg); }
+
+  inline void GMM_THROW() {}
+#define GMM_THROW(a, b) { GMM_THROW_(a,b); gmm::GMM_THROW(); }
+
+#if defined(NDEBUG)
+# define GMM_ASSERT2(test, errormsg) {}
+# define GMM_ASSERT3(test, errormsg) {}
+#elif !defined(GMM_FULL_NDEBUG)
+# define GMM_ASSERT2(test, errormsg)				        \
+  { if (!(test)) GMM_THROW_(gmm::gmm_error, errormsg); }
+# define GMM_ASSERT3(test, errormsg)				        \
+  { if (!(test)) GMM_THROW_(gmm::gmm_error, errormsg); }
+#else
+# define GMM_ASSERT2(test, errormsg)          				\
+  { if (!(test)) GMM_THROW_(gmm::gmm_error, errormsg); }
+# define GMM_ASSERT3(test, errormsg)
+#endif
+
+/* *********************************************************************** */
+/*	GetFEM++ warnings.                         			   */
+/* *********************************************************************** */
+
+  // This allows to dynamically hide warnings
+  struct warning_level {
+    static int level(int l = -2)
+    { static int level_ = 3; return (l != -2) ? (level_ = l) : level_; }
+  };
+
+  inline void set_warning_level(int l) { warning_level::level(std::max(0,l)); }
+  inline int  get_warning_level(void)  { return warning_level::level(-2); }
+
+  // This allows not to compile some Warnings
+#ifndef GMM_WARNING_LEVEL
+# define GMM_WARNING_LEVEL 4
+#endif
+
+  // Warning levels : 0 always printed
+  //                  1 very important : specify a possible error in the code.
+  //                  2 important : specify a default of optimization for inst.
+  //                  3 remark
+  //                  4 ignored by default.
+
+#define GMM_WARNING_MSG(level_, thestr)  {			             \
+      std::stringstream msg__;                                               \
+      msg__ << "Level " << level_ << " Warning in " << __FILE__ << ", line " \
+            << __LINE__ << ": " << thestr;		                     \
+       std::cerr << msg__.str() << std::endl;                                \
+    }
+
+#define GMM_WARNING0(thestr) GMM_WARNING_MSG(0, thestr)
+
+#if GMM_WARNING_LEVEL > 0
+# define GMM_WARNING1(thestr)                                           \
+  { if (1 <= gmm::warning_level::level()) GMM_WARNING_MSG(1, thestr) }
+#else
+# define GMM_WARNING1(thestr) {}
+#endif
+
+#if GMM_WARNING_LEVEL > 1
+# define GMM_WARNING2(thestr)                                           \
+  { if (2 <= gmm::warning_level::level()) GMM_WARNING_MSG(2, thestr) } 
+#else
+# define GMM_WARNING2(thestr) {}
+#endif
+
+#if GMM_WARNING_LEVEL > 2
+# define GMM_WARNING3(thestr)                                           \
+  { if (3 <= gmm::warning_level::level()) GMM_WARNING_MSG(3, thestr) } 
+#else
+# define GMM_WARNING3(thestr) {}
+#endif
+
+#if GMM_WARNING_LEVEL > 3
+# define GMM_WARNING4(thestr)                                           \
+  { if (4 <= gmm::warning_level::level()) GMM_WARNING_MSG(4, thestr) } 
+#else
+# define GMM_WARNING4(thestr) {}
+#endif
+
+/* *********************************************************************** */
+/*	GetFEM++ traces.                         			   */
+/* *********************************************************************** */
+
+  // This allows to dynamically hide traces
+  struct traces_level {
+    static int level(int l = -2)
+    { static int level_ = 3; return (l != -2) ? (level_ = l) : level_; }
+  };
+
+  inline void set_traces_level(int l) { traces_level::level(std::max(0,l)); }
+
+  // This allow not too compile some Warnings
+#ifndef GMM_TRACES_LEVEL
+# define GMM_TRACES_LEVEL 4
+#endif
+
+  // Traces levels : 0 always printed
+  //                 1 Susceptible to occur once in a program.
+  //                 2 Susceptible to occur occasionnaly in a program (10).
+  //                 3 Susceptible to occur often (100).
+  //                 4 Susceptible to occur very often (>1000).
+
+#define GMM_TRACE_MSG_MPI     // for Parallelized version
+#define GMM_TRACE_MSG(level_, thestr)  {			       \
+    GMM_TRACE_MSG_MPI {						       \
+      std::stringstream msg__;                                         \
+      msg__ << "Trace " << level_ << " in " << __FILE__ << ", line "   \
+            << __LINE__ << ": " << thestr;        		       \
+      std::cout << msg__.str() << std::endl;			       \
+    }                                                                  \
+  }        
+
+#define GMM_TRACE0(thestr) GMM_TRACE_MSG(0, thestr)
+
+#if GMM_TRACES_LEVEL > 0
+# define GMM_TRACE1(thestr)						\
+  { if (1 <= gmm::traces_level::level()) GMM_TRACE_MSG(1, thestr) }
+#else
+# define GMM_TRACE1(thestr) {}
+#endif
+  
+#if GMM_TRACES_LEVEL > 1
+# define GMM_TRACE2(thestr)						\
+  { if (2 <= gmm::traces_level::level()) GMM_TRACE_MSG(2, thestr) } 
+#else
+# define GMM_TRACE2(thestr) {}
+#endif
+  
+#if GMM_TRACES_LEVEL > 2
+# define GMM_TRACE3(thestr)						\
+  { if (3 <= gmm::traces_level::level()) GMM_TRACE_MSG(3, thestr) } 
+#else
+# define GMM_TRACE3(thestr) {}
+#endif
+  
+#if GMM_TRACES_LEVEL > 3
+# define GMM_TRACE4(thestr)						\
+  { if (4 <= gmm::traces_level::level()) GMM_TRACE_MSG(4, thestr) } 
+#else
+# define GMM_TRACE4(thestr) {}
+#endif
+  
+  
+  /* ********************************************************************* */
+  /*    Definitions for compatibility with old versions.        	   */
+  /* ********************************************************************* */ 
+  
+#define GMM_STANDARD_CATCH_ERROR   catch(std::logic_error e)		\
+    {									\
+      std::cerr << "============================================\n";	\
+      std::cerr << "|      An error has been detected !!!      |\n";	\
+      std::cerr << "============================================\n";	\
+      std::cerr << e.what() << std::endl << std::endl;			\
+      exit(1);								\
+    }									\
+  catch(const std::runtime_error &e)					\
+    {									\
+      std::cerr << "============================================\n";	\
+      std::cerr << "|      An error has been detected !!!      |\n";	\
+      std::cerr << "============================================\n";	\
+      std::cerr << e.what() << std::endl << std::endl;			\
+      exit(1);								\
+    }									\
+  catch(const std::bad_alloc &) {					\
+    std::cerr << "============================================\n";	\
+    std::cerr << "|  A bad allocation has been detected !!!  |\n";	\
+    std::cerr << "============================================\n";	\
+    exit(1);								\
+  }									\
+  catch(const std::bad_typeid &) {					\
+    std::cerr << "============================================\n";	\
+    std::cerr << "|  A bad typeid     has been detected !!!  |\n";	\
+    std::cerr << "============================================\n";	\
+    exit(1);								\
+  }									\
+  catch(const std::bad_exception &) {					\
+    std::cerr << "============================================\n";	\
+    std::cerr << "|  A bad exception  has been detected !!!  |\n";	\
+    std::cerr << "============================================\n";	\
+    exit(1);								\
+  }									\
+  catch(const std::bad_cast &) {					\
+    std::cerr << "============================================\n";	\
+    std::cerr << "|    A bad cast  has been detected !!!     |\n";	\
+    std::cerr << "============================================\n";	\
+    exit(1);								\
+  }									\
+  catch(...) {								\
+    std::cerr << "============================================\n";	\
+    std::cerr << "|  An unknown error has been detected !!!  |\n";	\
+    std::cerr << "============================================\n";	\
+    exit(1);								\
+  }
+  //   catch(ios_base::failure) { 
+  //     std::cerr << "============================================\n";
+  //     std::cerr << "| A ios_base::failure has been detected !!!|\n";
+  //     std::cerr << "============================================\n";
+  //     exit(1);
+  //   } 
+
+#if defined(__GNUC__) && (__GNUC__ > 3)
+# define GMM_SET_EXCEPTION_DEBUG				\
+  std::set_terminate(__gnu_cxx::__verbose_terminate_handler);
+#else
+# define GMM_SET_EXCEPTION_DEBUG
+#endif
+
+}
+#else
+#include <external_except.h>
+#endif /* EXTERNAL_EXCEPT_*/
+#endif /* GMM_EXCEPT_H__ */
diff --git a/gmm/gmm_inoutput.h b/gmm/gmm_inoutput.h
new file mode 100644
index 000000000..0e27b17cc
--- /dev/null
+++ b/gmm/gmm_inoutput.h
@@ -0,0 +1,1176 @@
+/* -*- c++ -*- (enables emacs c++ mode) */
+/*===========================================================================
+
+ Copyright (C) 2003-2017 Yves Renard, Julien Pommier
+
+ This file is a part of GetFEM++
+
+ GetFEM++  is  free software;  you  can  redistribute  it  and/or modify it
+ under  the  terms  of the  GNU  Lesser General Public License as published
+ by  the  Free Software Foundation;  either version 3 of the License,  or
+ (at your option) any later version along with the GCC Runtime Library
+ Exception either version 3.1 or (at your option) any later version.
+ This program  is  distributed  in  the  hope  that it will be useful,  but
+ WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ or  FITNESS  FOR  A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+ License and GCC Runtime Library Exception for more details.
+ You  should  have received a copy of the GNU Lesser General Public License
+ along  with  this program;  if not, write to the Free Software Foundation,
+ Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301, USA.
+
+ As a special exception, you  may use  this file  as it is a part of a free
+ software  library  without  restriction.  Specifically,  if   other  files
+ instantiate  templates  or  use macros or inline functions from this file,
+ or  you compile this  file  and  link  it  with other files  to produce an
+ executable, this file  does  not  by itself cause the resulting executable
+ to be covered  by the GNU Lesser General Public License.  This   exception
+ does not  however  invalidate  any  other  reasons why the executable file
+ might be covered by the GNU Lesser General Public License.
+
+===========================================================================*/
+
+/**@file gmm_inoutput.h
+   @author Yves Renard <Yves.Renard@insa-lyon.fr>
+   @author Julien Pommier <Julien.Pommier@insa-toulouse.fr>
+   @date July 8, 2003.
+   @brief Input/output on sparse matrices
+
+   Support Harwell-Boeing and Matrix-Market formats.
+*/
+#ifndef GMM_INOUTPUT_H
+#define GMM_INOUTPUT_H
+
+#include <stdio.h>
+#include "gmm_kernel.h"
+namespace gmm {
+
+  /*************************************************************************/
+  /*                                                                       */
+  /*  Functions to read and write Harwell Boeing format.                   */
+  /*                                                                       */
+  /*************************************************************************/
+
+  // Fri Aug 15 16:29:47 EDT 1997
+  // 
+  //                      Harwell-Boeing File I/O in C
+  //                               V. 1.0
+  // 
+  //          National Institute of Standards and Technology, MD.
+  //                            K.A. Remington
+  // ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  //                                NOTICE
+  //
+  // Permission to use, copy, modify, and distribute this software and
+  // its documentation for any purpose and without fee is hereby granted
+  // provided that the above copyright notice appear in all copies and
+  // that both the copyright notice and this permission notice appear in
+  // supporting documentation.
+  //
+  // Neither the Author nor the Institution (National Institute of Standards
+  // and Technology) make any representations about the suitability of this 
+  // software for any purpose. This software is provided "as is" without 
+  // expressed or implied warranty.
+  // ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+
+  inline void IOHBTerminate(const char *a) { GMM_ASSERT1(false, a);}
+
+  inline bool is_complex_double__(std::complex<double>) { return true; }
+  inline bool is_complex_double__(double) { return false; }
+
+  inline int ParseIfmt(const char *fmt, int* perline, int* width) {
+    if (SECURE_NONCHAR_SSCANF(fmt, " (%dI%d)", perline, width) != 2) {
+      *perline = 1;
+      int s = SECURE_NONCHAR_SSCANF(fmt, " (I%d)", width);
+      GMM_ASSERT1(s == 1, "invalid HB I-format: " << fmt);
+    }
+    return *width;
+  }
+  
+  inline int ParseRfmt(const char *fmt, int* perline, int* width,
+		       int* prec, int* flag) {
+    char p;
+    *perline = *width = *flag = *prec = 0;
+#ifdef GMM_SECURE_CRT
+    if (sscanf_s(fmt, " (%d%c%d.%d)", perline, &p, sizeof(char), width, prec)
+	< 3 || !strchr("PEDF", p))
+#else
+    if (sscanf(fmt, " (%d%c%d.%d)", perline, &p, width, prec) < 3
+	|| !strchr("PEDF", p))
+#endif
+	{
+      *perline = 1;
+#ifdef GMM_SECURE_CRT
+      int s = sscanf_s(fmt, " (%c%d.%d)", &p, sizeof(char), width, prec);
+#else
+      int s = sscanf(fmt, " (%c%d.%d)", &p, width, prec);
+#endif
+      GMM_ASSERT1(s>=2 && strchr("PEDF",p), "invalid HB REAL format: " << fmt);
+    }
+    *flag = p;
+    return *width;
+  }
+      
+  /** matrix input/output for Harwell-Boeing format */
+  struct HarwellBoeing_IO {
+    int nrows() const { return Nrow; }
+    int ncols() const { return Ncol; }
+    int nnz() const { return Nnzero; }
+    int is_complex() const { return Type[0] == 'C'; }
+    int is_symmetric() const { return Type[1] == 'S'; }
+    int is_hermitian() const { return Type[1] == 'H'; }
+    HarwellBoeing_IO() { clear(); }
+    HarwellBoeing_IO(const char *filename) { clear(); open(filename); }
+    ~HarwellBoeing_IO() { close(); }
+    /** open filename and reads header */
+    void open(const char *filename);
+    /** read the opened file */
+    template <typename T, int shift> void read(csc_matrix<T, shift>& A);
+    template <typename MAT> void read(MAT &M) IS_DEPRECATED;
+    template <typename T, int shift>
+    static void write(const char *filename, const csc_matrix<T, shift>& A);
+    template <typename T, int shift>
+    static void write(const char *filename, const csc_matrix<T, shift>& A,
+		      const std::vector<T> &rhs);
+    template <typename T, typename INDI, typename INDJ, int shift> 
+    static void write(const char *filename,
+		      const csc_matrix_ref<T*, INDI*, INDJ*, shift>& A);
+    template <typename T, typename INDI, typename INDJ, int shift> 
+    static void write(const char *filename,
+		      const csc_matrix_ref<T*, INDI*, INDJ*, shift>& A,
+		      const std::vector<T> &rhs);
+
+    /** static method for saving the matrix */
+    template <typename MAT> static void write(const char *filename,
+					      const MAT& A) IS_DEPRECATED;
+  private:
+    FILE *f;
+    char Title[73], Key[9], Rhstype[4], Type[4];
+    int Nrow, Ncol, Nnzero, Nrhs;
+    char Ptrfmt[17], Indfmt[17], Valfmt[21], Rhsfmt[21];
+    int Ptrcrd, Indcrd, Valcrd, Rhscrd; 
+    int lcount;
+
+
+    void close() { if (f) fclose(f); clear(); }
+    void clear() { 
+      Nrow = Ncol = Nnzero = Nrhs = 0; f = 0; lcount = 0;
+      memset(Type, 0, sizeof Type); 
+      memset(Key, 0, sizeof Key); 
+      memset(Title, 0, sizeof Title); 
+    }
+    char *getline(char *buf) { 
+      char *p = fgets(buf, BUFSIZ, f); ++lcount;
+      int s = SECURE_NONCHAR_SSCANF(buf,"%*s");
+      GMM_ASSERT1(s >= 0 && p != 0,
+		  "blank line in HB file at line " << lcount);
+      return buf;
+    }
+
+    int substrtoi(const char *p, size_type len) {
+      char s[100]; len = std::min(len, sizeof s - 1);
+      SECURE_STRNCPY(s, 100, p, len); s[len] = 0; return atoi(s);
+    }
+    double substrtod(const char *p, size_type len, int Valflag) {
+      char s[100]; len = std::min(len, sizeof s - 1);
+      SECURE_STRNCPY(s, 100, p, len); s[len] = 0;
+      if ( Valflag != 'F' && !strchr(s,'E')) {
+	/* insert a char prefix for exp */
+	int last = int(strlen(s));
+	for (int j=last+1;j>=0;j--) {
+	  s[j] = s[j-1];
+	  if ( s[j] == '+' || s[j] == '-' ) {
+	    s[j-1] = char(Valflag);                    
+	    break;
+	  }
+	}
+      }
+      return atof(s);
+    }
+    template <typename IND_TYPE>   
+    int readHB_data(IND_TYPE colptr[], IND_TYPE rowind[], 
+		    double val[]) {
+      /***********************************************************************/
+      /*  This function opens and reads the specified file, interpreting its */
+      /*  contents as a sparse matrix stored in the Harwell/Boeing standard  */
+      /*  format and creating compressed column storage scheme vectors to    */
+      /*  hold the index and nonzero value information.                      */
+      /*                                                                     */
+      /*    ----------                                                       */
+      /*    **CAVEAT**                                                       */
+      /*    ----------                                                       */
+      /*  Parsing real formats from Fortran is tricky, and this file reader  */
+      /*  does not claim to be foolproof.   It has been tested for cases     */
+      /*  when the real values are printed consistently and evenly spaced on */
+      /*  each line, with Fixed (F), and Exponential (E or D) formats.       */
+      /*                                                                     */
+      /*  **  If the input file does not adhere to the H/B format, the  **   */
+      /*  **             results will be unpredictable.                 **   */
+      /*                                                                     */
+      /***********************************************************************/
+      int i,ind,col,offset,count;
+      int Ptrperline, Ptrwidth, Indperline, Indwidth;
+      int Valperline, Valwidth, Valprec, Nentries;
+      int Valflag = 'D';           /* Indicates 'E','D', or 'F' float format */
+      char line[BUFSIZ];
+      gmm::standard_locale sl;
+
+
+      /*  Parse the array input formats from Line 3 of HB file  */
+      ParseIfmt(Ptrfmt,&Ptrperline,&Ptrwidth);
+      ParseIfmt(Indfmt,&Indperline,&Indwidth);
+      if ( Type[0] != 'P' ) {          /* Skip if pattern only  */
+	ParseRfmt(Valfmt,&Valperline,&Valwidth,&Valprec,&Valflag);
+      }
+    
+      /*  Read column pointer array:   */
+      offset = 0;         /* if base 0 storage is declared (via macro def),  */
+      /* then storage entries are offset by 1            */
+    
+      for (count = 0, i=0;i<Ptrcrd;i++) {
+	getline(line);
+	for (col = 0, ind = 0;ind<Ptrperline;ind++) {
+	  if (count > Ncol) break;
+	  colptr[count] = substrtoi(line+col,Ptrwidth)-offset;
+	  count++; col += Ptrwidth;
+	}
+      }
+    
+      /*  Read row index array:  */    
+      for (count = 0, i=0;i<Indcrd;i++) {
+	getline(line);
+	for (col = 0, ind = 0;ind<Indperline;ind++) {
+	  if (count == Nnzero) break;
+	  rowind[count] = substrtoi(line+col,Indwidth)-offset;
+	  count++; col += Indwidth;
+	}
+      }
+    
+      /*  Read array of values:  */
+      if ( Type[0] != 'P' ) {          /* Skip if pattern only  */
+	if ( Type[0] == 'C' ) Nentries = 2*Nnzero;
+	else Nentries = Nnzero;
+      
+	count = 0;
+	for (i=0;i<Valcrd;i++) {
+	  getline(line);
+	  if (Valflag == 'D')  {
+            // const_cast Due to aCC excentricity
+	    char *p;
+	    while( (p = const_cast<char *>(strchr(line,'D')) )) *p = 'E';
+	  }
+	  for (col = 0, ind = 0;ind<Valperline;ind++) {
+	    if (count == Nentries) break;
+	    val[count] = substrtod(line+col, Valwidth, Valflag);
+	    count++; col += Valwidth;
+	  }
+	}
+      }
+      return 1;
+    }
+  };
+  
+  inline void HarwellBoeing_IO::open(const char *filename) {
+    int Totcrd,Neltvl,Nrhsix;
+    char line[BUFSIZ];
+    close();
+    SECURE_FOPEN(&f, filename, "r");
+    GMM_ASSERT1(f, "could not open " << filename);
+    /* First line: */
+#ifdef GMM_SECURE_CRT
+    sscanf_s(getline(line), "%c%s", Title, 72, Key, 8);
+#else
+    sscanf(getline(line), "%72c%8s", Title, Key);
+#endif
+    Key[8] = Title[72] = 0;
+    /* Second line: */
+    Totcrd = Ptrcrd = Indcrd = Valcrd = Rhscrd = 0;
+    SECURE_NONCHAR_SSCANF(getline(line), "%d%d%d%d%d", &Totcrd, &Ptrcrd,
+			  &Indcrd, &Valcrd, &Rhscrd);
+    
+    /* Third line: */
+    Nrow = Ncol = Nnzero = Neltvl = 0;
+#ifdef GMM_SECURE_CRT
+    if (sscanf_s(getline(line), "%c%d%d%d%d", Type, 3, &Nrow, &Ncol, &Nnzero,
+		 &Neltvl) < 1)
+#else
+    if (sscanf(getline(line), "%3c%d%d%d%d", Type, &Nrow, &Ncol, &Nnzero,
+	       &Neltvl) < 1)
+#endif
+      IOHBTerminate("Invalid Type info, line 3 of Harwell-Boeing file.\n");
+    for (size_type i = 0; i < 3; ++i) Type[i] = char(toupper(Type[i]));
+    
+      /*  Fourth line:  */
+#ifdef GMM_SECURE_CRT
+    if ( sscanf_s(getline(line), "%c%c%c%c",Ptrfmt, 16,Indfmt, 16,Valfmt,
+		  20,Rhsfmt, 20) < 3)
+#else
+    if ( sscanf(getline(line), "%16c%16c%20c%20c",Ptrfmt,Indfmt,Valfmt,
+		Rhsfmt) < 3)
+#endif
+      IOHBTerminate("Invalid format info, line 4 of Harwell-Boeing file.\n"); 
+    Ptrfmt[16] = Indfmt[16] = Valfmt[20] = Rhsfmt[20] = 0;
+    
+    /*  (Optional) Fifth line: */
+    if (Rhscrd != 0 ) { 
+      Nrhs = Nrhsix = 0;
+#ifdef GMM_SECURE_CRT
+      if ( sscanf_s(getline(line), "%c%d%d", Rhstype, 3, &Nrhs, &Nrhsix) != 1)
+#else
+      if ( sscanf(getline(line), "%3c%d%d", Rhstype, &Nrhs, &Nrhsix) != 1)
+#endif
+	IOHBTerminate("Invalid RHS type information, line 5 of"
+		      " Harwell-Boeing file.\n");
+    }
+  }
+
+  /* only valid for double and complex<double> csc matrices */
+  template <typename T, int shift> void
+  HarwellBoeing_IO::read(csc_matrix<T, shift>& A) {
+
+    // typedef typename csc_matrix<T, shift>::IND_TYPE IND_TYPE;
+
+    GMM_ASSERT1(f, "no file opened!");
+    GMM_ASSERT1(Type[0] != 'P',
+		"Bad HB matrix format (pattern matrices not supported)");
+    GMM_ASSERT1(!is_complex_double__(T()) || Type[0] != 'R',
+		"Bad HB matrix format (file contains a REAL matrix)");
+    GMM_ASSERT1(is_complex_double__(T()) || Type[0] != 'C',
+		"Bad HB matrix format (file contains a COMPLEX matrix)");
+    A.nc = ncols(); A.nr = nrows();
+    A.jc.resize(ncols()+1);
+    A.ir.resize(nnz());
+    A.pr.resize(nnz());
+    readHB_data(&A.jc[0], &A.ir[0], (double*)&A.pr[0]);
+    for (int i = 0; i <= ncols(); ++i) { A.jc[i] += shift; A.jc[i] -= 1; }
+    for (int i = 0; i < nnz(); ++i)    { A.ir[i] += shift; A.ir[i] -= 1; }
+  }
+
+  template <typename MAT> void 
+  HarwellBoeing_IO::read(MAT &M) {
+    csc_matrix<typename gmm::linalg_traits<MAT>::value_type> csc;
+    read(csc); 
+    resize(M, mat_nrows(csc), mat_ncols(csc));
+    copy(csc, M);
+  }
+  
+  template <typename IND_TYPE> 
+  inline int writeHB_mat_double(const char* filename, int M, int N, int nz,
+				const IND_TYPE colptr[],
+				const IND_TYPE rowind[], 
+				const double val[], int Nrhs,
+				const double rhs[], const double guess[],
+				const double exact[], const char* Title,
+				const char* Key, const char* Type, 
+				const char* Ptrfmt, const char* Indfmt,
+				const char* Valfmt, const char* Rhsfmt,
+				const char* Rhstype, int shift) {
+    /************************************************************************/
+    /*  The writeHB function opens the named file and writes the specified  */
+    /*  matrix and optional right-hand-side(s) to that file in              */
+    /*  Harwell-Boeing format.                                              */
+    /*                                                                      */
+    /*  For a description of the Harwell Boeing standard, see:              */
+    /*            Duff, et al.,  ACM TOMS Vol.15, No.1, March 1989          */
+    /*                                                                      */
+    /************************************************************************/
+    FILE *out_file;
+    int i, entry, offset, j, acount, linemod;
+    int totcrd, ptrcrd, indcrd, valcrd, rhscrd;
+    int nvalentries, nrhsentries;
+    int Ptrperline, Ptrwidth, Indperline, Indwidth;
+    int Rhsperline, Rhswidth, Rhsprec, Rhsflag;
+    int Valperline, Valwidth, Valprec;
+    int Valflag;           /* Indicates 'E','D', or 'F' float format */
+    char pformat[16],iformat[16],vformat[19],rformat[19];
+    //    char *pValflag, *pRhsflag;
+    gmm::standard_locale sl;
+    
+    if ( Type[0] == 'C' )
+      { nvalentries = 2*nz; nrhsentries = 2*M; }
+    else
+      { nvalentries = nz; nrhsentries = M; }
+    
+    if ( filename != NULL ) {
+      SECURE_FOPEN(&out_file, filename, "w");
+      GMM_ASSERT1(out_file != NULL, "Error: Cannot open file: " << filename);
+    } else out_file = stdout;
+    
+    if ( Ptrfmt == NULL ) Ptrfmt = "(8I10)";
+    ParseIfmt(Ptrfmt, &Ptrperline, &Ptrwidth);
+    SECURE_SPRINTF1(pformat,sizeof(pformat),"%%%dd",Ptrwidth);
+    ptrcrd = (N+1)/Ptrperline;
+    if ( (N+1)%Ptrperline != 0) ptrcrd++;
+    
+    if ( Indfmt == NULL ) Indfmt =  Ptrfmt;
+    ParseIfmt(Indfmt, &Indperline, &Indwidth);
+    SECURE_SPRINTF1(iformat,sizeof(iformat), "%%%dd",Indwidth);
+    indcrd = nz/Indperline;
+    if ( nz%Indperline != 0) indcrd++;
+    
+    if ( Type[0] != 'P' ) {          /* Skip if pattern only  */
+      if ( Valfmt == NULL ) Valfmt = "(4E21.13)";
+      ParseRfmt(Valfmt, &Valperline, &Valwidth, &Valprec, &Valflag);
+//       if (Valflag == 'D') {
+//         pValflag = (char *) strchr(Valfmt,'D');
+//         *pValflag = 'E';
+//       }
+      if (Valflag == 'F')
+	SECURE_SPRINTF2(vformat, sizeof(vformat), "%% %d.%df", Valwidth,
+			Valprec);
+      else
+	SECURE_SPRINTF2(vformat, sizeof(vformat), "%% %d.%dE", Valwidth,
+			Valprec);
+      valcrd = nvalentries/Valperline;
+      if ( nvalentries%Valperline != 0) valcrd++;
+    } else valcrd = 0;
+    
+    if ( Nrhs > 0 ) {
+      if ( Rhsfmt == NULL ) Rhsfmt = Valfmt;
+      ParseRfmt(Rhsfmt,&Rhsperline,&Rhswidth,&Rhsprec, &Rhsflag);
+      if (Rhsflag == 'F')
+	SECURE_SPRINTF2(rformat,sizeof(rformat), "%% %d.%df",Rhswidth,Rhsprec);
+      else
+	SECURE_SPRINTF2(rformat,sizeof(rformat), "%% %d.%dE",Rhswidth,Rhsprec);
+//       if (Valflag == 'D') {
+//         pRhsflag = (char *) strchr(Rhsfmt,'D');
+//         *pRhsflag = 'E';
+//       }
+      rhscrd = nrhsentries/Rhsperline; 
+      if ( nrhsentries%Rhsperline != 0) rhscrd++;
+      if ( Rhstype[1] == 'G' ) rhscrd+=rhscrd;
+      if ( Rhstype[2] == 'X' ) rhscrd+=rhscrd;
+      rhscrd*=Nrhs;
+    } else rhscrd = 0;
+    
+    totcrd = 4+ptrcrd+indcrd+valcrd+rhscrd;
+    
+    
+    /*  Print header information:  */
+    
+    fprintf(out_file,"%-72s%-8s\n%14d%14d%14d%14d%14d\n",Title, Key, totcrd,
+	    ptrcrd, indcrd, valcrd, rhscrd);
+    fprintf(out_file,"%3s%11s%14d%14d%14d%14d\n",Type,"          ", M, N, nz, 0);
+    fprintf(out_file,"%-16s%-16s%-20s", Ptrfmt, Indfmt, Valfmt);
+    if ( Nrhs != 0 ) {
+      /* Print Rhsfmt on fourth line and                              */
+      /*  optional fifth header line for auxillary vector information:*/
+      fprintf(out_file,"%-20s\n%-14s%d\n",Rhsfmt,Rhstype,Nrhs);
+    }
+    else
+      fprintf(out_file,"\n");
+    
+    offset = 1 - shift;  /* if base 0 storage is declared (via macro def), */
+    /* then storage entries are offset by 1           */
+    
+    /*  Print column pointers:   */
+    for (i = 0; i < N+1; i++) {
+      entry = colptr[i]+offset;
+      fprintf(out_file,pformat,entry);
+      if ( (i+1)%Ptrperline == 0 ) fprintf(out_file,"\n");
+    }
+    
+    if ( (N+1) % Ptrperline != 0 ) fprintf(out_file,"\n");
+    
+    /*  Print row indices:       */
+    for (i=0;i<nz;i++) {
+      entry = rowind[i]+offset;
+      fprintf(out_file,iformat,entry);
+      if ( (i+1)%Indperline == 0 ) fprintf(out_file,"\n");
+    }
+    
+    if ( nz % Indperline != 0 ) fprintf(out_file,"\n");
+    
+    /*  Print values:            */
+    
+    if ( Type[0] != 'P' ) {          /* Skip if pattern only  */
+      for (i=0;i<nvalentries;i++) {
+	fprintf(out_file,vformat,val[i]);
+	if ( (i+1)%Valperline == 0 ) fprintf(out_file,"\n");
+      }
+      
+      if ( nvalentries % Valperline != 0 ) fprintf(out_file,"\n");
+      
+      /*  Print right hand sides:  */
+      acount = 1;
+      linemod=0;
+      if ( Nrhs > 0 ) {
+	for (j=0;j<Nrhs;j++) {
+	  for (i=0;i<nrhsentries;i++) {
+	    fprintf(out_file,rformat,rhs[i] /* *Rhswidth */);
+	    if ( acount++%Rhsperline == linemod ) fprintf(out_file,"\n");
+	  }
+	  if ( acount%Rhsperline != linemod ) {
+	    fprintf(out_file,"\n");
+	    linemod = (acount-1)%Rhsperline;
+	  }
+	  if ( Rhstype[1] == 'G' ) {
+	    for (i=0;i<nrhsentries;i++) {
+	      fprintf(out_file,rformat,guess[i] /* *Rhswidth */);
+	      if ( acount++%Rhsperline == linemod ) fprintf(out_file,"\n");
+	    }
+	    if ( acount%Rhsperline != linemod ) {
+	      fprintf(out_file,"\n");
+	      linemod = (acount-1)%Rhsperline;
+	    }
+	  }
+	  if ( Rhstype[2] == 'X' ) {
+	    for (i=0;i<nrhsentries;i++) {
+	      fprintf(out_file,rformat,exact[i] /* *Rhswidth */);
+	      if ( acount++%Rhsperline == linemod ) fprintf(out_file,"\n");
+	    }
+	    if ( acount%Rhsperline != linemod ) {
+	      fprintf(out_file,"\n");
+	      linemod = (acount-1)%Rhsperline;
+	    }
+	  }
+	}
+      }
+    }
+    int s = fclose(out_file);
+    GMM_ASSERT1(s == 0, "Error closing file in writeHB_mat_double().");
+    return 1;
+  }
+  
+  template <typename T, int shift> void
+  HarwellBoeing_IO::write(const char *filename,
+			  const csc_matrix<T, shift>& A) {
+    write(filename, csc_matrix_ref<const T*, const unsigned*,
+	  const unsigned *, shift>
+	  (&A.pr[0], &A.ir[0], &A.jc[0], A.nr, A.nc));
+  }
+
+  template <typename T, int shift> void
+  HarwellBoeing_IO::write(const char *filename,
+			  const csc_matrix<T, shift>& A,
+			  const std::vector<T> &rhs) {
+    write(filename, csc_matrix_ref<const T*, const unsigned*,
+	  const unsigned *, shift>
+	  (&A.pr[0], &A.ir[0], &A.jc[0], A.nr, A.nc), rhs);
+  }
+
+  template <typename T, typename INDI, typename INDJ, int shift> void
+  HarwellBoeing_IO::write(const char *filename,
+			  const csc_matrix_ref<T*, INDI*, INDJ*, shift>& A) {
+    const char *t = 0;    
+    if (is_complex_double__(T()))
+      if (mat_nrows(A) == mat_ncols(A)) t = "CUA"; else t = "CRA";
+    else
+      if (mat_nrows(A) == mat_ncols(A)) t = "RUA"; else t = "RRA";
+    writeHB_mat_double(filename, int(mat_nrows(A)), int(mat_ncols(A)),
+		       A.jc[mat_ncols(A)], A.jc, A.ir,
+		       (const double *)A.pr,
+		       0, 0, 0, 0, "GETFEM++ CSC MATRIX", "CSCMAT",
+		       t, 0, 0, 0, 0, "F", shift);
+  }
+
+  template <typename T, typename INDI, typename INDJ, int shift> void
+  HarwellBoeing_IO::write(const char *filename,
+			  const csc_matrix_ref<T*, INDI*, INDJ*, shift>& A,
+			  const std::vector<T> &rhs) {
+    const char *t = 0;
+    if (is_complex_double__(T()))
+      if (mat_nrows(A) == mat_ncols(A)) t = "CUA"; else t = "CRA";
+    else
+      if (mat_nrows(A) == mat_ncols(A)) t = "RUA"; else t = "RRA";
+    int Nrhs = gmm::vect_size(rhs) / mat_nrows(A);
+    writeHB_mat_double(filename, int(mat_nrows(A)), int(mat_ncols(A)),
+		       A.jc[mat_ncols(A)], A.jc, A.ir,
+		       (const double *)A.pr,
+		       Nrhs, (const double *)(&rhs[0]), 0, 0,
+		       "GETFEM++ CSC MATRIX", "CSCMAT",
+		       t, 0, 0, 0, 0, "F  ", shift);
+  }
+
+  
+  template <typename MAT> void
+  HarwellBoeing_IO::write(const char *filename, const MAT& A) {
+    gmm::csc_matrix<typename gmm::linalg_traits<MAT>::value_type> 
+      tmp(gmm::mat_nrows(A), gmm::mat_ncols(A));
+    gmm::copy(A,tmp); 
+    HarwellBoeing_IO::write(filename, tmp);
+  }
+
+  /** save a "double" or "std::complex<double>" csc matrix into a
+      HarwellBoeing file
+  */
+  template <typename T, int shift> inline void
+  Harwell_Boeing_save(const std::string &filename,
+		      const csc_matrix<T, shift>& A)
+  { HarwellBoeing_IO::write(filename.c_str(), A); }
+
+  /** save a reference on "double" or "std::complex<double>" csc matrix
+      into a HarwellBoeing file
+  */
+  template <typename T, typename INDI, typename INDJ, int shift> inline void
+  Harwell_Boeing_save(const std::string &filename,
+		      const csc_matrix_ref<T, INDI, INDJ, shift>& A)
+  { HarwellBoeing_IO::write(filename.c_str(), A); }
+
+  /** save a "double" or "std::complex<double>" generic matrix
+      into a HarwellBoeing file making a copy in a csc matrix
+  */
+  template <typename MAT> inline void
+  Harwell_Boeing_save(const std::string &filename, const MAT& A) {
+    gmm::csc_matrix<typename gmm::linalg_traits<MAT>::value_type> 
+      tmp(gmm::mat_nrows(A), gmm::mat_ncols(A));
+    gmm::copy(A, tmp); 
+    HarwellBoeing_IO::write(filename.c_str(), tmp);
+  }
+
+  template <typename MAT, typename VECT> inline void
+  Harwell_Boeing_save(const std::string &filename, const MAT& A,
+		      const VECT &RHS) {
+    typedef typename gmm::linalg_traits<MAT>::value_type T;
+    gmm::csc_matrix<T> tmp(gmm::mat_nrows(A), gmm::mat_ncols(A));
+    gmm::copy(A, tmp);
+    std::vector<T> tmprhs(gmm::vect_size(RHS));
+    gmm::copy(RHS, tmprhs);
+    HarwellBoeing_IO::write(filename.c_str(), tmp, tmprhs);
+  }
+
+  /** load a "double" or "std::complex<double>" csc matrix from a
+      HarwellBoeing file
+  */
+  template <typename T, int shift> void
+  Harwell_Boeing_load(const std::string &filename, csc_matrix<T, shift>& A) {
+    HarwellBoeing_IO h(filename.c_str()); h.read(A);
+  }
+
+  /** load a "double" or "std::complex<double>" generic matrix from a
+      HarwellBoeing file
+  */
+  template <typename MAT> void
+  Harwell_Boeing_load(const std::string &filename, MAT& A) {
+    csc_matrix<typename gmm::linalg_traits<MAT>::value_type> csc;
+    Harwell_Boeing_load(filename, csc);
+    resize(A, mat_nrows(csc), mat_ncols(csc));
+    copy(csc, A);
+  }
+
+  /*************************************************************************/
+  /*                                                                       */
+  /*  Functions to read and write MatrixMarket format.                     */
+  /*                                                                       */
+  /*************************************************************************/
+
+  /* 
+   *   Matrix Market I/O library for ANSI C
+   *
+   *   See http://math.nist.gov/MatrixMarket for details.
+   *
+   *
+   */
+
+#define MM_MAX_LINE_LENGTH 1025
+#define MatrixMarketBanner "%%MatrixMarket"
+#define MM_MAX_TOKEN_LENGTH 64
+
+  typedef char MM_typecode[4];
+
+  /******************* MM_typecode query functions *************************/
+
+#define mm_is_matrix(typecode)	        ((typecode)[0]=='M')
+  
+#define mm_is_sparse(typecode)	        ((typecode)[1]=='C')
+#define mm_is_coordinate(typecode)      ((typecode)[1]=='C')
+#define mm_is_dense(typecode)	        ((typecode)[1]=='A')
+#define mm_is_array(typecode)	        ((typecode)[1]=='A')
+  
+#define mm_is_complex(typecode)	        ((typecode)[2]=='C')
+#define mm_is_real(typecode)	        ((typecode)[2]=='R')
+#define mm_is_pattern(typecode)	        ((typecode)[2]=='P')
+#define mm_is_integer(typecode)         ((typecode)[2]=='I')
+  
+#define mm_is_symmetric(typecode)       ((typecode)[3]=='S')
+#define mm_is_general(typecode)	        ((typecode)[3]=='G')
+#define mm_is_skew(typecode)	        ((typecode)[3]=='K')
+#define mm_is_hermitian(typecode)       ((typecode)[3]=='H')
+  
+  /******************* MM_typecode modify fucntions ************************/
+
+#define mm_set_matrix(typecode)	        ((*typecode)[0]='M')
+#define mm_set_coordinate(typecode)	((*typecode)[1]='C')
+#define mm_set_array(typecode)	        ((*typecode)[1]='A')
+#define mm_set_dense(typecode)	        mm_set_array(typecode)
+#define mm_set_sparse(typecode)	        mm_set_coordinate(typecode)
+
+#define mm_set_complex(typecode)        ((*typecode)[2]='C')
+#define mm_set_real(typecode)	        ((*typecode)[2]='R')
+#define mm_set_pattern(typecode)        ((*typecode)[2]='P')
+#define mm_set_integer(typecode)        ((*typecode)[2]='I')
+
+
+#define mm_set_symmetric(typecode)      ((*typecode)[3]='S')
+#define mm_set_general(typecode)        ((*typecode)[3]='G')
+#define mm_set_skew(typecode)	        ((*typecode)[3]='K')
+#define mm_set_hermitian(typecode)      ((*typecode)[3]='H')
+
+#define mm_clear_typecode(typecode)     ((*typecode)[0]=(*typecode)[1]= \
+			       	        (*typecode)[2]=' ',(*typecode)[3]='G')
+
+#define mm_initialize_typecode(typecode) mm_clear_typecode(typecode)
+
+
+  /******************* Matrix Market error codes ***************************/
+
+
+#define MM_COULD_NOT_READ_FILE	11
+#define MM_PREMATURE_EOF		12
+#define MM_NOT_MTX				13
+#define MM_NO_HEADER			14
+#define MM_UNSUPPORTED_TYPE		15
+#define MM_LINE_TOO_LONG		16
+#define MM_COULD_NOT_WRITE_FILE	17
+
+
+  /******************** Matrix Market internal definitions *****************
+
+   MM_matrix_typecode: 4-character sequence
+
+	                object 	    sparse/   	data        storage 
+	                            dense     	type        scheme
+
+   string position:	 [0]        [1]		[2]         [3]
+
+   Matrix typecode:     M(atrix)    C(oord)	R(eal)      G(eneral)
+		                    A(array)    C(omplex)   H(ermitian)
+	                                        P(attern)   S(ymmetric)
+                                                I(nteger)   K(kew)
+
+  ***********************************************************************/
+
+#define MM_MTX_STR	   "matrix"
+#define MM_ARRAY_STR	   "array"
+#define MM_DENSE_STR	   "array"
+#define MM_COORDINATE_STR  "coordinate" 
+#define MM_SPARSE_STR	   "coordinate"
+#define MM_COMPLEX_STR	   "complex"
+#define MM_REAL_STR	   "real"
+#define MM_INT_STR	   "integer"
+#define MM_GENERAL_STR     "general"
+#define MM_SYMM_STR	   "symmetric"
+#define MM_HERM_STR	   "hermitian"
+#define MM_SKEW_STR	   "skew-symmetric"
+#define MM_PATTERN_STR     "pattern"
+
+  inline char  *mm_typecode_to_str(MM_typecode matcode) {
+    char buffer[MM_MAX_LINE_LENGTH];
+    const char *types[4] = {0,0,0,0};
+    /*    int error =0; */
+    /*   int i; */
+    
+    /* check for MTX type */
+    if (mm_is_matrix(matcode)) 
+      types[0] = MM_MTX_STR;
+    /*
+      else
+      error=1;
+    */
+    /* check for CRD or ARR matrix */
+    if (mm_is_sparse(matcode))
+      types[1] = MM_SPARSE_STR;
+    else
+      if (mm_is_dense(matcode))
+        types[1] = MM_DENSE_STR;
+      else
+        return NULL;
+    
+    /* check for element data type */
+    if (mm_is_real(matcode))
+      types[2] = MM_REAL_STR;
+    else
+      if (mm_is_complex(matcode))
+        types[2] = MM_COMPLEX_STR;
+      else
+	if (mm_is_pattern(matcode))
+	  types[2] = MM_PATTERN_STR;
+	else
+	  if (mm_is_integer(matcode))
+	    types[2] = MM_INT_STR;
+	  else
+	    return NULL;
+    
+    
+    /* check for symmetry type */
+    if (mm_is_general(matcode))
+      types[3] = MM_GENERAL_STR;
+    else if (mm_is_symmetric(matcode))
+      types[3] = MM_SYMM_STR;
+    else if (mm_is_hermitian(matcode))
+      types[3] = MM_HERM_STR;
+    else  if (mm_is_skew(matcode))
+      types[3] = MM_SKEW_STR;
+    else
+      return NULL;
+    
+    SECURE_SPRINTF4(buffer, sizeof(buffer), "%s %s %s %s", types[0], types[1],
+		    types[2], types[3]);
+    return SECURE_STRDUP(buffer);
+    
+  }
+  
+  inline int mm_read_banner(FILE *f, MM_typecode *matcode) {
+    char line[MM_MAX_LINE_LENGTH];
+    char banner[MM_MAX_TOKEN_LENGTH];
+    char mtx[MM_MAX_TOKEN_LENGTH]; 
+    char crd[MM_MAX_TOKEN_LENGTH];
+    char data_type[MM_MAX_TOKEN_LENGTH];
+    char storage_scheme[MM_MAX_TOKEN_LENGTH];
+    char *p;
+    gmm::standard_locale sl;
+    /*    int ret_code; */
+    
+    mm_clear_typecode(matcode);  
+    
+    if (fgets(line, MM_MAX_LINE_LENGTH, f) == NULL) 
+      return MM_PREMATURE_EOF;
+
+#ifdef GMM_SECURE_CRT
+    if (sscanf_s(line, "%s %s %s %s %s", banner, sizeof(banner),
+		 mtx, sizeof(mtx), crd, sizeof(crd), data_type,
+		 sizeof(data_type), storage_scheme,
+		 sizeof(storage_scheme)) != 5)
+#else
+	if (sscanf(line, "%s %s %s %s %s", banner, mtx, crd,
+		   data_type, storage_scheme) != 5)
+#endif
+      return MM_PREMATURE_EOF;
+
+    for (p=mtx; *p!='\0'; *p=char(tolower(*p)),p++) {};  /* convert to lower case */
+    for (p=crd; *p!='\0'; *p=char(tolower(*p)),p++) {};  
+    for (p=data_type; *p!='\0'; *p=char(tolower(*p)),p++) {};
+    for (p=storage_scheme; *p!='\0'; *p=char(tolower(*p)),p++) {};
+
+    /* check for banner */
+    if (strncmp(banner, MatrixMarketBanner, strlen(MatrixMarketBanner)) != 0)
+      return MM_NO_HEADER;
+
+    /* first field should be "mtx" */
+    if (strcmp(mtx, MM_MTX_STR) != 0)
+      return  MM_UNSUPPORTED_TYPE;
+    mm_set_matrix(matcode);
+
+
+    /* second field describes whether this is a sparse matrix (in coordinate
+       storgae) or a dense array */
+
+
+    if (strcmp(crd, MM_SPARSE_STR) == 0)
+      mm_set_sparse(matcode);
+    else
+      if (strcmp(crd, MM_DENSE_STR) == 0)
+	mm_set_dense(matcode);
+      else
+        return MM_UNSUPPORTED_TYPE;
+    
+
+    /* third field */
+
+    if (strcmp(data_type, MM_REAL_STR) == 0)
+      mm_set_real(matcode);
+    else
+      if (strcmp(data_type, MM_COMPLEX_STR) == 0)
+        mm_set_complex(matcode);
+      else
+	if (strcmp(data_type, MM_PATTERN_STR) == 0)
+	  mm_set_pattern(matcode);
+	else
+	  if (strcmp(data_type, MM_INT_STR) == 0)
+	    mm_set_integer(matcode);
+	  else
+	    return MM_UNSUPPORTED_TYPE;
+    
+
+    /* fourth field */
+
+    if (strcmp(storage_scheme, MM_GENERAL_STR) == 0)
+      mm_set_general(matcode);
+    else
+      if (strcmp(storage_scheme, MM_SYMM_STR) == 0)
+        mm_set_symmetric(matcode);
+      else
+	if (strcmp(storage_scheme, MM_HERM_STR) == 0)
+	  mm_set_hermitian(matcode);
+	else
+	  if (strcmp(storage_scheme, MM_SKEW_STR) == 0)
+	    mm_set_skew(matcode);
+	  else
+	    return MM_UNSUPPORTED_TYPE;
+        
+    return 0;
+  }
+
+  inline int mm_read_mtx_crd_size(FILE *f, int *M, int *N, int *nz ) {
+    char line[MM_MAX_LINE_LENGTH];
+    /* int ret_code;*/
+    int num_items_read;
+    
+    /* set return null parameter values, in case we exit with errors */
+    *M = *N = *nz = 0;
+    
+    /* now continue scanning until you reach the end-of-comments */
+    do {
+      if (fgets(line,MM_MAX_LINE_LENGTH,f) == NULL) 
+	return MM_PREMATURE_EOF;
+    } while (line[0] == '%');
+    
+    /* line[] is either blank or has M,N, nz */
+    if (SECURE_NONCHAR_SSCANF(line, "%d %d %d", M, N, nz) == 3) return 0;
+    else
+      do { 
+	num_items_read = SECURE_NONCHAR_FSCANF(f, "%d %d %d", M, N, nz); 
+	if (num_items_read == EOF) return MM_PREMATURE_EOF;
+      }
+      while (num_items_read != 3);
+    
+    return 0;
+  }
+
+
+  inline int mm_read_mtx_crd_data(FILE *f, int, int, int nz, int II[],
+				  int J[], double val[], MM_typecode matcode) {
+    int i;
+    if (mm_is_complex(matcode)) {
+      for (i=0; i<nz; i++)
+	if (SECURE_NONCHAR_FSCANF(f, "%d %d %lg %lg", &II[i], &J[i],
+				  &val[2*i], &val[2*i+1])
+	    != 4) return MM_PREMATURE_EOF;
+    }
+    else if (mm_is_real(matcode)) {
+      for (i=0; i<nz; i++) {
+	if (SECURE_NONCHAR_FSCANF(f, "%d %d %lg\n", &II[i], &J[i], &val[i])
+	    != 3) return MM_PREMATURE_EOF;
+	
+      }
+    }
+    else if (mm_is_pattern(matcode)) {
+      for (i=0; i<nz; i++)
+	if (SECURE_NONCHAR_FSCANF(f, "%d %d", &II[i], &J[i])
+	    != 2) return MM_PREMATURE_EOF;
+    }
+    else return MM_UNSUPPORTED_TYPE;
+
+    return 0;
+  }
+
+  inline int mm_write_mtx_crd(const char *fname, int M, int N, int nz,
+			      int II[], int J[], const double val[],
+			      MM_typecode matcode) {
+    FILE *f;
+    int i;
+    
+    if (strcmp(fname, "stdout") == 0) 
+      f = stdout;
+    else {
+      SECURE_FOPEN(&f, fname, "w");
+      if (f == NULL)
+        return MM_COULD_NOT_WRITE_FILE;
+    }
+    
+    /* print banner followed by typecode */
+    fprintf(f, "%s ", MatrixMarketBanner);
+    char *str = mm_typecode_to_str(matcode);
+    fprintf(f, "%s\n", str);
+    free(str);
+    
+    /* print matrix sizes and nonzeros */
+    fprintf(f, "%d %d %d\n", M, N, nz);
+    
+    /* print values */
+    if (mm_is_pattern(matcode))
+      for (i=0; i<nz; i++)
+	fprintf(f, "%d %d\n", II[i], J[i]);
+    else
+      if (mm_is_real(matcode))
+        for (i=0; i<nz; i++)
+	  fprintf(f, "%d %d %20.16g\n", II[i], J[i], val[i]);
+      else
+	if (mm_is_complex(matcode))
+	  for (i=0; i<nz; i++)
+            fprintf(f, "%d %d %20.16g %20.16g\n", II[i], J[i], val[2*i], 
+		    val[2*i+1]);
+	else {
+	  if (f != stdout) fclose(f);
+	  return MM_UNSUPPORTED_TYPE;
+	}
+    
+    if (f !=stdout) fclose(f); 
+    return 0;
+  }
+  
+
+  /** matrix input/output for MatrixMarket storage */
+  class MatrixMarket_IO {
+    FILE *f;
+    bool isComplex, isSymmetric, isHermitian;
+    int row, col, nz;
+    MM_typecode matcode;
+  public:
+    MatrixMarket_IO() : f(0) {}
+    MatrixMarket_IO(const char *filename) : f(0) { open(filename); }
+    ~MatrixMarket_IO() { if (f) fclose(f); f = 0; }
+
+    int nrows() const { return row; }
+    int ncols() const { return col; }
+    int nnz() const { return nz; }
+    int is_complex() const { return isComplex; }
+    int is_symmetric() const { return isSymmetric; }
+    int is_hermitian() const { return isHermitian; }
+
+    /* open filename and reads header */
+    void open(const char *filename);
+    /* read opened file */
+    template <typename Matrix> void read(Matrix &A);
+    /* write a matrix */
+    template <typename T, int shift> static void 
+    write(const char *filename, const csc_matrix<T, shift>& A);  
+    template <typename T, typename INDI, typename INDJ, int shift> static void 
+    write(const char *filename,
+	  const csc_matrix_ref<T*, INDI*, INDJ*, shift>& A);  
+    template <typename MAT> static void 
+    write(const char *filename, const MAT& A);  
+  };
+
+  /** load a matrix-market file */
+  template <typename Matrix> inline void
+  MatrixMarket_load(const char *filename, Matrix& A) {
+    MatrixMarket_IO mm; mm.open(filename);
+    mm.read(A);
+  }
+  /** write a matrix-market file */
+  template <typename T, int shift> void
+  MatrixMarket_save(const char *filename, const csc_matrix<T, shift>& A) {
+    MatrixMarket_IO mm; mm.write(filename, A);
+  }
+
+  template <typename T, typename INDI, typename INDJ, int shift> inline void
+  MatrixMarket_save(const char *filename,
+		    const csc_matrix_ref<T, INDI, INDJ, shift>& A) {
+    MatrixMarket_IO mm; mm.write(filename, A);
+  }
+
+
+  inline void MatrixMarket_IO::open(const char *filename) {
+    gmm::standard_locale sl;
+    if (f) { fclose(f); }
+    SECURE_FOPEN(&f, filename, "r");
+    GMM_ASSERT1(f, "Sorry, cannot open file " << filename);
+    int s1 = mm_read_banner(f, &matcode);
+    GMM_ASSERT1(s1 == 0, "Sorry, cannnot find the matrix market banner in "
+		<< filename);
+    int s2 = mm_is_coordinate(matcode), s3 = mm_is_matrix(matcode);
+    GMM_ASSERT1(s2 > 0 && s3 > 0,
+		"file is not coordinate storage or is not a matrix");
+    int s4 = mm_is_pattern(matcode);
+    GMM_ASSERT1(s4 == 0,
+	       "the file does only contain the pattern of a sparse matrix");
+    int s5 = mm_is_skew(matcode);
+    GMM_ASSERT1(s5 == 0, "not currently supporting skew symmetric");
+    isSymmetric = mm_is_symmetric(matcode) || mm_is_hermitian(matcode); 
+    isHermitian = mm_is_hermitian(matcode); 
+    isComplex =   mm_is_complex(matcode);
+    mm_read_mtx_crd_size(f, &row, &col, &nz);
+  }
+
+  template <typename Matrix> void MatrixMarket_IO::read(Matrix &A) {
+    gmm::standard_locale sl;
+    typedef typename linalg_traits<Matrix>::value_type T;
+    GMM_ASSERT1(f, "no file opened!");
+    GMM_ASSERT1(!is_complex_double__(T()) || isComplex,
+		"Bad MM matrix format (complex matrix expected)");
+    GMM_ASSERT1(is_complex_double__(T()) || !isComplex,
+		"Bad MM matrix format (real matrix expected)");
+    A = Matrix(row, col);
+    gmm::clear(A);
+    
+    std::vector<int> II(nz), J(nz);
+    std::vector<typename Matrix::value_type> PR(nz);
+    mm_read_mtx_crd_data(f, row, col, nz, &II[0], &J[0],
+			 (double*)&PR[0], matcode);
+    
+    for (size_type i = 0; i < size_type(nz); ++i) {
+        A(II[i]-1, J[i]-1) = PR[i];
+
+        // FIXED MM Format
+        if (mm_is_hermitian(matcode) && (II[i] != J[i]) ) {
+            A(J[i]-1, II[i]-1) = gmm::conj(PR[i]);
+        }
+
+        if (mm_is_symmetric(matcode) && (II[i] != J[i]) ) {
+            A(J[i]-1, II[i]-1) = PR[i];
+        }
+
+        if (mm_is_skew(matcode) && (II[i] != J[i]) ) {
+            A(J[i]-1, II[i]-1) = -PR[i];
+        }
+    }
+  }
+
+  template <typename T, int shift> void 
+  MatrixMarket_IO::write(const char *filename, const csc_matrix<T, shift>& A) {
+    write(filename, csc_matrix_ref<const T*, const unsigned*,
+	  const unsigned*,shift>
+	  (&A.pr[0], &A.ir[0], &A.jc[0], A.nr, A.nc));
+  }
+
+  template <typename T, typename INDI, typename INDJ, int shift> void 
+  MatrixMarket_IO::write(const char *filename, 
+			 const csc_matrix_ref<T*, INDI*, INDJ*, shift>& A) {
+    gmm::standard_locale sl;
+    static MM_typecode t1 = {'M', 'C', 'R', 'G'};
+    static MM_typecode t2 = {'M', 'C', 'C', 'G'};
+    MM_typecode t;
+    
+    if (is_complex_double__(T())) std::copy(&(t2[0]), &(t2[0])+4, &(t[0]));
+    else std::copy(&(t1[0]), &(t1[0])+4, &(t[0]));
+    size_type nz = A.jc[mat_ncols(A)];
+    std::vector<int> II(nz), J(nz);
+    for (size_type j=0; j < mat_ncols(A); ++j) {      
+      for (size_type i = A.jc[j]; i < A.jc[j+1]; ++i) {
+	II[i] = A.ir[i] + 1 - shift;
+	J[i] = int(j + 1);
+      }
+    }
+    mm_write_mtx_crd(filename, int(mat_nrows(A)), int(mat_ncols(A)),
+		     int(nz), &II[0], &J[0], (const double *)A.pr, t);
+  }
+
+
+  template <typename MAT> void
+  MatrixMarket_IO::write(const char *filename, const MAT& A) {
+    gmm::csc_matrix<typename gmm::linalg_traits<MAT>::value_type> 
+      tmp(gmm::mat_nrows(A), gmm::mat_ncols(A));
+    gmm::copy(A,tmp); 
+    MatrixMarket_IO::write(filename, tmp);
+  }
+
+  template<typename VEC> static void vecsave(std::string fname, const VEC& V,
+                                             bool binary=false) {
+    if (binary) {
+      std::ofstream f(fname.c_str(), std::ofstream::binary);
+      for (size_type i=0; i < gmm::vect_size(V); ++i)
+        f.write(reinterpret_cast<const char*>(&V[i]), sizeof(V[i]));
+    }
+    else {
+      std::ofstream f(fname.c_str()); f.precision(16); f.imbue(std::locale("C"));
+      for (size_type i=0; i < gmm::vect_size(V); ++i) f << V[i] << "\n";
+    }
+  } 
+
+  template<typename VEC> static void vecload(std::string fname, const VEC& V_,
+                                             bool binary=false) {
+    VEC &V(const_cast<VEC&>(V_));
+    if (binary) {
+      std::ifstream f(fname.c_str(), std::ifstream::binary);
+      for (size_type i=0; i < gmm::vect_size(V); ++i)
+        f.read(reinterpret_cast<char*>(&V[i]), sizeof(V[i]));
+    }
+    else {
+      std::ifstream f(fname.c_str()); f.imbue(std::locale("C"));
+      for (size_type i=0; i < gmm::vect_size(V); ++i) f >> V[i];
+    }
+  }
+}
+
+
+#endif //  GMM_INOUTPUT_H
diff --git a/gmm/gmm_interface.h b/gmm/gmm_interface.h
new file mode 100644
index 000000000..a3c66cd1b
--- /dev/null
+++ b/gmm/gmm_interface.h
@@ -0,0 +1,1068 @@
+/* -*- c++ -*- (enables emacs c++ mode) */
+/*===========================================================================
+
+ Copyright (C) 2002-2017 Yves Renard
+
+ This file is a part of GetFEM++
+
+ GetFEM++  is  free software;  you  can  redistribute  it  and/or modify it
+ under  the  terms  of the  GNU  Lesser General Public License as published
+ by  the  Free Software Foundation;  either version 3 of the License,  or
+ (at your option) any later version along with the GCC Runtime Library
+ Exception either version 3.1 or (at your option) any later version.
+ This program  is  distributed  in  the  hope  that it will be useful,  but
+ WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ or  FITNESS  FOR  A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+ License and GCC Runtime Library Exception for more details.
+ You  should  have received a copy of the GNU Lesser General Public License
+ along  with  this program;  if not, write to the Free Software Foundation,
+ Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301, USA.
+
+ As a special exception, you  may use  this file  as it is a part of a free
+ software  library  without  restriction.  Specifically,  if   other  files
+ instantiate  templates  or  use macros or inline functions from this file,
+ or  you compile this  file  and  link  it  with other files  to produce an
+ executable, this file  does  not  by itself cause the resulting executable
+ to be covered  by the GNU Lesser General Public License.  This   exception
+ does not  however  invalidate  any  other  reasons why the executable file
+ might be covered by the GNU Lesser General Public License.
+
+===========================================================================*/
+
+
+/**@file gmm_interface.h
+   @author  Yves Renard <Yves.Renard@insa-lyon.fr>
+   @date October 13, 2002.
+   @brief gmm interface for STL vectors.
+*/
+
+#ifndef GMM_INTERFACE_H__
+#define GMM_INTERFACE_H__
+
+#include "gmm_blas.h"
+#include "gmm_sub_index.h"
+
+namespace gmm {
+
+  /* ********************************************************************* */
+  /*                                                                       */
+  /* What is needed for a Vector type :                                    */
+  /*   Vector v(n) defines a vector with n components.                     */
+  /*   v[i] allows to access to the ith component of v.                    */
+  /*   linalg_traits<Vector> should be filled with appropriate definitions */
+  /*                                                                       */
+  /*   for a dense vector : the minimum is two random iterators (begin and */
+  /*                        end) and a pointer to a valid origin.          */
+  /*   for a sparse vector : the minimum is two forward iterators, with    */
+  /*                         a method it.index() which gives the index of  */
+  /*                         a non zero element, an interface object       */
+  /*                         should describe the method to add new non     */
+  /*                         zero element, and  a pointer to a valid       */
+  /*                         origin.                                       */
+  /*                                                                       */
+  /* What is needed for a Matrix type :                                    */
+  /*   Matrix m(n, m) defines a matrix with n rows and m columns.          */
+  /*   m(i, j) allows to access to the element at row i and column j.      */
+  /*   linalg_traits<Matrix> should be filled with appropriate definitions */
+  /*                                                                       */
+  /* What is needed for an iterator on dense vector                        */
+  /*    to be standard random access iterator                              */
+  /*                                                                       */
+  /* What is needed for an iterator on a sparse vector                     */
+  /*    to be a standard bidirectional iterator                            */
+  /*    elt should be sorted with increasing indices.                      */
+  /*    it.index() gives the index of the non-zero element.                */
+  /*                                                                       */
+  /* Remark : If original iterators are not convenient, they could be      */
+  /*   redefined and interfaced in linalg_traits<Vector> without changing  */
+  /*   the original Vector type.                                           */
+  /*                                                                       */
+  /* ********************************************************************* */
+
+  /* ********************************************************************* */
+  /*		Simple references on vectors            		   */
+  /* ********************************************************************* */
+
+  template <typename PT> struct simple_vector_ref {
+    typedef simple_vector_ref<PT> this_type;
+    typedef typename std::iterator_traits<PT>::value_type V;
+    typedef V * CPT;
+    typedef typename std::iterator_traits<PT>::reference ref_V;
+    typedef typename linalg_traits<this_type>::iterator iterator;
+    typedef typename linalg_traits<this_type>::reference reference;
+    typedef typename linalg_traits<this_type>::porigin_type porigin_type;
+
+    iterator begin_, end_;
+    porigin_type origin;
+    size_type size_;
+
+    simple_vector_ref(ref_V v) : begin_(vect_begin(const_cast<V&>(v))), 
+				 end_(vect_end(const_cast<V&>(v))), 
+				 origin(linalg_origin(const_cast<V&>(v))),
+				 size_(vect_size(v)) {}
+
+    simple_vector_ref(const simple_vector_ref<CPT> &cr)
+      : begin_(cr.begin_),end_(cr.end_),origin(cr.origin),size_(cr.size_) {}
+
+    simple_vector_ref(void) {}
+
+    reference operator[](size_type i) const
+    { return linalg_traits<V>::access(origin, begin_, end_, i); }
+  };
+
+  template <typename IT, typename ORG, typename PT> inline
+  void set_to_begin(IT &it, ORG o, simple_vector_ref<PT> *,linalg_modifiable) {
+    typedef typename linalg_traits<simple_vector_ref<PT> >::V_reference ref_t;
+    set_to_begin(it, o, PT(), ref_t());
+  }
+
+  template <typename IT, typename ORG, typename PT> inline
+  void set_to_begin(IT &it, ORG o, const simple_vector_ref<PT> *,
+		    linalg_modifiable) {
+    typedef typename linalg_traits<simple_vector_ref<PT> >::V_reference ref_t;
+    set_to_begin(it, o, PT(), ref_t());
+  }
+
+  template <typename IT, typename ORG, typename PT> inline
+  void set_to_end(IT &it, ORG o, simple_vector_ref<PT> *, linalg_modifiable) {
+    typedef typename linalg_traits<simple_vector_ref<PT> >::V_reference ref_t;
+    set_to_end(it, o, PT(), ref_t());
+  }
+
+  template <typename IT, typename ORG, typename PT> inline
+  void set_to_end(IT &it, ORG o, const simple_vector_ref<PT> *,
+		  linalg_modifiable) {
+    typedef typename linalg_traits<simple_vector_ref<PT> >::V_reference ref_t;
+    set_to_end(it, o, PT(), ref_t());
+  }
+
+
+  template <typename PT> struct linalg_traits<simple_vector_ref<PT> > {
+    typedef simple_vector_ref<PT> this_type;
+    typedef this_type *pthis_type;
+    typedef typename std::iterator_traits<PT>::value_type V;
+    typedef typename linalg_traits<V>::origin_type origin_type;
+    typedef V *pV;
+    typedef typename linalg_traits<V>::is_reference V_reference;
+    typedef typename which_reference<PT>::is_reference is_reference;
+    typedef abstract_vector linalg_type;
+    typedef typename linalg_traits<V>::value_type value_type;
+    typedef typename select_ref<value_type, typename
+            linalg_traits<V>::reference, PT>::ref_type reference;
+    typedef typename select_ref<const origin_type *, origin_type *,
+			        PT>::ref_type porigin_type;
+    typedef typename select_ref<typename linalg_traits<V>::const_iterator,
+	    typename linalg_traits<V>::iterator, PT>::ref_type iterator;
+    typedef typename linalg_traits<V>::const_iterator const_iterator;
+    typedef typename linalg_traits<V>::storage_type storage_type;
+    typedef linalg_true index_sorted;
+    static size_type size(const this_type &v) { return v.size_; }
+    static inline iterator begin(this_type &v) {
+      iterator it = v.begin_;
+      set_to_begin(it, v.origin, pthis_type(), is_reference()); 
+      return it;
+    }
+    static inline const_iterator begin(const this_type &v) {
+      const_iterator it = v.begin_;
+      set_to_begin(it, v.origin, pthis_type(), is_reference());
+      return it;
+    }
+    static inline iterator end(this_type &v) {
+      iterator it = v.end_;
+      set_to_end(it, v.origin, pthis_type(), is_reference());
+      return it;
+    }
+    static inline const_iterator end(const this_type &v) {
+      const_iterator it = v.end_;
+      set_to_end(it, v.origin, pthis_type(), is_reference());
+      return it;
+    }
+    static origin_type* origin(this_type &v) { return v.origin; }
+    static const origin_type* origin(const this_type &v) { return v.origin; }
+    static void clear(origin_type* o, const iterator &it, const iterator &ite)
+    { linalg_traits<V>::clear(o, it, ite); }
+    static void do_clear(this_type &v) { clear(v.origin, v.begin_, v.end_); }
+    static value_type access(const origin_type *o, const const_iterator &it,
+			     const const_iterator &ite, size_type i)
+    { return linalg_traits<V>::access(o, it, ite, i); }
+    static reference access(origin_type *o, const iterator &it,
+			    const iterator &ite, size_type i)
+    { return linalg_traits<V>::access(o, it, ite, i); }
+  };
+
+  template <typename PT>
+  std::ostream &operator << (std::ostream &o, const simple_vector_ref<PT>& v)
+  { gmm::write(o,v); return o; }
+
+  template <typename T, typename alloc>
+  simple_vector_ref<const std::vector<T,alloc> *>
+    vref(const std::vector<T, alloc> &vv)
+  { return simple_vector_ref<const std::vector<T,alloc> *>(vv); }
+  
+
+  /* ********************************************************************* */
+  /*		                                         		   */
+  /*		Traits for S.T.L. object                     		   */
+  /*		                                         		   */
+  /* ********************************************************************* */
+
+  template <typename T, typename alloc>
+  struct linalg_traits<std::vector<T, alloc> > {
+    typedef std::vector<T, alloc> this_type;
+    typedef this_type origin_type;
+    typedef linalg_false is_reference;
+    typedef abstract_vector linalg_type;
+    typedef T value_type;
+    typedef T& reference;
+    typedef typename this_type::iterator iterator;
+    typedef typename this_type::const_iterator const_iterator;
+    typedef abstract_dense storage_type;
+    typedef linalg_true index_sorted;
+    static size_type size(const this_type &v) { return v.size(); }
+    static iterator begin(this_type &v) { return v.begin(); }
+    static const_iterator begin(const this_type &v) { return v.begin(); }
+    static iterator end(this_type &v) { return v.end(); }
+    static const_iterator end(const this_type &v) { return v.end(); }
+    static origin_type* origin(this_type &v) { return &v; }
+    static const origin_type* origin(const this_type &v) { return &v; }
+    static void clear(origin_type*, const iterator &it, const iterator &ite)
+    { std::fill(it, ite, value_type(0)); }
+    static void do_clear(this_type &v) { std::fill(v.begin(), v.end(), T(0)); }
+    static value_type access(const origin_type *, const const_iterator &it,
+			     const const_iterator &, size_type i)
+    { return it[i]; }
+    static reference access(origin_type *, const iterator &it,
+			    const iterator &, size_type i)
+    { return it[i]; }
+    static void resize(this_type &v, size_type n) { v.resize(n); }
+  };
+
+  
+  
+  template <typename T>
+  inline size_type nnz(const std::vector<T>& l) { return l.size(); }
+
+  /* ********************************************************************* */
+  /*		                                         		   */
+  /*		Traits for ref objects                     		   */
+  /*		                                         		   */
+  /* ********************************************************************* */
+
+  template <typename IT, typename V>
+  struct tab_ref_with_origin : public gmm::tab_ref<IT> {
+    typedef tab_ref_with_origin<IT, V> this_type;
+    // next line replaced by the 4 following lines in order to please aCC
+    //typedef typename linalg_traits<this_type>::porigin_type porigin_type;
+    typedef typename linalg_traits<V>::origin_type origin_type;
+    typedef typename std::iterator_traits<IT>::pointer PT;
+    typedef typename select_ref<const origin_type *, origin_type *,
+				PT>::ref_type porigin_type;
+   
+
+    porigin_type origin;
+   
+    tab_ref_with_origin(void) {}
+    template <class PT> tab_ref_with_origin(const IT &b, const IT &e, PT p)
+      : gmm::tab_ref<IT>(b,e), origin(porigin_type(p)) {}
+    tab_ref_with_origin(const IT &b, const IT &e, porigin_type p)
+      : gmm::tab_ref<IT>(b,e), origin(p) {}
+   
+    tab_ref_with_origin(const V &v, const sub_interval &si)
+      : gmm::tab_ref<IT>(vect_begin(const_cast<V&>(v))+si.min,
+			 vect_begin(const_cast<V&>(v))+si.max),
+        origin(linalg_origin(const_cast<V&>(v))) {}
+    tab_ref_with_origin(V &v, const sub_interval &si)
+      : gmm::tab_ref<IT>(vect_begin(const_cast<V&>(v))+si.min,
+			 vect_begin(const_cast<V&>(v))+si.max),
+        origin(linalg_origin(const_cast<V&>(v))) {}
+  };
+
+  template <typename IT, typename V>
+  struct linalg_traits<tab_ref_with_origin<IT, V> > {
+    typedef typename std::iterator_traits<IT>::pointer PT;
+    typedef typename linalg_traits<V>::origin_type origin_type;
+    typedef tab_ref_with_origin<IT, V> this_type;
+    typedef typename which_reference<PT>::is_reference is_reference;
+    typedef abstract_vector linalg_type;
+    typedef typename select_ref<const origin_type *, origin_type *,
+				PT>::ref_type porigin_type;
+    typedef typename std::iterator_traits<IT>::value_type value_type;
+    typedef typename std::iterator_traits<IT>::reference reference;
+    typedef typename this_type::iterator iterator;
+    typedef typename this_type::iterator const_iterator;
+    typedef abstract_dense storage_type;
+    typedef linalg_true index_sorted;
+    static size_type size(const this_type &v) { return v.size(); }
+    static iterator begin(this_type &v) { return v.begin(); }
+    static const_iterator begin(const this_type &v) { return v.begin(); }
+    static iterator end(this_type &v) { return v.end(); }
+    static const_iterator end(const this_type &v) { return v.end(); }
+    static origin_type* origin(this_type &v) { return v.origin; }
+    static const origin_type* origin(const this_type &v) { return v.origin; }
+    static void clear(origin_type*, const iterator &it, const iterator &ite)
+    { std::fill(it, ite, value_type(0)); }
+    static inline void do_clear(this_type &v)
+    { std::fill(v.begin(), v.end(), value_type(0)); }
+    static value_type access(const origin_type *, const const_iterator &it,
+			     const const_iterator &, size_type i)
+    { return it[i]; }
+    static reference access(origin_type *, const iterator &it, 
+			    const iterator &, size_type i)
+    { return it[i]; }
+  };
+
+  template <typename IT, typename V> std::ostream &operator <<
+  (std::ostream &o, const tab_ref_with_origin<IT, V>& m)
+  { gmm::write(o,m); return o; }
+
+
+  template <typename IT, typename V>
+  struct tab_ref_reg_spaced_with_origin : public gmm::tab_ref_reg_spaced<IT> {
+    typedef  tab_ref_reg_spaced_with_origin<IT, V> this_type;
+    typedef typename linalg_traits<this_type>::porigin_type porigin_type;
+
+    porigin_type origin;
+    
+    tab_ref_reg_spaced_with_origin(void) {}
+    tab_ref_reg_spaced_with_origin(const IT &b, size_type n, size_type s,
+				   const porigin_type p)
+      : gmm::tab_ref_reg_spaced<IT>(b,n,s), origin(p) {}
+    tab_ref_reg_spaced_with_origin(const V &v, const sub_slice &si)
+      : gmm::tab_ref_reg_spaced<IT>(vect_begin(const_cast<V&>(v)) + si.min, 
+				    si.N, (si.max - si.min)/si.N),
+      origin(linalg_origin(const_cast<V&>(v))) {}
+    tab_ref_reg_spaced_with_origin(V &v, const sub_slice &si)
+      : gmm::tab_ref_reg_spaced<IT>(vect_begin(const_cast<V&>(v)) + si.min,
+				    si.N, (si.max - si.min)/si.N),
+	origin(linalg_origin(const_cast<V&>(v))) {}
+  };
+
+  template <typename IT, typename V> 
+  struct linalg_traits<tab_ref_reg_spaced_with_origin<IT, V> > {
+    typedef typename std::iterator_traits<IT>::pointer PT;
+    typedef tab_ref_reg_spaced_with_origin<IT, V> this_type;
+    typedef typename linalg_traits<V>::origin_type origin_type;
+    typedef typename select_ref<const origin_type *, origin_type *,
+				PT>::ref_type porigin_type;
+    typedef typename which_reference<PT>::is_reference is_reference;
+    typedef abstract_vector linalg_type;
+    typedef typename std::iterator_traits<IT>::value_type value_type;
+    typedef typename std::iterator_traits<IT>::reference reference;
+    typedef typename this_type::iterator iterator;
+    typedef typename this_type::iterator const_iterator;
+    typedef abstract_dense storage_type;
+    typedef linalg_true index_sorted;
+    static size_type size(const this_type &v) { return v.size(); }
+    static iterator begin(this_type &v) { return v.begin(); }
+    static const_iterator begin(const this_type &v) { return v.begin(); }
+    static iterator end(this_type &v) { return v.end(); }
+    static const_iterator end(const this_type &v) { return v.end(); }
+    static origin_type* origin(this_type &v) { return v.origin; }
+    static const origin_type* origin(const this_type &v) { return v.origin; }
+    static void clear(origin_type*, const iterator &it, const iterator &ite)
+    { std::fill(it, ite, value_type(0)); }
+    static void do_clear(this_type &v)
+    { std::fill(v.begin(), v.end(), value_type(0)); }
+    static value_type access(const origin_type *, const const_iterator &it,
+			     const const_iterator &, size_type i)
+    { return it[i]; }
+    static reference access(origin_type *, const iterator &it, 
+			    const iterator &, size_type i)
+    { return it[i]; }
+  };
+  
+  template <typename IT, typename V> std::ostream &operator <<
+  (std::ostream &o, const tab_ref_reg_spaced_with_origin<IT, V>& m)
+  { gmm::write(o,m); return o; }
+
+
+  template <typename IT, typename ITINDEX, typename V>
+  struct tab_ref_index_ref_with_origin 
+    : public gmm::tab_ref_index_ref<IT, ITINDEX> {
+    typedef tab_ref_index_ref_with_origin<IT, ITINDEX, V> this_type;
+    typedef typename linalg_traits<this_type>::porigin_type porigin_type;
+
+    porigin_type origin;
+
+    tab_ref_index_ref_with_origin(void) {}
+    tab_ref_index_ref_with_origin(const IT &b, const ITINDEX &bi,
+				  const ITINDEX &ei, porigin_type p)
+      : gmm::tab_ref_index_ref<IT, ITINDEX>(b, bi, ei), origin(p) {}
+
+    tab_ref_index_ref_with_origin(const V &v, const sub_index &si)
+      : gmm::tab_ref_index_ref<IT, ITINDEX>(vect_begin(const_cast<V&>(v)),
+					    si.begin(), si.end()),
+      origin(linalg_origin(const_cast<V&>(v))) {}
+    tab_ref_index_ref_with_origin(V &v, const sub_index &si)
+      : gmm::tab_ref_index_ref<IT, ITINDEX>(vect_begin(const_cast<V&>(v)),
+					    si.begin(), si.end()),
+	origin(linalg_origin(const_cast<V&>(v))) {}
+  };
+
+  template <typename IT, typename ITINDEX, typename V>
+  struct linalg_traits<tab_ref_index_ref_with_origin<IT, ITINDEX, V> > {
+    typedef typename std::iterator_traits<IT>::pointer PT;
+    typedef tab_ref_index_ref_with_origin<IT, ITINDEX, V> this_type;
+    typedef typename linalg_traits<V>::origin_type origin_type;
+    typedef typename select_ref<const origin_type *, origin_type *,
+				PT>::ref_type porigin_type;
+    typedef typename which_reference<PT>::is_reference is_reference;
+    typedef abstract_vector linalg_type;
+    typedef typename std::iterator_traits<IT>::value_type value_type;
+    typedef typename std::iterator_traits<IT>::reference reference;
+    typedef typename this_type::iterator iterator;
+    typedef typename this_type::iterator const_iterator;
+    typedef abstract_dense storage_type;
+    typedef linalg_true index_sorted;
+    static size_type size(const this_type &v) { return v.size(); }
+    static iterator begin(this_type &v) { return v.begin(); }
+    static const_iterator begin(const this_type &v) { return v.begin(); }
+    static iterator end(this_type &v) { return v.end(); }
+    static const_iterator end(const this_type &v) { return v.end(); }
+    static origin_type* origin(this_type &v) { return v.origin; }
+    static const origin_type* origin(const this_type &v) { return v.origin; }
+    static void clear(origin_type*, const iterator &it, const iterator &ite)
+    { std::fill(it, ite, value_type(0)); }
+    static void do_clear(this_type &v)
+    { std::fill(v.begin(), v.end(), value_type(0)); }
+    static value_type access(const origin_type *, const const_iterator &it,
+			     const const_iterator &, size_type i)
+    { return it[i]; }
+    static reference access(origin_type *, const iterator &it,
+			    const iterator &, size_type i)
+    { return it[i]; }
+  };
+
+  template <typename IT, typename ITINDEX, typename V>
+  std::ostream &operator <<
+  (std::ostream &o, const tab_ref_index_ref_with_origin<IT, ITINDEX, V>& m)
+  { gmm::write(o,m); return o; }
+
+
+  template<typename ITER, typename MIT, typename PT> 
+  struct dense_compressed_iterator {
+    typedef ITER value_type;
+    typedef ITER *pointer;
+    typedef ITER &reference;
+    typedef ptrdiff_t difference_type;
+    typedef std::random_access_iterator_tag iterator_category;
+    typedef size_t size_type;
+    typedef dense_compressed_iterator<ITER, MIT, PT> iterator;
+    typedef typename std::iterator_traits<PT>::value_type *MPT;
+
+    ITER it;
+    size_type N, nrows, ncols, i;
+    PT origin;
+    
+    iterator operator ++(int) { iterator tmp = *this; i++; return tmp; }
+    iterator operator --(int) { iterator tmp = *this; i--; return tmp; }
+    iterator &operator ++()   { ++i; return *this; }
+    iterator &operator --()   { --i; return *this; }
+    iterator &operator +=(difference_type ii) { i += ii; return *this; }
+    iterator &operator -=(difference_type ii) { i -= ii; return *this; }
+    iterator operator +(difference_type ii) const 
+    { iterator itt = *this; return (itt += ii); }
+    iterator operator -(difference_type ii) const
+    { iterator itt = *this; return (itt -= ii); }
+    difference_type operator -(const iterator &ii) const
+    { return (N ? (it - ii.it) / N : 0) + i - ii.i; }
+
+    ITER operator *() const { return it+i*N; }
+    ITER operator [](int ii) const { return it + (i+ii) * N; }
+
+    bool operator ==(const iterator &ii) const
+    { return (*this - ii) == difference_type(0); }
+    bool operator !=(const iterator &ii) const { return !(ii == *this); }
+    bool operator < (const iterator &ii) const
+    { return (*this - ii) < difference_type(0); }
+
+    dense_compressed_iterator(void) {}
+    dense_compressed_iterator(const dense_compressed_iterator<MIT,MIT,MPT> &ii)
+      : it(ii.it), N(ii.N), nrows(ii.nrows), ncols(ii.ncols), i(ii.i),
+	origin(ii.origin)  {}
+    dense_compressed_iterator(const ITER &iter, size_type n, size_type r,
+			      size_type c, size_type ii, PT o)
+      : it(iter), N(n), nrows(r), ncols(c), i(ii), origin(o) { }
+    
+  };
+
+  /* ******************************************************************** */
+  /*	    Read only reference on a compressed sparse vector             */
+  /* ******************************************************************** */
+
+  template <typename PT1, typename PT2, int shift = 0>
+  struct cs_vector_ref_iterator {
+    PT1 pr;
+    PT2 ir;
+
+    typedef typename std::iterator_traits<PT1>::value_type value_type;
+    typedef PT1 pointer;
+    typedef typename std::iterator_traits<PT1>::reference  reference;
+    typedef size_t        size_type;
+    typedef ptrdiff_t     difference_type;
+    typedef std::bidirectional_iterator_tag iterator_category;
+    typedef cs_vector_ref_iterator<PT1, PT2, shift> iterator;
+    
+    cs_vector_ref_iterator(void) {}
+    cs_vector_ref_iterator(PT1 p1, PT2 p2) : pr(p1), ir(p2) {}
+
+    inline size_type index(void) const { return (*ir) - shift; }
+    iterator &operator ++() { ++pr; ++ir; return *this; }
+    iterator operator ++(int) { iterator tmp = *this; ++(*this); return tmp; }
+    iterator &operator --() { --pr; --ir; return *this; }
+    iterator operator --(int) { iterator tmp = *this; --(*this); return tmp; }
+    
+    reference operator  *() const { return *pr; }
+    pointer   operator ->() const { return pr; }
+    
+    bool operator ==(const iterator &i) const { return (i.pr==pr);}
+    bool operator !=(const iterator &i) const { return (i.pr!=pr);}
+  };
+    
+  template <typename PT1, typename PT2, int shift = 0> struct cs_vector_ref {
+    PT1 pr;
+    PT2 ir;
+    size_type n, size_;
+
+    typedef cs_vector_ref<PT1, PT2, shift> this_type;
+    typedef typename std::iterator_traits<PT1>::value_type value_type;
+    typedef typename linalg_traits<this_type>::const_iterator const_iterator;
+
+    cs_vector_ref(PT1 pt1, PT2 pt2, size_type nnz, size_type ns)
+      : pr(pt1), ir(pt2), n(nnz), size_(ns) {}
+    cs_vector_ref(void) {}
+
+    size_type size(void) const { return size_; }
+    
+    const_iterator begin(void) const { return const_iterator(pr, ir); }
+    const_iterator end(void) const { return const_iterator(pr+n, ir+n); }
+    
+    value_type operator[](size_type i) const
+    { return linalg_traits<this_type>::access(pr, begin(), end(),i); }
+  };
+
+  template <typename PT1, typename PT2, int shift>
+  struct linalg_traits<cs_vector_ref<PT1, PT2, shift> > {
+    typedef cs_vector_ref<PT1, PT2, shift> this_type;
+    typedef linalg_const is_reference;
+    typedef abstract_vector linalg_type;
+    typedef typename std::iterator_traits<PT1>::value_type value_type;
+    typedef value_type origin_type;
+    typedef typename std::iterator_traits<PT1>::value_type reference;
+    typedef cs_vector_ref_iterator<typename const_pointer<PT1>::pointer,
+	    typename const_pointer<PT2>::pointer, shift>  const_iterator;
+    typedef abstract_null_type iterator;
+    typedef abstract_sparse storage_type;
+    typedef linalg_true index_sorted;
+    static size_type size(const this_type &v) { return v.size(); }
+    static iterator begin(this_type &v) { return v.begin(); }
+    static const_iterator begin(const this_type &v) { return v.begin(); }
+    static iterator end(this_type &v) { return v.end(); }
+    static const_iterator end(const this_type &v) { return v.end(); }
+    static const origin_type* origin(const this_type &v) { return v.pr; }
+    static value_type access(const origin_type *, const const_iterator &b,
+			     const const_iterator &e, size_type i) {
+      if (b.ir == e.ir) return value_type(0);
+      PT2 p = std::lower_bound(b.ir, e.ir, i+shift);
+      return (*p == i+shift && p != e.ir) ? b.pr[p-b.ir] : value_type(0);
+    }
+  };
+
+  template <typename PT1, typename PT2, int shift>
+  std::ostream &operator <<
+  (std::ostream &o, const cs_vector_ref<PT1, PT2, shift>& m)
+  { gmm::write(o,m); return o; }
+
+  template <typename PT1, typename PT2, int shift>
+  inline size_type nnz(const cs_vector_ref<PT1, PT2, shift>& l) { return l.n; }
+
+  /* ******************************************************************** */
+  /*	    Read only reference on a compressed sparse column matrix      */
+  /* ******************************************************************** */
+
+  template <typename PT1, typename PT2, typename PT3, int shift = 0>
+  struct sparse_compressed_iterator {
+    typedef typename std::iterator_traits<PT1>::value_type value_type;
+    typedef const value_type *pointer;
+    typedef const value_type &reference;
+    typedef ptrdiff_t difference_type;
+    typedef size_t size_type;
+    typedef std::random_access_iterator_tag iterator_category;
+    typedef sparse_compressed_iterator<PT1, PT2, PT3, shift> iterator;
+
+    PT1 pr;
+    PT2 ir;
+    PT3 jc;
+    size_type n;
+    const value_type *origin;
+    
+    iterator operator ++(int) { iterator tmp = *this; jc++; return tmp; }
+    iterator operator --(int) { iterator tmp = *this; jc--; return tmp; }
+    iterator &operator ++()   { jc++; return *this; }
+    iterator &operator --()   { jc--; return *this; }
+    iterator &operator +=(difference_type i) { jc += i; return *this; }
+    iterator &operator -=(difference_type i) { jc -= i; return *this; }
+    iterator operator +(difference_type i) const 
+    { iterator itt = *this; return (itt += i); }
+    iterator operator -(difference_type i) const
+    { iterator itt = *this; return (itt -= i); }
+    difference_type operator -(const iterator &i) const { return jc - i.jc; }
+
+    reference operator *() const { return pr + *jc - shift; }
+    reference operator [](int ii) { return pr + *(jc+ii) - shift; }
+
+    bool operator ==(const iterator &i) const { return (jc == i.jc); }
+    bool operator !=(const iterator &i) const { return !(i == *this); }
+    bool operator < (const iterator &i) const { return (jc < i.jc); }
+
+    sparse_compressed_iterator(void) {}
+    sparse_compressed_iterator(PT1 p1, PT2 p2, PT3 p3, size_type nn,
+			       const value_type *o)
+      : pr(p1), ir(p2), jc(p3), n(nn), origin(o) { }
+    
+  };
+
+  template <typename PT1, typename PT2, typename PT3, int shift = 0>
+  struct csc_matrix_ref {
+    PT1 pr; // values.
+    PT2 ir; // row indexes.
+    PT3 jc; // column repartition on pr and ir.
+    size_type nc, nr;
+    
+    typedef typename std::iterator_traits<PT1>::value_type value_type;
+    csc_matrix_ref(PT1 pt1, PT2 pt2, PT3 pt3, size_type nrr, size_type ncc)
+      : pr(pt1), ir(pt2), jc(pt3), nc(ncc), nr(nrr) {}
+    csc_matrix_ref(void) {}
+    
+    size_type nrows(void) const { return nr; }
+    size_type ncols(void) const { return nc; }
+   
+    value_type operator()(size_type i, size_type j) const
+      { return mat_col(*this, j)[i]; }
+  };
+
+  template <typename PT1, typename PT2, typename PT3, int shift>
+  struct linalg_traits<csc_matrix_ref<PT1, PT2, PT3, shift> > {
+    typedef csc_matrix_ref<PT1, PT2, PT3, shift> this_type;
+    typedef linalg_const is_reference;
+    typedef abstract_matrix linalg_type;
+    typedef typename std::iterator_traits<PT1>::value_type value_type;
+    typedef typename std::iterator_traits<PT1>::value_type reference;
+    typedef value_type origin_type;
+    typedef abstract_sparse storage_type;
+    typedef abstract_null_type sub_row_type;
+    typedef abstract_null_type const_sub_row_type;
+    typedef abstract_null_type row_iterator;
+    typedef abstract_null_type const_row_iterator;
+    typedef abstract_null_type sub_col_type;
+    typedef cs_vector_ref<typename const_pointer<PT1>::pointer,
+            typename const_pointer<PT2>::pointer, shift> const_sub_col_type;
+    typedef sparse_compressed_iterator<typename const_pointer<PT1>::pointer,
+				       typename const_pointer<PT2>::pointer,
+				       typename const_pointer<PT3>::pointer,
+				       shift>  const_col_iterator;
+    typedef abstract_null_type col_iterator;
+    typedef col_major sub_orientation;
+    typedef linalg_true index_sorted;
+    static size_type nrows(const this_type &m) { return m.nrows(); }
+    static size_type ncols(const this_type &m) { return m.ncols(); }
+    static const_col_iterator col_begin(const this_type &m)
+    { return const_col_iterator(m.pr, m.ir, m.jc, m.nr, m.pr); }
+    static const_col_iterator col_end(const this_type &m)
+    { return const_col_iterator(m.pr, m.ir, m.jc + m.nc, m.nr, m.pr); }
+    static const_sub_col_type col(const const_col_iterator &it) {
+      return const_sub_col_type(it.pr + *(it.jc) - shift,
+	     it.ir + *(it.jc) - shift, *(it.jc + 1) - *(it.jc), it.n);
+    }
+    static const origin_type* origin(const this_type &m) { return m.pr; }
+    static value_type access(const const_col_iterator &itcol, size_type j)
+    { return col(itcol)[j]; }
+  };
+
+
+  template <typename PT1, typename PT2, typename PT3, int shift>
+  std::ostream &operator <<
+  (std::ostream &o, const csc_matrix_ref<PT1, PT2, PT3, shift>& m)
+  { gmm::write(o,m); return o; }
+
+  /* ******************************************************************** */
+  /*	   Read only reference on a compressed sparse row matrix          */
+  /* ******************************************************************** */
+
+  template <typename PT1, typename PT2, typename PT3, int shift = 0>
+  struct csr_matrix_ref {
+    PT1 pr; // values.
+    PT2 ir; // column indexes.
+    PT3 jc; // row repartition on pr and ir.
+    size_type nc, nr;
+    
+    typedef typename std::iterator_traits<PT1>::value_type value_type;
+    csr_matrix_ref(PT1 pt1, PT2 pt2, PT3 pt3, size_type nrr, size_type ncc)
+      : pr(pt1), ir(pt2), jc(pt3), nc(ncc), nr(nrr) {}
+    csr_matrix_ref(void) {}
+    
+    size_type nrows(void) const { return nr; }
+    size_type ncols(void) const { return nc; }
+   
+    value_type operator()(size_type i, size_type j) const
+      { return mat_row(*this, i)[j]; }
+  };
+  
+  template <typename PT1, typename PT2, typename PT3, int shift>
+  struct linalg_traits<csr_matrix_ref<PT1, PT2, PT3, shift> > {
+    typedef csr_matrix_ref<PT1, PT2, PT3, shift> this_type;
+    typedef linalg_const is_reference;
+    typedef abstract_matrix linalg_type;
+    typedef typename std::iterator_traits<PT1>::value_type value_type;
+    typedef typename std::iterator_traits<PT1>::value_type reference;
+    typedef value_type origin_type;
+    typedef abstract_sparse storage_type;
+    typedef abstract_null_type sub_col_type;
+    typedef abstract_null_type const_sub_col_type;
+    typedef abstract_null_type col_iterator;
+    typedef abstract_null_type const_col_iterator;
+    typedef abstract_null_type sub_row_type;
+    typedef cs_vector_ref<typename const_pointer<PT1>::pointer,
+			  typename const_pointer<PT2>::pointer, shift>
+            const_sub_row_type;
+    typedef sparse_compressed_iterator<typename const_pointer<PT1>::pointer,
+				       typename const_pointer<PT2>::pointer,
+				       typename const_pointer<PT3>::pointer,
+				       shift>  const_row_iterator;
+    typedef abstract_null_type row_iterator;
+    typedef row_major sub_orientation;
+    typedef linalg_true index_sorted;
+    static size_type nrows(const this_type &m) { return m.nrows(); }
+    static size_type ncols(const this_type &m) { return m.ncols(); }
+    static const_row_iterator row_begin(const this_type &m)
+    { return const_row_iterator(m.pr, m.ir, m.jc, m.nc, m.pr); }
+    static const_row_iterator row_end(const this_type &m)
+    { return const_row_iterator(m.pr, m.ir, m.jc + m.nr, m.nc, m.pr); }
+    static const_sub_row_type row(const const_row_iterator &it) {
+      return const_sub_row_type(it.pr + *(it.jc) - shift,
+	     it.ir + *(it.jc) - shift, *(it.jc + 1) - *(it.jc), it.n);
+    }
+    static const origin_type* origin(const this_type &m) { return m.pr; }
+    static value_type access(const const_row_iterator &itrow, size_type j)
+    { return row(itrow)[j]; }
+  };
+
+  template <typename PT1, typename PT2, typename PT3, int shift>
+  std::ostream &operator <<
+  (std::ostream &o, const csr_matrix_ref<PT1, PT2, PT3, shift>& m)
+  { gmm::write(o,m); return o; }
+
+  /* ********************************************************************* */
+  /*		                                         		   */
+  /*		Simple interface for C arrays                     	   */
+  /*		                                         		   */
+  /* ********************************************************************* */
+
+  template <class PT> struct array1D_reference {
+
+    typedef typename std::iterator_traits<PT>::value_type value_type;
+
+    PT begin, end;
+    
+    const value_type &operator[](size_type i) const { return *(begin+i); }
+    value_type &operator[](size_type i) { return *(begin+i); }
+
+    array1D_reference(PT begin_, size_type s) : begin(begin_), end(begin_+s) {}
+  };
+
+  template <typename PT>
+  struct linalg_traits<array1D_reference<PT> > {
+    typedef array1D_reference<PT> this_type;
+    typedef this_type origin_type;
+    typedef typename which_reference<PT>::is_reference is_reference;
+    typedef abstract_vector linalg_type;
+    typedef typename std::iterator_traits<PT>::value_type value_type;
+    typedef typename std::iterator_traits<PT>::reference reference;
+    typedef PT iterator;
+    typedef PT const_iterator;
+    typedef abstract_dense storage_type;
+    typedef linalg_true index_sorted;
+    static size_type size(const this_type &v) { return v.end - v.begin; }
+    static iterator begin(this_type &v) { return v.begin; }
+    static const_iterator begin(const this_type &v) { return v.begin; }
+    static iterator end(this_type &v) { return v.end; }
+    static const_iterator end(const this_type &v) { return v.end; }
+    static origin_type* origin(this_type &v) { return &v; }
+    static const origin_type* origin(const this_type &v) { return &v; }
+    static void clear(origin_type*, const iterator &it, const iterator &ite)
+    { std::fill(it, ite, value_type(0)); }
+    static void do_clear(this_type &v)
+    { std::fill(v.begin, v.end, value_type(0)); }
+    static value_type access(const origin_type *, const const_iterator &it,
+			     const const_iterator &, size_type i)
+    { return it[i]; }
+    static reference access(origin_type *, const iterator &it,
+			    const iterator &, size_type i)
+    { return it[i]; }
+    static void resize(this_type &, size_type )
+    { GMM_ASSERT1(false, "Not resizable vector"); }
+  };
+
+  template<typename PT> std::ostream &operator <<
+  (std::ostream &o, const array1D_reference<PT>& v)
+  { gmm::write(o,v); return o; }
+  
+  template <class PT> struct array2D_col_reference {
+
+    typedef typename std::iterator_traits<PT>::value_type T;
+    typedef typename std::iterator_traits<PT>::reference reference;
+    typedef typename const_reference<reference>::reference const_reference;
+    typedef PT iterator;
+    typedef typename const_pointer<PT>::pointer const_iterator;
+    
+    PT begin_;
+    size_type nbl, nbc;
+
+    inline const_reference operator ()(size_type l, size_type c) const {
+      GMM_ASSERT2(l < nbl && c < nbc, "out of range");
+      return *(begin_ + c*nbl+l);
+    }
+    inline reference operator ()(size_type l, size_type c) {
+      GMM_ASSERT2(l < nbl && c < nbc, "out of range");
+      return *(begin_ + c*nbl+l);
+    }
+    
+    void resize(size_type, size_type);
+    void reshape(size_type m, size_type n) {
+      GMM_ASSERT2(n*m == nbl*nbc, "dimensions mismatch");
+      nbl = m; nbc = n;
+    }
+    
+    void fill(T a, T b = T(0)) { 
+      std::fill(begin_, begin_+nbc*nbl, b);
+      iterator p = begin_, e = begin_+nbc*nbl;
+      while (p < e) { *p = a; p += nbl+1; }
+    }
+    inline size_type nrows(void) const { return nbl; }
+    inline size_type ncols(void) const { return nbc; }
+
+    iterator begin(void) { return begin_; }
+    const_iterator begin(void) const { return begin_; }
+    iterator end(void) { return begin_+nbl*nbc; }
+    const_iterator end(void) const { return begin_+nbl*nbc; }
+
+    array2D_col_reference(PT begin__, size_type nrows_, size_type ncols_)
+      : begin_(begin__), nbl(nrows_), nbc(ncols_) {}
+  };
+
+  template <typename PT> struct linalg_traits<array2D_col_reference<PT> > {
+    typedef array2D_col_reference<PT> this_type;
+    typedef this_type origin_type;
+    typedef typename which_reference<PT>::is_reference is_reference;
+    typedef abstract_matrix linalg_type;
+    typedef typename std::iterator_traits<PT>::value_type value_type;
+    typedef typename std::iterator_traits<PT>::reference reference;
+    typedef abstract_dense storage_type;
+    typedef tab_ref_reg_spaced_with_origin<typename this_type::iterator,
+					   this_type> sub_row_type;
+    typedef tab_ref_reg_spaced_with_origin<typename this_type::const_iterator,
+					   this_type> const_sub_row_type;
+    typedef dense_compressed_iterator<typename this_type::iterator,
+				      typename this_type::iterator,
+				      this_type *> row_iterator;
+    typedef dense_compressed_iterator<typename this_type::const_iterator,
+				      typename this_type::iterator,
+				      const this_type *> const_row_iterator;
+    typedef tab_ref_with_origin<typename this_type::iterator, 
+				this_type> sub_col_type;
+    typedef tab_ref_with_origin<typename this_type::const_iterator,
+				this_type> const_sub_col_type;
+    typedef dense_compressed_iterator<typename this_type::iterator,
+				      typename this_type::iterator,
+				      this_type *> col_iterator;
+    typedef dense_compressed_iterator<typename this_type::const_iterator,
+				      typename this_type::iterator,
+				      const this_type *> const_col_iterator;
+    typedef col_and_row sub_orientation;
+    typedef linalg_true index_sorted;
+    static size_type nrows(const this_type &m) { return m.nrows(); }
+    static size_type ncols(const this_type &m) { return m.ncols(); }
+    static const_sub_row_type row(const const_row_iterator &it)
+    { return const_sub_row_type(*it, it.nrows, it.ncols, it.origin); }
+    static const_sub_col_type col(const const_col_iterator &it)
+    { return const_sub_col_type(*it, *it + it.nrows, it.origin); }
+    static sub_row_type row(const row_iterator &it)
+    { return sub_row_type(*it, it.nrows, it.ncols, it.origin); }
+    static sub_col_type col(const col_iterator &it)
+    { return sub_col_type(*it, *it + it.nrows, it.origin); }
+    static row_iterator row_begin(this_type &m)
+    { return row_iterator(m.begin(), 1, m.nrows(), m.ncols(), 0, &m); }
+    static row_iterator row_end(this_type &m)
+    { return row_iterator(m.begin(), 1, m.nrows(), m.ncols(), m.nrows(), &m); }
+    static const_row_iterator row_begin(const this_type &m)
+    { return const_row_iterator(m.begin(), 1, m.nrows(), m.ncols(), 0, &m); }
+    static const_row_iterator row_end(const this_type &m) {
+      return const_row_iterator(m.begin(), 1, m.nrows(),
+				m.ncols(), m.nrows(), &m);
+    }
+    static col_iterator col_begin(this_type &m)
+    { return col_iterator(m.begin(), m.nrows(), m.nrows(), m.ncols(), 0, &m); }
+    static col_iterator col_end(this_type &m) {
+      return col_iterator(m.begin(), m.nrows(), m.nrows(), m.ncols(),
+			  m.ncols(), &m);
+    }
+    static const_col_iterator col_begin(const this_type &m) {
+      return const_col_iterator(m.begin(), m.nrows(), m.nrows(),
+				m.ncols(), 0, &m);
+    }
+    static const_col_iterator col_end(const this_type &m) {
+      return const_col_iterator(m.begin(), m.nrows(),m.nrows(),m.ncols(),
+				m.ncols(), &m);
+    }
+    static origin_type* origin(this_type &m) { return &m; }
+    static const origin_type* origin(const this_type &m) { return &m; }
+    static void do_clear(this_type &m) { m.fill(value_type(0)); }
+    static value_type access(const const_col_iterator &itcol, size_type j)
+    { return (*itcol)[j]; }
+    static reference access(const col_iterator &itcol, size_type j)
+    { return (*itcol)[j]; }
+    static void resize(this_type &v, size_type m, size_type n)
+    { v.resize(m,n); }
+    static void reshape(this_type &v, size_type m, size_type n)
+    { v.reshape(m, n); }
+  };
+
+  template<typename PT> std::ostream &operator <<
+    (std::ostream &o, const array2D_col_reference<PT>& m)
+  { gmm::write(o,m); return o; }
+
+
+
+  template <class PT> struct array2D_row_reference {
+    
+    typedef typename std::iterator_traits<PT>::value_type T;
+    typedef typename std::iterator_traits<PT>::reference reference;
+    typedef typename const_reference<reference>::reference const_reference;
+    typedef PT iterator;
+    typedef typename const_pointer<PT>::pointer const_iterator;
+    
+    PT begin_;
+    size_type nbl, nbc;
+
+    inline const_reference operator ()(size_type l, size_type c) const {
+      GMM_ASSERT2(l < nbl && c < nbc, "out of range");
+      return *(begin_ + l*nbc+c);
+    }
+    inline reference operator ()(size_type l, size_type c) {
+      GMM_ASSERT2(l < nbl && c < nbc, "out of range");
+      return *(begin_ + l*nbc+c);
+    }
+    
+    void resize(size_type, size_type);
+    void reshape(size_type m, size_type n) {
+      GMM_ASSERT2(n*m == nbl*nbc, "dimensions mismatch");
+      nbl = m; nbc = n;
+    }
+    
+    void fill(T a, T b = T(0)) { 
+      std::fill(begin_, begin_+nbc*nbl, b);
+      iterator p = begin_, e = begin_+nbc*nbl;
+      while (p < e) { *p = a; p += nbc+1; }
+    }
+    inline size_type nrows(void) const { return nbl; }
+    inline size_type ncols(void) const { return nbc; }
+
+    iterator begin(void) { return begin_; }
+    const_iterator begin(void) const { return begin_; }
+    iterator end(void) { return begin_+nbl*nbc; }
+    const_iterator end(void) const { return begin_+nbl*nbc; }
+
+    array2D_row_reference(PT begin__, size_type nrows_, size_type ncols_)
+      : begin_(begin__), nbl(nrows_), nbc(ncols_) {}
+  };
+
+  template <typename PT> struct linalg_traits<array2D_row_reference<PT> > {
+    typedef array2D_row_reference<PT> this_type;
+    typedef this_type origin_type;
+    typedef typename which_reference<PT>::is_reference is_reference;
+    typedef abstract_matrix linalg_type;
+    typedef typename std::iterator_traits<PT>::value_type value_type;
+    typedef typename std::iterator_traits<PT>::reference reference;
+    typedef abstract_dense storage_type;
+    typedef tab_ref_reg_spaced_with_origin<typename this_type::iterator,
+					   this_type> sub_col_type;
+    typedef tab_ref_reg_spaced_with_origin<typename this_type::const_iterator,
+					   this_type> const_sub_col_type;
+    typedef dense_compressed_iterator<typename this_type::iterator,
+				      typename this_type::iterator,
+				      this_type *> col_iterator;
+    typedef dense_compressed_iterator<typename this_type::const_iterator,
+				      typename this_type::iterator,
+				      const this_type *> const_col_iterator;
+    typedef tab_ref_with_origin<typename this_type::iterator, 
+				this_type> sub_row_type;
+    typedef tab_ref_with_origin<typename this_type::const_iterator,
+				this_type> const_sub_row_type;
+    typedef dense_compressed_iterator<typename this_type::iterator,
+				      typename this_type::iterator,
+				      this_type *> row_iterator;
+    typedef dense_compressed_iterator<typename this_type::const_iterator,
+				      typename this_type::iterator,
+				      const this_type *> const_row_iterator;
+    typedef col_and_row sub_orientation;
+    typedef linalg_true index_sorted;
+    static size_type ncols(const this_type &m) { return m.ncols(); }
+    static size_type nrows(const this_type &m) { return m.nrows(); }
+    static const_sub_col_type col(const const_col_iterator &it)
+    { return const_sub_col_type(*it, it.ncols, it.nrows, it.origin); }
+    static const_sub_row_type row(const const_row_iterator &it)
+    { return const_sub_row_type(*it, *it + it.ncols, it.origin); }
+    static sub_col_type col(const col_iterator &it)
+    { return sub_col_type(*it, *it, it.ncols, it.nrows, it.origin); }
+    static sub_row_type row(const row_iterator &it)
+    { return sub_row_type(*it, *it + it.ncols, it.origin); }
+    static col_iterator col_begin(this_type &m)
+    { return col_iterator(m.begin(), 1, m.ncols(), m.nrows(), 0, &m); }
+    static col_iterator col_end(this_type &m)
+    { return col_iterator(m.begin(), 1, m.ncols(), m.nrows(), m.ncols(), &m); }
+    static const_col_iterator col_begin(const this_type &m)
+    { return const_col_iterator(m.begin(), 1, m.ncols(), m.nrows(), 0, &m); }
+    static const_col_iterator col_end(const this_type &m) {
+      return const_col_iterator(m.begin(), 1, m.ncols(),
+				m.nrows(), m.ncols(), &m);
+    }
+    static row_iterator row_begin(this_type &m)
+    { return row_iterator(m.begin(), m.ncols(), m.ncols(), m.nrows(), 0, &m); }
+    static row_iterator row_end(this_type &m) {
+      return row_iterator(m.begin(), m.ncols(), m.ncols(), m.nrows(),
+			  m.nrows(), &m);
+    }
+    static const_row_iterator row_begin(const this_type &m) {
+      return const_row_iterator(m.begin(), m.ncols(), m.ncols(), m.nrows(),
+				0, &m);
+    }
+    static const_row_iterator row_end(const this_type &m) {
+      return const_row_iterator(m.begin(), m.ncols(), m.ncols(), m.nrows(),
+				m.nrows(), &m);
+    }
+    static origin_type* origin(this_type &m) { return &m; }
+    static const origin_type* origin(const this_type &m) { return &m; }
+    static void do_clear(this_type &m) { m.fill(value_type(0)); }
+    static value_type access(const const_row_iterator &itrow, size_type j)
+    { return (*itrow)[j]; }
+    static reference access(const row_iterator &itrow, size_type j)
+    { return (*itrow)[j]; }
+    static void resize(this_type &v, size_type m, size_type n)
+    { v.resize(m,n); }
+    static void reshape(this_type &v, size_type m, size_type n)
+    { v.reshape(m, n); }
+  };
+
+  template<typename PT> std::ostream &operator <<
+    (std::ostream &o, const array2D_row_reference<PT>& m)
+  { gmm::write(o,m); return o; }
+
+
+
+
+
+
+}
+
+
+#endif //  GMM_INTERFACE_H__
diff --git a/gmm/gmm_interface_bgeot.h b/gmm/gmm_interface_bgeot.h
new file mode 100644
index 000000000..d1d0ae3ab
--- /dev/null
+++ b/gmm/gmm_interface_bgeot.h
@@ -0,0 +1,83 @@
+/* -*- c++ -*- (enables emacs c++ mode) */
+/*===========================================================================
+
+ Copyright (C) 2002-2017 Yves Renard
+
+ This file is a part of GetFEM++
+
+ GetFEM++  is  free software;  you  can  redistribute  it  and/or modify it
+ under  the  terms  of the  GNU  Lesser General Public License as published
+ by  the  Free Software Foundation;  either version 3 of the License,  or
+ (at your option) any later version along with the GCC Runtime Library
+ Exception either version 3.1 or (at your option) any later version.
+ This program  is  distributed  in  the  hope  that it will be useful,  but
+ WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ or  FITNESS  FOR  A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+ License and GCC Runtime Library Exception for more details.
+ You  should  have received a copy of the GNU Lesser General Public License
+ along  with  this program;  if not, write to the Free Software Foundation,
+ Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301, USA.
+
+ As a special exception, you  may use  this file  as it is a part of a free
+ software  library  without  restriction.  Specifically,  if   other  files
+ instantiate  templates  or  use macros or inline functions from this file,
+ or  you compile this  file  and  link  it  with other files  to produce an
+ executable, this file  does  not  by itself cause the resulting executable
+ to be covered  by the GNU Lesser General Public License.  This   exception
+ does not  however  invalidate  any  other  reasons why the executable file
+ might be covered by the GNU Lesser General Public License.
+
+===========================================================================*/
+
+/**@file gmm_interface_bgeot.h
+   @author  Yves Renard <Yves.Renard@insa-lyon.fr>
+   @date October 13, 2002.
+   @brief interface for bgeot::small_vector
+*/
+#ifndef GMM_INTERFACE_BGEOT_H__
+#define GMM_INTERFACE_BGEOT_H__
+
+
+namespace gmm {
+
+  /* ********************************************************************* */
+  /*		                                         	 	   */
+  /*		Traits for bgeot objects                     		   */
+  /*		                                         		   */
+  /* ********************************************************************* */
+
+  template <typename T> struct linalg_traits<bgeot::small_vector<T> > {
+    typedef bgeot::small_vector<T> this_type;
+    typedef this_type origin_type;
+    typedef linalg_false is_reference;
+    typedef abstract_vector linalg_type;
+    typedef T value_type;
+    typedef T& reference;
+    typedef typename this_type::iterator iterator;
+    typedef typename this_type::const_iterator const_iterator;
+    typedef abstract_dense storage_type;
+    typedef linalg_true index_sorted;
+    static size_type size(const this_type &v) { return v.size(); }
+    static iterator begin(this_type &v) { return v.begin(); }
+    static const_iterator begin(const this_type &v) { return v.begin(); }
+    static iterator end(this_type &v) { return v.end(); }
+    static const_iterator end(const this_type &v) { return v.end(); }
+    static origin_type* origin(this_type &v) { return &v; }
+    static const origin_type* origin(const this_type &v) { return &v; }
+    static void clear(origin_type* o, const iterator &it, const iterator &ite)
+    { std::fill(it, ite, value_type(0)); }
+    static void do_clear(this_type &v)
+    { std::fill(v.begin(), v.end(), value_type(0)); }
+    static value_type access(const origin_type *, const const_iterator &it,
+			     const const_iterator &, size_type i)
+    { return it[i]; }
+    static reference access(origin_type *, const iterator &it,
+			    const iterator &, size_type i)
+    { return it[i]; }
+    static void resize(this_type &v, size_type n) { v.resize(n); }
+  };
+
+}
+
+
+#endif //  GMM_INTERFACE_BGEOT_H__
diff --git a/gmm/gmm_iter.h b/gmm/gmm_iter.h
new file mode 100644
index 000000000..e82d270f4
--- /dev/null
+++ b/gmm/gmm_iter.h
@@ -0,0 +1,162 @@
+/* -*- c++ -*- (enables emacs c++ mode) */
+/*===========================================================================
+
+ Copyright (C) 2002-2017 Yves Renard
+
+ This file is a part of GetFEM++
+
+ GetFEM++  is  free software;  you  can  redistribute  it  and/or modify it
+ under  the  terms  of the  GNU  Lesser General Public License as published
+ by  the  Free Software Foundation;  either version 3 of the License,  or
+ (at your option) any later version along with the GCC Runtime Library
+ Exception either version 3.1 or (at your option) any later version.
+ This program  is  distributed  in  the  hope  that it will be useful,  but
+ WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ or  FITNESS  FOR  A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+ License and GCC Runtime Library Exception for more details.
+ You  should  have received a copy of the GNU Lesser General Public License
+ along  with  this program;  if not, write to the Free Software Foundation,
+ Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301, USA.
+
+ As a special exception, you  may use  this file  as it is a part of a free
+ software  library  without  restriction.  Specifically,  if   other  files
+ instantiate  templates  or  use macros or inline functions from this file,
+ or  you compile this  file  and  link  it  with other files  to produce an
+ executable, this file  does  not  by itself cause the resulting executable
+ to be covered  by the GNU Lesser General Public License.  This   exception
+ does not  however  invalidate  any  other  reasons why the executable file
+ might be covered by the GNU Lesser General Public License.
+
+===========================================================================*/
+
+/**@file gmm_iter.h
+   @author  Yves Renard <Yves.Renard@insa-lyon.fr>
+   @date February 10, 2003.
+   @brief Iteration object.
+*/
+
+#ifndef GMM_ITER_H__
+#define GMM_ITER_H__
+
+#include "gmm_kernel.h"
+#include <iomanip>
+
+namespace gmm {
+
+  /**  The Iteration object calculates whether the solution has reached the
+       desired accuracy, or whether the maximum number of iterations has
+       been reached. 
+
+       The method finished() checks the convergence.  The first()
+       method is used to determine the first iteration of the loop.
+  */
+  class iteration {
+  protected :
+    double rhsn;       /* Right hand side norm.                            */
+    size_type maxiter; /* Max. number of iterations.                       */
+    int noise;         /* if noise > 0 iterations are printed.             */
+    double resmax;     /* maximum residu.                                  */
+    double resminreach, resadd;
+    double diverged_res; /* Threshold beyond which the iterative           */
+                       /* is considered to diverge.                        */
+    size_type nit;     /* iteration number.                                */
+    double res;        /* last computed residu.                            */
+    std::string name;  /* eventually, name of the method.                  */
+    bool written;
+    void (*callback)(const gmm::iteration&);
+  public :
+
+    void init(void) { 
+      nit = 0; res = 0.0; written = false; 
+      resminreach = 1E200; resadd = 0.0; 
+      callback = 0;
+    }
+
+    iteration(double r = 1.0E-8, int noi = 0, size_type mit = size_type(-1),
+              double div_res = 1E200)
+      : rhsn(1.0), maxiter(mit), noise(noi), resmax(r), diverged_res(div_res)
+    { init(); }
+
+    void  operator ++(int) {  nit++; written = false; resadd += res; }
+    void  operator ++() { (*this)++; }
+
+    bool first(void) { return nit == 0; }
+
+    /* get/set the "noisyness" (verbosity) of the solvers */
+    int get_noisy(void) const { return noise; }
+    void set_noisy(int n) { noise = n; }
+    void reduce_noisy(void) { if (noise > 0) noise--; }
+
+    double get_resmax(void) const { return resmax; }
+    void set_resmax(double r) { resmax = r; }
+
+    double get_res() const { return res; }
+    void enforce_converged(bool c = true)
+    { if (c) res = double(0); else res = rhsn * resmax + double(1); }
+
+    /* change the user-definable callback, called after each iteration */
+    void set_callback(void (*t)(const gmm::iteration&)) {
+      callback = t;
+    }
+
+    double get_diverged_residual(void) const { return diverged_res; }
+    void set_diverged_residual(double r) { diverged_res = r; }
+
+    size_type get_iteration(void) const { return nit; }
+    void set_iteration(size_type i) { nit = i; }
+    
+    size_type get_maxiter(void) const { return maxiter; }
+    void set_maxiter(size_type i) { maxiter = i; }
+
+    double get_rhsnorm(void) const { return rhsn; }
+    void set_rhsnorm(double r) { rhsn = r; }
+    
+    bool converged(void) {
+      return !isnan(res) && res <= rhsn * resmax;
+    }
+    bool converged(double nr) { 
+      res = gmm::abs(nr);
+      resminreach = std::min(resminreach, res);
+      return converged();
+    }
+    template <typename VECT> bool converged(const VECT &v)
+    { return converged(gmm::vect_norm2(v)); }
+    bool diverged(void) {
+      return isnan(res) || (nit>=maxiter)
+                        || (res>=rhsn*diverged_res && nit > 4);
+    }
+    bool diverged(double nr) {
+      res = gmm::abs(nr);
+      resminreach = std::min(resminreach, res);
+      return diverged();
+    }
+
+    bool finished(double nr) {
+      if (callback) callback(*this);
+      if (noise > 0 && !written) {
+        double a = (rhsn == 0) ? 1.0 : rhsn;
+        converged(nr);
+        cout << name << " iter " << std::setw(3) << nit << " residual "
+             << std::setw(12) << gmm::abs(nr) / a;
+//         if (nit % 100 == 0 && nit > 0) {
+//           cout << " (residual min " << resminreach / a << " mean val "
+//                << resadd / (100.0 * a) << " )";
+//           resadd = 0.0;
+//         }
+        cout <<  endl;
+        written = true;
+      }
+      return (converged(nr) || diverged(nr));
+    }
+    template <typename VECT> bool finished_vect(const VECT &v)
+    { return finished(double(gmm::vect_norm2(v))); }
+
+
+    void set_name(const std::string &n) { name = n; }
+    const std::string &get_name(void) const { return name; }
+
+  };
+
+}
+
+#endif /* GMM_ITER_H__ */
diff --git a/gmm/gmm_iter_solvers.h b/gmm/gmm_iter_solvers.h
new file mode 100644
index 000000000..cb34ef088
--- /dev/null
+++ b/gmm/gmm_iter_solvers.h
@@ -0,0 +1,111 @@
+/* -*- c++ -*- (enables emacs c++ mode) */
+/*===========================================================================
+
+ Copyright (C) 2002-2017 Yves Renard
+
+ This file is a part of GetFEM++
+
+ GetFEM++  is  free software;  you  can  redistribute  it  and/or modify it
+ under  the  terms  of the  GNU  Lesser General Public License as published
+ by  the  Free Software Foundation;  either version 3 of the License,  or
+ (at your option) any later version along with the GCC Runtime Library
+ Exception either version 3.1 or (at your option) any later version.
+ This program  is  distributed  in  the  hope  that it will be useful,  but
+ WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ or  FITNESS  FOR  A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+ License and GCC Runtime Library Exception for more details.
+ You  should  have received a copy of the GNU Lesser General Public License
+ along  with  this program;  if not, write to the Free Software Foundation,
+ Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301, USA.
+
+ As a special exception, you  may use  this file  as it is a part of a free
+ software  library  without  restriction.  Specifically,  if   other  files
+ instantiate  templates  or  use macros or inline functions from this file,
+ or  you compile this  file  and  link  it  with other files  to produce an
+ executable, this file  does  not  by itself cause the resulting executable
+ to be covered  by the GNU Lesser General Public License.  This   exception
+ does not  however  invalidate  any  other  reasons why the executable file
+ might be covered by the GNU Lesser General Public License.
+
+===========================================================================*/
+
+/**@file gmm_iter_solvers.h
+   @author  Yves Renard <Yves.Renard@insa-lyon.fr>
+   @date October 13, 2002.
+   @brief Include standard gmm iterative solvers (cg, gmres, ...)
+*/
+#ifndef GMM_ITER_SOLVERS_H__
+#define GMM_ITER_SOLVERS_H__
+
+#include "gmm_iter.h"
+
+
+namespace gmm {
+
+  /** mixed method to find a zero of a real function G, a priori 
+   * between a and b. If the zero is not between a and b, iterations
+   * of secant are applied. When a convenient interval is found,
+   * iterations of dichotomie and regula falsi are applied.
+   */
+  template <typename FUNC, typename T>
+  T find_root(const FUNC &G, T a = T(0), T b = T(1),
+	      T tol = gmm::default_tol(T())) {
+    T c, Ga = G(a), Gb = G(b), Gc, d;
+    d = gmm::abs(b - a);
+#if 0
+    for (int i = 0; i < 4; i++) { /* secant iterations.                   */
+      if (d < tol) return (b + a) / 2.0;
+      c = b - Gb * (b - a) / (Gb - Ga); Gc = G(c);
+      a = b; b = c; Ga = Gb; Gb = Gc;
+      d = gmm::abs(b - a);
+    }
+#endif
+    while (Ga * Gb > 0.0) { /* secant iterations.                         */
+      if (d < tol) return (b + a) / 2.0;
+      c = b - Gb * (b - a) / (Gb - Ga); Gc = G(c);
+      a = b; b = c; Ga = Gb; Gb = Gc;
+      d = gmm::abs(b - a);
+    }
+    
+    c = std::max(a, b); a = std::min(a, b); b = c;
+    while (d > tol) {
+      c = b - (b - a) * (Gb / (Gb - Ga)); /* regula falsi.     */
+      if (c > b) c = b;
+      if (c < a) c = a; 
+      Gc = G(c);
+      if (Gc*Gb > 0) { b = c; Gb = Gc; } else { a = c; Ga = Gc; }
+      c = (b + a) / 2.0 ; Gc = G(c); /* Dichotomie.                       */
+      if (Gc*Gb > 0) { b = c; Gb = Gc; } else { a = c; Ga = Gc; }
+      d = gmm::abs(b - a); c = (b + a) / 2.0; if ((c == a) || (c == b)) d = 0.0;
+    }
+    return (b + a) / 2.0;
+  }
+  
+}
+
+#include "gmm_precond_diagonal.h"
+#include "gmm_precond_ildlt.h"
+#include "gmm_precond_ildltt.h"
+#include "gmm_precond_mr_approx_inverse.h"
+#include "gmm_precond_ilu.h"
+#include "gmm_precond_ilut.h"
+#include "gmm_precond_ilutp.h"
+
+
+
+#include "gmm_solver_cg.h"
+#include "gmm_solver_bicgstab.h"
+#include "gmm_solver_qmr.h"
+#include "gmm_solver_constrained_cg.h"
+#include "gmm_solver_Schwarz_additive.h"
+#include "gmm_modified_gram_schmidt.h"
+#include "gmm_tri_solve.h"
+#include "gmm_solver_gmres.h"
+#include "gmm_solver_bfgs.h"
+#include "gmm_least_squares_cg.h"
+
+// #include "gmm_solver_idgmres.h"
+
+
+
+#endif //  GMM_ITER_SOLVERS_H__
diff --git a/gmm/gmm_kernel.h b/gmm/gmm_kernel.h
new file mode 100644
index 000000000..ebd217610
--- /dev/null
+++ b/gmm/gmm_kernel.h
@@ -0,0 +1,55 @@
+/* -*- c++ -*- (enables emacs c++ mode) */
+/*===========================================================================
+
+ Copyright (C) 2002-2017 Yves Renard
+
+ This file is a part of GetFEM++
+
+ GetFEM++  is  free software;  you  can  redistribute  it  and/or modify it
+ under  the  terms  of the  GNU  Lesser General Public License as published
+ by  the  Free Software Foundation;  either version 3 of the License,  or
+ (at your option) any later version along with the GCC Runtime Library
+ Exception either version 3.1 or (at your option) any later version.
+ This program  is  distributed  in  the  hope  that it will be useful,  but
+ WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ or  FITNESS  FOR  A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+ License and GCC Runtime Library Exception for more details.
+ You  should  have received a copy of the GNU Lesser General Public License
+ along  with  this program;  if not, write to the Free Software Foundation,
+ Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301, USA.
+
+ As a special exception, you  may use  this file  as it is a part of a free
+ software  library  without  restriction.  Specifically,  if   other  files
+ instantiate  templates  or  use macros or inline functions from this file,
+ or  you compile this  file  and  link  it  with other files  to produce an
+ executable, this file  does  not  by itself cause the resulting executable
+ to be covered  by the GNU Lesser General Public License.  This   exception
+ does not  however  invalidate  any  other  reasons why the executable file
+ might be covered by the GNU Lesser General Public License.
+
+===========================================================================*/
+
+/**@file gmm_kernel.h 
+   @author  Yves Renard <Yves.Renard@insa-lyon.fr>
+   @date November 15, 2003.
+   @brief Include the base gmm files.
+ */
+
+#ifndef GMM_KERNEL_H__
+#define GMM_KERNEL_H__
+
+#include "gmm_def.h"
+#include "gmm_blas.h"
+#include "gmm_real_part.h"
+#include "gmm_interface.h"
+#include "gmm_sub_vector.h"
+#include "gmm_sub_matrix.h"
+#include "gmm_vector_to_matrix.h"
+#include "gmm_vector.h"
+#include "gmm_matrix.h"
+#include "gmm_tri_solve.h"
+#include "gmm_blas_interface.h"
+#include "gmm_lapack_interface.h"
+
+
+#endif //  GMM_KERNEL_H__
diff --git a/gmm/gmm_lapack_interface.h b/gmm/gmm_lapack_interface.h
new file mode 100644
index 000000000..7888aea05
--- /dev/null
+++ b/gmm/gmm_lapack_interface.h
@@ -0,0 +1,470 @@
+/* -*- c++ -*- (enables emacs c++ mode) */
+/*===========================================================================
+
+ Copyright (C) 2003-2017 Yves Renard
+
+ This file is a part of GetFEM++
+
+ GetFEM++  is  free software;  you  can  redistribute  it  and/or modify it
+ under  the  terms  of the  GNU  Lesser General Public License as published
+ by  the  Free Software Foundation;  either version 3 of the License,  or
+ (at your option) any later version along with the GCC Runtime Library
+ Exception either version 3.1 or (at your option) any later version.
+ This program  is  distributed  in  the  hope  that it will be useful,  but
+ WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ or  FITNESS  FOR  A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+ License and GCC Runtime Library Exception for more details.
+ You  should  have received a copy of the GNU Lesser General Public License
+ along  with  this program;  if not, write to the Free Software Foundation,
+ Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301, USA.
+
+ As a special exception, you  may use  this file  as it is a part of a free
+ software  library  without  restriction.  Specifically,  if   other  files
+ instantiate  templates  or  use macros or inline functions from this file,
+ or  you compile this  file  and  link  it  with other files  to produce an
+ executable, this file  does  not  by itself cause the resulting executable
+ to be covered  by the GNU Lesser General Public License.  This   exception
+ does not  however  invalidate  any  other  reasons why the executable file
+ might be covered by the GNU Lesser General Public License.
+
+===========================================================================*/
+
+/**@file gmm_lapack_interface.h
+   @author  Yves Renard <Yves.Renard@insa-lyon.fr>
+   @date October 7, 2003.
+   @brief gmm interface for LAPACK
+*/
+
+#ifndef GMM_LAPACK_INTERFACE_H
+#define GMM_LAPACK_INTERFACE_H
+
+#include "gmm_blas_interface.h"
+#include "gmm_dense_lu.h"
+#include "gmm_dense_qr.h"
+
+
+#if defined(GMM_USES_LAPACK)
+
+namespace gmm {
+
+  /* ********************************************************************* */
+  /* Operations interfaced for T = float, double, std::complex<float>      */
+  /*    or std::complex<double> :                                          */
+  /*                                                                       */
+  /* lu_factor(dense_matrix<T>, std::vector<int>)                          */
+  /* lu_solve(dense_matrix<T>, std::vector<T>, std::vector<T>)             */
+  /* lu_solve(dense_matrix<T>, std::vector<int>, std::vector<T>,           */
+  /*          std::vector<T>)                                              */
+  /* lu_solve_transposed(dense_matrix<T>, std::vector<int>, std::vector<T>,*/
+  /*          std::vector<T>)                                              */
+  /* lu_inverse(dense_matrix<T>)                                           */
+  /* lu_inverse(dense_matrix<T>, std::vector<int>, dense_matrix<T>)        */
+  /*                                                                       */
+  /* qr_factor(dense_matrix<T>, dense_matrix<T>, dense_matrix<T>)          */
+  /*                                                                       */
+  /* implicit_qr_algorithm(dense_matrix<T>, std::vector<T>)                */
+  /* implicit_qr_algorithm(dense_matrix<T>, std::vector<T>,                */
+  /*                       dense_matrix<T>)                                */
+  /* implicit_qr_algorithm(dense_matrix<T>, std::vector<std::complex<T> >) */
+  /* implicit_qr_algorithm(dense_matrix<T>, std::vector<std::complex<T> >, */
+  /*                       dense_matrix<T>)                                */
+  /*                                                                       */
+  /* geev_interface_right                                                  */
+  /* geev_interface_left                                                   */
+  /*                                                                       */
+  /* schur(dense_matrix<T>, dense_matrix<T>, dense_matrix<T>)              */
+  /*                                                                       */
+  /* svd(dense_matrix<T>, dense_matrix<T>, dense_matrix<T>, std::vector<T>)*/
+  /* svd(dense_matrix<T>, dense_matrix<T>, dense_matrix<T>,                */
+  /*     std::vector<std::complex<T> >)                                    */
+  /*                                                                       */
+  /* ********************************************************************* */
+
+  /* ********************************************************************* */
+  /* LAPACK functions used.                                                */
+  /* ********************************************************************* */
+
+  extern "C" {
+    void sgetrf_(...); void dgetrf_(...); void cgetrf_(...); void zgetrf_(...);
+    void sgetrs_(...); void dgetrs_(...); void cgetrs_(...); void zgetrs_(...);
+    void sgetri_(...); void dgetri_(...); void cgetri_(...); void zgetri_(...);
+    void sgeqrf_(...); void dgeqrf_(...); void cgeqrf_(...); void zgeqrf_(...);
+    void sorgqr_(...); void dorgqr_(...); void cungqr_(...); void zungqr_(...);
+    void sormqr_(...); void dormqr_(...); void cunmqr_(...); void zunmqr_(...);
+    void sgees_ (...); void dgees_ (...); void cgees_ (...); void zgees_ (...);
+    void sgeev_ (...); void dgeev_ (...); void cgeev_ (...); void zgeev_ (...);
+    void sgeesx_(...); void dgeesx_(...); void cgeesx_(...); void zgeesx_(...);
+    void sgesvd_(...); void dgesvd_(...); void cgesvd_(...); void zgesvd_(...);
+  }
+
+  /* ********************************************************************* */
+  /* LU decomposition.                                                     */
+  /* ********************************************************************* */
+
+# define getrf_interface(lapack_name, base_type) inline                    \
+  size_type lu_factor(dense_matrix<base_type > &A, std::vector<int> &ipvt){\
+    GMMLAPACK_TRACE("getrf_interface");                                    \
+    int m = int(mat_nrows(A)), n = int(mat_ncols(A)), lda(m), info(0);     \
+    if (m && n) lapack_name(&m, &n, &A(0,0), &lda, &ipvt[0], &info);       \
+    return size_type(info);                                                \
+  }
+
+  getrf_interface(sgetrf_, BLAS_S)
+  getrf_interface(dgetrf_, BLAS_D)
+  getrf_interface(cgetrf_, BLAS_C)
+  getrf_interface(zgetrf_, BLAS_Z)
+
+  /* ********************************************************************* */
+  /* LU solve.                                                             */
+  /* ********************************************************************* */
+
+# define getrs_interface(f_name, trans1, lapack_name, base_type) inline    \
+  void f_name(const dense_matrix<base_type > &A,                           \
+              const std::vector<int> &ipvt, std::vector<base_type > &x,    \
+              const std::vector<base_type > &b) {                          \
+    GMMLAPACK_TRACE("getrs_interface");                                    \
+    int n = int(mat_nrows(A)), info, nrhs(1);                              \
+    gmm::copy(b, x); trans1;                                               \
+    if (n)                                                                 \
+      lapack_name(&t, &n, &nrhs, &(A(0,0)),&n,&ipvt[0], &x[0], &n, &info); \
+  }
+  
+# define getrs_trans_n const char t = 'N'
+# define getrs_trans_t const char t = 'T'
+
+  getrs_interface(lu_solve, getrs_trans_n, sgetrs_, BLAS_S)
+  getrs_interface(lu_solve, getrs_trans_n, dgetrs_, BLAS_D)
+  getrs_interface(lu_solve, getrs_trans_n, cgetrs_, BLAS_C)
+  getrs_interface(lu_solve, getrs_trans_n, zgetrs_, BLAS_Z)
+  getrs_interface(lu_solve_transposed, getrs_trans_t, sgetrs_, BLAS_S)
+  getrs_interface(lu_solve_transposed, getrs_trans_t, dgetrs_, BLAS_D)
+  getrs_interface(lu_solve_transposed, getrs_trans_t, cgetrs_, BLAS_C)
+  getrs_interface(lu_solve_transposed, getrs_trans_t, zgetrs_, BLAS_Z)
+
+  /* ********************************************************************* */
+  /* LU inverse.                                                           */
+  /* ********************************************************************* */
+
+# define getri_interface(lapack_name, base_type) inline                    \
+  void lu_inverse(const dense_matrix<base_type > &LU,                      \
+       std::vector<int> &ipvt, const dense_matrix<base_type > &A_) {       \
+    GMMLAPACK_TRACE("getri_interface");                                    \
+    dense_matrix<base_type> &A                                             \
+      = const_cast<dense_matrix<base_type > &>(A_);                        \
+    int n = int(mat_nrows(A)), info, lwork(10000); base_type work[10000];  \
+    if (n) {                                                               \
+      std::copy(LU.begin(), LU.end(), A.begin());			   \
+      lapack_name(&n, &A(0,0), &n, &ipvt[0], &work[0], &lwork, &info);     \
+    }                                                                      \
+  }
+
+  getri_interface(sgetri_, BLAS_S)
+  getri_interface(dgetri_, BLAS_D)
+  getri_interface(cgetri_, BLAS_C)
+  getri_interface(zgetri_, BLAS_Z)
+
+
+  /* ********************************************************************* */
+  /* QR factorization.                                                     */
+  /* ********************************************************************* */
+
+# define geqrf_interface(lapack_name1, base_type) inline                   \
+  void qr_factor(dense_matrix<base_type > &A){                             \
+    GMMLAPACK_TRACE("geqrf_interface");                                    \
+    int m = int(mat_nrows(A)), n = int(mat_ncols(A)), info, lwork(-1);     \
+    base_type work1;                                                       \
+    if (m && n) {                                                          \
+      std::vector<base_type > tau(n);                                      \
+      lapack_name1(&m, &n, &A(0,0), &m, &tau[0], &work1  , &lwork, &info); \
+      lwork = int(gmm::real(work1));                                       \
+      std::vector<base_type > work(lwork);                                 \
+      lapack_name1(&m, &n, &A(0,0), &m, &tau[0], &work[0], &lwork, &info); \
+      GMM_ASSERT1(!info, "QR factorization failed");                       \
+    }                                                                      \
+  }
+    
+  geqrf_interface(sgeqrf_, BLAS_S)
+  geqrf_interface(dgeqrf_, BLAS_D)
+    // For complex values, housholder vectors are not the same as in
+    // gmm::lu_factor. Impossible to interface for the moment.
+    //  geqrf_interface(cgeqrf_, BLAS_C)
+    //  geqrf_interface(zgeqrf_, BLAS_Z)
+
+# define geqrf_interface2(lapack_name1, lapack_name2, base_type) inline    \
+  void qr_factor(const dense_matrix<base_type > &A,                        \
+       dense_matrix<base_type > &Q, dense_matrix<base_type > &R) {         \
+    GMMLAPACK_TRACE("geqrf_interface2");                                   \
+    int m = int(mat_nrows(A)), n = int(mat_ncols(A)), info, lwork(-1);     \
+    base_type work1;                                                       \
+    if (m && n) {							   \
+      std::copy(A.begin(), A.end(), Q.begin());				   \
+      std::vector<base_type > tau(n);                                      \
+      lapack_name1(&m, &n, &Q(0,0), &m, &tau[0], &work1  , &lwork, &info); \
+      lwork = int(gmm::real(work1));                                       \
+      std::vector<base_type > work(lwork);                                 \
+      lapack_name1(&m, &n, &Q(0,0), &m, &tau[0], &work[0], &lwork, &info); \
+      GMM_ASSERT1(!info, "QR factorization failed");                       \
+      base_type *p = &R(0,0), *q = &Q(0,0);                                \
+      for (int j = 0; j < n; ++j, q += m-n)                                \
+        for (int i = 0; i < n; ++i, ++p, ++q)                              \
+          *p = (j < i) ? base_type(0) : *q;                                \
+      lapack_name2(&m, &n, &n, &Q(0,0), &m,&tau[0],&work[0],&lwork,&info); \
+    }                                                                      \
+    else gmm::clear(Q);                                                    \
+  }
+
+  geqrf_interface2(sgeqrf_, sorgqr_, BLAS_S)
+  geqrf_interface2(dgeqrf_, dorgqr_, BLAS_D)
+  geqrf_interface2(cgeqrf_, cungqr_, BLAS_C)
+  geqrf_interface2(zgeqrf_, zungqr_, BLAS_Z)
+  
+  /* ********************************************************************* */
+  /* QR algorithm for eigenvalues search.                                  */
+  /* ********************************************************************* */
+
+# define gees_interface(lapack_name, base_type)                            \
+  template <typename VECT> inline void implicit_qr_algorithm(              \
+         const dense_matrix<base_type > &A,  const VECT &eigval_,          \
+         dense_matrix<base_type > &Q,                                      \
+         double tol=gmm::default_tol(base_type()), bool compvect = true) { \
+    GMMLAPACK_TRACE("gees_interface");                                     \
+    typedef bool (*L_fp)(...);  L_fp p = 0;                                \
+    int n = int(mat_nrows(A)), info, lwork(-1), sdim; base_type work1;     \
+    if (!n) return;                                                        \
+    dense_matrix<base_type > H(n,n); gmm::copy(A, H);                      \
+    char jobvs = (compvect ? 'V' : 'N'), sort = 'N';                       \
+    std::vector<double> rwork(n), eigv1(n), eigv2(n);                      \
+    lapack_name(&jobvs, &sort, p, &n, &H(0,0), &n, &sdim, &eigv1[0],       \
+                &eigv2[0], &Q(0,0), &n, &work1, &lwork, &rwork[0], &info); \
+    lwork = int(gmm::real(work1));                                         \
+    std::vector<base_type > work(lwork);                                   \
+    lapack_name(&jobvs, &sort, p, &n, &H(0,0), &n, &sdim, &eigv1[0],       \
+                &eigv2[0], &Q(0,0), &n, &work[0], &lwork, &rwork[0],&info);\
+    GMM_ASSERT1(!info, "QR algorithm failed");                             \
+    extract_eig(H, const_cast<VECT &>(eigval_), tol);                      \
+  }
+
+# define gees_interface2(lapack_name, base_type)                           \
+  template <typename VECT> inline void implicit_qr_algorithm(              \
+         const dense_matrix<base_type > &A,  const VECT &eigval_,          \
+         dense_matrix<base_type > &Q,                                      \
+         double tol=gmm::default_tol(base_type()), bool compvect = true) { \
+    GMMLAPACK_TRACE("gees_interface2");                                    \
+    typedef bool (*L_fp)(...);  L_fp p = 0;                                \
+    int n = int(mat_nrows(A)), info, lwork(-1), sdim; base_type work1;     \
+    if (!n) return;                                                        \
+    dense_matrix<base_type > H(n,n); gmm::copy(A, H);                      \
+    char jobvs = (compvect ? 'V' : 'N'), sort = 'N';                       \
+    std::vector<double> rwork(n), eigvv(n*2);                              \
+    lapack_name(&jobvs, &sort, p, &n, &H(0,0), &n, &sdim, &eigvv[0],       \
+                &Q(0,0), &n, &work1, &lwork, &rwork[0], &rwork[0], &info); \
+    lwork = int(gmm::real(work1));                                         \
+    std::vector<base_type > work(lwork);                                   \
+    lapack_name(&jobvs, &sort, p, &n, &H(0,0), &n, &sdim, &eigvv[0],       \
+                &Q(0,0), &n, &work[0], &lwork, &rwork[0], &rwork[0],&info);\
+    GMM_ASSERT1(!info, "QR algorithm failed");                             \
+    extract_eig(H, const_cast<VECT &>(eigval_), tol);                      \
+  }
+
+  gees_interface(sgees_, BLAS_S)
+  gees_interface(dgees_, BLAS_D)
+  gees_interface2(cgees_, BLAS_C)
+  gees_interface2(zgees_, BLAS_Z)
+
+
+# define jobv_right char jobvl = 'N', jobvr = 'V';
+# define jobv_left char jobvl = 'V', jobvr = 'N';
+
+# define geev_interface(lapack_name, base_type, side)                      \
+  template <typename VECT> inline void geev_interface_ ## side(             \
+         const dense_matrix<base_type > &A,  const VECT &eigval_,          \
+         dense_matrix<base_type > &Q) {                                    \
+    GMMLAPACK_TRACE("geev_interface");                                     \
+    int n = int(mat_nrows(A)), info, lwork(-1); base_type work1;           \
+    if (!n) return;                                                        \
+    dense_matrix<base_type > H(n,n); gmm::copy(A, H);                      \
+    jobv_ ## side                                                          \
+    std::vector<base_type > eigvr(n), eigvi(n);                            \
+    lapack_name(&jobvl, &jobvr, &n, &H(0,0), &n, &eigvr[0], &eigvi[0],     \
+                &Q(0,0), &n, &Q(0,0), &n, &work1, &lwork, &info);          \
+    lwork = int(gmm::real(work1));                                         \
+    std::vector<base_type > work(lwork);                                   \
+    lapack_name(&jobvl, &jobvr, &n, &H(0,0), &n, &eigvr[0], &eigvi[0],     \
+                &Q(0,0), &n, &Q(0,0), &n, &work[0], &lwork, &info);        \
+    GMM_ASSERT1(!info, "QR algorithm failed");                             \
+    gmm::copy(eigvr, gmm::real_part(const_cast<VECT &>(eigval_)));         \
+    gmm::copy(eigvi, gmm::imag_part(const_cast<VECT &>(eigval_)));         \
+  }
+
+# define geev_interface2(lapack_name, base_type, side)                     \
+  template <typename VECT> inline void geev_interface_ ## side(            \
+         const dense_matrix<base_type > &A,  const VECT &eigval_,          \
+         dense_matrix<base_type > &Q) {                                    \
+    GMMLAPACK_TRACE("geev_interface");                                     \
+    int n = int(mat_nrows(A)), info, lwork(-1); base_type work1;           \
+    if (!n) return;                                                        \
+    dense_matrix<base_type > H(n,n); gmm::copy(A, H);                      \
+    jobv_ ## side                                                          \
+    std::vector<base_type::value_type> rwork(2*n);                         \
+    std::vector<base_type> eigv(n);                                        \
+    lapack_name(&jobvl, &jobvr, &n, &H(0,0), &n, &eigv[0], &Q(0,0), &n,    \
+                &Q(0,0), &n, &work1, &lwork, &rwork[0], &info);            \
+    lwork = int(gmm::real(work1));                                         \
+    std::vector<base_type > work(lwork);                                   \
+    lapack_name(&jobvl, &jobvr, &n, &H(0,0), &n, &eigv[0], &Q(0,0), &n,    \
+                &Q(0,0), &n, &work[0], &lwork,  &rwork[0],  &info);        \
+    GMM_ASSERT1(!info, "QR algorithm failed");                             \
+    gmm::copy(eigv, const_cast<VECT &>(eigval_));                          \
+  }
+
+  geev_interface(sgeev_, BLAS_S, right)
+  geev_interface(dgeev_, BLAS_D, right)
+  geev_interface2(cgeev_, BLAS_C, right)
+  geev_interface2(zgeev_, BLAS_Z, right)
+
+  geev_interface(sgeev_, BLAS_S, left)
+  geev_interface(dgeev_, BLAS_D, left)
+  geev_interface2(cgeev_, BLAS_C, left)
+  geev_interface2(zgeev_, BLAS_Z, left) 
+    
+
+  /* ********************************************************************* */
+  /* SCHUR algorithm:                                                      */
+  /*  A = Q*S*(Q^T), with Q orthogonal and S upper quasi-triangula         */
+  /* ********************************************************************* */
+
+# define geesx_interface(lapack_name, base_type) inline                 \
+  void schur(dense_matrix<base_type> &A,                                \
+             dense_matrix<base_type> &S,                                \
+             dense_matrix<base_type> &Q) {                              \
+    GMMLAPACK_TRACE("geesx_interface");                                 \
+    int m = int(mat_nrows(A)), n = int(mat_ncols(A));                   \
+    GMM_ASSERT1(m == n, "Schur decomposition requires square matrix");  \
+    char jobvs = 'V', sort = 'N', sense = 'N';                          \
+    bool select = false;                                                \
+    int lwork = 8*n, sdim = 0, liwork = 1;                              \
+    std::vector<base_type> work(lwork), wr(n), wi(n);                   \
+    std::vector<int> iwork(liwork);                                     \
+    std::vector<int> bwork(1);                                          \
+    resize(S, n, n); copy(A, S);                                        \
+    resize(Q, n, n);                                                    \
+    base_type rconde(0), rcondv(0);                                     \
+    int info = -1;                                                      \
+    lapack_name(&jobvs, &sort, &select, &sense, &n, &S(0,0), &n,        \
+                &sdim, &wr[0], &wi[0], &Q(0,0), &n, &rconde, &rcondv,   \
+                &work[0], &lwork, &iwork[0], &liwork, &bwork[0], &info);\
+    GMM_ASSERT1(!info, "SCHUR algorithm failed");                       \
+  }
+
+# define geesx_interface2(lapack_name, base_type) inline                \
+  void schur(dense_matrix<base_type> &A,                                \
+             dense_matrix<base_type> &S,                                \
+             dense_matrix<base_type> &Q) {                              \
+    GMMLAPACK_TRACE("geesx_interface");                                 \
+    int m = int(mat_nrows(A)), n = int(mat_ncols(A));                   \
+    GMM_ASSERT1(m == n, "Schur decomposition requires square matrix");  \
+    char jobvs = 'V', sort = 'N', sense = 'N';                          \
+    bool select = false;                                                \
+    int lwork = 8*n, sdim = 0;                                          \
+    std::vector<base_type::value_type> rwork(lwork);                    \
+    std::vector<base_type> work(lwork), w(n);                           \
+    std::vector<int> bwork(1);                                          \
+    resize(S, n, n); copy(A, S);                                        \
+    resize(Q, n, n);                                                    \
+    base_type rconde(0), rcondv(0);                                     \
+    int info = -1;                                                      \
+    lapack_name(&jobvs, &sort, &select, &sense, &n, &S(0,0), &n,        \
+                &sdim, &w[0], &Q(0,0), &n, &rconde, &rcondv,            \
+                &work[0], &lwork, &rwork[0], &bwork[0], &info);         \
+    GMM_ASSERT1(!info, "SCHUR algorithm failed");                       \
+  }
+
+  geesx_interface(sgeesx_, BLAS_S)
+  geesx_interface(dgeesx_, BLAS_D)
+  geesx_interface2(cgeesx_, BLAS_C)
+  geesx_interface2(zgeesx_, BLAS_Z)
+
+  template <typename MAT>
+  void schur(const MAT &A_, MAT &S, MAT &Q) {
+   MAT A(A_);
+   schur(A, S, Q);
+  }
+
+
+  /* ********************************************************************* */
+  /* Interface to SVD. Does not correspond to a Gmm++ functionnality.      */
+  /* Author : Sebastian Nowozin <sebastian.nowozin@tuebingen.mpg.de>       */
+  /* ********************************************************************* */
+    
+# define gesvd_interface(lapack_name, base_type) inline                 \
+  void svd(dense_matrix<base_type> &X,                                  \
+           dense_matrix<base_type> &U,                                  \
+           dense_matrix<base_type> &Vtransposed,                        \
+           std::vector<base_type> &sigma) {                             \
+    GMMLAPACK_TRACE("gesvd_interface");                                 \
+    int m = int(mat_nrows(X)), n = int(mat_ncols(X));                   \
+    int mn_min = m < n ? m : n;                                         \
+    sigma.resize(mn_min);                                               \
+    std::vector<base_type> work(15 * mn_min);                           \
+    int lwork = int(work.size());                                       \
+    resize(U, m, m);                                                    \
+    resize(Vtransposed, n, n);                                          \
+    char job = 'A';                                                     \
+    int info = -1;                                                      \
+    lapack_name(&job, &job, &m, &n, &X(0,0), &m, &sigma[0], &U(0,0),    \
+                &m, &Vtransposed(0,0), &n, &work[0], &lwork, &info);    \
+  }
+
+# define cgesvd_interface(lapack_name, base_type, base_type2) inline    \
+  void svd(dense_matrix<base_type> &X,                                  \
+           dense_matrix<base_type> &U,                                  \
+           dense_matrix<base_type> &Vtransposed,                        \
+           std::vector<base_type2> &sigma) {                            \
+    GMMLAPACK_TRACE("gesvd_interface");                                 \
+    int m = int(mat_nrows(X)), n = int(mat_ncols(X));                   \
+    int mn_min = m < n ? m : n;                                         \
+    sigma.resize(mn_min);                                               \
+    std::vector<base_type> work(15 * mn_min);                           \
+    std::vector<base_type2> rwork(5 * mn_min);                          \
+    int lwork = int(work.size());                                       \
+    resize(U, m, m);                                                    \
+    resize(Vtransposed, n, n);                                          \
+    char job = 'A';                                                     \
+    int info = -1;                                                      \
+    lapack_name(&job, &job, &m, &n, &X(0,0), &m, &sigma[0], &U(0,0),    \
+                &m, &Vtransposed(0,0), &n, &work[0], &lwork,            \
+                &rwork[0], &info);                                      \
+  }
+  
+  gesvd_interface(sgesvd_, BLAS_S)
+  gesvd_interface(dgesvd_, BLAS_D)
+  cgesvd_interface(cgesvd_, BLAS_C, BLAS_S)
+  cgesvd_interface(zgesvd_, BLAS_Z, BLAS_D)
+
+  template <typename MAT, typename VEC>
+  void svd(const MAT &X_, MAT &U, MAT &Vtransposed, VEC &sigma) {
+   MAT X(X_);
+   svd(X, U, Vtransposed, sigma);
+  }
+    
+
+
+
+}
+
+#else
+
+namespace gmm
+{
+template <typename MAT>
+void schur(const MAT &A_, MAT &S, MAT &Q)
+{
+  GMM_ASSERT1(false, "Use of function schur(A,S,Q) requires GetFEM++ "
+              "to be built with Lapack");
+}
+
+}// namespace gmm
+
+#endif // GMM_USES_LAPACK
+
+#endif // GMM_LAPACK_INTERFACE_H
diff --git a/gmm/gmm_least_squares_cg.h b/gmm/gmm_least_squares_cg.h
new file mode 100644
index 000000000..71e446658
--- /dev/null
+++ b/gmm/gmm_least_squares_cg.h
@@ -0,0 +1,96 @@
+/* -*- c++ -*- (enables emacs c++ mode) */
+/*===========================================================================
+
+ Copyright (C) 2002-2017 Yves Renard, Benjamin Schleimer
+
+ This file is a part of GetFEM++
+
+ GetFEM++  is  free software;  you  can  redistribute  it  and/or modify it
+ under  the  terms  of the  GNU  Lesser General Public License as published
+ by  the  Free Software Foundation;  either version 3 of the License,  or
+ (at your option) any later version along with the GCC Runtime Library
+ Exception either version 3.1 or (at your option) any later version.
+ This program  is  distributed  in  the  hope  that it will be useful,  but
+ WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ or  FITNESS  FOR  A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+ License and GCC Runtime Library Exception for more details.
+ You  should  have received a copy of the GNU Lesser General Public License
+ along  with  this program;  if not, write to the Free Software Foundation,
+ Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301, USA.
+
+ As a special exception, you  may use  this file  as it is a part of a free
+ software  library  without  restriction.  Specifically,  if   other  files
+ instantiate  templates  or  use macros or inline functions from this file,
+ or  you compile this  file  and  link  it  with other files  to produce an
+ executable, this file  does  not  by itself cause the resulting executable
+ to be covered  by the GNU Lesser General Public License.  This   exception
+ does not  however  invalidate  any  other  reasons why the executable file
+ might be covered by the GNU Lesser General Public License.
+
+===========================================================================*/
+
+/**@file gmm_leastsquares_cg.h
+   @author Benjamin Schleimer <bensch128  (at) yahoo (dot) com>
+   @date January 23, 2007.
+   @brief Conjugate gradient least squares algorithm. 
+   Algorithm taken from http://www.stat.washington.edu/wxs/Stat538-w05/Notes/conjugate-gradients.pdf page 6
+*/
+#ifndef GMM_LEAST_SQUARES_CG_H__
+#define GMM_LEAST_SQUARES_CG_H__
+
+#include "gmm_kernel.h"
+#include "gmm_iter.h"
+#include "gmm_conjugated.h"
+
+namespace gmm {
+
+  template <typename Matrix, typename Vector1, typename Vector2>
+  void least_squares_cg(const Matrix& C, Vector1& x, const Vector2& y,
+			iteration &iter) {
+
+    typedef typename temporary_dense_vector<Vector1>::vector_type temp_vector;
+    typedef typename linalg_traits<Vector1>::value_type T;
+
+    T rho, rho_1(0), a;
+    temp_vector p(vect_size(x)), q(vect_size(y)), g(vect_size(x));
+    temp_vector r(vect_size(y));
+    iter.set_rhsnorm(gmm::sqrt(gmm::abs(vect_hp(y, y))));
+
+    if (iter.get_rhsnorm() == 0.0)
+      clear(x);
+    else {
+      mult(C, scaled(x, T(-1)), y, r);
+      mult(conjugated(C), r, g);
+      rho = vect_hp(g, g);
+      copy(g, p);
+
+      while (!iter.finished_vect(g)) {
+
+	if (!iter.first()) { 
+	  rho = vect_hp(g, g);
+	  add(g, scaled(p, rho / rho_1), p);
+	}
+
+	mult(C, p, q);
+
+	a = rho / vect_hp(q, q);	
+	add(scaled(p, a), x);
+	add(scaled(q, -a), r);
+	// NOTE: how do we minimize the impact to the transpose?
+	mult(conjugated(C), r, g);
+	rho_1 = rho;
+
+	++iter;
+      }
+    }
+  }
+
+  template <typename Matrix, typename Precond, 
+            typename Vector1, typename Vector2> inline 
+  void least_squares_cg(const Matrix& C, const Vector1& x, const Vector2& y,
+			iteration &iter)
+  { least_squares_cg(C, linalg_const_cast(x), y, iter); }
+}
+
+
+#endif //  GMM_SOLVER_CG_H__
diff --git a/gmm/gmm_matrix.h b/gmm/gmm_matrix.h
new file mode 100644
index 000000000..23fb9d267
--- /dev/null
+++ b/gmm/gmm_matrix.h
@@ -0,0 +1,1199 @@
+/* -*- c++ -*- (enables emacs c++ mode) */
+/*===========================================================================
+
+ Copyright (C) 2002-2017 Yves Renard
+
+ This file is a part of GetFEM++
+
+ GetFEM++  is  free software;  you  can  redistribute  it  and/or modify it
+ under  the  terms  of the  GNU  Lesser General Public License as published
+ by  the  Free Software Foundation;  either version 3 of the License,  or
+ (at your option) any later version along with the GCC Runtime Library
+ Exception either version 3.1 or (at your option) any later version.
+ This program  is  distributed  in  the  hope  that it will be useful,  but
+ WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ or  FITNESS  FOR  A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+ License and GCC Runtime Library Exception for more details.
+ You  should  have received a copy of the GNU Lesser General Public License
+ along  with  this program;  if not, write to the Free Software Foundation,
+ Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301, USA.
+
+ As a special exception, you  may use  this file  as it is a part of a free
+ software  library  without  restriction.  Specifically,  if   other  files
+ instantiate  templates  or  use macros or inline functions from this file,
+ or  you compile this  file  and  link  it  with other files  to produce an
+ executable, this file  does  not  by itself cause the resulting executable
+ to be covered  by the GNU Lesser General Public License.  This   exception
+ does not  however  invalidate  any  other  reasons why the executable file
+ might be covered by the GNU Lesser General Public License.
+
+===========================================================================*/
+
+/** @file gmm_matrix.h
+   @author  Yves Renard <Yves.Renard@insa-lyon.fr>
+   @date October 13, 2002.
+    @brief Declaration of some matrix types (gmm::dense_matrix,
+    gmm::row_matrix, gmm::col_matrix, gmm::csc_matrix, etc.)
+*/
+
+#ifndef GMM_MATRIX_H__
+#define GMM_MATRIX_H__
+
+#include "gmm_vector.h"
+#include "gmm_sub_vector.h"
+#include "gmm_sub_matrix.h"
+#include "gmm_transposed.h"
+
+namespace gmm
+{
+
+  /* ******************************************************************** */
+  /*		                                            		  */
+  /*		Identity matrix                         		  */
+  /*		                                            		  */
+  /* ******************************************************************** */
+
+  struct identity_matrix {
+    template <class MAT> void build_with(const MAT &) {}
+  };
+
+  template <typename M> inline
+  void add(const identity_matrix&, M &v1) {
+    size_type n = std::min(gmm::mat_nrows(v1), gmm::mat_ncols(v1));
+    for (size_type i = 0; i < n; ++i)
+      v1(i,i) += typename linalg_traits<M>::value_type(1);
+  }
+  template <typename M> inline
+  void add(const identity_matrix &II, const M &v1)
+  { add(II, linalg_const_cast(v1)); }
+
+  template <typename V1, typename V2> inline
+  void mult(const identity_matrix&, const V1 &v1, V2 &v2)
+  { copy(v1, v2); }
+  template <typename V1, typename V2> inline
+  void mult(const identity_matrix&, const V1 &v1, const V2 &v2) 
+  { copy(v1, v2); }
+  template <typename V1, typename V2, typename V3> inline
+  void mult(const identity_matrix&, const V1 &v1, const V2 &v2, V3 &v3)
+  { add(v1, v2, v3); }
+  template <typename V1, typename V2, typename V3> inline
+  void mult(const identity_matrix&, const V1 &v1, const V2 &v2, const V3 &v3)
+  { add(v1, v2, v3); }
+  template <typename V1, typename V2> inline
+  void left_mult(const identity_matrix&, const V1 &v1, V2 &v2)
+  { copy(v1, v2); }
+  template <typename V1, typename V2> inline
+  void left_mult(const identity_matrix&, const V1 &v1, const V2 &v2) 
+  { copy(v1, v2); }
+  template <typename V1, typename V2> inline
+  void right_mult(const identity_matrix&, const V1 &v1, V2 &v2)
+  { copy(v1, v2); }
+  template <typename V1, typename V2> inline
+  void right_mult(const identity_matrix&, const V1 &v1, const V2 &v2) 
+  { copy(v1, v2); }
+  template <typename V1, typename V2> inline
+  void transposed_left_mult(const identity_matrix&, const V1 &v1, V2 &v2)
+  { copy(v1, v2); }
+  template <typename V1, typename V2> inline
+  void transposed_left_mult(const identity_matrix&, const V1 &v1,const V2 &v2) 
+  { copy(v1, v2); }
+  template <typename V1, typename V2> inline
+  void transposed_right_mult(const identity_matrix&, const V1 &v1, V2 &v2)
+  { copy(v1, v2); }
+  template <typename V1, typename V2> inline
+  void transposed_right_mult(const identity_matrix&,const V1 &v1,const V2 &v2) 
+  { copy(v1, v2); }
+  template <typename M> void copy_ident(const identity_matrix&, M &m) {
+    size_type i = 0, n = std::min(mat_nrows(m), mat_ncols(m));
+    clear(m);
+    for (; i < n; ++i) m(i,i) = typename linalg_traits<M>::value_type(1);
+  }
+  template <typename M> inline void copy(const identity_matrix&, M &m)
+  { copy_ident(identity_matrix(), m); } 
+  template <typename M> inline void copy(const identity_matrix &, const M &m)
+  { copy_ident(identity_matrix(), linalg_const_cast(m)); }
+  template <typename V1, typename V2> inline
+  typename linalg_traits<V1>::value_type
+  vect_sp(const identity_matrix &, const V1 &v1, const V2 &v2)
+  { return vect_sp(v1, v2); }
+  template <typename V1, typename V2> inline
+  typename linalg_traits<V1>::value_type
+  vect_hp(const identity_matrix &, const V1 &v1, const V2 &v2)
+  { return vect_hp(v1, v2); }
+  template<typename M> inline bool is_identity(const M&) { return false; }
+  inline bool is_identity(const identity_matrix&) { return true; }
+
+  /* ******************************************************************** */
+  /*		                                            		  */
+  /*		Row matrix                                   		  */
+  /*		                                            		  */
+  /* ******************************************************************** */
+
+  template<typename V> class row_matrix {
+  protected :
+    std::vector<V> li; /* array of rows.                                  */
+    size_type nc;
+    
+  public :
+    
+    typedef typename linalg_traits<V>::reference reference;
+    typedef typename linalg_traits<V>::value_type value_type;
+    
+    row_matrix(size_type r, size_type c) : li(r, V(c)), nc(c) {}
+    row_matrix(void) : nc(0) {}
+    reference operator ()(size_type l, size_type c) 
+    { return li[l][c]; }
+    value_type operator ()(size_type l, size_type c) const
+    { return li[l][c]; }
+
+    void clear_mat();
+    void resize(size_type m, size_type n);
+
+    typename std::vector<V>::iterator begin(void)
+    { return li.begin(); }
+    typename std::vector<V>::iterator end(void)  
+    { return li.end(); }
+    typename std::vector<V>::const_iterator begin(void) const
+    { return li.begin(); }
+    typename std::vector<V>::const_iterator end(void) const
+    { return li.end(); }
+    
+    
+    V& row(size_type i) { return li[i]; }
+    const V& row(size_type i) const { return li[i]; }
+    V& operator[](size_type i) { return li[i]; }
+    const V& operator[](size_type i) const { return li[i]; }
+    
+    inline size_type nrows(void) const { return li.size(); }
+    inline size_type ncols(void) const { return nc;        }
+
+    void swap(row_matrix<V> &m) { std::swap(li, m.li); std::swap(nc, m.nc); }
+    void swap_row(size_type i, size_type j) { std::swap(li[i], li[j]); }
+  };
+
+  template<typename V> void row_matrix<V>::resize(size_type m, size_type n) {
+    size_type nr = std::min(nrows(), m);
+    li.resize(m);
+    for (size_type i=nr; i < m; ++i) gmm::resize(li[i], n);
+    if (n != nc) {
+      for (size_type i=0; i < nr; ++i) gmm::resize(li[i], n);    
+      nc = n;
+    }
+  }
+
+
+  template<typename V> void row_matrix<V>::clear_mat()
+  { for (size_type i=0; i < nrows(); ++i) clear(li[i]); }
+
+  template <typename V> struct linalg_traits<row_matrix<V> > {
+    typedef row_matrix<V> this_type;
+    typedef this_type origin_type;
+    typedef linalg_false is_reference;
+    typedef abstract_matrix linalg_type;
+    typedef typename linalg_traits<V>::value_type value_type;
+    typedef typename linalg_traits<V>::reference reference;
+    typedef typename linalg_traits<V>::storage_type storage_type;
+    typedef V & sub_row_type;
+    typedef const V & const_sub_row_type;
+    typedef typename std::vector<V>::iterator row_iterator;
+    typedef typename std::vector<V>::const_iterator const_row_iterator;
+    typedef abstract_null_type sub_col_type;
+    typedef abstract_null_type const_sub_col_type;
+    typedef abstract_null_type col_iterator;
+    typedef abstract_null_type const_col_iterator;
+    typedef row_major sub_orientation;
+    typedef linalg_true index_sorted;
+    static size_type nrows(const this_type &m) { return m.nrows(); }
+    static size_type ncols(const this_type &m) { return m.ncols(); }
+    static row_iterator row_begin(this_type &m) { return m.begin(); }
+    static row_iterator row_end(this_type &m) { return m.end(); }
+    static const_row_iterator row_begin(const this_type &m)
+    { return m.begin(); }
+    static const_row_iterator row_end(const this_type &m)
+    { return m.end(); }
+    static const_sub_row_type row(const const_row_iterator &it)
+    { return const_sub_row_type(*it); }
+    static sub_row_type row(const row_iterator &it) 
+    { return sub_row_type(*it); }
+    static origin_type* origin(this_type &m) { return &m; }
+    static const origin_type* origin(const this_type &m) { return &m; }
+    static void do_clear(this_type &m) { m.clear_mat(); }
+    static value_type access(const const_row_iterator &itrow, size_type j)
+    { return (*itrow)[j]; }
+    static reference access(const row_iterator &itrow, size_type j)
+    { return (*itrow)[j]; }
+    static void resize(this_type &v, size_type m, size_type n)
+    { v.resize(m, n); }
+    static void reshape(this_type &, size_type, size_type)
+    { GMM_ASSERT1(false, "Sorry, to be done"); }
+  };
+
+  template<typename V> std::ostream &operator <<
+    (std::ostream &o, const row_matrix<V>& m) { gmm::write(o,m); return o; }
+
+  /* ******************************************************************** */
+  /*		                                            		  */
+  /*		Column matrix                                		  */
+  /*		                                            		  */
+  /* ******************************************************************** */
+
+  template<typename V> class col_matrix {
+  protected :
+    std::vector<V> li; /* array of columns.                               */
+    size_type nr;
+    
+  public :
+    
+    typedef typename linalg_traits<V>::reference reference;
+    typedef typename linalg_traits<V>::value_type value_type;
+    
+    col_matrix(size_type r, size_type c) : li(c, V(r)), nr(r) { }
+    col_matrix(void) : nr(0) {}
+    reference operator ()(size_type l, size_type c)
+    { return li[c][l]; }
+    value_type operator ()(size_type l, size_type c) const
+    { return li[c][l]; }
+
+    void clear_mat();
+    void resize(size_type, size_type);
+
+    V& col(size_type i) { return li[i]; }
+    const V& col(size_type i) const { return li[i]; }
+    V& operator[](size_type i) { return li[i]; }
+    const V& operator[](size_type i) const { return li[i]; }
+
+    typename std::vector<V>::iterator begin(void)
+    { return li.begin(); }
+    typename std::vector<V>::iterator end(void)  
+    { return li.end(); }
+    typename std::vector<V>::const_iterator begin(void) const
+    { return li.begin(); }
+    typename std::vector<V>::const_iterator end(void) const
+    { return li.end(); }
+    
+    inline size_type ncols(void) const { return li.size(); }
+    inline size_type nrows(void) const { return nr; }
+
+    void swap(col_matrix<V> &m) { std::swap(li, m.li); std::swap(nr, m.nr); }
+    void swap_col(size_type i, size_type j) { std::swap(li[i], li[j]); }
+  };
+
+  template<typename V> void col_matrix<V>::resize(size_type m, size_type n) {
+    size_type nc = std::min(ncols(), n);
+    li.resize(n);
+    for (size_type i=nc; i < n; ++i) gmm::resize(li[i], m);
+    if (m != nr) {
+      for (size_type i=0; i < nc; ++i) gmm::resize(li[i], m);    
+      nr = m;
+    }
+  }
+
+  template<typename V> void col_matrix<V>::clear_mat()
+  { for (size_type i=0; i < ncols(); ++i)  clear(li[i]); }
+
+  template <typename V> struct linalg_traits<col_matrix<V> > {
+    typedef col_matrix<V> this_type;
+    typedef this_type origin_type;
+    typedef linalg_false is_reference;
+    typedef abstract_matrix linalg_type;
+    typedef typename linalg_traits<V>::value_type value_type;
+    typedef typename linalg_traits<V>::reference reference;
+    typedef typename linalg_traits<V>::storage_type storage_type;
+    typedef V &sub_col_type;
+    typedef const V &const_sub_col_type;
+    typedef typename std::vector<V>::iterator col_iterator;
+    typedef typename std::vector<V>::const_iterator const_col_iterator;
+    typedef abstract_null_type sub_row_type;
+    typedef abstract_null_type const_sub_row_type;
+    typedef abstract_null_type row_iterator;
+    typedef abstract_null_type const_row_iterator;
+    typedef col_major sub_orientation;
+    typedef linalg_true index_sorted;
+    static size_type nrows(const this_type &m) { return m.nrows(); }
+    static size_type ncols(const this_type &m) { return m.ncols(); }
+    static col_iterator col_begin(this_type &m) { return m.begin(); }
+    static col_iterator col_end(this_type &m) { return m.end(); }
+    static const_col_iterator col_begin(const this_type &m)
+    { return m.begin(); }
+    static const_col_iterator col_end(const this_type &m)
+    { return m.end(); }
+    static const_sub_col_type col(const const_col_iterator &it)
+    { return *it; }
+    static sub_col_type col(const col_iterator &it) 
+    { return *it; }
+    static origin_type* origin(this_type &m) { return &m; }
+    static const origin_type* origin(const this_type &m) { return &m; }
+    static void do_clear(this_type &m) { m.clear_mat(); }
+    static value_type access(const const_col_iterator &itcol, size_type j)
+    { return (*itcol)[j]; }
+    static reference access(const col_iterator &itcol, size_type j)
+    { return (*itcol)[j]; }
+    static void resize(this_type &v, size_type m, size_type n)
+    { v.resize(m,n); }
+    static void reshape(this_type &, size_type, size_type)
+    { GMM_ASSERT1(false, "Sorry, to be done"); }
+  };
+
+  template<typename V> std::ostream &operator <<
+    (std::ostream &o, const col_matrix<V>& m) { gmm::write(o,m); return o; }
+
+  /* ******************************************************************** */
+  /*		                                            		  */
+  /*		Dense matrix                                		  */
+  /*		                                            		  */
+  /* ******************************************************************** */
+
+  template<typename T> class dense_matrix : public std::vector<T> {
+  public:
+    typedef typename std::vector<T>::size_type size_type;
+    typedef typename std::vector<T>::iterator iterator;
+    typedef typename std::vector<T>::const_iterator const_iterator;
+    typedef typename std::vector<T>::reference reference;
+    typedef typename std::vector<T>::const_reference const_reference;
+    
+  protected:
+    size_type nbc, nbl;
+    
+  public:
+    
+    inline const_reference operator ()(size_type l, size_type c) const {
+      GMM_ASSERT2(l < nbl && c < nbc, "out of range");
+      return *(this->begin() + c*nbl+l);
+    }
+    inline reference operator ()(size_type l, size_type c) {
+      GMM_ASSERT2(l < nbl && c < nbc, "out of range");
+      return *(this->begin() + c*nbl+l);
+    }
+
+    std::vector<T> &as_vector(void) { return *this; }
+    const std::vector<T> &as_vector(void) const { return *this; }
+
+    void resize(size_type, size_type);
+    void base_resize(size_type, size_type);
+    void reshape(size_type, size_type);
+    
+    void fill(T a, T b = T(0));
+    inline size_type nrows(void) const { return nbl; }
+    inline size_type ncols(void) const { return nbc; }
+    void swap(dense_matrix<T> &m)
+    { std::vector<T>::swap(m); std::swap(nbc, m.nbc); std::swap(nbl, m.nbl); }
+    
+    dense_matrix(size_type l, size_type c)
+      : std::vector<T>(c*l), nbc(c), nbl(l)  {}
+    dense_matrix(void) { nbl = nbc = 0; }
+  };
+
+  template<typename T> void dense_matrix<T>::reshape(size_type m,size_type n) {
+    GMM_ASSERT2(n*m == nbl*nbc, "dimensions mismatch");
+    nbl = m; nbc = n;
+  }
+
+  template<typename T> void dense_matrix<T>::base_resize(size_type m,
+							 size_type n)
+  { std::vector<T>::resize(n*m); nbl = m; nbc = n; }
+  
+  template<typename T> void dense_matrix<T>::resize(size_type m, size_type n) {
+    if (n*m > nbc*nbl) std::vector<T>::resize(n*m);
+    if (m < nbl) {
+      for (size_type i = 1; i < std::min(nbc, n); ++i)
+	std::copy(this->begin()+i*nbl, this->begin()+(i*nbl+m),
+		  this->begin()+i*m);
+      for (size_type i = std::min(nbc, n); i < n; ++i)
+	std::fill(this->begin()+(i*m), this->begin()+(i+1)*m, T(0));
+      }
+    else if (m > nbl) { /* do nothing when the nb of rows does not change */
+      for (size_type i = std::min(nbc, n); i > 1; --i)
+	std::copy(this->begin()+(i-1)*nbl, this->begin()+i*nbl,
+		  this->begin()+(i-1)*m);
+      for (size_type i = 0; i < std::min(nbc, n); ++i)
+	std::fill(this->begin()+(i*m+nbl), this->begin()+(i+1)*m, T(0));
+    }
+    if (n*m < nbc*nbl) std::vector<T>::resize(n*m);
+    nbl = m; nbc = n;
+  }
+  
+  template<typename T> void dense_matrix<T>::fill(T a, T b) {
+    std::fill(this->begin(), this->end(), b);
+    size_type n = std::min(nbl, nbc);
+    if (a != b) for (size_type i = 0; i < n; ++i) (*this)(i,i) = a; 
+  }
+
+  template <typename T> struct linalg_traits<dense_matrix<T> > {
+    typedef dense_matrix<T> this_type;
+    typedef this_type origin_type;
+    typedef linalg_false is_reference;
+    typedef abstract_matrix linalg_type;
+    typedef T value_type;
+    typedef T& reference;
+    typedef abstract_dense storage_type;
+    typedef tab_ref_reg_spaced_with_origin<typename this_type::iterator,
+					   this_type> sub_row_type;
+    typedef tab_ref_reg_spaced_with_origin<typename this_type::const_iterator,
+					   this_type> const_sub_row_type;
+    typedef dense_compressed_iterator<typename this_type::iterator,
+				      typename this_type::iterator,
+				      this_type *> row_iterator;
+    typedef dense_compressed_iterator<typename this_type::const_iterator,
+				      typename this_type::iterator,
+				      const this_type *> const_row_iterator;
+    typedef tab_ref_with_origin<typename this_type::iterator, 
+				this_type> sub_col_type;
+    typedef tab_ref_with_origin<typename this_type::const_iterator,
+				this_type> const_sub_col_type;
+    typedef dense_compressed_iterator<typename this_type::iterator,
+				      typename this_type::iterator,
+				      this_type *> col_iterator;
+    typedef dense_compressed_iterator<typename this_type::const_iterator,
+				      typename this_type::iterator,
+				      const this_type *> const_col_iterator;
+    typedef col_and_row sub_orientation;
+    typedef linalg_true index_sorted;
+    static size_type nrows(const this_type &m) { return m.nrows(); }
+    static size_type ncols(const this_type &m) { return m.ncols(); }
+    static const_sub_row_type row(const const_row_iterator &it)
+    { return const_sub_row_type(*it, it.nrows, it.ncols, it.origin); }
+    static const_sub_col_type col(const const_col_iterator &it)
+    { return const_sub_col_type(*it, *it + it.nrows, it.origin); }
+    static sub_row_type row(const row_iterator &it)
+    { return sub_row_type(*it, it.nrows, it.ncols, it.origin); }
+    static sub_col_type col(const col_iterator &it)
+    { return sub_col_type(*it, *it + it.nrows, it.origin); }
+    static row_iterator row_begin(this_type &m)
+    { return row_iterator(m.begin(), m.size() ? 1 : 0, m.nrows(), m.ncols(), 0, &m); }
+    static row_iterator row_end(this_type &m)
+    { return row_iterator(m.begin(), m.size() ? 1 : 0, m.nrows(), m.ncols(), m.nrows(), &m); }
+    static const_row_iterator row_begin(const this_type &m)
+    { return const_row_iterator(m.begin(), m.size() ? 1 : 0, m.nrows(), m.ncols(), 0, &m); }
+    static const_row_iterator row_end(const this_type &m)
+    { return const_row_iterator(m.begin(),  m.size() ? 1 : 0, m.nrows(), m.ncols(), m.nrows(), &m); }
+    static col_iterator col_begin(this_type &m)
+    { return col_iterator(m.begin(), m.nrows(), m.nrows(), m.ncols(), 0, &m); }
+    static col_iterator col_end(this_type &m)
+    { return col_iterator(m.begin(), m.nrows(), m.nrows(), m.ncols(), m.ncols(), &m); }
+    static const_col_iterator col_begin(const this_type &m)
+    { return const_col_iterator(m.begin(), m.nrows(), m.nrows(), m.ncols(), 0, &m); }
+    static const_col_iterator col_end(const this_type &m)
+    { return const_col_iterator(m.begin(),m.nrows(),m.nrows(),m.ncols(),m.ncols(), &m); }
+    static origin_type* origin(this_type &m) { return &m; }
+    static const origin_type* origin(const this_type &m) { return &m; }
+    static void do_clear(this_type &m) { m.fill(value_type(0)); }
+    static value_type access(const const_col_iterator &itcol, size_type j)
+    { return (*itcol)[j]; }
+    static reference access(const col_iterator &itcol, size_type j)
+    { return (*itcol)[j]; }
+    static void resize(this_type &v, size_type m, size_type n)
+    { v.resize(m,n); }
+    static void reshape(this_type &v, size_type m, size_type n)
+    { v.reshape(m, n); }
+  };
+
+  template<typename T> std::ostream &operator <<
+    (std::ostream &o, const dense_matrix<T>& m) { gmm::write(o,m); return o; }
+
+
+  /* ******************************************************************** */
+  /*                                                                      */
+  /*	        Read only compressed sparse column matrix                 */
+  /*                                                                      */
+  /* ******************************************************************** */
+
+  template <typename T, int shift = 0>
+  struct csc_matrix {
+    typedef unsigned int IND_TYPE;
+
+    std::vector<T> pr;
+    std::vector<IND_TYPE> ir;
+    std::vector<IND_TYPE> jc;
+    size_type nc, nr;
+
+    typedef T value_type;
+    typedef T& access_type;
+
+    template <typename Matrix> void init_with_good_format(const Matrix &B);
+    template <typename Matrix> void init_with(const Matrix &A);
+    void init_with(const col_matrix<gmm::rsvector<T> > &B)
+    { init_with_good_format(B); }
+    void init_with(const col_matrix<wsvector<T> > &B)
+    { init_with_good_format(B); }
+    template <typename PT1, typename PT2, typename PT3, int cshift>
+    void init_with(const csc_matrix_ref<PT1,PT2,PT3,cshift>& B)
+    { init_with_good_format(B); }
+    template <typename U, int cshift>    
+    void init_with(const csc_matrix<U, cshift>& B)
+    { init_with_good_format(B); }
+
+    void init_with_identity(size_type n);
+
+    csc_matrix(void) :  nc(0), nr(0) {}
+    csc_matrix(size_type nnr, size_type nnc);
+
+    size_type nrows(void) const { return nr; }
+    size_type ncols(void) const { return nc; }
+    void swap(csc_matrix<T, shift> &m) { 
+      std::swap(pr, m.pr); 
+      std::swap(ir, m.ir); std::swap(jc, m.jc); 
+      std::swap(nc, m.nc); std::swap(nr, m.nr);
+    }
+    value_type operator()(size_type i, size_type j) const
+    { return mat_col(*this, j)[i]; }
+  };
+
+  template <typename T, int shift> template<typename Matrix>
+  void csc_matrix<T, shift>::init_with_good_format(const Matrix &B) {
+    typedef typename linalg_traits<Matrix>::const_sub_col_type col_type;
+    nc = mat_ncols(B); nr = mat_nrows(B);
+    jc.resize(nc+1);
+    jc[0] = shift;
+    for (size_type j = 0; j < nc; ++j) {
+      jc[j+1] = IND_TYPE(jc[j] + nnz(mat_const_col(B, j)));
+    }
+    pr.resize(jc[nc]);
+    ir.resize(jc[nc]);
+    for (size_type j = 0; j < nc; ++j) {
+      col_type col = mat_const_col(B, j);
+      typename linalg_traits<typename org_type<col_type>::t>::const_iterator
+	it = vect_const_begin(col), ite = vect_const_end(col);
+      for (size_type k = 0; it != ite; ++it, ++k) {
+	pr[jc[j]-shift+k] = *it;
+	ir[jc[j]-shift+k] = IND_TYPE(it.index() + shift);
+      }
+    }
+  }
+  
+  template <typename T, int shift> template <typename Matrix>
+  void csc_matrix<T, shift>::init_with(const Matrix &A) {
+    col_matrix<wsvector<T> > B(mat_nrows(A), mat_ncols(A));
+    copy(A, B);
+    init_with_good_format(B);
+  }
+  
+  template <typename T, int shift>
+  void csc_matrix<T, shift>::init_with_identity(size_type n) {
+    nc = nr = n; 
+    pr.resize(nc); ir.resize(nc); jc.resize(nc+1);
+    for (size_type j = 0; j < nc; ++j)
+      { ir[j] = jc[j] = shift + j; pr[j] = T(1); }
+    jc[nc] = shift + nc;
+  }
+  
+  template <typename T, int shift>
+  csc_matrix<T, shift>::csc_matrix(size_type nnr, size_type nnc)
+    : nc(nnc), nr(nnr) {
+    pr.resize(1);  ir.resize(1); jc.resize(nc+1);
+    for (size_type j = 0; j <= nc; ++j) jc[j] = shift;
+  }
+
+  template <typename T, int shift>
+  struct linalg_traits<csc_matrix<T, shift> > {
+    typedef csc_matrix<T, shift> this_type;
+    typedef typename this_type::IND_TYPE IND_TYPE;
+    typedef linalg_const is_reference;
+    typedef abstract_matrix linalg_type;
+    typedef T value_type;
+    typedef T origin_type;
+    typedef T reference;
+    typedef abstract_sparse storage_type;
+    typedef abstract_null_type sub_row_type;
+    typedef abstract_null_type const_sub_row_type;
+    typedef abstract_null_type row_iterator;
+    typedef abstract_null_type const_row_iterator;
+    typedef abstract_null_type sub_col_type;
+    typedef cs_vector_ref<const T *, const IND_TYPE *, shift>
+    const_sub_col_type;
+    typedef sparse_compressed_iterator<const T *, const IND_TYPE *,
+				       const IND_TYPE *, shift>
+    const_col_iterator;
+    typedef abstract_null_type col_iterator;
+    typedef col_major sub_orientation;
+    typedef linalg_true index_sorted;
+    static size_type nrows(const this_type &m) { return m.nrows(); }
+    static size_type ncols(const this_type &m) { return m.ncols(); }
+    static const_col_iterator col_begin(const this_type &m)
+    { return const_col_iterator(&m.pr[0],&m.ir[0],&m.jc[0], m.nr, &m.pr[0]); }
+    static const_col_iterator col_end(const this_type &m) {
+      return const_col_iterator(&m.pr[0],&m.ir[0],&m.jc[0]+m.nc,
+				m.nr,&m.pr[0]);
+    }
+    static const_sub_col_type col(const const_col_iterator &it) {
+      return const_sub_col_type(it.pr + *(it.jc) - shift,
+				it.ir + *(it.jc) - shift,
+				*(it.jc + 1) - *(it.jc), it.n);
+    }
+    static const origin_type* origin(const this_type &m) { return &m.pr[0]; }
+    static void do_clear(this_type &m) { m.do_clear(); }
+    static value_type access(const const_col_iterator &itcol, size_type j)
+    { return col(itcol)[j]; }
+  };
+
+  template <typename T, int shift>
+  std::ostream &operator <<
+    (std::ostream &o, const csc_matrix<T, shift>& m)
+  { gmm::write(o,m); return o; }
+  
+  template <typename T, int shift>
+  inline void copy(const identity_matrix &, csc_matrix<T, shift>& M)
+  { M.init_with_identity(mat_nrows(M)); }
+
+  template <typename Matrix, typename T, int shift>
+  inline void copy(const Matrix &A, csc_matrix<T, shift>& M)
+  { M.init_with(A); }
+
+  /* ******************************************************************** */
+  /*                                                                      */
+  /*	        Read only compressed sparse row matrix                    */
+  /*                                                                      */
+  /* ******************************************************************** */
+
+  template <typename T, int shift = 0>
+  struct csr_matrix {
+
+    typedef unsigned int IND_TYPE;
+
+    std::vector<T> pr;        // values.
+    std::vector<IND_TYPE> ir; // col indices.
+    std::vector<IND_TYPE> jc; // row repartition on pr and ir.
+    size_type nc, nr;
+
+    typedef T value_type;
+    typedef T& access_type;
+
+
+    template <typename Matrix> void init_with_good_format(const Matrix &B);
+    void init_with(const row_matrix<wsvector<T> > &B)
+    { init_with_good_format(B); }
+    void init_with(const row_matrix<rsvector<T> > &B)
+    { init_with_good_format(B); }
+    template <typename PT1, typename PT2, typename PT3, int cshift>
+    void init_with(const csr_matrix_ref<PT1,PT2,PT3,cshift>& B)
+    { init_with_good_format(B); }
+    template <typename U, int cshift>
+    void init_with(const csr_matrix<U, cshift>& B)
+    { init_with_good_format(B); }
+
+    template <typename Matrix> void init_with(const Matrix &A);
+    void init_with_identity(size_type n);
+
+    csr_matrix(void) : nc(0), nr(0) {}
+    csr_matrix(size_type nnr, size_type nnc);
+
+    size_type nrows(void) const { return nr; }
+    size_type ncols(void) const { return nc; }
+    void swap(csr_matrix<T, shift> &m) { 
+      std::swap(pr, m.pr); 
+      std::swap(ir,m.ir); std::swap(jc, m.jc); 
+      std::swap(nc, m.nc); std::swap(nr,m.nr);
+    }
+   
+    value_type operator()(size_type i, size_type j) const
+    { return mat_row(*this, i)[j]; }
+  };
+  
+  template <typename T, int shift> template <typename Matrix>
+  void csr_matrix<T, shift>::init_with_good_format(const Matrix &B) {
+    typedef typename linalg_traits<Matrix>::const_sub_row_type row_type;
+    nc = mat_ncols(B); nr = mat_nrows(B);
+    jc.resize(nr+1);
+    jc[0] = shift;
+    for (size_type j = 0; j < nr; ++j) {
+      jc[j+1] = IND_TYPE(jc[j] + nnz(mat_const_row(B, j)));
+    }
+    pr.resize(jc[nr]);
+    ir.resize(jc[nr]);
+    for (size_type j = 0; j < nr; ++j) {
+      row_type row = mat_const_row(B, j);
+      typename linalg_traits<typename org_type<row_type>::t>::const_iterator
+	it = vect_const_begin(row), ite = vect_const_end(row);
+      for (size_type k = 0; it != ite; ++it, ++k) {
+	pr[jc[j]-shift+k] = *it;
+	ir[jc[j]-shift+k] = IND_TYPE(it.index()+shift);
+      }
+    }
+  }
+
+  template <typename T, int shift> template <typename Matrix> 
+  void csr_matrix<T, shift>::init_with(const Matrix &A) { 
+    row_matrix<wsvector<T> > B(mat_nrows(A), mat_ncols(A)); 
+    copy(A, B); 
+    init_with_good_format(B);
+  }
+
+  template <typename T, int shift> 
+  void csr_matrix<T, shift>::init_with_identity(size_type n) {
+    nc = nr = n; 
+    pr.resize(nr); ir.resize(nr); jc.resize(nr+1);
+    for (size_type j = 0; j < nr; ++j)
+      { ir[j] = jc[j] = shift + j; pr[j] = T(1); }
+    jc[nr] = shift + nr;
+  }
+
+  template <typename T, int shift>
+  csr_matrix<T, shift>::csr_matrix(size_type nnr, size_type nnc)
+    : nc(nnc), nr(nnr) {
+    pr.resize(1);  ir.resize(1); jc.resize(nr+1);
+    for (size_type j = 0; j < nr; ++j) jc[j] = shift;
+    jc[nr] = shift;
+  }
+
+
+  template <typename T, int shift>
+  struct linalg_traits<csr_matrix<T, shift> > {
+    typedef csr_matrix<T, shift> this_type;
+    typedef typename this_type::IND_TYPE IND_TYPE;
+    typedef linalg_const is_reference;
+    typedef abstract_matrix linalg_type;
+    typedef T value_type;
+    typedef T origin_type;
+    typedef T reference;
+    typedef abstract_sparse storage_type;
+    typedef abstract_null_type sub_col_type;
+    typedef abstract_null_type const_sub_col_type;
+    typedef abstract_null_type col_iterator;
+    typedef abstract_null_type const_col_iterator;
+    typedef abstract_null_type sub_row_type;
+    typedef cs_vector_ref<const T *, const IND_TYPE *, shift>
+    const_sub_row_type;
+    typedef sparse_compressed_iterator<const T *, const IND_TYPE *,
+				       const IND_TYPE *, shift>
+    const_row_iterator;
+    typedef abstract_null_type row_iterator;
+    typedef row_major sub_orientation;
+    typedef linalg_true index_sorted;
+    static size_type nrows(const this_type &m) { return m.nrows(); }
+    static size_type ncols(const this_type &m) { return m.ncols(); }
+    static const_row_iterator row_begin(const this_type &m)
+    { return const_row_iterator(&m.pr[0], &m.ir[0], &m.jc[0], m.nc, &m.pr[0]); }
+    static const_row_iterator row_end(const this_type &m)
+    { return const_row_iterator(&m.pr[0], &m.ir[0], &m.jc[0] + m.nr, m.nc, &m.pr[0]); }
+    static const_sub_row_type row(const const_row_iterator &it) {
+      return const_sub_row_type(it.pr + *(it.jc) - shift,
+				it.ir + *(it.jc) - shift,
+				*(it.jc + 1) - *(it.jc), it.n);
+    }
+    static const origin_type* origin(const this_type &m) { return &m.pr[0]; }
+    static void do_clear(this_type &m) { m.do_clear(); }
+    static value_type access(const const_row_iterator &itrow, size_type j)
+    { return row(itrow)[j]; }
+  };
+
+  template <typename T, int shift>
+  std::ostream &operator <<
+    (std::ostream &o, const csr_matrix<T, shift>& m)
+  { gmm::write(o,m); return o; }
+  
+  template <typename T, int shift>
+  inline void copy(const identity_matrix &, csr_matrix<T, shift>& M)
+  { M.init_with_identity(mat_nrows(M)); }
+
+  template <typename Matrix, typename T, int shift>
+  inline void copy(const Matrix &A, csr_matrix<T, shift>& M)
+  { M.init_with(A); }
+
+  /* ******************************************************************** */
+  /*		                                            		  */
+  /*		Block matrix                                		  */
+  /*		                                            		  */
+  /* ******************************************************************** */
+
+  template <typename MAT> class block_matrix {
+  protected :
+    std::vector<MAT> blocks;
+    size_type nrowblocks_;
+    size_type ncolblocks_;
+    std::vector<sub_interval> introw, intcol;
+
+  public :
+    typedef typename linalg_traits<MAT>::value_type value_type;
+    typedef typename linalg_traits<MAT>::reference reference;
+
+    size_type nrows(void) const { return introw[nrowblocks_-1].max; }
+    size_type ncols(void) const { return intcol[ncolblocks_-1].max; }
+    size_type nrowblocks(void) const { return nrowblocks_; }
+    size_type ncolblocks(void) const { return ncolblocks_; }
+    const sub_interval &subrowinterval(size_type i) const { return introw[i]; }
+    const sub_interval &subcolinterval(size_type i) const { return intcol[i]; }
+    const MAT &block(size_type i, size_type j) const 
+    { return blocks[j*ncolblocks_+i]; }
+    MAT &block(size_type i, size_type j)
+    { return blocks[j*ncolblocks_+i]; }
+    void do_clear(void);
+    // to be done : read and write access to a component
+    value_type operator() (size_type i, size_type j) const {
+      size_type k, l;
+      for (k = 0; k < nrowblocks_; ++k)
+	if (i >= introw[k].min && i <  introw[k].max) break;
+      for (l = 0; l < nrowblocks_; ++l)
+	if (j >= introw[l].min && j <  introw[l].max) break;
+      return (block(k, l))(i - introw[k].min, j - introw[l].min);
+    }
+    reference operator() (size_type i, size_type j) {
+      size_type k, l;
+      for (k = 0; k < nrowblocks_; ++k)
+	if (i >= introw[k].min && i <  introw[k].max) break;
+      for (l = 0; l < nrowblocks_; ++l)
+	if (j >= introw[l].min && j <  introw[l].max) break;
+      return (block(k, l))(i - introw[k].min, j - introw[l].min);
+    }
+    
+    template <typename CONT> void resize(const CONT &c1, const CONT &c2);
+    template <typename CONT> block_matrix(const CONT &c1, const CONT &c2)
+    { resize(c1, c2); }
+    block_matrix(void) {}
+
+  };
+
+  template <typename MAT> struct linalg_traits<block_matrix<MAT> > {
+    typedef block_matrix<MAT> this_type;
+    typedef linalg_false is_reference;
+    typedef abstract_matrix linalg_type;
+    typedef this_type origin_type;
+    typedef typename linalg_traits<MAT>::value_type value_type;
+    typedef typename linalg_traits<MAT>::reference reference;
+    typedef typename linalg_traits<MAT>::storage_type storage_type;
+    typedef abstract_null_type sub_row_type;       // to be done ...
+    typedef abstract_null_type const_sub_row_type; // to be done ...
+    typedef abstract_null_type row_iterator;       // to be done ...
+    typedef abstract_null_type const_row_iterator; // to be done ...
+    typedef abstract_null_type sub_col_type;       // to be done ...
+    typedef abstract_null_type const_sub_col_type; // to be done ...
+    typedef abstract_null_type col_iterator;       // to be done ...
+    typedef abstract_null_type const_col_iterator; // to be done ...
+    typedef abstract_null_type sub_orientation;    // to be done ...
+    typedef linalg_true index_sorted;
+    static size_type nrows(const this_type &m) { return m.nrows(); }
+    static size_type ncols(const this_type &m) { return m.ncols(); }
+    static origin_type* origin(this_type &m) { return &m; }
+    static const origin_type* origin(const this_type &m) { return &m; }
+    static void do_clear(this_type &m) { m.do_clear(); }
+    // access to be done ...    
+    static void resize(this_type &, size_type , size_type)
+    { GMM_ASSERT1(false, "Sorry, to be done"); }
+    static void reshape(this_type &, size_type , size_type)
+    { GMM_ASSERT1(false, "Sorry, to be done"); }
+  };
+
+  template <typename MAT> void block_matrix<MAT>::do_clear(void) { 
+    for (size_type j = 0, l = 0; j < ncolblocks_; ++j)
+      for (size_type i = 0, k = 0; i < nrowblocks_; ++i)
+	clear(block(i,j));
+  }
+
+  template <typename MAT> template <typename CONT>
+  void block_matrix<MAT>::resize(const CONT &c1, const CONT &c2) {
+    nrowblocks_ = c1.size(); ncolblocks_ = c2.size();
+    blocks.resize(nrowblocks_ * ncolblocks_);
+    intcol.resize(ncolblocks_);
+    introw.resize(nrowblocks_);
+    for (size_type j = 0, l = 0; j < ncolblocks_; ++j) {
+      intcol[j] = sub_interval(l, c2[j]); l += c2[j];
+      for (size_type i = 0, k = 0; i < nrowblocks_; ++i) {
+	if (j == 0) { introw[i] = sub_interval(k, c1[i]); k += c1[i]; }
+	block(i, j) = MAT(c1[i], c2[j]);
+      }
+    }
+  }
+
+  template <typename M1, typename M2>
+  void copy(const block_matrix<M1> &m1, M2 &m2) {
+    for (size_type j = 0; j < m1.ncolblocks(); ++j)
+      for (size_type i = 0; i < m1.nrowblocks(); ++i)
+	copy(m1.block(i,j), sub_matrix(m2, m1.subrowinterval(i), 
+				       m1.subcolinterval(j)));
+  }
+
+  template <typename M1, typename M2>
+  void copy(const block_matrix<M1> &m1, const M2 &m2)
+  { copy(m1, linalg_const_cast(m2)); }
+  
+
+  template <typename MAT, typename V1, typename V2>
+  void mult(const block_matrix<MAT> &m, const V1 &v1, V2 &v2) {
+    clear(v2);
+    typename sub_vector_type<V2 *, sub_interval>::vector_type sv;
+    for (size_type i = 0; i < m.nrowblocks() ; ++i)
+      for (size_type j = 0; j < m.ncolblocks() ; ++j) {
+	sv = sub_vector(v2, m.subrowinterval(i));
+	mult(m.block(i,j),
+	     sub_vector(v1, m.subcolinterval(j)), sv, sv);
+      }
+  }
+
+  template <typename MAT, typename V1, typename V2, typename V3>
+  void mult(const block_matrix<MAT> &m, const V1 &v1, const V2 &v2, V3 &v3) {
+    typename sub_vector_type<V3 *, sub_interval>::vector_type sv;
+    for (size_type i = 0; i < m.nrowblocks() ; ++i)
+      for (size_type j = 0; j < m.ncolblocks() ; ++j) {
+	sv = sub_vector(v3, m.subrowinterval(i));
+	if (j == 0)
+	  mult(m.block(i,j),
+	       sub_vector(v1, m.subcolinterval(j)),
+	       sub_vector(v2, m.subrowinterval(i)), sv);
+	else
+	  mult(m.block(i,j),
+	       sub_vector(v1, m.subcolinterval(j)), sv, sv);
+      }
+    
+  }
+
+  template <typename MAT, typename V1, typename V2>
+  void mult(const block_matrix<MAT> &m, const V1 &v1, const V2 &v2)
+  { mult(m, v1, linalg_const_cast(v2)); }
+
+  template <typename MAT, typename V1, typename V2, typename V3>
+  void mult(const block_matrix<MAT> &m, const V1 &v1, const V2 &v2, 
+	    const V3 &v3)
+  { mult_const(m, v1, v2, linalg_const_cast(v3)); }
+
+}
+  /* ******************************************************************** */
+  /*		                                            		  */
+  /*		Distributed matrices                                	  */
+  /*		                                            		  */
+  /* ******************************************************************** */
+
+#ifdef GMM_USES_MPI
+# include <mpi.h>
+
+namespace gmm {
+
+  
+  
+  template <typename T> inline MPI_Datatype mpi_type(T)
+  { GMM_ASSERT1(false, "Sorry unsupported type"); return MPI_FLOAT; }
+  inline MPI_Datatype mpi_type(double) { return MPI_DOUBLE; }
+  inline MPI_Datatype mpi_type(float) { return MPI_FLOAT; }
+  inline MPI_Datatype mpi_type(long double) { return MPI_LONG_DOUBLE; }
+#ifndef LAM_MPI
+  inline MPI_Datatype mpi_type(std::complex<float>) { return MPI_COMPLEX; }
+  inline MPI_Datatype mpi_type(std::complex<double>) { return MPI_DOUBLE_COMPLEX; }
+#endif
+  inline MPI_Datatype mpi_type(int) { return MPI_INT; }
+  inline MPI_Datatype mpi_type(unsigned int) { return MPI_UNSIGNED; }
+  inline MPI_Datatype mpi_type(long) { return MPI_LONG; }
+  inline MPI_Datatype mpi_type(unsigned long) { return MPI_UNSIGNED_LONG; }
+
+  template <typename MAT> struct mpi_distributed_matrix {
+    MAT M;
+
+    mpi_distributed_matrix(size_type n, size_type m) : M(n, m) {}
+    mpi_distributed_matrix() {}
+
+    const MAT &local_matrix(void) const { return M; }
+    MAT &local_matrix(void) { return M; }
+  };
+  
+  template <typename MAT> inline MAT &eff_matrix(MAT &m) { return m; }
+  template <typename MAT> inline
+  const MAT &eff_matrix(const MAT &m) { return m; }
+  template <typename MAT> inline
+  MAT &eff_matrix(mpi_distributed_matrix<MAT> &m) { return m.M; }
+  template <typename MAT> inline
+  const MAT &eff_matrix(const mpi_distributed_matrix<MAT> &m) { return m.M; }
+  
+
+  template <typename MAT1, typename MAT2>
+  inline void copy(const mpi_distributed_matrix<MAT1> &m1,
+		   mpi_distributed_matrix<MAT2> &m2)
+  { copy(eff_matrix(m1), eff_matrix(m2)); }
+  template <typename MAT1, typename MAT2>
+  inline void copy(const mpi_distributed_matrix<MAT1> &m1,
+		   const mpi_distributed_matrix<MAT2> &m2)
+  { copy(m1.M, m2.M); }
+  
+  template <typename MAT1, typename MAT2>
+  inline void copy(const mpi_distributed_matrix<MAT1> &m1, MAT2 &m2)
+  { copy(m1.M, m2); }
+  template <typename MAT1, typename MAT2>
+  inline void copy(const mpi_distributed_matrix<MAT1> &m1, const MAT2 &m2)
+  { copy(m1.M, m2); }
+  
+
+  template <typename MATSP, typename V1, typename V2> inline
+  typename strongest_value_type3<V1,V2,MATSP>::value_type
+  vect_sp(const mpi_distributed_matrix<MATSP> &ps, const V1 &v1,
+	  const V2 &v2) {
+    typedef typename strongest_value_type3<V1,V2,MATSP>::value_type T;
+    T res = vect_sp(ps.M, v1, v2), rest;
+    MPI_Allreduce(&res, &rest, 1, mpi_type(T()), MPI_SUM,MPI_COMM_WORLD);
+    return rest;
+  }
+
+  template <typename MAT, typename V1, typename V2>
+  inline void mult_add(const mpi_distributed_matrix<MAT> &m, const V1 &v1,
+		       V2 &v2) {
+    typedef typename linalg_traits<V2>::value_type T;
+    std::vector<T> v3(vect_size(v2)), v4(vect_size(v2));
+    static double tmult_tot = 0.0;
+    static double tmult_tot2 = 0.0;
+    double t_ref = MPI_Wtime();
+    gmm::mult(m.M, v1, v3);
+    if (is_sparse(v2)) GMM_WARNING2("Using a plain temporary, here.");
+    double t_ref2 = MPI_Wtime();
+    MPI_Allreduce(&(v3[0]), &(v4[0]),gmm::vect_size(v2), mpi_type(T()),
+		  MPI_SUM,MPI_COMM_WORLD);
+    tmult_tot2 = MPI_Wtime()-t_ref2;
+    cout << "reduce mult mpi = " << tmult_tot2 << endl;
+    gmm::add(v4, v2);
+    tmult_tot = MPI_Wtime()-t_ref;
+    cout << "tmult mpi = " << tmult_tot << endl;
+  }
+
+  template <typename MAT, typename V1, typename V2>
+  void mult_add(const mpi_distributed_matrix<MAT> &m, const V1 &v1,
+		const V2 &v2_)
+  { mult_add(m, v1, const_cast<V2 &>(v2_)); }
+
+  template <typename MAT, typename V1, typename V2>
+  inline void mult(const mpi_distributed_matrix<MAT> &m, const V1 &v1,
+		   const V2 &v2_)
+  { V2 &v2 = const_cast<V2 &>(v2_); clear(v2); mult_add(m, v1, v2); }
+
+  template <typename MAT, typename V1, typename V2>
+  inline void mult(const mpi_distributed_matrix<MAT> &m, const V1 &v1,
+		   V2 &v2)
+  { clear(v2); mult_add(m, v1, v2); }
+
+  template <typename MAT, typename V1, typename V2, typename V3>
+  inline void mult(const mpi_distributed_matrix<MAT> &m, const V1 &v1,
+		   const V2 &v2, const V3 &v3_)
+  { V3 &v3 = const_cast<V3 &>(v3_); gmm::copy(v2, v3); mult_add(m, v1, v3); }
+
+  template <typename MAT, typename V1, typename V2, typename V3>
+  inline void mult(const mpi_distributed_matrix<MAT> &m, const V1 &v1,
+		   const V2 &v2, V3 &v3)
+  { gmm::copy(v2, v3); mult_add(m, v1, v3); }
+  
+
+  template <typename MAT> inline
+  size_type mat_nrows(const mpi_distributed_matrix<MAT> &M) 
+  { return mat_nrows(M.M); }
+  template <typename MAT> inline
+  size_type mat_ncols(const mpi_distributed_matrix<MAT> &M) 
+  { return mat_nrows(M.M); }
+  template <typename MAT> inline
+  void resize(mpi_distributed_matrix<MAT> &M, size_type m, size_type n)
+  { resize(M.M, m, n); }
+  template <typename MAT> inline void clear(mpi_distributed_matrix<MAT> &M)
+  { clear(M.M); }
+  
+
+  // For compute reduced system
+  template <typename MAT1, typename MAT2> inline
+  void mult(const MAT1 &M1, const mpi_distributed_matrix<MAT2> &M2,
+	    mpi_distributed_matrix<MAT2> &M3)
+  { mult(M1, M2.M, M3.M); }
+  template <typename MAT1, typename MAT2> inline
+  void mult(const mpi_distributed_matrix<MAT2> &M2,
+	    const MAT1 &M1, mpi_distributed_matrix<MAT2> &M3)
+  { mult(M2.M, M1, M3.M); }
+  template <typename MAT1, typename MAT2, typename MAT3> inline
+  void mult(const MAT1 &M1, const mpi_distributed_matrix<MAT2> &M2,
+		   MAT3 &M3)
+  { mult(M1, M2.M, M3); }
+  template <typename MAT1, typename MAT2, typename MAT3> inline
+  void mult(const MAT1 &M1, const mpi_distributed_matrix<MAT2> &M2,
+		   const MAT3 &M3)
+  { mult(M1, M2.M, M3); }
+
+  template <typename M, typename SUBI1, typename SUBI2>
+  struct sub_matrix_type<const mpi_distributed_matrix<M> *, SUBI1, SUBI2>
+  { typedef abstract_null_type matrix_type; };
+
+  template <typename M, typename SUBI1, typename SUBI2>
+  struct sub_matrix_type<mpi_distributed_matrix<M> *, SUBI1, SUBI2>
+  { typedef abstract_null_type matrix_type; };
+
+  template <typename M, typename SUBI1, typename SUBI2>  inline
+  typename select_return<typename sub_matrix_type<const M *, SUBI1, SUBI2>
+  ::matrix_type, typename sub_matrix_type<M *, SUBI1, SUBI2>::matrix_type,
+   M *>::return_type
+   sub_matrix(mpi_distributed_matrix<M> &m, const SUBI1 &si1, const SUBI2 &si2)
+  { return sub_matrix(m.M, si1, si2); }
+
+  template <typename MAT, typename SUBI1, typename SUBI2>  inline
+  typename select_return<typename sub_matrix_type<const MAT *, SUBI1, SUBI2>
+  ::matrix_type, typename sub_matrix_type<MAT *, SUBI1, SUBI2>::matrix_type,
+			 const MAT *>::return_type
+  sub_matrix(const mpi_distributed_matrix<MAT> &m, const SUBI1 &si1,
+	     const SUBI2 &si2)
+  { return sub_matrix(m.M, si1, si2);  }
+
+  template <typename M, typename SUBI1>  inline
+    typename select_return<typename sub_matrix_type<const M *, SUBI1, SUBI1>
+    ::matrix_type, typename sub_matrix_type<M *, SUBI1, SUBI1>::matrix_type,
+    M *>::return_type
+  sub_matrix(mpi_distributed_matrix<M> &m, const SUBI1 &si1) 
+  { return sub_matrix(m.M, si1, si1); }
+
+  template <typename M, typename SUBI1>  inline
+    typename select_return<typename sub_matrix_type<const M *, SUBI1, SUBI1>
+    ::matrix_type, typename sub_matrix_type<M *, SUBI1, SUBI1>::matrix_type,
+    const M *>::return_type
+  sub_matrix(const mpi_distributed_matrix<M> &m, const SUBI1 &si1)
+  { return sub_matrix(m.M, si1, si1); }
+
+
+  template <typename L> struct transposed_return<const mpi_distributed_matrix<L> *> 
+  { typedef abstract_null_type return_type; };
+  template <typename L> struct transposed_return<mpi_distributed_matrix<L> *> 
+  { typedef abstract_null_type return_type; };
+  
+  template <typename L> inline typename transposed_return<const L *>::return_type
+  transposed(const mpi_distributed_matrix<L> &l)
+  { return transposed(l.M); }
+
+  template <typename L> inline typename transposed_return<L *>::return_type
+  transposed(mpi_distributed_matrix<L> &l)
+  { return transposed(l.M); }
+
+
+  template <typename MAT>
+  struct linalg_traits<mpi_distributed_matrix<MAT> > {
+    typedef mpi_distributed_matrix<MAT> this_type;
+    typedef MAT origin_type;
+    typedef linalg_false is_reference;
+    typedef abstract_matrix linalg_type;
+    typedef typename linalg_traits<MAT>::value_type value_type;
+    typedef typename linalg_traits<MAT>::reference reference;
+    typedef typename linalg_traits<MAT>::storage_type storage_type;
+    typedef abstract_null_type sub_row_type;
+    typedef abstract_null_type const_sub_row_type;
+    typedef abstract_null_type row_iterator;
+    typedef abstract_null_type const_row_iterator;
+    typedef abstract_null_type sub_col_type;
+    typedef abstract_null_type const_sub_col_type;
+    typedef abstract_null_type col_iterator;
+    typedef abstract_null_type const_col_iterator;
+    typedef abstract_null_type sub_orientation;
+    typedef abstract_null_type index_sorted;
+    static size_type nrows(const this_type &m) { return nrows(m.M); }
+    static size_type ncols(const this_type &m) { return ncols(m.M); }
+    static void do_clear(this_type &m) { clear(m.M); }
+  };
+
+}
+
+
+#endif // GMM_USES_MPI
+
+namespace std {
+  template <typename V>
+  void swap(gmm::row_matrix<V> &m1, gmm::row_matrix<V> &m2)
+  { m1.swap(m2); }
+  template <typename V>
+  void swap(gmm::col_matrix<V> &m1, gmm::col_matrix<V> &m2)
+  { m1.swap(m2); }
+  template <typename T>
+  void swap(gmm::dense_matrix<T> &m1, gmm::dense_matrix<T> &m2)
+  { m1.swap(m2); }
+  template <typename T, int shift> void 
+  swap(gmm::csc_matrix<T,shift> &m1, gmm::csc_matrix<T,shift> &m2)
+  { m1.swap(m2); }
+  template <typename T, int shift> void 
+  swap(gmm::csr_matrix<T,shift> &m1, gmm::csr_matrix<T,shift> &m2)
+  { m1.swap(m2); }
+}
+
+
+
+
+#endif /* GMM_MATRIX_H__ */
diff --git a/gmm/gmm_modified_gram_schmidt.h b/gmm/gmm_modified_gram_schmidt.h
new file mode 100644
index 000000000..34d54ae3f
--- /dev/null
+++ b/gmm/gmm_modified_gram_schmidt.h
@@ -0,0 +1,127 @@
+/* -*- c++ -*- (enables emacs c++ mode) */
+/*===========================================================================
+
+ Copyright (C) 2002-2017 Yves Renard
+
+ This file is a part of GetFEM++
+
+ GetFEM++  is  free software;  you  can  redistribute  it  and/or modify it
+ under  the  terms  of the  GNU  Lesser General Public License as published
+ by  the  Free Software Foundation;  either version 3 of the License,  or
+ (at your option) any later version along with the GCC Runtime Library
+ Exception either version 3.1 or (at your option) any later version.
+ This program  is  distributed  in  the  hope  that it will be useful,  but
+ WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ or  FITNESS  FOR  A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+ License and GCC Runtime Library Exception for more details.
+ You  should  have received a copy of the GNU Lesser General Public License
+ along  with  this program;  if not, write to the Free Software Foundation,
+ Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301, USA.
+
+ As a special exception, you  may use  this file  as it is a part of a free
+ software  library  without  restriction.  Specifically,  if   other  files
+ instantiate  templates  or  use macros or inline functions from this file,
+ or  you compile this  file  and  link  it  with other files  to produce an
+ executable, this file  does  not  by itself cause the resulting executable
+ to be covered  by the GNU Lesser General Public License.  This   exception
+ does not  however  invalidate  any  other  reasons why the executable file
+ might be covered by the GNU Lesser General Public License.
+
+===========================================================================*/
+
+//===========================================================================
+//
+// Copyright (c) 1998-2001, University of Notre Dame. All rights reserved.
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+//    * Redistributions of source code must retain the above copyright
+//      notice, this list of conditions and the following disclaimer.
+//    * Redistributions in binary form must reproduce the above copyright
+//      notice, this list of conditions and the following disclaimer in the
+//      documentation and/or other materials provided with the distribution.
+//    * Neither the name of the University of Notre Dame nor the
+//      names of its contributors may be used to endorse or promote products
+//      derived from this software without specific prior written permission.
+//
+// THIS SOFTWARE  IS  PROVIDED  BY  THE TRUSTEES  OF  INDIANA UNIVERSITY  AND
+// CONTRIBUTORS  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,
+// BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND  FITNESS
+// FOR  A PARTICULAR PURPOSE ARE DISCLAIMED. IN  NO  EVENT SHALL THE TRUSTEES
+// OF INDIANA UNIVERSITY AND CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+// INCIDENTAL, SPECIAL, EXEMPLARY,  OR CONSEQUENTIAL DAMAGES (INCLUDING,  BUT
+// NOT  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA,  OR PROFITS;  OR BUSINESS  INTERRUPTION)  HOWEVER  CAUSED AND ON ANY
+// THEORY  OF  LIABILITY,  WHETHER  IN  CONTRACT,  STRICT  LIABILITY, OR TORT
+// (INCLUDING  NEGLIGENCE  OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+// THIS  SOFTWARE,  EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+//===========================================================================
+
+/**@file gmm_modified_gram_schmidt.h
+   @author  Andrew Lumsdaine <lums@osl.iu.edu>, Lie-Quan Lee     <llee@osl.iu.edu>
+   @date October 13, 2002.
+   @brief Modified Gram-Schmidt orthogonalization
+*/
+
+#ifndef GMM_MODIFIED_GRAM_SCHMIDT_H
+#define GMM_MODIFIED_GRAM_SCHMIDT_H
+
+#include "gmm_kernel.h"
+
+namespace gmm {
+
+  template <typename T>
+  class modified_gram_schmidt {
+  protected:
+    typedef dense_matrix<T> MAT;
+    MAT M;
+
+  public:
+
+    modified_gram_schmidt(int restart, size_t s) : M(s, restart+1) {}
+
+    typename linalg_traits<MAT>::const_sub_col_type
+      operator[](size_t i) const { return mat_const_col(M, i); }
+
+    typename linalg_traits<MAT>::sub_col_type
+      operator[](size_t i) { return mat_col(M, i); }
+
+    inline size_type nrows(void) const { return M.nrows(); }
+    inline size_type ncols(void) const { return M.ncols(); }
+    MAT &mat(void) { return M; }
+    const MAT &mat(void) const { return M; }
+    
+  };
+
+  template <typename T, typename VecHi> inline
+  void orthogonalize(modified_gram_schmidt<T>& V, const VecHi& Hi_, size_t i) {
+    VecHi& Hi = const_cast<VecHi&>(Hi_);
+    
+    for (size_t k = 0; k <= i; k++) {
+      Hi[k] = gmm::vect_hp(V[i+1], V[k]);
+      gmm::add(gmm::scaled(V[k], -Hi[k]), V[i+1]);
+    }
+  }
+
+  template <typename T, typename VecHi>
+  void orthogonalize_with_refinment(modified_gram_schmidt<T>& V,
+				    const VecHi& Hi_, size_t i) {
+    VecHi& Hi = const_cast<VecHi&>(Hi_);
+    orthogonalize(V, Hi_, i);
+    
+    sub_interval SUBI(0, V.nrows()), SUBJ(0, i+1);
+    std::vector<T> corr(i+1);
+    gmm::mult(conjugated(sub_matrix(V.mat(), SUBI, SUBJ)),
+	      V[i+1], corr);
+    gmm::mult(sub_matrix(V.mat(), SUBI, SUBJ),
+	      scaled(corr, T(-1)), V[i+1],V[i+1]);
+    gmm::add(corr, sub_vector(Hi, SUBJ));
+  }
+  
+  template <typename T, typename VecS, typename VecX>
+  void combine(modified_gram_schmidt<T>& V, const VecS& s, VecX& x, size_t i)
+  { for (size_t j = 0; j < i; ++j) gmm::add(gmm::scaled(V[j], s[j]), x); }
+}
+
+#endif
diff --git a/gmm/gmm_opt.h b/gmm/gmm_opt.h
new file mode 100644
index 000000000..e73af4153
--- /dev/null
+++ b/gmm/gmm_opt.h
@@ -0,0 +1,128 @@
+/* -*- c++ -*- (enables emacs c++ mode) */
+/*===========================================================================
+
+ Copyright (C) 2003-2017 Yves Renard
+
+ This file is a part of GetFEM++
+
+ GetFEM++  is  free software;  you  can  redistribute  it  and/or modify it
+ under  the  terms  of the  GNU  Lesser General Public License as published
+ by  the  Free Software Foundation;  either version 3 of the License,  or
+ (at your option) any later version along with the GCC Runtime Library
+ Exception either version 3.1 or (at your option) any later version.
+ This program  is  distributed  in  the  hope  that it will be useful,  but
+ WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ or  FITNESS  FOR  A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+ License and GCC Runtime Library Exception for more details.
+ You  should  have received a copy of the GNU Lesser General Public License
+ along  with  this program;  if not, write to the Free Software Foundation,
+ Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301, USA.
+
+ As a special exception, you  may use  this file  as it is a part of a free
+ software  library  without  restriction.  Specifically,  if   other  files
+ instantiate  templates  or  use macros or inline functions from this file,
+ or  you compile this  file  and  link  it  with other files  to produce an
+ executable, this file  does  not  by itself cause the resulting executable
+ to be covered  by the GNU Lesser General Public License.  This   exception
+ does not  however  invalidate  any  other  reasons why the executable file
+ might be covered by the GNU Lesser General Public License.
+
+===========================================================================*/
+
+/**@file gmm_opt.h
+   @author  Yves Renard <Yves.Renard@insa-lyon.fr>
+   @date July 9, 2003.
+   @brief Optimization for some small cases (inversion of 2x2 matrices etc.)
+*/
+#ifndef GMM_OPT_H__
+#define GMM_OPT_H__
+
+namespace gmm {
+
+  /* ********************************************************************* */
+  /*    Optimized determinant and inverse for small matrices (2x2 and 3x3) */
+  /*    with dense_matrix<T>.                                              */
+  /* ********************************************************************* */
+
+  template <typename T>  T lu_det(const dense_matrix<T> &A) {
+    size_type n(mat_nrows(A));
+    if (n) {
+      const T *p = &(A(0,0));
+      switch (n) {
+      case 1 : return (*p);
+      case 2 : return (*p) * (*(p+3)) - (*(p+1)) * (*(p+2));
+// Not stable for nearly singular matrices
+//       case 3 : return (*p) * ((*(p+4)) * (*(p+8)) - (*(p+5)) * (*(p+7)))
+// 		 - (*(p+1)) * ((*(p+3)) * (*(p+8)) - (*(p+5)) * (*(p+6)))
+// 		 + (*(p+2)) * ((*(p+3)) * (*(p+7)) - (*(p+4)) * (*(p+6)));
+      default :
+	{
+	  dense_matrix<T> B(mat_nrows(A), mat_ncols(A));
+	  std::vector<size_type> ipvt(mat_nrows(A));
+	  gmm::copy(A, B);
+	  lu_factor(B, ipvt);
+	  return lu_det(B, ipvt);	
+	}
+      }
+    }
+    return T(1);
+  }
+
+
+  template <typename T> T lu_inverse(const dense_matrix<T> &A_, bool doassert = true) {
+    dense_matrix<T>& A = const_cast<dense_matrix<T> &>(A_);
+    size_type N = mat_nrows(A);
+    T det(1);
+    if (N) {
+      T *p = &(A(0,0));
+      if (N <= 2) {
+	switch (N) {
+	  case 1 : {
+	    det = *p;
+	    if (doassert) GMM_ASSERT1(det!=T(0), "non invertible matrix");
+            if (det == T(0)) break;
+	    *p = T(1) / det; 
+	  } break;
+	  case 2 : {
+	    det = (*p) * (*(p+3)) - (*(p+1)) * (*(p+2));
+	    if (doassert) GMM_ASSERT1(det!=T(0), "non invertible matrix");
+            if (det == T(0)) break;
+	    std::swap(*p, *(p+3));
+	    *p++ /= det; *p++ /= -det; *p++ /= -det; *p++ /= det; 
+	  } break;
+// 	  case 3 : { // not stable for nearly singular matrices
+// 	    T a, b, c, d, e, f, g, h, i;
+// 	    a =   (*(p+4)) * (*(p+8)) - (*(p+5)) * (*(p+7));
+// 	    b = - (*(p+1)) * (*(p+8)) + (*(p+2)) * (*(p+7));
+// 	    c =   (*(p+1)) * (*(p+5)) - (*(p+2)) * (*(p+4));
+// 	    d = - (*(p+3)) * (*(p+8)) + (*(p+5)) * (*(p+6));
+// 	    e =   (*(p+0)) * (*(p+8)) - (*(p+2)) * (*(p+6));
+// 	    f = - (*(p+0)) * (*(p+5)) + (*(p+2)) * (*(p+3));
+// 	    g =   (*(p+3)) * (*(p+7)) - (*(p+4)) * (*(p+6));
+// 	    h = - (*(p+0)) * (*(p+7)) + (*(p+1)) * (*(p+6));
+// 	    i =   (*(p+0)) * (*(p+4)) - (*(p+1)) * (*(p+3));
+// 	    det = (*p) * a + (*(p+1)) * d + (*(p+2)) * g;
+// 	    GMM_ASSERT1(det!=T(0), "non invertible matrix");
+// 	    *p++ = a / det; *p++ = b / det; *p++ = c / det; 
+// 	    *p++ = d / det; *p++ = e / det; *p++ = f / det; 
+// 	    *p++ = g / det; *p++ = h / det; *p++ = i / det; 
+// 	  } break;
+	}
+      }
+      else {
+	dense_matrix<T> B(mat_nrows(A), mat_ncols(A));
+	std::vector<int> ipvt(mat_nrows(A));
+	gmm::copy(A, B);
+	size_type info = lu_factor(B, ipvt);
+	GMM_ASSERT1(!info, "non invertible matrix");
+	lu_inverse(B, ipvt, A);
+	return lu_det(B, ipvt);
+      }
+    }
+    return det;
+  }
+
+  
+}
+
+#endif //  GMM_OPT_H__
diff --git a/gmm/gmm_precond.h b/gmm/gmm_precond.h
new file mode 100644
index 000000000..fca4f35d4
--- /dev/null
+++ b/gmm/gmm_precond.h
@@ -0,0 +1,65 @@
+/* -*- c++ -*- (enables emacs c++ mode) */
+/*===========================================================================
+
+ Copyright (C) 2004-2017 Yves Renard
+
+ This file is a part of GetFEM++
+
+ GetFEM++  is  free software;  you  can  redistribute  it  and/or modify it
+ under  the  terms  of the  GNU  Lesser General Public License as published
+ by  the  Free Software Foundation;  either version 3 of the License,  or
+ (at your option) any later version along with the GCC Runtime Library
+ Exception either version 3.1 or (at your option) any later version.
+ This program  is  distributed  in  the  hope  that it will be useful,  but
+ WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ or  FITNESS  FOR  A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+ License and GCC Runtime Library Exception for more details.
+ You  should  have received a copy of the GNU Lesser General Public License
+ along  with  this program;  if not, write to the Free Software Foundation,
+ Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301, USA.
+
+ As a special exception, you  may use  this file  as it is a part of a free
+ software  library  without  restriction.  Specifically,  if   other  files
+ instantiate  templates  or  use macros or inline functions from this file,
+ or  you compile this  file  and  link  it  with other files  to produce an
+ executable, this file  does  not  by itself cause the resulting executable
+ to be covered  by the GNU Lesser General Public License.  This   exception
+ does not  however  invalidate  any  other  reasons why the executable file
+ might be covered by the GNU Lesser General Public License.
+
+===========================================================================*/
+#ifndef GMM_PRECOND_H
+#define GMM_PRECOND_H
+
+#include "gmm_kernel.h"
+
+/** @file gmm_precond.h
+    @author  Yves Renard <Yves.Renard@insa-lyon.fr>
+    @date March 29, 2004.
+    @brief gmm preconditioners.
+ */
+
+/* Preconditioner concept :                                                */
+/*                                                                         */
+/* A the matrix, P the preconditioner PA well conditioned.                 */
+/* PRECOND precontioner type.                                              */
+/* mult(P, v, w) :  w <- P v                                               */
+/* transposed_mult(P, v, w)       : w <- transposed(P) v                   */
+/* left_mult(P, v, w)             : see qmr solver                         */
+/* right_mult(P, v, w)            : see qmr solver                         */
+/* transposed_left_mult(P, v, w)  : see qmr solver                         */
+/* transposed_right_mult(P, v, w) : see qmr solver                         */
+/*                                                                         */
+/* PRECOND P() : empty preconditioner.                                     */
+/* PRECOND P(A, ...) : preconditioner for the matrix A, with optional      */
+/*                     parameters                                          */
+/* PRECOND(...)  : empty precondtioner with parameters set.                */
+/* P.build_with(A) : build a precondtioner for A.                          */
+/*                                                                         */
+/* *********************************************************************** */
+
+
+
+
+#endif 
+
diff --git a/gmm/gmm_precond_diagonal.h b/gmm/gmm_precond_diagonal.h
new file mode 100644
index 000000000..19d46095b
--- /dev/null
+++ b/gmm/gmm_precond_diagonal.h
@@ -0,0 +1,132 @@
+/* -*- c++ -*- (enables emacs c++ mode) */
+/*===========================================================================
+
+ Copyright (C) 2003-2017 Yves Renard
+
+ This file is a part of GetFEM++
+
+ GetFEM++  is  free software;  you  can  redistribute  it  and/or modify it
+ under  the  terms  of the  GNU  Lesser General Public License as published
+ by  the  Free Software Foundation;  either version 3 of the License,  or
+ (at your option) any later version along with the GCC Runtime Library
+ Exception either version 3.1 or (at your option) any later version.
+ This program  is  distributed  in  the  hope  that it will be useful,  but
+ WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ or  FITNESS  FOR  A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+ License and GCC Runtime Library Exception for more details.
+ You  should  have received a copy of the GNU Lesser General Public License
+ along  with  this program;  if not, write to the Free Software Foundation,
+ Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301, USA.
+
+ As a special exception, you  may use  this file  as it is a part of a free
+ software  library  without  restriction.  Specifically,  if   other  files
+ instantiate  templates  or  use macros or inline functions from this file,
+ or  you compile this  file  and  link  it  with other files  to produce an
+ executable, this file  does  not  by itself cause the resulting executable
+ to be covered  by the GNU Lesser General Public License.  This   exception
+ does not  however  invalidate  any  other  reasons why the executable file
+ might be covered by the GNU Lesser General Public License.
+
+===========================================================================*/
+
+/**@file gmm_precond_diagonal.h
+   @author  Yves Renard <Yves.Renard@insa-lyon.fr>
+   @date June 5, 2003.
+   @brief Diagonal matrix preconditoner.
+*/
+
+#ifndef GMM_PRECOND_DIAGONAL_H
+#define GMM_PRECOND_DIAGONAL_H
+
+#include "gmm_precond.h"
+
+namespace gmm {
+
+  /** Diagonal preconditioner. */
+  template<typename Matrix> struct diagonal_precond {
+    typedef typename linalg_traits<Matrix>::value_type value_type;
+    typedef typename number_traits<value_type>::magnitude_type magnitude_type;
+
+    std::vector<magnitude_type> diag;
+
+    void build_with(const Matrix &M) {
+      diag.resize(mat_nrows(M));
+      for (size_type i = 0; i < mat_nrows(M); ++i) {
+	magnitude_type x = gmm::abs(M(i, i));
+	if (x == magnitude_type(0)) {
+	  x = magnitude_type(1);
+	  GMM_WARNING2("The matrix has a zero on its diagonal");
+	}
+	diag[i] = magnitude_type(1) / x;
+      }
+    }
+    size_type memsize() const { return sizeof(*this) + diag.size() * sizeof(value_type); }
+    diagonal_precond(const Matrix &M) { build_with(M); }
+    diagonal_precond(void) {}
+  };
+
+  template <typename Matrix, typename V2> inline
+  void mult_diag_p(const diagonal_precond<Matrix>& P, V2 &v2, abstract_sparse){
+    typename linalg_traits<V2>::iterator it = vect_begin(v2),
+      ite = vect_end(v2);
+    for (; it != ite; ++it) *it *= P.diag[it.index()];
+  }
+
+  template <typename Matrix, typename V2> inline
+  void mult_diag_p(const diagonal_precond<Matrix>& P,V2 &v2, abstract_skyline)
+    { mult_diag_p(P, v2, abstract_sparse()); }
+
+  template <typename Matrix, typename V2> inline
+  void mult_diag_p(const diagonal_precond<Matrix>& P, V2 &v2, abstract_dense){
+    for (size_type i = 0; i < P.diag.size(); ++i) v2[i] *= P.diag[i];
+  }
+
+  template <typename Matrix, typename V1, typename V2> inline
+  void mult(const diagonal_precond<Matrix>& P, const V1 &v1, V2 &v2) {
+    GMM_ASSERT2(P.diag.size() == vect_size(v2),"dimensions mismatch");
+    copy(v1, v2);
+    mult_diag_p(P, v2, typename linalg_traits<V2>::storage_type());
+  }
+
+  template <typename Matrix, typename V1, typename V2> inline
+  void transposed_mult(const diagonal_precond<Matrix>& P,const V1 &v1,V2 &v2) {
+    mult(P, v1, v2);
+  }
+  
+  // # define DIAG_LEFT_MULT_SQRT
+  
+  template <typename Matrix, typename V1, typename V2> inline
+  void left_mult(const diagonal_precond<Matrix>& P, const V1 &v1, V2 &v2) {
+    GMM_ASSERT2(P.diag.size() == vect_size(v2), "dimensions mismatch");
+    copy(v1, v2);
+#   ifdef DIAG_LEFT_MULT_SQRT
+    for (size_type i= 0; i < P.diag.size(); ++i) v2[i] *= gmm::sqrt(P.diag[i]);
+#   else
+    for (size_type i= 0; i < P.diag.size(); ++i) v2[i] *= P.diag[i];
+#   endif
+  }
+
+  template <typename Matrix, typename V1, typename V2> inline
+  void transposed_left_mult(const diagonal_precond<Matrix>& P,
+			    const V1 &v1, V2 &v2)
+    { left_mult(P, v1, v2); }
+
+  template <typename Matrix, typename V1, typename V2> inline
+  void right_mult(const diagonal_precond<Matrix>& P, const V1 &v1, V2 &v2) {
+    // typedef typename linalg_traits<Matrix>::value_type T;
+    GMM_ASSERT2(P.diag.size() == vect_size(v2), "dimensions mismatch");
+    copy(v1, v2);
+#   ifdef DIAG_LEFT_MULT_SQRT    
+    for (size_type i= 0; i < P.diag.size(); ++i) v2[i] *= gmm::sqrt(P.diag[i]);
+#   endif
+  }
+
+  template <typename Matrix, typename V1, typename V2> inline
+  void transposed_right_mult(const diagonal_precond<Matrix>& P,
+			    const V1 &v1, V2 &v2)
+    { right_mult(P, v1, v2); }
+
+}
+
+#endif 
+
diff --git a/gmm/gmm_precond_ildlt.h b/gmm/gmm_precond_ildlt.h
new file mode 100644
index 000000000..22484df73
--- /dev/null
+++ b/gmm/gmm_precond_ildlt.h
@@ -0,0 +1,241 @@
+/* -*- c++ -*- (enables emacs c++ mode) */
+/*===========================================================================
+
+ Copyright (C) 2003-2017 Yves Renard
+
+ This file is a part of GetFEM++
+
+ GetFEM++  is  free software;  you  can  redistribute  it  and/or modify it
+ under  the  terms  of the  GNU  Lesser General Public License as published
+ by  the  Free Software Foundation;  either version 3 of the License,  or
+ (at your option) any later version along with the GCC Runtime Library
+ Exception either version 3.1 or (at your option) any later version.
+ This program  is  distributed  in  the  hope  that it will be useful,  but
+ WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ or  FITNESS  FOR  A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+ License and GCC Runtime Library Exception for more details.
+ You  should  have received a copy of the GNU Lesser General Public License
+ along  with  this program;  if not, write to the Free Software Foundation,
+ Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301, USA.
+
+ As a special exception, you  may use  this file  as it is a part of a free
+ software  library  without  restriction.  Specifically,  if   other  files
+ instantiate  templates  or  use macros or inline functions from this file,
+ or  you compile this  file  and  link  it  with other files  to produce an
+ executable, this file  does  not  by itself cause the resulting executable
+ to be covered  by the GNU Lesser General Public License.  This   exception
+ does not  however  invalidate  any  other  reasons why the executable file
+ might be covered by the GNU Lesser General Public License.
+
+===========================================================================*/
+
+// This file is a modified version of cholesky.h from ITL.
+// See http://osl.iu.edu/research/itl/
+// Following the corresponding Copyright notice.
+//===========================================================================
+//
+// Copyright (c) 1998-2001, University of Notre Dame. All rights reserved.
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+//    * Redistributions of source code must retain the above copyright
+//      notice, this list of conditions and the following disclaimer.
+//    * Redistributions in binary form must reproduce the above copyright
+//      notice, this list of conditions and the following disclaimer in the
+//      documentation and/or other materials provided with the distribution.
+//    * Neither the name of the University of Notre Dame nor the
+//      names of its contributors may be used to endorse or promote products
+//      derived from this software without specific prior written permission.
+//
+// THIS SOFTWARE  IS  PROVIDED  BY  THE TRUSTEES  OF  INDIANA UNIVERSITY  AND
+// CONTRIBUTORS  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,
+// BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND  FITNESS
+// FOR  A PARTICULAR PURPOSE ARE DISCLAIMED. IN  NO  EVENT SHALL THE TRUSTEES
+// OF INDIANA UNIVERSITY AND CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+// INCIDENTAL, SPECIAL, EXEMPLARY,  OR CONSEQUENTIAL DAMAGES (INCLUDING,  BUT
+// NOT  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA,  OR PROFITS;  OR BUSINESS  INTERRUPTION)  HOWEVER  CAUSED AND ON ANY
+// THEORY  OF  LIABILITY,  WHETHER  IN  CONTRACT,  STRICT  LIABILITY, OR TORT
+// (INCLUDING  NEGLIGENCE  OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+// THIS  SOFTWARE,  EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+//===========================================================================
+
+#ifndef GMM_PRECOND_ILDLT_H
+#define GMM_PRECOND_ILDLT_H
+
+/**@file gmm_precond_ildlt.h
+   @author Andrew Lumsdaine <lums@osl.iu.edu>
+   @author Lie-Quan Lee <llee@osl.iu.edu>
+   @author Yves Renard <yves.renard@insa-lyon.fr>
+   @date June 5, 2003.
+   @brief Incomplete Level 0 ILDLT Preconditioner.
+*/
+
+#include "gmm_precond.h"
+
+namespace gmm {
+
+  /** Incomplete Level 0 LDLT Preconditioner.
+      
+  For use with symmetric real or hermitian complex sparse matrices.
+
+  Notes: The idea under a concrete Preconditioner such as Incomplete
+  Cholesky is to create a Preconditioner object to use in iterative
+  methods.
+
+
+  Y. Renard : Transformed in LDLT for stability reason.
+  
+  U=LT is stored in csr format. D is stored on the diagonal of U.
+  */
+  template <typename Matrix>
+  class ildlt_precond {
+
+  public :
+    typedef typename linalg_traits<Matrix>::value_type value_type;
+    typedef typename number_traits<value_type>::magnitude_type magnitude_type;
+    typedef csr_matrix_ref<value_type *, size_type *, size_type *, 0> tm_type;
+
+    tm_type U;
+
+  protected :
+    std::vector<value_type> Tri_val;
+    std::vector<size_type> Tri_ind, Tri_ptr;
+ 
+    template<typename M> void do_ildlt(const M& A, row_major);
+    void do_ildlt(const Matrix& A, col_major);
+
+  public:
+
+    size_type nrows(void) const { return mat_nrows(U); }
+    size_type ncols(void) const { return mat_ncols(U); }
+    value_type &D(size_type i) { return Tri_val[Tri_ptr[i]]; }
+    const value_type &D(size_type i) const { return Tri_val[Tri_ptr[i]]; }
+    ildlt_precond(void) {}
+    void build_with(const Matrix& A) {
+      Tri_ptr.resize(mat_nrows(A)+1);
+      do_ildlt(A, typename principal_orientation_type<typename
+		  linalg_traits<Matrix>::sub_orientation>::potype());
+    }
+    ildlt_precond(const Matrix& A)  { build_with(A); }
+    size_type memsize() const { 
+      return sizeof(*this) + 
+	Tri_val.size() * sizeof(value_type) + 
+	(Tri_ind.size()+Tri_ptr.size()) * sizeof(size_type); 
+    }
+  };
+
+  template <typename Matrix> template<typename M>
+  void ildlt_precond<Matrix>::do_ildlt(const M& A, row_major) {
+    typedef typename linalg_traits<Matrix>::storage_type store_type;
+    typedef value_type T;
+    typedef typename number_traits<T>::magnitude_type R;
+    
+    size_type Tri_loc = 0, n = mat_nrows(A), d, g, h, i, j, k;
+    if (n == 0) return;
+    T z, zz;
+    Tri_ptr[0] = 0;
+    R prec = default_tol(R());
+    R max_pivot = gmm::abs(A(0,0)) * prec;
+    
+    for (int count = 0; count < 2; ++count) {
+      if (count) { Tri_val.resize(Tri_loc); Tri_ind.resize(Tri_loc); }
+      for (Tri_loc = 0, i = 0; i < n; ++i) {
+	typedef typename linalg_traits<M>::const_sub_row_type row_type;
+	row_type row = mat_const_row(A, i);
+        typename linalg_traits<typename org_type<row_type>::t>::const_iterator
+	  it = vect_const_begin(row), ite = vect_const_end(row);
+
+	if (count) { Tri_val[Tri_loc] = T(0); Tri_ind[Tri_loc] = i; }
+	++Tri_loc; // diagonal element
+
+	for (k = 0; it != ite; ++it, ++k) {
+	  j = index_of_it(it, k, store_type());
+	  if (i == j) {
+	    if (count) Tri_val[Tri_loc-1] = *it; 
+	  }
+	  else if (j > i) {
+	    if (count) { Tri_val[Tri_loc] = *it; Tri_ind[Tri_loc]=j; }
+	    ++Tri_loc;
+	  }
+	}
+	Tri_ptr[i+1] = Tri_loc;
+      }
+    }
+    
+    if (A(0,0) == T(0)) {
+      Tri_val[Tri_ptr[0]] = T(1);
+      GMM_WARNING2("pivot 0 is too small");
+    }
+    
+    for (k = 0; k < n; k++) {
+      d = Tri_ptr[k];
+      z = T(gmm::real(Tri_val[d])); Tri_val[d] = z;
+      if (gmm::abs(z) <= max_pivot) {
+	Tri_val[d] = z = T(1);
+	GMM_WARNING2("pivot " << k << " is too small [" << gmm::abs(z) << "]");
+      }
+      max_pivot = std::max(max_pivot, std::min(gmm::abs(z) * prec, R(1)));
+      
+      for (i = d + 1; i < Tri_ptr[k+1]; ++i) Tri_val[i] /= z;
+      for (i = d + 1; i < Tri_ptr[k+1]; ++i) {
+	zz = gmm::conj(Tri_val[i] * z);
+	h = Tri_ind[i];
+	g = i;
+	
+	for (j = Tri_ptr[h] ; j < Tri_ptr[h+1]; ++j)
+	  for ( ; g < Tri_ptr[k+1] && Tri_ind[g] <= Tri_ind[j]; ++g)
+	    if (Tri_ind[g] == Tri_ind[j])
+	      Tri_val[j] -= zz * Tri_val[g];
+      }
+    }
+    U = tm_type(&(Tri_val[0]), &(Tri_ind[0]), &(Tri_ptr[0]),
+			n, mat_ncols(A));
+  }
+  
+  template <typename Matrix>
+  void ildlt_precond<Matrix>::do_ildlt(const Matrix& A, col_major)
+  { do_ildlt(gmm::conjugated(A), row_major()); }
+
+  template <typename Matrix, typename V1, typename V2> inline
+  void mult(const ildlt_precond<Matrix>& P, const V1 &v1, V2 &v2) {
+    gmm::copy(v1, v2);
+    gmm::lower_tri_solve(gmm::conjugated(P.U), v2, true);
+    for (size_type i = 0; i < mat_nrows(P.U); ++i) v2[i] /= P.D(i);
+    gmm::upper_tri_solve(P.U, v2, true);
+  }
+
+  template <typename Matrix, typename V1, typename V2> inline
+  void transposed_mult(const ildlt_precond<Matrix>& P,const V1 &v1,V2 &v2)
+  { mult(P, v1, v2); }
+
+  template <typename Matrix, typename V1, typename V2> inline
+  void left_mult(const ildlt_precond<Matrix>& P, const V1 &v1, V2 &v2) {
+    copy(v1, v2);
+    gmm::lower_tri_solve(gmm::conjugated(P.U), v2, true);
+    for (size_type i = 0; i < mat_nrows(P.U); ++i) v2[i] /= P.D(i);
+  }
+
+  template <typename Matrix, typename V1, typename V2> inline
+  void right_mult(const ildlt_precond<Matrix>& P, const V1 &v1, V2 &v2)
+  { copy(v1, v2); gmm::upper_tri_solve(P.U, v2, true);  }
+
+  template <typename Matrix, typename V1, typename V2> inline
+  void transposed_left_mult(const ildlt_precond<Matrix>& P, const V1 &v1,
+			    V2 &v2) {
+    copy(v1, v2);
+    gmm::upper_tri_solve(P.U, v2, true);
+    for (size_type i = 0; i < mat_nrows(P.U); ++i) v2[i] /= P.D(i);
+  }
+
+  template <typename Matrix, typename V1, typename V2> inline
+  void transposed_right_mult(const ildlt_precond<Matrix>& P, const V1 &v1,
+			     V2 &v2)
+  { copy(v1, v2); gmm::lower_tri_solve(gmm::conjugated(P.U), v2, true); }
+
+
+}
+
+#endif 
+
diff --git a/gmm/gmm_precond_ildltt.h b/gmm/gmm_precond_ildltt.h
new file mode 100644
index 000000000..380106a40
--- /dev/null
+++ b/gmm/gmm_precond_ildltt.h
@@ -0,0 +1,174 @@
+/* -*- c++ -*- (enables emacs c++ mode) */
+/*===========================================================================
+
+ Copyright (C) 2003-2017 Yves Renard
+
+ This file is a part of GetFEM++
+
+ GetFEM++  is  free software;  you  can  redistribute  it  and/or modify it
+ under  the  terms  of the  GNU  Lesser General Public License as published
+ by  the  Free Software Foundation;  either version 3 of the License,  or
+ (at your option) any later version along with the GCC Runtime Library
+ Exception either version 3.1 or (at your option) any later version.
+ This program  is  distributed  in  the  hope  that it will be useful,  but
+ WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ or  FITNESS  FOR  A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+ License and GCC Runtime Library Exception for more details.
+ You  should  have received a copy of the GNU Lesser General Public License
+ along  with  this program;  if not, write to the Free Software Foundation,
+ Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301, USA.
+
+ As a special exception, you  may use  this file  as it is a part of a free
+ software  library  without  restriction.  Specifically,  if   other  files
+ instantiate  templates  or  use macros or inline functions from this file,
+ or  you compile this  file  and  link  it  with other files  to produce an
+ executable, this file  does  not  by itself cause the resulting executable
+ to be covered  by the GNU Lesser General Public License.  This   exception
+ does not  however  invalidate  any  other  reasons why the executable file
+ might be covered by the GNU Lesser General Public License.
+
+===========================================================================*/
+
+/**@file gmm_precond_ildltt.h
+   @author  Yves Renard <Yves.Renard@insa-lyon.fr>
+   @date June 30, 2003.
+   @brief incomplete LDL^t (cholesky) preconditioner with fill-in and threshold.
+*/
+
+#ifndef GMM_PRECOND_ILDLTT_H
+#define GMM_PRECOND_ILDLTT_H
+
+// Store U = LT and D in indiag. On each line, the fill-in is the number
+// of non-zero elements on the line of the original matrix plus K, except if
+// the matrix is dense. In this case the fill-in is K on each line.
+
+#include "gmm_precond_ilut.h"
+
+namespace gmm {
+  /** incomplete LDL^t (cholesky) preconditioner with fill-in and
+      threshold. */
+  template <typename Matrix>
+  class ildltt_precond  {
+  public :
+    typedef typename linalg_traits<Matrix>::value_type value_type;
+    typedef typename number_traits<value_type>::magnitude_type magnitude_type;
+    
+    typedef rsvector<value_type> svector;
+
+    row_matrix<svector> U;
+    std::vector<magnitude_type> indiag;
+
+  protected:
+    size_type K;
+    double eps;    
+
+    template<typename M> void do_ildltt(const M&, row_major);
+    void do_ildltt(const Matrix&, col_major);
+
+  public:
+    void build_with(const Matrix& A, int k_ = -1, double eps_ = double(-1)) {
+      if (k_ >= 0) K = k_;
+      if (eps_ >= double(0)) eps = eps_;
+      gmm::resize(U, mat_nrows(A), mat_ncols(A));
+      indiag.resize(std::min(mat_nrows(A), mat_ncols(A)));
+      do_ildltt(A, typename principal_orientation_type<typename
+		linalg_traits<Matrix>::sub_orientation>::potype());
+    }
+    ildltt_precond(const Matrix& A, int k_, double eps_) 
+      : U(mat_nrows(A),mat_ncols(A)), K(k_), eps(eps_) { build_with(A); }
+    ildltt_precond(void) { K=10; eps = 1E-7; }
+    ildltt_precond(size_type k_, double eps_) :  K(k_), eps(eps_) {}
+    size_type memsize() const { 
+      return sizeof(*this) + nnz(U)*sizeof(value_type) + indiag.size() * sizeof(magnitude_type);
+    }    
+  };
+
+  template<typename Matrix> template<typename M> 
+  void ildltt_precond<Matrix>::do_ildltt(const M& A,row_major) {
+    typedef value_type T;
+    typedef typename number_traits<T>::magnitude_type R;
+
+    size_type n = mat_nrows(A);
+    if (n == 0) return;
+    svector w(n);
+    T tmp;
+    R prec = default_tol(R()), max_pivot = gmm::abs(A(0,0)) * prec;
+
+    gmm::clear(U);
+    for (size_type i = 0; i < n; ++i) {
+      gmm::copy(mat_const_row(A, i), w);
+      double norm_row = gmm::vect_norm2(w);
+
+      for (size_type krow = 0, k; krow < w.nb_stored(); ++krow) {
+	typename svector::iterator wk = w.begin() + krow;
+	if ((k = wk->c) >= i) break;
+ 	if (gmm::is_complex(wk->e)) {
+ 	  tmp = gmm::conj(U(k, i))/indiag[k]; // not completely satisfactory ..
+ 	  gmm::add(scaled(mat_row(U, k), -tmp), w);
+ 	}
+ 	else {
+	  tmp = wk->e;
+	  if (gmm::abs(tmp) < eps * norm_row) { w.sup(k); --krow; } 
+	  else { wk->e += tmp; gmm::add(scaled(mat_row(U, k), -tmp), w); }
+	}
+      }
+      tmp = w[i];
+
+      if (gmm::abs(gmm::real(tmp)) <= max_pivot)
+	{ GMM_WARNING2("pivot " << i << " is too small"); tmp = T(1); }
+
+      max_pivot = std::max(max_pivot, std::min(gmm::abs(tmp) * prec, R(1)));
+      indiag[i] = R(1) / gmm::real(tmp);
+      gmm::clean(w, eps * norm_row);
+      gmm::scale(w, T(indiag[i]));
+      std::sort(w.begin(), w.end(), elt_rsvector_value_less_<T>());
+      typename svector::const_iterator wit = w.begin(), wite = w.end();
+      for (size_type nnu = 0; wit != wite; ++wit)  // copy to be optimized ...
+	if (wit->c > i) { if (nnu < K) { U(i, wit->c) = wit->e; ++nnu; } }
+    }
+  }
+
+  template<typename Matrix> 
+  void ildltt_precond<Matrix>::do_ildltt(const Matrix& A, col_major)
+  { do_ildltt(gmm::conjugated(A), row_major()); }
+
+  template <typename Matrix, typename V1, typename V2> inline
+  void mult(const ildltt_precond<Matrix>& P, const V1 &v1, V2 &v2) {
+    gmm::copy(v1, v2);
+    gmm::lower_tri_solve(gmm::conjugated(P.U), v2, true);
+    for (size_type i = 0; i < P.indiag.size(); ++i) v2[i] *= P.indiag[i];
+    gmm::upper_tri_solve(P.U, v2, true);
+  }
+
+  template <typename Matrix, typename V1, typename V2> inline
+  void transposed_mult(const ildltt_precond<Matrix>& P,const V1 &v1, V2 &v2)
+  { mult(P, v1, v2); }
+
+  template <typename Matrix, typename V1, typename V2> inline
+  void left_mult(const ildltt_precond<Matrix>& P, const V1 &v1, V2 &v2) {
+    copy(v1, v2);
+    gmm::lower_tri_solve(gmm::conjugated(P.U), v2, true);
+    for (size_type i = 0; i < P.indiag.size(); ++i) v2[i] *= P.indiag[i];
+  }
+
+  template <typename Matrix, typename V1, typename V2> inline
+  void right_mult(const ildltt_precond<Matrix>& P, const V1 &v1, V2 &v2)
+  { copy(v1, v2); gmm::upper_tri_solve(P.U, v2, true); }
+
+  template <typename Matrix, typename V1, typename V2> inline
+  void transposed_left_mult(const ildltt_precond<Matrix>& P, const V1 &v1,
+			    V2 &v2) {
+    copy(v1, v2);
+    gmm::upper_tri_solve(P.U, v2, true);
+    for (size_type i = 0; i < P.indiag.size(); ++i) v2[i] *= P.indiag[i];
+  }
+
+  template <typename Matrix, typename V1, typename V2> inline
+  void transposed_right_mult(const ildltt_precond<Matrix>& P, const V1 &v1,
+			     V2 &v2)
+  { copy(v1, v2); gmm::lower_tri_solve(gmm::conjugated(P.U), v2, true); }
+
+}
+
+#endif 
+
diff --git a/gmm/gmm_precond_ilu.h b/gmm/gmm_precond_ilu.h
new file mode 100644
index 000000000..9256b86a2
--- /dev/null
+++ b/gmm/gmm_precond_ilu.h
@@ -0,0 +1,280 @@
+/* -*- c++ -*- (enables emacs c++ mode) */
+/*===========================================================================
+
+ Copyright (C) 2002-2017 Yves Renard
+
+ This file is a part of GetFEM++
+
+ GetFEM++  is  free software;  you  can  redistribute  it  and/or modify it
+ under  the  terms  of the  GNU  Lesser General Public License as published
+ by  the  Free Software Foundation;  either version 3 of the License,  or
+ (at your option) any later version along with the GCC Runtime Library
+ Exception either version 3.1 or (at your option) any later version.
+ This program  is  distributed  in  the  hope  that it will be useful,  but
+ WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ or  FITNESS  FOR  A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+ License and GCC Runtime Library Exception for more details.
+ You  should  have received a copy of the GNU Lesser General Public License
+ along  with  this program;  if not, write to the Free Software Foundation,
+ Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301, USA.
+
+ As a special exception, you  may use  this file  as it is a part of a free
+ software  library  without  restriction.  Specifically,  if   other  files
+ instantiate  templates  or  use macros or inline functions from this file,
+ or  you compile this  file  and  link  it  with other files  to produce an
+ executable, this file  does  not  by itself cause the resulting executable
+ to be covered  by the GNU Lesser General Public License.  This   exception
+ does not  however  invalidate  any  other  reasons why the executable file
+ might be covered by the GNU Lesser General Public License.
+
+===========================================================================*/
+
+// This file is a modified version of ilu.h from ITL.
+// See http://osl.iu.edu/research/itl/
+// Following the corresponding Copyright notice.
+//===========================================================================
+//
+// Copyright (c) 1998-2001, University of Notre Dame. All rights reserved.
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+//    * Redistributions of source code must retain the above copyright
+//      notice, this list of conditions and the following disclaimer.
+//    * Redistributions in binary form must reproduce the above copyright
+//      notice, this list of conditions and the following disclaimer in the
+//      documentation and/or other materials provided with the distribution.
+//    * Neither the name of the University of Notre Dame nor the
+//      names of its contributors may be used to endorse or promote products
+//      derived from this software without specific prior written permission.
+//
+// THIS SOFTWARE  IS  PROVIDED  BY  THE TRUSTEES  OF  INDIANA UNIVERSITY  AND
+// CONTRIBUTORS  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,
+// BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND  FITNESS
+// FOR  A PARTICULAR PURPOSE ARE DISCLAIMED. IN  NO  EVENT SHALL THE TRUSTEES
+// OF INDIANA UNIVERSITY AND CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+// INCIDENTAL, SPECIAL, EXEMPLARY,  OR CONSEQUENTIAL DAMAGES (INCLUDING,  BUT
+// NOT  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA,  OR PROFITS;  OR BUSINESS  INTERRUPTION)  HOWEVER  CAUSED AND ON ANY
+// THEORY  OF  LIABILITY,  WHETHER  IN  CONTRACT,  STRICT  LIABILITY, OR TORT
+// (INCLUDING  NEGLIGENCE  OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+// THIS  SOFTWARE,  EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+//===========================================================================
+
+/**@file gmm_precond_ilu.h
+   @author Andrew Lumsdaine <lums@osl.iu.edu>
+   @author Lie-Quan Lee <llee@osl.iu.edu>
+   @author Yves Renard <yves.renard@insa-lyon.fr>
+   @date June 5, 2003.
+   @brief Incomplete LU without fill-in Preconditioner.
+*/
+
+#ifndef GMM_PRECOND_ILU_H
+#define GMM_PRECOND_ILU_H
+
+//
+// Notes: The idea under a concrete Preconditioner such 
+//        as Incomplete LU is to create a Preconditioner
+//        object to use in iterative methods. 
+//
+
+#include "gmm_precond.h"
+
+namespace gmm {
+  /** Incomplete LU without fill-in Preconditioner. */
+  template <typename Matrix>
+  class ilu_precond {
+
+  public :
+    typedef typename linalg_traits<Matrix>::value_type value_type;
+    typedef csr_matrix_ref<value_type *, size_type *, size_type *, 0> tm_type;
+
+    tm_type U, L;
+    bool invert;
+  protected :
+    std::vector<value_type> L_val, U_val;
+    std::vector<size_type> L_ind, U_ind, L_ptr, U_ptr;
+ 
+    template<typename M> void do_ilu(const M& A, row_major);
+    void do_ilu(const Matrix& A, col_major);
+
+  public:
+    
+    size_type nrows(void) const { return mat_nrows(L); }
+    size_type ncols(void) const { return mat_ncols(U); }
+    
+    void build_with(const Matrix& A) {
+      invert = false;
+       L_ptr.resize(mat_nrows(A)+1);
+       U_ptr.resize(mat_nrows(A)+1);
+       do_ilu(A, typename principal_orientation_type<typename
+	      linalg_traits<Matrix>::sub_orientation>::potype());
+    }
+    ilu_precond(const Matrix& A) { build_with(A); }
+    ilu_precond(void) {}
+    size_type memsize() const { 
+      return sizeof(*this) + 
+	(L_val.size()+U_val.size()) * sizeof(value_type) + 
+	(L_ind.size()+L_ptr.size()) * sizeof(size_type) +
+	(U_ind.size()+U_ptr.size()) * sizeof(size_type); 
+    }
+  };
+
+  template <typename Matrix> template <typename M>
+  void ilu_precond<Matrix>::do_ilu(const M& A, row_major) {
+    typedef typename linalg_traits<Matrix>::storage_type store_type;
+    typedef value_type T;
+    typedef typename number_traits<T>::magnitude_type R;
+
+    size_type L_loc = 0, U_loc = 0, n = mat_nrows(A), i, j, k;
+    if (n == 0) return;
+    L_ptr[0] = 0; U_ptr[0] = 0;
+    R prec = default_tol(R());
+    R max_pivot = gmm::abs(A(0,0)) * prec;
+
+
+    for (int count = 0; count < 2; ++count) {
+      if (count) { 
+	L_val.resize(L_loc); L_ind.resize(L_loc);
+	U_val.resize(U_loc); U_ind.resize(U_loc);
+      }
+      L_loc = U_loc = 0;
+      for (i = 0; i < n; ++i) {
+	typedef typename linalg_traits<M>::const_sub_row_type row_type;
+	row_type row = mat_const_row(A, i);
+	typename linalg_traits<typename org_type<row_type>::t>::const_iterator
+	  it = vect_const_begin(row), ite = vect_const_end(row);
+	
+	if (count) { U_val[U_loc] = T(0); U_ind[U_loc] = i; }
+	++U_loc; // diagonal element
+	
+	for (k = 0; it != ite && k < 1000; ++it, ++k) {
+	  // if a plain row is present, retains only the 1000 firsts
+	  // nonzero elements. ---> a sort should be done.
+	  j = index_of_it(it, k, store_type());
+	  if (j < i) {
+	    if (count) { L_val[L_loc] = *it; L_ind[L_loc] = j; }
+	    L_loc++;
+	  }
+	  else if (i == j) {
+	    if (count) U_val[U_loc-1] = *it;
+	  }
+	  else {
+	    if (count) { U_val[U_loc] = *it; U_ind[U_loc] = j; }
+	    U_loc++;
+	  }
+	}
+        L_ptr[i+1] = L_loc; U_ptr[i+1] = U_loc;
+      }
+    }
+    
+    if (A(0,0) == T(0)) {
+      U_val[U_ptr[0]] = T(1);
+      GMM_WARNING2("pivot 0 is too small");
+    }
+
+    size_type qn, pn, rn;
+    for (i = 1; i < n; i++) {
+
+      pn = U_ptr[i];
+      if (gmm::abs(U_val[pn]) <= max_pivot) {
+	U_val[pn] = T(1);
+	GMM_WARNING2("pivot " << i << " is too small");
+      }
+      max_pivot = std::max(max_pivot,
+			   std::min(gmm::abs(U_val[pn]) * prec, R(1)));
+
+      for (j = L_ptr[i]; j < L_ptr[i+1]; j++) {
+	pn = U_ptr[L_ind[j]];
+	
+	T multiplier = (L_val[j] /= U_val[pn]);
+	
+	qn = j + 1;
+	rn = U_ptr[i];
+	
+	for (pn++; pn < U_ptr[L_ind[j]+1] && U_ind[pn] < i; pn++) {
+	  while (qn < L_ptr[i+1] && L_ind[qn] < U_ind[pn])
+	    qn++;
+	  if (qn < L_ptr[i+1] && U_ind[pn] == L_ind[qn])
+	    L_val[qn] -= multiplier * U_val[pn];
+	}
+	for (; pn < U_ptr[L_ind[j]+1]; pn++) {
+	  while (rn < U_ptr[i+1] && U_ind[rn] < U_ind[pn])
+	    rn++;
+	  if (rn < U_ptr[i+1] && U_ind[pn] == U_ind[rn])
+	    U_val[rn] -= multiplier * U_val[pn];
+	}
+      }
+    }
+
+    L = tm_type(&(L_val[0]), &(L_ind[0]), &(L_ptr[0]), n, mat_ncols(A));
+    U = tm_type(&(U_val[0]), &(U_ind[0]), &(U_ptr[0]), n, mat_ncols(A));
+  }
+  
+  template <typename Matrix>
+  void ilu_precond<Matrix>::do_ilu(const Matrix& A, col_major) {
+    do_ilu(gmm::transposed(A), row_major());
+    invert = true;
+  }
+
+  template <typename Matrix, typename V1, typename V2> inline
+  void mult(const ilu_precond<Matrix>& P, const V1 &v1, V2 &v2) {
+    gmm::copy(v1, v2);
+    if (P.invert) {
+      gmm::lower_tri_solve(gmm::transposed(P.U), v2, false);
+      gmm::upper_tri_solve(gmm::transposed(P.L), v2, true);
+    }
+    else {
+      gmm::lower_tri_solve(P.L, v2, true);
+      gmm::upper_tri_solve(P.U, v2, false);
+    }
+  }
+
+  template <typename Matrix, typename V1, typename V2> inline
+  void transposed_mult(const ilu_precond<Matrix>& P,const V1 &v1,V2 &v2) {
+    gmm::copy(v1, v2);
+    if (P.invert) {
+      gmm::lower_tri_solve(P.L, v2, true);
+      gmm::upper_tri_solve(P.U, v2, false);
+    }
+    else {
+      gmm::lower_tri_solve(gmm::transposed(P.U), v2, false);
+      gmm::upper_tri_solve(gmm::transposed(P.L), v2, true);
+    }
+  }
+
+  template <typename Matrix, typename V1, typename V2> inline
+  void left_mult(const ilu_precond<Matrix>& P, const V1 &v1, V2 &v2) {
+    copy(v1, v2);
+    if (P.invert) gmm::lower_tri_solve(gmm::transposed(P.U), v2, false);
+    else gmm::lower_tri_solve(P.L, v2, true);
+  }
+
+  template <typename Matrix, typename V1, typename V2> inline
+  void right_mult(const ilu_precond<Matrix>& P, const V1 &v1, V2 &v2) {
+    copy(v1, v2);
+    if (P.invert) gmm::upper_tri_solve(gmm::transposed(P.L), v2, true);
+    else gmm::upper_tri_solve(P.U, v2, false);
+  }
+
+  template <typename Matrix, typename V1, typename V2> inline
+  void transposed_left_mult(const ilu_precond<Matrix>& P, const V1 &v1,
+			    V2 &v2) {
+    copy(v1, v2);
+    if (P.invert) gmm::upper_tri_solve(P.U, v2, false);
+    else gmm::upper_tri_solve(gmm::transposed(P.L), v2, true);
+  }
+
+  template <typename Matrix, typename V1, typename V2> inline
+  void transposed_right_mult(const ilu_precond<Matrix>& P, const V1 &v1,
+			     V2 &v2) {
+    copy(v1, v2);
+    if (P.invert) gmm::lower_tri_solve(P.L, v2, true);
+    else gmm::lower_tri_solve(gmm::transposed(P.U), v2, false);
+  }
+
+
+}
+
+#endif 
+
diff --git a/gmm/gmm_precond_ilut.h b/gmm/gmm_precond_ilut.h
new file mode 100644
index 000000000..0860324f0
--- /dev/null
+++ b/gmm/gmm_precond_ilut.h
@@ -0,0 +1,263 @@
+/* -*- c++ -*- (enables emacs c++ mode) */
+/*===========================================================================
+
+ Copyright (C) 2002-2017 Yves Renard
+
+ This file is a part of GetFEM++
+
+ GetFEM++  is  free software;  you  can  redistribute  it  and/or modify it
+ under  the  terms  of the  GNU  Lesser General Public License as published
+ by  the  Free Software Foundation;  either version 3 of the License,  or
+ (at your option) any later version along with the GCC Runtime Library
+ Exception either version 3.1 or (at your option) any later version.
+ This program  is  distributed  in  the  hope  that it will be useful,  but
+ WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ or  FITNESS  FOR  A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+ License and GCC Runtime Library Exception for more details.
+ You  should  have received a copy of the GNU Lesser General Public License
+ along  with  this program;  if not, write to the Free Software Foundation,
+ Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301, USA.
+
+ As a special exception, you  may use  this file  as it is a part of a free
+ software  library  without  restriction.  Specifically,  if   other  files
+ instantiate  templates  or  use macros or inline functions from this file,
+ or  you compile this  file  and  link  it  with other files  to produce an
+ executable, this file  does  not  by itself cause the resulting executable
+ to be covered  by the GNU Lesser General Public License.  This   exception
+ does not  however  invalidate  any  other  reasons why the executable file
+ might be covered by the GNU Lesser General Public License.
+
+===========================================================================*/
+
+// This file is a modified version of ilut.h from ITL.
+// See http://osl.iu.edu/research/itl/
+// Following the corresponding Copyright notice.
+//===========================================================================
+//
+// Copyright (c) 1998-2001, University of Notre Dame. All rights reserved.
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+//    * Redistributions of source code must retain the above copyright
+//      notice, this list of conditions and the following disclaimer.
+//    * Redistributions in binary form must reproduce the above copyright
+//      notice, this list of conditions and the following disclaimer in the
+//      documentation and/or other materials provided with the distribution.
+//    * Neither the name of the University of Notre Dame nor the
+//      names of its contributors may be used to endorse or promote products
+//      derived from this software without specific prior written permission.
+//
+// THIS SOFTWARE  IS  PROVIDED  BY  THE TRUSTEES  OF  INDIANA UNIVERSITY  AND
+// CONTRIBUTORS  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,
+// BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND  FITNESS
+// FOR  A PARTICULAR PURPOSE ARE DISCLAIMED. IN  NO  EVENT SHALL THE TRUSTEES
+// OF INDIANA UNIVERSITY AND CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+// INCIDENTAL, SPECIAL, EXEMPLARY,  OR CONSEQUENTIAL DAMAGES (INCLUDING,  BUT
+// NOT  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA,  OR PROFITS;  OR BUSINESS  INTERRUPTION)  HOWEVER  CAUSED AND ON ANY
+// THEORY  OF  LIABILITY,  WHETHER  IN  CONTRACT,  STRICT  LIABILITY, OR TORT
+// (INCLUDING  NEGLIGENCE  OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+// THIS  SOFTWARE,  EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+//===========================================================================
+
+#ifndef GMM_PRECOND_ILUT_H
+#define GMM_PRECOND_ILUT_H
+
+/**@file gmm_precond_ilut.h
+   @author  Andrew Lumsdaine <lums@osl.iu.edu>, Lie-Quan Lee <llee@osl.iu.edu>
+   @date June 5, 2003.
+   @brief ILUT:  Incomplete LU with threshold and K fill-in Preconditioner.
+*/
+
+/*
+  Performane comparing for SSOR, ILU and ILUT based on sherman 5 matrix 
+  in Harwell-Boeing collection on Sun Ultra 30 UPA/PCI (UltraSPARC-II 296MHz)
+  Preconditioner & Factorization time  &  Number of Iteration \\ \hline
+  SSOR        &   0.010577  & 41 \\
+  ILU         &   0.019336  & 32 \\
+  ILUT with 0 fill-in and threshold of 1.0e-6 & 0.343612 &  23 \\
+  ILUT with 5 fill-in and threshold of 1.0e-6 & 0.343612 &  18 \\ \hline
+*/
+
+#include "gmm_precond.h"
+
+namespace gmm {
+
+  template<typename T> struct elt_rsvector_value_less_ {
+    inline bool operator()(const elt_rsvector_<T>& a, 
+			   const elt_rsvector_<T>& b) const
+    { return (gmm::abs(a.e) > gmm::abs(b.e)); }
+  };
+
+  /** Incomplete LU with threshold and K fill-in Preconditioner.
+
+  The algorithm of ILUT(A, 0, 1.0e-6) is slower than ILU(A). If No
+  fill-in is arrowed, you can use ILU instead of ILUT.
+
+  Notes: The idea under a concrete Preconditioner such as ilut is to
+  create a Preconditioner object to use in iterative methods.
+  */
+  template <typename Matrix>
+  class ilut_precond  {
+  public :
+    typedef typename linalg_traits<Matrix>::value_type value_type;
+    typedef wsvector<value_type> _wsvector;
+    typedef rsvector<value_type> _rsvector;
+    typedef row_matrix<_rsvector> LU_Matrix;
+
+    bool invert;
+    LU_Matrix L, U;
+
+  protected:
+    size_type K;
+    double eps;    
+
+    template<typename M> void do_ilut(const M&, row_major);
+    void do_ilut(const Matrix&, col_major);
+
+  public:
+    void build_with(const Matrix& A, int k_ = -1, double eps_ = double(-1)) {
+      if (k_ >= 0) K = k_;
+      if (eps_ >= double(0)) eps = eps_;
+      invert = false;
+      gmm::resize(L, mat_nrows(A), mat_ncols(A));
+      gmm::resize(U, mat_nrows(A), mat_ncols(A));
+      do_ilut(A, typename principal_orientation_type<typename
+	      linalg_traits<Matrix>::sub_orientation>::potype());
+    }
+    ilut_precond(const Matrix& A, int k_, double eps_) 
+      : L(mat_nrows(A), mat_ncols(A)), U(mat_nrows(A), mat_ncols(A)),
+	K(k_), eps(eps_) { build_with(A); }
+    ilut_precond(size_type k_, double eps_) :  K(k_), eps(eps_) {}
+    ilut_precond(void) { K = 10; eps = 1E-7; }
+    size_type memsize() const { 
+      return sizeof(*this) + (nnz(U)+nnz(L))*sizeof(value_type);
+    }
+  };
+
+  template<typename Matrix> template<typename M> 
+  void ilut_precond<Matrix>::do_ilut(const M& A, row_major) {
+    typedef value_type T;
+    typedef typename number_traits<T>::magnitude_type R;
+    
+    size_type n = mat_nrows(A);
+    if (n == 0) return;
+    std::vector<T> indiag(n);
+    _wsvector w(mat_ncols(A));
+    _rsvector ww(mat_ncols(A)), wL(mat_ncols(A)), wU(mat_ncols(A));
+    T tmp;
+    gmm::clear(U); gmm::clear(L);
+    R prec = default_tol(R()); 
+    R max_pivot = gmm::abs(A(0,0)) * prec;
+
+    for (size_type i = 0; i < n; ++i) {
+      gmm::copy(mat_const_row(A, i), w);
+      double norm_row = gmm::vect_norm2(w);
+
+      typename _wsvector::iterator wkold = w.end();
+      for (typename _wsvector::iterator wk = w.begin();
+	   wk != w.end() && wk->first < i; ) {
+	size_type k = wk->first;
+	tmp = (wk->second) * indiag[k];
+	if (gmm::abs(tmp) < eps * norm_row) w.erase(k);
+	else { wk->second += tmp; gmm::add(scaled(mat_row(U, k), -tmp), w); }
+	if (wkold == w.end()) wk = w.begin(); else { wk = wkold; ++wk; }
+	if (wk != w.end() && wk->first == k)
+	  { if (wkold == w.end()) wkold = w.begin(); else ++wkold; ++wk; }
+      }
+      tmp = w[i];
+
+      if (gmm::abs(tmp) <= max_pivot) {
+	GMM_WARNING2("pivot " << i << " too small. try with ilutp ?");
+	w[i] = tmp = T(1);
+      }
+
+      max_pivot = std::max(max_pivot, std::min(gmm::abs(tmp) * prec, R(1)));
+      indiag[i] = T(1) / tmp;
+      gmm::clean(w, eps * norm_row);
+      gmm::copy(w, ww);
+      std::sort(ww.begin(), ww.end(), elt_rsvector_value_less_<T>());
+      typename _rsvector::const_iterator wit = ww.begin(), wite = ww.end();
+
+      size_type nnl = 0, nnu = 0;    
+      wL.base_resize(K); wU.base_resize(K+1);
+      typename _rsvector::iterator witL = wL.begin(), witU = wU.begin();
+      for (; wit != wite; ++wit) 
+	if (wit->c < i) { if (nnl < K) { *witL++ = *wit; ++nnl; } }
+	else { if (nnu < K  || wit->c == i) { *witU++ = *wit; ++nnu; } }
+      wL.base_resize(nnl); wU.base_resize(nnu);
+      std::sort(wL.begin(), wL.end());
+      std::sort(wU.begin(), wU.end());
+      gmm::copy(wL, L.row(i));
+      gmm::copy(wU, U.row(i));
+    }
+
+  }
+
+  template<typename Matrix> 
+  void ilut_precond<Matrix>::do_ilut(const Matrix& A, col_major) {
+    do_ilut(gmm::transposed(A), row_major());
+    invert = true;
+  }
+
+  template <typename Matrix, typename V1, typename V2> inline
+  void mult(const ilut_precond<Matrix>& P, const V1 &v1, V2 &v2) {
+    gmm::copy(v1, v2);
+    if (P.invert) {
+      gmm::lower_tri_solve(gmm::transposed(P.U), v2, false);
+      gmm::upper_tri_solve(gmm::transposed(P.L), v2, true);
+    }
+    else {
+      gmm::lower_tri_solve(P.L, v2, true);
+      gmm::upper_tri_solve(P.U, v2, false);
+    }
+  }
+
+  template <typename Matrix, typename V1, typename V2> inline
+  void transposed_mult(const ilut_precond<Matrix>& P,const V1 &v1,V2 &v2) {
+    gmm::copy(v1, v2);
+    if (P.invert) {
+      gmm::lower_tri_solve(P.L, v2, true);
+      gmm::upper_tri_solve(P.U, v2, false);
+    }
+    else {
+      gmm::lower_tri_solve(gmm::transposed(P.U), v2, false);
+      gmm::upper_tri_solve(gmm::transposed(P.L), v2, true);
+    }
+  }
+
+  template <typename Matrix, typename V1, typename V2> inline
+  void left_mult(const ilut_precond<Matrix>& P, const V1 &v1, V2 &v2) {
+    copy(v1, v2);
+    if (P.invert) gmm::lower_tri_solve(gmm::transposed(P.U), v2, false);
+    else gmm::lower_tri_solve(P.L, v2, true);
+  }
+
+  template <typename Matrix, typename V1, typename V2> inline
+  void right_mult(const ilut_precond<Matrix>& P, const V1 &v1, V2 &v2) {
+    copy(v1, v2);
+    if (P.invert) gmm::upper_tri_solve(gmm::transposed(P.L), v2, true);
+    else gmm::upper_tri_solve(P.U, v2, false);
+  }
+
+  template <typename Matrix, typename V1, typename V2> inline
+  void transposed_left_mult(const ilut_precond<Matrix>& P, const V1 &v1,
+			    V2 &v2) {
+    copy(v1, v2);
+    if (P.invert) gmm::upper_tri_solve(P.U, v2, false);
+    else gmm::upper_tri_solve(gmm::transposed(P.L), v2, true);
+  }
+
+  template <typename Matrix, typename V1, typename V2> inline
+  void transposed_right_mult(const ilut_precond<Matrix>& P, const V1 &v1,
+			     V2 &v2) {
+    copy(v1, v2);
+    if (P.invert) gmm::lower_tri_solve(P.L, v2, true);
+    else gmm::lower_tri_solve(gmm::transposed(P.U), v2, false);
+  }
+
+}
+
+#endif 
+
diff --git a/gmm/gmm_precond_ilutp.h b/gmm/gmm_precond_ilutp.h
new file mode 100644
index 000000000..d867d6053
--- /dev/null
+++ b/gmm/gmm_precond_ilutp.h
@@ -0,0 +1,284 @@
+/* -*- c++ -*- (enables emacs c++ mode) */
+/*===========================================================================
+
+ Copyright (C) 2004-2017 Yves Renard
+
+ This file is a part of GetFEM++
+
+ GetFEM++  is  free software;  you  can  redistribute  it  and/or modify it
+ under  the  terms  of the  GNU  Lesser General Public License as published
+ by  the  Free Software Foundation;  either version 3 of the License,  or
+ (at your option) any later version along with the GCC Runtime Library
+ Exception either version 3.1 or (at your option) any later version.
+ This program  is  distributed  in  the  hope  that it will be useful,  but
+ WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ or  FITNESS  FOR  A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+ License and GCC Runtime Library Exception for more details.
+ You  should  have received a copy of the GNU Lesser General Public License
+ along  with  this program;  if not, write to the Free Software Foundation,
+ Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301, USA.
+
+ As a special exception, you  may use  this file  as it is a part of a free
+ software  library  without  restriction.  Specifically,  if   other  files
+ instantiate  templates  or  use macros or inline functions from this file,
+ or  you compile this  file  and  link  it  with other files  to produce an
+ executable, this file  does  not  by itself cause the resulting executable
+ to be covered  by the GNU Lesser General Public License.  This   exception
+ does not  however  invalidate  any  other  reasons why the executable file
+ might be covered by the GNU Lesser General Public License.
+
+===========================================================================*/
+
+/**@file gmm_precond_ilutp.h
+   @author  Yves Renard <Yves.Renard@insa-lyon.fr>
+   @date October 14, 2004.
+   @brief ILUTP: Incomplete LU with threshold and K fill-in Preconditioner and
+   column pivoting.
+
+   
+*/
+#ifndef GMM_PRECOND_ILUTP_H
+#define GMM_PRECOND_ILUTP_H
+
+#include "gmm_precond_ilut.h"
+
+namespace gmm {
+
+  /**
+     ILUTP: Incomplete LU with threshold and K fill-in Preconditioner and
+     column pivoting.
+   
+     See Yousef Saad, Iterative Methods for
+     sparse linear systems, PWS Publishing Company, section 10.4.4
+
+      TODO : store the permutation by cycles to avoid the temporary vector
+  */
+  template <typename Matrix>
+  class ilutp_precond  {
+  public :
+    typedef typename linalg_traits<Matrix>::value_type value_type;
+    typedef wsvector<value_type> _wsvector;
+    typedef rsvector<value_type> _rsvector;
+    typedef row_matrix<_rsvector> LU_Matrix;
+    typedef col_matrix<_wsvector> CLU_Matrix;
+
+    bool invert;
+    LU_Matrix L, U;
+    gmm::unsorted_sub_index indperm;
+    gmm::unsorted_sub_index indperminv;
+    mutable std::vector<value_type> temporary;
+
+  protected:
+    size_type K;
+    double eps;
+
+    template<typename M> void do_ilutp(const M&, row_major);
+    void do_ilutp(const Matrix&, col_major);
+
+  public:
+    void build_with(const Matrix& A, int k_ = -1, double eps_ = double(-1)) {
+      if (k_ >= 0) K = k_;
+      if (eps_ >= double(0)) eps = eps_;
+      invert = false;
+      gmm::resize(L, mat_nrows(A), mat_ncols(A));
+      gmm::resize(U, mat_nrows(A), mat_ncols(A));
+      do_ilutp(A, typename principal_orientation_type<typename
+	      linalg_traits<Matrix>::sub_orientation>::potype());
+    }
+    ilutp_precond(const Matrix& A, size_type k_, double eps_) 
+      : L(mat_nrows(A), mat_ncols(A)), U(mat_nrows(A), mat_ncols(A)),
+	K(k_), eps(eps_) { build_with(A); }
+    ilutp_precond(int k_, double eps_) :  K(k_), eps(eps_) {}
+    ilutp_precond(void) { K = 10; eps = 1E-7; }
+    size_type memsize() const { 
+      return sizeof(*this) + (nnz(U)+nnz(L))*sizeof(value_type);
+    }
+  };
+
+
+  template<typename Matrix> template<typename M> 
+  void ilutp_precond<Matrix>::do_ilutp(const M& A, row_major) {
+    typedef value_type T;
+    typedef typename number_traits<T>::magnitude_type R;
+
+    size_type n = mat_nrows(A);
+    CLU_Matrix CU(n,n);
+    if (n == 0) return;
+    std::vector<T> indiag(n);
+    temporary.resize(n);
+    std::vector<size_type> ipvt(n), ipvtinv(n);
+    for (size_type i = 0; i < n; ++i) ipvt[i] = ipvtinv[i] = i;
+    indperm = unsorted_sub_index(ipvt);
+    indperminv = unsorted_sub_index(ipvtinv);
+    _wsvector w(mat_ncols(A));
+    _rsvector ww(mat_ncols(A));
+    
+    T tmp = T(0);
+    gmm::clear(L); gmm::clear(U);
+    R prec = default_tol(R()); 
+    R max_pivot = gmm::abs(A(0,0)) * prec;
+
+    for (size_type i = 0; i < n; ++i) {
+
+      copy(sub_vector(mat_const_row(A, i), indperm), w);
+      double norm_row = gmm::vect_norm2(mat_const_row(A, i)); 
+
+      typename _wsvector::iterator wkold = w.end();
+      for (typename _wsvector::iterator wk = w.begin();
+	   wk != w.end() && wk->first < i; )  {
+	size_type k = wk->first;
+	tmp = (wk->second) * indiag[k];
+	if (gmm::abs(tmp) < eps * norm_row) w.erase(k); 
+	else { wk->second += tmp; gmm::add(scaled(mat_row(U, k), -tmp), w); }
+	if (wkold == w.end()) wk = w.begin(); else { wk = wkold; ++wk; }
+	if (wk != w.end() && wk->first == k)
+	  { if (wkold == w.end()) wkold = w.begin(); else ++wkold; ++wk; }
+      }
+
+      gmm::clean(w, eps * norm_row);
+      gmm::copy(w, ww);
+
+      std::sort(ww.begin(), ww.end(), elt_rsvector_value_less_<T>());
+      typename _rsvector::const_iterator wit = ww.begin(), wite = ww.end();
+      size_type ip = size_type(-1);
+
+      for (; wit != wite; ++wit)
+	if (wit->c >= i) { ip = wit->c; tmp = wit->e; break; }
+      if (ip == size_type(-1) || gmm::abs(tmp) <= max_pivot)
+	{ GMM_WARNING2("pivot " << i << " too small"); ip=i; ww[i]=tmp=T(1); }
+      max_pivot = std::max(max_pivot, std::min(gmm::abs(tmp) * prec, R(1)));
+      indiag[i] = T(1) / tmp;
+      wit = ww.begin();
+
+      size_type nnl = 0, nnu = 0;
+      L[i].base_resize(K); U[i].base_resize(K+1);
+      typename _rsvector::iterator witL = L[i].begin(), witU = U[i].begin();
+      for (; wit != wite; ++wit) {
+	if (wit->c < i) { if (nnl < K) { *witL++ = *wit; ++nnl; } }
+	else if (nnu < K || wit->c == i)
+	  { CU(i, wit->c) = wit->e; *witU++ = *wit; ++nnu; }
+      }
+      L[i].base_resize(nnl); U[i].base_resize(nnu);
+      std::sort(L[i].begin(), L[i].end());
+      std::sort(U[i].begin(), U[i].end());
+
+      if (ip != i) {
+	typename _wsvector::const_iterator iti = CU.col(i).begin();
+	typename _wsvector::const_iterator itie = CU.col(i).end();
+	typename _wsvector::const_iterator itp = CU.col(ip).begin();
+	typename _wsvector::const_iterator itpe = CU.col(ip).end();
+	
+	while (iti != itie && itp != itpe) {
+	  if (iti->first < itp->first)
+	    { U.row(iti->first).swap_indices(i, ip); ++iti; }
+	  else if (iti->first > itp->first)
+	    { U.row(itp->first).swap_indices(i,ip);++itp; }
+	  else
+	    { U.row(iti->first).swap_indices(i, ip); ++iti; ++itp; }
+	}
+	
+	for( ; iti != itie; ++iti) U.row(iti->first).swap_indices(i, ip);
+	for( ; itp != itpe; ++itp) U.row(itp->first).swap_indices(i, ip);
+
+	CU.swap_col(i, ip);
+	
+	indperm.swap(i, ip);
+	indperminv.swap(ipvt[i], ipvt[ip]);
+	std::swap(ipvtinv[ipvt[i]], ipvtinv[ipvt[ip]]);
+	std::swap(ipvt[i], ipvt[ip]);
+      }
+    }
+  }
+
+  template<typename Matrix> 
+  void ilutp_precond<Matrix>::do_ilutp(const Matrix& A, col_major) {
+    do_ilutp(gmm::transposed(A), row_major());
+    invert = true;
+  }
+
+  template <typename Matrix, typename V1, typename V2> inline
+  void mult(const ilutp_precond<Matrix>& P, const V1 &v1, V2 &v2) {
+    if (P.invert) {
+      gmm::copy(gmm::sub_vector(v1, P.indperm), v2);
+      gmm::lower_tri_solve(gmm::transposed(P.U), v2, false);
+      gmm::upper_tri_solve(gmm::transposed(P.L), v2, true);
+    }
+    else {
+      gmm::copy(v1, P.temporary);
+      gmm::lower_tri_solve(P.L, P.temporary, true);
+      gmm::upper_tri_solve(P.U, P.temporary, false);
+      gmm::copy(gmm::sub_vector(P.temporary, P.indperminv), v2);
+    }
+  }
+
+  template <typename Matrix, typename V1, typename V2> inline
+  void transposed_mult(const ilutp_precond<Matrix>& P,const V1 &v1,V2 &v2) {
+    if (P.invert) {
+      gmm::copy(v1, P.temporary);
+      gmm::lower_tri_solve(P.L, P.temporary, true);
+      gmm::upper_tri_solve(P.U, P.temporary, false);
+      gmm::copy(gmm::sub_vector(P.temporary, P.indperminv), v2);
+    }
+    else {
+      gmm::copy(gmm::sub_vector(v1, P.indperm), v2);
+      gmm::lower_tri_solve(gmm::transposed(P.U), v2, false);
+      gmm::upper_tri_solve(gmm::transposed(P.L), v2, true);
+    }
+  }
+
+  template <typename Matrix, typename V1, typename V2> inline
+  void left_mult(const ilutp_precond<Matrix>& P, const V1 &v1, V2 &v2) {
+    if (P.invert) {
+      gmm::copy(gmm::sub_vector(v1, P.indperm), v2);
+      gmm::lower_tri_solve(gmm::transposed(P.U), v2, false);
+    }
+    else {
+      copy(v1, v2);
+      gmm::lower_tri_solve(P.L, v2, true);
+    }
+  }
+
+  template <typename Matrix, typename V1, typename V2> inline
+  void right_mult(const ilutp_precond<Matrix>& P, const V1 &v1, V2 &v2) {
+    if (P.invert) {
+      copy(v1, v2);
+      gmm::upper_tri_solve(gmm::transposed(P.L), v2, true);
+    }
+    else {
+      copy(v1, P.temporary);
+      gmm::upper_tri_solve(P.U, P.temporary, false);
+      gmm::copy(gmm::sub_vector(P.temporary, P.indperminv), v2);
+    }
+  }
+
+  template <typename Matrix, typename V1, typename V2> inline
+  void transposed_left_mult(const ilutp_precond<Matrix>& P, const V1 &v1,
+			    V2 &v2) {
+    if (P.invert) {
+      copy(v1, P.temporary);
+      gmm::upper_tri_solve(P.U, P.temporary, false);
+      gmm::copy(gmm::sub_vector(P.temporary, P.indperminv), v2);
+    }
+    else {
+      copy(v1, v2);
+      gmm::upper_tri_solve(gmm::transposed(P.L), v2, true);
+    }
+  }
+  
+  template <typename Matrix, typename V1, typename V2> inline
+  void transposed_right_mult(const ilutp_precond<Matrix>& P, const V1 &v1,
+			     V2 &v2) {
+    if (P.invert) {
+      copy(v1, v2);
+      gmm::lower_tri_solve(P.L, v2, true);
+    }
+    else {
+      gmm::copy(gmm::sub_vector(v1, P.indperm), v2);
+      gmm::lower_tri_solve(gmm::transposed(P.U), v2, false);
+    }
+  }
+
+}
+
+#endif 
+
diff --git a/gmm/gmm_precond_mr_approx_inverse.h b/gmm/gmm_precond_mr_approx_inverse.h
new file mode 100644
index 000000000..7504f48fb
--- /dev/null
+++ b/gmm/gmm_precond_mr_approx_inverse.h
@@ -0,0 +1,149 @@
+/* -*- c++ -*- (enables emacs c++ mode) */
+/*===========================================================================
+
+ Copyright (C) 2002-2017 Yves Renard
+
+ This file is a part of GetFEM++
+
+ GetFEM++  is  free software;  you  can  redistribute  it  and/or modify it
+ under  the  terms  of the  GNU  Lesser General Public License as published
+ by  the  Free Software Foundation;  either version 3 of the License,  or
+ (at your option) any later version along with the GCC Runtime Library
+ Exception either version 3.1 or (at your option) any later version.
+ This program  is  distributed  in  the  hope  that it will be useful,  but
+ WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ or  FITNESS  FOR  A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+ License and GCC Runtime Library Exception for more details.
+ You  should  have received a copy of the GNU Lesser General Public License
+ along  with  this program;  if not, write to the Free Software Foundation,
+ Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301, USA.
+
+ As a special exception, you  may use  this file  as it is a part of a free
+ software  library  without  restriction.  Specifically,  if   other  files
+ instantiate  templates  or  use macros or inline functions from this file,
+ or  you compile this  file  and  link  it  with other files  to produce an
+ executable, this file  does  not  by itself cause the resulting executable
+ to be covered  by the GNU Lesser General Public License.  This   exception
+ does not  however  invalidate  any  other  reasons why the executable file
+ might be covered by the GNU Lesser General Public License.
+
+===========================================================================*/
+
+
+// This file is a modified version of approximate_inverse.h from ITL.
+// See http://osl.iu.edu/research/itl/
+// Following the corresponding Copyright notice.
+//===========================================================================
+//
+// Copyright (c) 1998-2001, University of Notre Dame. All rights reserved.
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+//    * Redistributions of source code must retain the above copyright
+//      notice, this list of conditions and the following disclaimer.
+//    * Redistributions in binary form must reproduce the above copyright
+//      notice, this list of conditions and the following disclaimer in the
+//      documentation and/or other materials provided with the distribution.
+//    * Neither the name of the University of Notre Dame nor the
+//      names of its contributors may be used to endorse or promote products
+//      derived from this software without specific prior written permission.
+//
+// THIS SOFTWARE  IS  PROVIDED  BY  THE TRUSTEES  OF  INDIANA UNIVERSITY  AND
+// CONTRIBUTORS  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,
+// BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND  FITNESS
+// FOR  A PARTICULAR PURPOSE ARE DISCLAIMED. IN  NO  EVENT SHALL THE TRUSTEES
+// OF INDIANA UNIVERSITY AND CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+// INCIDENTAL, SPECIAL, EXEMPLARY,  OR CONSEQUENTIAL DAMAGES (INCLUDING,  BUT
+// NOT  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA,  OR PROFITS;  OR BUSINESS  INTERRUPTION)  HOWEVER  CAUSED AND ON ANY
+// THEORY  OF  LIABILITY,  WHETHER  IN  CONTRACT,  STRICT  LIABILITY, OR TORT
+// (INCLUDING  NEGLIGENCE  OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+// THIS  SOFTWARE,  EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+//===========================================================================
+
+/**@file gmm_precond_mr_approx_inverse.h
+   @author Andrew Lumsdaine <lums@osl.iu.edu>
+   @author Lie-Quan Lee     <llee@osl.iu.edu>
+   @author  Yves Renard <Yves.Renard@insa-lyon.fr>
+   @date June 5, 2003.
+   @brief Approximate inverse via MR iteration.
+*/
+
+#ifndef GMM_PRECOND_MR_APPROX_INVERSE_H
+#define GMM_PRECOND_MR_APPROX_INVERSE_H
+
+
+#include "gmm_precond.h"
+
+namespace gmm {
+
+  /** Approximate inverse via MR iteration (see P301 of Saad book).
+   */
+  template <typename Matrix>
+  struct mr_approx_inverse_precond {
+
+    typedef typename linalg_traits<Matrix>::value_type value_type;
+    typedef typename number_traits<value_type>::magnitude_type magnitude_type;
+    typedef typename principal_orientation_type<typename
+      linalg_traits<Matrix>::sub_orientation>::potype sub_orientation;
+    typedef wsvector<value_type> VVector;
+    typedef col_matrix<VVector> MMatrix;
+
+    MMatrix M;
+    size_type nb_it;
+    magnitude_type threshold;
+
+    void build_with(const Matrix& A);
+    mr_approx_inverse_precond(const Matrix& A, size_type nb_it_,
+			      magnitude_type threshold_)
+      : M(mat_nrows(A), mat_ncols(A))
+    { threshold = threshold_; nb_it = nb_it_; build_with(A); }
+    mr_approx_inverse_precond(void)
+    { threshold = magnitude_type(1E-7); nb_it = 5; }
+    mr_approx_inverse_precond(size_type nb_it_, magnitude_type threshold_)
+    { threshold = threshold_; nb_it = nb_it_; } 
+    const MMatrix &approx_inverse(void) const { return M; }
+  };
+
+  template <typename Matrix, typename V1, typename V2> inline
+  void mult(const mr_approx_inverse_precond<Matrix>& P, const V1 &v1, V2 &v2)
+  { mult(P.M, v1, v2); }
+
+  template <typename Matrix, typename V1, typename V2> inline
+  void transposed_mult(const mr_approx_inverse_precond<Matrix>& P,
+		       const V1 &v1,V2 &v2)
+  { mult(gmm::conjugated(P.M), v1, v2); }
+
+  template <typename Matrix>
+  void mr_approx_inverse_precond<Matrix>::build_with(const Matrix& A) {
+    gmm::resize(M, mat_nrows(A), mat_ncols(A));
+    typedef value_type T;
+    typedef magnitude_type R;
+    VVector m(mat_ncols(A)),r(mat_ncols(A)),ei(mat_ncols(A)),Ar(mat_ncols(A)); 
+    T alpha = mat_trace(A)/ mat_euclidean_norm_sqr(A);
+    if (alpha == T(0)) alpha = T(1);
+    
+    for (size_type i = 0; i < mat_nrows(A); ++i) {
+      gmm::clear(m); gmm::clear(ei); 
+      m[i] = alpha;
+      ei[i] = T(1);
+      
+      for (size_type j = 0; j < nb_it; ++j) {
+	gmm::mult(A, gmm::scaled(m, T(-1)), r);
+	gmm::add(ei, r);
+	gmm::mult(A, r, Ar);
+	T nAr = vect_sp(Ar,Ar);
+	if (gmm::abs(nAr) > R(0)) {
+	  gmm::add(gmm::scaled(r, gmm::safe_divide(vect_sp(r, Ar), vect_sp(Ar, Ar))), m);
+	  gmm::clean(m, threshold * gmm::vect_norm2(m));
+	} else gmm::clear(m);
+      }
+      if (gmm::vect_norm2(m) == R(0)) m[i] = alpha;
+      gmm::copy(m, M.col(i));
+    }
+  }
+}
+
+#endif 
+
diff --git a/gmm/gmm_range_basis.h b/gmm/gmm_range_basis.h
new file mode 100644
index 000000000..05a71a0c8
--- /dev/null
+++ b/gmm/gmm_range_basis.h
@@ -0,0 +1,499 @@
+/* -*- c++ -*- (enables emacs c++ mode) */
+/*===========================================================================
+
+ Copyright (C) 2009-2017 Yves Renard
+
+ This file is a part of GetFEM++
+
+ GetFEM++  is  free software;  you  can  redistribute  it  and/or modify it
+ under  the  terms  of the  GNU  Lesser General Public License as published
+ by  the  Free Software Foundation;  either version 3 of the License,  or
+ (at your option) any later version along with the GCC Runtime Library
+ Exception either version 3.1 or (at your option) any later version.
+ This program  is  distributed  in  the  hope  that it will be useful,  but
+ WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ or  FITNESS  FOR  A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+ License and GCC Runtime Library Exception for more details.
+ You  should  have received a copy of the GNU Lesser General Public License
+ along  with  this program;  if not, write to the Free Software Foundation,
+ Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301, USA.
+
+ As a special exception, you  may use  this file  as it is a part of a free
+ software  library  without  restriction.  Specifically,  if   other  files
+ instantiate  templates  or  use macros or inline functions from this file,
+ or  you compile this  file  and  link  it  with other files  to produce an
+ executable, this file  does  not  by itself cause the resulting executable
+ to be covered  by the GNU Lesser General Public License.  This   exception
+ does not  however  invalidate  any  other  reasons why the executable file
+ might be covered by the GNU Lesser General Public License.
+
+===========================================================================*/
+
+/**@file gmm_range_basis.h
+   @author  Yves Renard <Yves.Renard@insa-lyon.fr>
+   @date March 10, 2009.
+   @brief Extract a basis of the range of a (large sparse) matrix from the
+          columns of this matrix.
+*/
+#ifndef GMM_RANGE_BASIS_H
+#define GMM_RANGE_BASIS_H
+#include "gmm_dense_qr.h"
+#include "gmm_dense_lu.h"
+
+#include "gmm_kernel.h"
+#include "gmm_iter.h"
+#include <set>
+#include <list>
+
+
+namespace gmm {
+
+
+  template <typename T, typename VECT, typename MAT1>
+  void tridiag_qr_algorithm
+  (std::vector<typename number_traits<T>::magnitude_type> diag,
+   std::vector<T> sdiag, const VECT &eigval_, const MAT1 &eigvect_,
+   bool compvect, tol_type_for_qr tol = default_tol_for_qr) {
+    VECT &eigval = const_cast<VECT &>(eigval_);
+    MAT1 &eigvect = const_cast<MAT1 &>(eigvect_);
+    typedef typename number_traits<T>::magnitude_type R;
+
+    if (compvect) gmm::copy(identity_matrix(), eigvect);
+
+    size_type n = diag.size(), q = 0, p, ite = 0;
+    if (n == 0) return;
+    if (n == 1) { eigval[0] = gmm::real(diag[0]); return; }
+
+    symmetric_qr_stop_criterion(diag, sdiag, p, q, tol);
+
+    while (q < n) {
+      sub_interval SUBI(p, n-p-q), SUBJ(0, mat_ncols(eigvect)), SUBK(p, n-p-q);
+      if (!compvect) SUBK = sub_interval(0,0);
+
+      symmetric_Wilkinson_qr_step(sub_vector(diag, SUBI),
+                                  sub_vector(sdiag, SUBI),
+                                  sub_matrix(eigvect, SUBJ, SUBK), compvect);
+
+      symmetric_qr_stop_criterion(diag, sdiag, p, q, tol*R(3));
+      ++ite;
+      GMM_ASSERT1(ite < n*100, "QR algorithm failed.");
+    }
+
+    gmm::copy(diag, eigval);
+  }
+
+  // Range basis with a restarted Lanczos method
+  template <typename Mat>
+  void range_basis_eff_Lanczos(const Mat &BB, std::set<size_type> &columns,
+                       double EPS=1E-12) {
+    typedef std::set<size_type> TAB;
+    typedef typename linalg_traits<Mat>::value_type T;
+    typedef typename number_traits<T>::magnitude_type R;
+
+    size_type nc_r = columns.size(), k;
+    col_matrix< rsvector<T> > B(mat_nrows(BB), mat_ncols(BB));
+
+    k = 0;
+    for (TAB::iterator it = columns.begin(); it!=columns.end(); ++it, ++k){
+      gmm::copy(scaled(mat_col(BB, *it), T(1)/vect_norm2(mat_col(BB, *it))),
+                mat_col(B, *it));
+    }
+    std::vector<T> w(mat_nrows(B));
+    size_type restart = 120;
+    std::vector<T> sdiag(restart);
+    std::vector<R> eigval(restart), diag(restart);
+    dense_matrix<T> eigvect(restart, restart);
+
+    R rho = R(-1), rho2;
+    while (nc_r) {
+
+      std::vector<T> v(nc_r), v0(nc_r), wl(nc_r);
+      dense_matrix<T> lv(nc_r, restart);
+
+      if (rho < R(0)) { // Estimate of the spectral radius of B^* B
+        gmm::fill_random(v);
+        for (size_type i = 0; i < 100; ++i) {
+          gmm::scale(v, T(1)/vect_norm2(v));
+          gmm::copy(v, v0);
+          k = 0; gmm::clear(w);
+          for (TAB::iterator it=columns.begin(); it!=columns.end(); ++it, ++k)
+            add(scaled(mat_col(B, *it), v[k]), w);
+          k = 0;
+          for (TAB::iterator it=columns.begin(); it!=columns.end(); ++it, ++k)
+            v[k] = vect_hp(w, mat_col(B, *it));
+          rho = gmm::abs(vect_hp(v, v0) / vect_hp(v0, v0));
+        }
+        rho *= R(2);
+      }
+
+      // Computing vectors of the null space of de B^* B with restarted Lanczos
+      rho2 = 0;
+      gmm::fill_random(v);
+      size_type iter = 0;
+      for(;;++iter) {
+        R rho_old = rho2;
+        R beta = R(0), alpha;
+        gmm::scale(v, T(1)/vect_norm2(v));
+        size_type eff_restart = restart;
+    if (sdiag.size() != restart) {
+      sdiag.resize(restart); eigval.resize(restart); diag.resize(restart); gmm::resize(eigvect, restart, restart);
+      gmm::resize(lv, nc_r, restart);
+    }
+
+        for (size_type i = 0; i < restart; ++i) { // Lanczos iterations
+          gmm::copy(v, mat_col(lv, i));
+          gmm::clear(w);
+          k = 0;
+          for (TAB::iterator it=columns.begin(); it!=columns.end(); ++it, ++k)
+            add(scaled(mat_col(B, *it), v[k]), w);
+
+          k = 0;
+          for (TAB::iterator it=columns.begin(); it!=columns.end(); ++it, ++k)
+            wl[k] = v[k]*rho - vect_hp(w, mat_col(B, *it)) - beta*v0[k];
+          alpha = gmm::real(vect_hp(wl, v));
+          diag[i] = alpha;
+          gmm::add(gmm::scaled(v, -alpha), wl);
+          sdiag[i] = beta = vect_norm2(wl);
+          gmm::copy(v, v0);
+      if (beta < EPS) { eff_restart = i+1; break; }
+      gmm::copy(gmm::scaled(wl, T(1) / beta), v);
+    }
+    if (eff_restart != restart) {
+      sdiag.resize(eff_restart); eigval.resize(eff_restart); diag.resize(eff_restart);
+      gmm::resize(eigvect, eff_restart, eff_restart); gmm::resize(lv, nc_r, eff_restart);
+    }
+        tridiag_qr_algorithm(diag, sdiag, eigval, eigvect, true);
+
+        size_type num = size_type(-1);
+        rho2 = R(0);
+        for (size_type j = 0; j < eff_restart; ++j)
+          { R nvp=gmm::abs(eigval[j]); if (nvp > rho2) { rho2=nvp; num=j; }}
+
+        GMM_ASSERT1(num != size_type(-1), "Internal error");
+
+        gmm::mult(lv, mat_col(eigvect, num), v);
+
+        if (gmm::abs(rho2-rho_old) < rho_old*R(EPS)) break;
+        // if (gmm::abs(rho-rho2) < rho*R(gmm::sqrt(EPS))) break;
+        if (gmm::abs(rho-rho2) < rho*R(EPS)*R(100)) break;
+      }
+
+      if (gmm::abs(rho-rho2) < rho*R(EPS*10.)) {
+        size_type j_max = size_type(-1), j = 0;
+        R val_max = R(0);
+        for (TAB::iterator it=columns.begin(); it!=columns.end(); ++it, ++j)
+          if (gmm::abs(v[j]) > val_max)
+            { val_max = gmm::abs(v[j]); j_max = *it; }
+        columns.erase(j_max); nc_r = columns.size();
+      }
+      else break;
+    }
+  }
+
+  // Range basis with LU decomposition. Not stable from a numerical viewpoint.
+  // Complex version not verified
+  template <typename Mat>
+  void range_basis_eff_lu(const Mat &B, std::set<size_type> &columns,
+                          std::vector<bool> &c_ortho, double EPS) {
+
+    typedef std::set<size_type> TAB;
+    typedef typename linalg_traits<Mat>::value_type T;
+    typedef typename number_traits<T>::magnitude_type R;
+
+    size_type nc_r = 0, nc_o = 0, nc = mat_ncols(B), nr = mat_nrows(B), i, j;
+
+    for (TAB::iterator it=columns.begin(); it!=columns.end(); ++it)
+      if (!(c_ortho[*it])) ++nc_r; else nc_o++;
+
+    if (nc_r > 0) {
+
+      gmm::row_matrix< gmm::rsvector<T> > Hr(nc, nc_r), Ho(nc, nc_o);
+      gmm::row_matrix< gmm::rsvector<T> > BBr(nr, nc_r), BBo(nr, nc_o);
+
+      i = j = 0;
+      for (TAB::iterator it=columns.begin(); it!=columns.end(); ++it)
+        if (!(c_ortho[*it]))
+          { Hr(*it, i) = T(1)/ vect_norminf(mat_col(B, *it)); ++i; }
+        else
+          { Ho(*it, j) = T(1)/ vect_norm2(mat_col(B, *it)); ++j; }
+
+      gmm::mult(B, Hr, BBr);
+      gmm::mult(B, Ho, BBo);
+      gmm::dense_matrix<T> M(nc_r, nc_r), BBB(nc_r, nc_o), MM(nc_r, nc_r);
+      gmm::mult(gmm::conjugated(BBr), BBr, M);
+      gmm::mult(gmm::conjugated(BBr), BBo, BBB);
+      gmm::mult(BBB, gmm::conjugated(BBB), MM);
+      gmm::add(gmm::scaled(MM, T(-1)), M);
+
+      std::vector<int> ipvt(nc_r);
+      gmm::lu_factor(M, ipvt);
+
+      R emax = R(0);
+      for (i = 0; i < nc_r; ++i) emax = std::max(emax, gmm::abs(M(i,i)));
+
+      i = 0;
+      std::set<size_type> c = columns;
+      for (TAB::iterator it = c.begin(); it != c.end(); ++it)
+        if (!(c_ortho[*it])) {
+          if (gmm::abs(M(i,i)) <= R(EPS)*emax) columns.erase(*it);
+          ++i;
+        }
+    }
+  }
+
+
+  // Range basis with Gram-Schmidt orthogonalization (sparse version)
+  // The sparse version is better when the sparsity is high and less efficient
+  // than the dense version for high degree elements (P3, P4 ...)
+  // Complex version not verified
+  template <typename Mat>
+  void range_basis_eff_Gram_Schmidt_sparse(const Mat &BB,
+                                           std::set<size_type> &columns,
+                                           std::vector<bool> &c_ortho,
+                                           double EPS) {
+
+    typedef std::set<size_type> TAB;
+    typedef typename linalg_traits<Mat>::value_type T;
+    typedef typename number_traits<T>::magnitude_type R;
+
+    size_type nc = mat_ncols(BB), nr = mat_nrows(BB);
+    std::set<size_type> c = columns, rc = columns;
+
+    gmm::col_matrix< rsvector<T> > B(nr, nc);
+    for (std::set<size_type>::iterator it = columns.begin();
+         it != columns.end(); ++it) {
+      gmm::copy(mat_col(BB, *it), mat_col(B, *it));
+      gmm::scale(mat_col(B, *it), T(1)/vect_norm2(mat_col(B, *it)));
+    }
+
+    for (std::set<size_type>::iterator it = c.begin(); it != c.end(); ++it)
+      if (c_ortho[*it]) {
+        for (std::set<size_type>::iterator it2 = rc.begin();
+             it2 != rc.end(); ++it2)
+          if (!(c_ortho[*it2])) {
+            T r = -vect_hp(mat_col(B, *it2), mat_col(B, *it));
+            if (r != T(0)) add(scaled(mat_col(B, *it), r), mat_col(B, *it2));
+          }
+        rc.erase(*it);
+      }
+
+    while (rc.size()) {
+      R nmax = R(0); size_type cmax = size_type(-1);
+      for (std::set<size_type>::iterator it=rc.begin(); it != rc.end();) {
+        TAB::iterator itnext = it; ++itnext;
+        R n = vect_norm2(mat_col(B, *it));
+        if (nmax < n) { nmax = n; cmax = *it; }
+        if (n < R(EPS)) { columns.erase(*it); rc.erase(*it); }
+        it = itnext;
+      }
+
+      if (nmax < R(EPS)) break;
+
+      gmm::scale(mat_col(B, cmax), T(1)/vect_norm2(mat_col(B, cmax)));
+      rc.erase(cmax);
+      for (std::set<size_type>::iterator it=rc.begin(); it!=rc.end(); ++it) {
+        T r = -vect_hp(mat_col(B, *it), mat_col(B, cmax));
+        if (r != T(0)) add(scaled(mat_col(B, cmax), r), mat_col(B, *it));
+      }
+    }
+    for (std::set<size_type>::iterator it=rc.begin(); it!=rc.end(); ++it)
+      columns.erase(*it);
+  }
+
+
+  // Range basis with Gram-Schmidt orthogonalization (dense version)
+  template <typename Mat>
+  void range_basis_eff_Gram_Schmidt_dense(const Mat &B,
+                                          std::set<size_type> &columns,
+                                          std::vector<bool> &c_ortho,
+                                          double EPS) {
+
+    typedef std::set<size_type> TAB;
+    typedef typename linalg_traits<Mat>::value_type T;
+    typedef typename number_traits<T>::magnitude_type R;
+
+    size_type nc_r = columns.size(), nc = mat_ncols(B), nr = mat_nrows(B), i;
+    std::set<size_type> rc;
+
+    row_matrix< gmm::rsvector<T> > H(nc, nc_r), BB(nr, nc_r);
+    std::vector<T> v(nc_r);
+    std::vector<size_type> ind(nc_r);
+
+    i = 0;
+    for (TAB::iterator it = columns.begin(); it != columns.end(); ++it, ++i)
+      H(*it, i) = T(1) / vect_norm2(mat_col(B, *it));
+
+    mult(B, H, BB);
+    dense_matrix<T> M(nc_r, nc_r);
+    mult(gmm::conjugated(BB), BB, M);
+
+    i = 0;
+    for (TAB::iterator it = columns.begin(); it != columns.end(); ++it, ++i)
+      if (c_ortho[*it]) {
+        gmm::copy(mat_row(M, i), v);
+        rank_one_update(M, scaled(v, T(-1)), v);
+        M(i, i) = T(1);
+      }
+      else { rc.insert(i); ind[i] = *it; }
+
+    while (rc.size() > 0) {
+
+      // Next pivot
+      R nmax = R(0); size_type imax = size_type(-1);
+      for (TAB::iterator it = rc.begin(); it != rc.end();) {
+        TAB::iterator itnext = it; ++itnext;
+        R a = gmm::abs(M(*it, *it));
+        if (a > nmax) { nmax = a; imax = *it; }
+        if (a < R(EPS)) { columns.erase(ind[*it]); rc.erase(*it); }
+        it = itnext;
+      }
+
+      if (nmax < R(EPS)) break;
+
+      // Normalization
+      gmm::scale(mat_row(M, imax), T(1) / sqrt(nmax));
+      gmm::scale(mat_col(M, imax), T(1) / sqrt(nmax));
+
+      // orthogonalization
+      copy(mat_row(M, imax), v);
+      rank_one_update(M, scaled(v, T(-1)), v);
+      M(imax, imax) = T(1);
+
+      rc.erase(imax);
+    }
+    for (std::set<size_type>::iterator it=rc.begin(); it!=rc.end(); ++it)
+      columns.erase(ind[*it]);
+  }
+
+  template <typename L> size_type nnz_eps(const L& l, double eps) {
+    typename linalg_traits<L>::const_iterator it = vect_const_begin(l),
+      ite = vect_const_end(l);
+    size_type res(0);
+    for (; it != ite; ++it) if (gmm::abs(*it) >= eps) ++res;
+    return res;
+  }
+
+  template <typename L>
+  bool reserve__rb(const L& l, std::vector<bool> &b, double eps) {
+    typename linalg_traits<L>::const_iterator it = vect_const_begin(l),
+      ite = vect_const_end(l);
+    bool ok = true;
+    for (; it != ite; ++it)
+      if (gmm::abs(*it) >= eps && b[it.index()]) ok = false;
+    if (ok) {
+      for (it = vect_const_begin(l); it != ite; ++it)
+        if (gmm::abs(*it) >= eps) b[it.index()] = true;
+    }
+    return ok;
+  }
+
+  template <typename Mat>
+  void range_basis(const Mat &B, std::set<size_type> &columns,
+                       double EPS, col_major, bool skip_init=false) {
+
+    typedef typename linalg_traits<Mat>::value_type T;
+    typedef typename number_traits<T>::magnitude_type R;
+
+    size_type nc = mat_ncols(B), nr = mat_nrows(B);
+
+    std::vector<R> norms(nc);
+    std::vector<bool> c_ortho(nc), booked(nr);
+    std::vector< std::set<size_type> > nnzs(mat_nrows(B));
+
+    if (!skip_init) {
+
+      R norm_max = R(0);
+      for (size_type i = 0; i < nc; ++i) {
+        norms[i] = vect_norminf(mat_col(B, i));
+        norm_max = std::max(norm_max, norms[i]);
+      }
+
+      columns.clear();
+      for (size_type i = 0; i < nc; ++i)
+        if (norms[i] > norm_max*R(EPS)) {
+          columns.insert(i);
+          nnzs[nnz_eps(mat_col(B, i), R(EPS) * norms[i])].insert(i);
+        }
+
+      for (size_type i = 1; i < nr; ++i)
+        for (std::set<size_type>::iterator it = nnzs[i].begin();
+             it != nnzs[i].end(); ++it)
+          if (reserve__rb(mat_col(B, *it), booked, R(EPS) * norms[*it]))
+            c_ortho[*it] = true;
+    }
+
+    size_type sizesm[7] = {125, 200, 350, 550, 800, 1100, 1500}, actsize;
+    for (int k = 0; k < 7; ++k) {
+      size_type nc_r = columns.size();
+      std::set<size_type> c1, cres;
+      actsize = sizesm[k];
+      for (std::set<size_type>::iterator it = columns.begin();
+           it != columns.end(); ++it) {
+        c1.insert(*it);
+        if (c1.size() >= actsize) {
+          range_basis_eff_Gram_Schmidt_dense(B, c1, c_ortho, EPS);
+          for (std::set<size_type>::iterator it2=c1.begin(); it2 != c1.end();
+               ++it2) cres.insert(*it2);
+          c1.clear();
+        }
+      }
+      if (c1.size() > 1)
+        range_basis_eff_Gram_Schmidt_dense(B, c1, c_ortho, EPS);
+      for (std::set<size_type>::iterator it = c1.begin(); it != c1.end(); ++it)
+        cres.insert(*it);
+      columns = cres;
+      if (nc_r <= actsize) return;
+      if (columns.size() == nc_r) break;
+      if (sizesm[k] >= 350 && columns.size() > (nc_r*19)/20) break;
+    }
+    if (columns.size() > std::max(size_type(10), actsize))
+      range_basis_eff_Lanczos(B, columns, EPS);
+    else
+      range_basis_eff_Gram_Schmidt_dense(B, columns, c_ortho, EPS);
+  }
+
+
+  template <typename Mat>
+  void range_basis(const Mat &B, std::set<size_type> &columns,
+                   double EPS, row_major) {
+    typedef typename  linalg_traits<Mat>::value_type T;
+    gmm::col_matrix< rsvector<T> > BB(mat_nrows(B), mat_ncols(B));
+    GMM_WARNING3("A copy of a row matrix is done into a column matrix "
+                 "for range basis algorithm.");
+    gmm::copy(B, BB);
+    range_basis(BB, columns, EPS);
+  }
+
+  /** Range Basis :
+    Extract a basis of the range of a (large sparse) matrix selecting some
+    column vectors of this matrix. This is in particular useful to select
+    an independent set of linear constraints.
+
+    The algorithm is optimized for two cases :
+       - when the (non trivial) kernel is small. An iterativ algorithm
+         based on Lanczos method is applied
+       - when the (non trivial) kernel is large and most of the dependencies
+         can be detected locally. A block Gram-Schmidt is applied first then
+         a restarted Lanczos method when the remaining kernel is greatly
+         smaller.
+    The restarted Lanczos method could be improved or replaced by a block
+    Lanczos method, a block Wiedelann method (in order to be parallelized for
+    instance) or simply could compute more than one vector of the null
+    space at each iteration.
+    The LU decomposition has been tested for local elimination but gives bad
+    results : the algorithm is unstable and do not permit to give the right
+    number of vector at the end of the process. Moreover, the number of final
+    vectors depends greatly on the number of vectors in a block of the local
+    analysis.
+  */
+  template <typename Mat>
+  void range_basis(const Mat &B, std::set<size_type> &columns,
+                   double EPS=1E-12) {
+    range_basis(B, columns, EPS,
+                typename principal_orientation_type
+                <typename linalg_traits<Mat>::sub_orientation>::potype());
+}
+
+}
+
+#endif
diff --git a/gmm/gmm_real_part.h b/gmm/gmm_real_part.h
new file mode 100644
index 000000000..c4e61d815
--- /dev/null
+++ b/gmm/gmm_real_part.h
@@ -0,0 +1,605 @@
+/* -*- c++ -*- (enables emacs c++ mode) */
+/*===========================================================================
+
+ Copyright (C) 2003-2017 Yves Renard
+
+ This file is a part of GetFEM++
+
+ GetFEM++  is  free software;  you  can  redistribute  it  and/or modify it
+ under  the  terms  of the  GNU  Lesser General Public License as published
+ by  the  Free Software Foundation;  either version 3 of the License,  or
+ (at your option) any later version along with the GCC Runtime Library
+ Exception either version 3.1 or (at your option) any later version.
+ This program  is  distributed  in  the  hope  that it will be useful,  but
+ WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ or  FITNESS  FOR  A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+ License and GCC Runtime Library Exception for more details.
+ You  should  have received a copy of the GNU Lesser General Public License
+ along  with  this program;  if not, write to the Free Software Foundation,
+ Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301, USA.
+
+ As a special exception, you  may use  this file  as it is a part of a free
+ software  library  without  restriction.  Specifically,  if   other  files
+ instantiate  templates  or  use macros or inline functions from this file,
+ or  you compile this  file  and  link  it  with other files  to produce an
+ executable, this file  does  not  by itself cause the resulting executable
+ to be covered  by the GNU Lesser General Public License.  This   exception
+ does not  however  invalidate  any  other  reasons why the executable file
+ might be covered by the GNU Lesser General Public License.
+
+===========================================================================*/
+
+/**@file gmm_real_part.h
+   @author  Yves Renard <Yves.Renard@insa-lyon.fr>
+   @date September 18, 2003.
+   @brief extract the real/imaginary part of vectors/matrices 
+*/
+#ifndef GMM_REAL_PART_H
+#define GMM_REAL_PART_H
+
+#include "gmm_def.h"
+#include "gmm_vector.h"
+
+namespace gmm {
+
+  struct linalg_real_part {};
+  struct linalg_imag_part {};
+  template <typename R, typename PART> struct which_part {};
+  
+  template <typename C> typename number_traits<C>::magnitude_type 
+  real_or_imag_part(C x, linalg_real_part) { return gmm::real(x); }
+  template <typename C> typename number_traits<C>::magnitude_type 
+  real_or_imag_part(C x, linalg_imag_part) { return gmm::imag(x); }
+  template <typename T, typename C, typename OP> C
+  complex_from(T x, C y, OP op, linalg_real_part) { return std::complex<T>(op(std::real(y), x), std::imag(y)); }
+  template <typename T, typename C, typename OP> C
+  complex_from(T x, C y, OP op,linalg_imag_part) { return std::complex<T>(std::real(y), op(std::imag(y), x)); }
+  
+  template<typename T> struct project2nd {
+    T operator()(T , T b) const { return b; }
+  };
+  
+  template<typename T, typename R, typename PART> class ref_elt_vector<T, which_part<R, PART> > {
+
+    R r;
+    
+    public :
+
+    operator T() const { return real_or_imag_part(std::complex<T>(r), PART()); }
+    ref_elt_vector(R r_) : r(r_) {}
+    inline ref_elt_vector &operator =(T v)
+    { r = complex_from(v, std::complex<T>(r), gmm::project2nd<T>(), PART()); return *this; }
+    inline bool operator ==(T v) const { return (r == v); }
+    inline bool operator !=(T v) const { return (r != v); }
+    inline ref_elt_vector &operator +=(T v)
+    { r = complex_from(v, std::complex<T>(r), std::plus<T>(), PART()); return *this; }
+    inline ref_elt_vector &operator -=(T v)
+      { r = complex_from(v, std::complex<T>(r), std::minus<T>(), PART()); return *this; }
+    inline ref_elt_vector &operator /=(T v)
+      { r = complex_from(v, std::complex<T>(r), std::divides<T>(), PART()); return *this; }
+    inline ref_elt_vector &operator *=(T v)
+      { r = complex_from(v, std::complex<T>(r), std::multiplies<T>(), PART()); return *this; }
+    inline ref_elt_vector &operator =(const ref_elt_vector &re)
+      { *this = T(re); return *this; }
+    T operator +()    { return  T(*this);   } // necessary for unknow reason
+    T operator -()    { return -T(*this);   } // necessary for unknow reason
+    T operator +(T v) { return T(*this)+ v; } // necessary for unknow reason
+    T operator -(T v) { return T(*this)- v; } // necessary for unknow reason
+    T operator *(T v) { return T(*this)* v; } // necessary for unknow reason
+    T operator /(T v) { return T(*this)/ v; } // necessary for unknow reason
+  };
+
+  template<typename reference> struct ref_or_value_type {
+    template <typename T, typename W>
+    static W r(const T &x, linalg_real_part, W) {
+      return gmm::real(x);
+    }
+    template <typename T, typename W>
+    static W r(const T &x, linalg_imag_part, W) {
+      return gmm::imag(x);
+    }
+  };
+  
+  template<typename U, typename R, typename PART> 
+  struct ref_or_value_type<ref_elt_vector<U, which_part<R, PART> > > {
+    template<typename T , typename W> 
+    static const T &r(const T &x, linalg_real_part, W)
+    { return x; }
+    template<typename T, typename W> 
+    static const T &r(const T &x, linalg_imag_part, W) {
+      return x; 
+    }
+    template<typename T , typename W> 
+    static T &r(T &x, linalg_real_part, W)
+    { return x; }
+    template<typename T, typename W> 
+    static T &r(T &x, linalg_imag_part, W) {
+      return x; 
+    }
+  };
+
+  
+  /* ********************************************************************* */
+  /*	Reference to the real part of (complex) vectors            	   */
+  /* ********************************************************************* */
+
+  template <typename IT, typename MIT, typename PART>
+  struct part_vector_iterator {
+    typedef typename std::iterator_traits<IT>::value_type      vtype;
+    typedef typename gmm::number_traits<vtype>::magnitude_type value_type;
+    typedef value_type                                        *pointer;
+    typedef ref_elt_vector<value_type, which_part<typename std::iterator_traits<IT>::reference, PART> > reference;
+    typedef typename std::iterator_traits<IT>::difference_type difference_type;
+    typedef typename std::iterator_traits<IT>::iterator_category
+    iterator_category;
+
+    IT it;
+    
+    part_vector_iterator(void) {}
+    explicit part_vector_iterator(const IT &i) : it(i) {}
+    part_vector_iterator(const part_vector_iterator<MIT, MIT, PART> &i) : it(i.it) {}
+    
+
+    size_type index(void) const { return it.index(); }
+    part_vector_iterator operator ++(int)
+    { part_vector_iterator tmp = *this; ++it; return tmp; }
+    part_vector_iterator operator --(int) 
+    { part_vector_iterator tmp = *this; --it; return tmp; }
+    part_vector_iterator &operator ++() { ++it; return *this; }
+    part_vector_iterator &operator --() { --it; return *this; }
+    part_vector_iterator &operator +=(difference_type i)
+      { it += i; return *this; }
+    part_vector_iterator &operator -=(difference_type i)
+      { it -= i; return *this; }
+    part_vector_iterator operator +(difference_type i) const
+      { part_vector_iterator itb = *this; return (itb += i); }
+    part_vector_iterator operator -(difference_type i) const
+      { part_vector_iterator itb = *this; return (itb -= i); }
+    difference_type operator -(const part_vector_iterator &i) const
+      { return difference_type(it - i.it); }
+    
+    reference operator  *() const { return reference(*it); }
+    reference operator [](size_type ii) const { return reference(it[ii]); }
+    
+    bool operator ==(const part_vector_iterator &i) const
+      { return (i.it == it); }
+    bool operator !=(const part_vector_iterator &i) const
+      { return (i.it != it); }
+    bool operator < (const part_vector_iterator &i) const
+      { return (it < i.it); }
+  };
+
+
+  template <typename PT, typename PART> struct part_vector {
+    typedef part_vector<PT, PART> this_type;
+    typedef typename std::iterator_traits<PT>::value_type V;
+    typedef V * CPT;
+    typedef typename select_ref<typename linalg_traits<V>::const_iterator,
+            typename linalg_traits<V>::iterator, PT>::ref_type iterator;
+    typedef typename linalg_traits<this_type>::reference reference;
+    typedef typename linalg_traits<this_type>::value_type value_type;
+    typedef typename linalg_traits<this_type>::porigin_type porigin_type;
+
+    iterator begin_, end_;
+    porigin_type origin;
+    size_type size_;
+
+    size_type size(void) const { return size_; }
+   
+    reference operator[](size_type i) const { 
+      return reference(ref_or_value_type<reference>::r(
+	     linalg_traits<V>::access(origin, begin_, end_, i),
+	     PART(), value_type()));
+    }
+
+    part_vector(V &v)
+      : begin_(vect_begin(v)),  end_(vect_end(v)),
+	origin(linalg_origin(v)), size_(gmm::vect_size(v)) {}
+    part_vector(const V &v) 
+      : begin_(vect_begin(const_cast<V &>(v))),
+       end_(vect_end(const_cast<V &>(v))),
+	origin(linalg_origin(const_cast<V &>(v))), size_(gmm::vect_size(v)) {}
+    part_vector() {}
+    part_vector(const part_vector<CPT, PART> &cr)
+      : begin_(cr.begin_),end_(cr.end_),origin(cr.origin), size_(cr.size_) {} 
+  };
+
+  template <typename IT, typename MIT, typename ORG, typename PT,
+	    typename PART> inline
+  void set_to_begin(part_vector_iterator<IT, MIT, PART> &it,
+		    ORG o, part_vector<PT, PART> *, linalg_modifiable) {
+    typedef part_vector<PT, PART> VECT;
+    typedef typename linalg_traits<VECT>::V_reference ref_t;
+    set_to_begin(it.it, o, typename linalg_traits<VECT>::pV(), ref_t());
+  }
+  template <typename IT, typename MIT, typename ORG, typename PT,
+	    typename PART> inline
+  void set_to_begin(part_vector_iterator<IT, MIT, PART> &it,
+		    ORG o, const part_vector<PT, PART> *, linalg_modifiable) {
+    typedef part_vector<PT, PART> VECT;
+    typedef typename linalg_traits<VECT>::V_reference ref_t;
+    set_to_begin(it.it, o, typename linalg_traits<VECT>::pV(), ref_t());
+  }
+  template <typename IT, typename MIT, typename ORG, typename PT,
+	    typename PART> inline
+  void set_to_end(part_vector_iterator<IT, MIT, PART> &it,
+		    ORG o, part_vector<PT, PART> *, linalg_modifiable) {
+    typedef part_vector<PT, PART> VECT;
+    typedef typename linalg_traits<VECT>::V_reference ref_t;
+    set_to_end(it.it, o, typename linalg_traits<VECT>::pV(), ref_t());
+  }
+  template <typename IT, typename MIT, typename ORG,
+	    typename PT, typename PART> inline
+  void set_to_end(part_vector_iterator<IT, MIT, PART> &it,
+		  ORG o, const part_vector<PT, PART> *,
+		  linalg_modifiable) {
+    typedef part_vector<PT, PART> VECT;
+    typedef typename linalg_traits<VECT>::V_reference ref_t;
+    set_to_end(it.it, o, typename linalg_traits<VECT>::pV(), ref_t());
+  }
+
+  template <typename PT, typename PART> std::ostream &operator <<
+    (std::ostream &o, const part_vector<PT, PART>& m)
+  { gmm::write(o,m); return o; }
+
+
+  /* ********************************************************************* */
+  /*	Reference to the real or imaginary part of (complex) matrices      */
+  /* ********************************************************************* */
+
+
+  template <typename PT, typename PART> struct  part_row_ref {
+    
+    typedef part_row_ref<PT, PART> this_type;
+    typedef typename std::iterator_traits<PT>::value_type M;
+    typedef M * CPT;
+    typedef typename std::iterator_traits<PT>::reference ref_M;
+    typedef typename select_ref<typename linalg_traits<this_type>
+            ::const_row_iterator, typename linalg_traits<this_type>
+            ::row_iterator, PT>::ref_type iterator;
+    typedef typename linalg_traits<this_type>::value_type value_type;
+    typedef typename linalg_traits<this_type>::reference reference;
+    typedef typename linalg_traits<this_type>::porigin_type porigin_type;
+
+    iterator begin_, end_;
+    porigin_type origin;
+    size_type nr, nc;
+
+    part_row_ref(ref_M m)
+      : begin_(mat_row_begin(m)), end_(mat_row_end(m)),
+	origin(linalg_origin(m)), nr(mat_nrows(m)), nc(mat_ncols(m)) {}
+
+    part_row_ref(const part_row_ref<CPT, PART> &cr) :
+      begin_(cr.begin_),end_(cr.end_), origin(cr.origin),nr(cr.nr),nc(cr.nc) {}
+
+    reference operator()(size_type i, size_type j) const {
+      return reference(ref_or_value_type<reference>::r(
+					 linalg_traits<M>::access(begin_+i, j),
+					 PART(), value_type()));
+    }
+  };
+  
+  template<typename PT, typename PART> std::ostream &operator <<
+    (std::ostream &o, const part_row_ref<PT, PART>& m)
+  { gmm::write(o,m); return o; }
+
+  template <typename PT, typename PART> struct  part_col_ref {
+    
+    typedef part_col_ref<PT, PART> this_type;
+    typedef typename std::iterator_traits<PT>::value_type M;
+    typedef M * CPT;
+    typedef typename std::iterator_traits<PT>::reference ref_M;
+    typedef typename select_ref<typename linalg_traits<this_type>
+            ::const_col_iterator, typename linalg_traits<this_type>
+            ::col_iterator, PT>::ref_type iterator;
+    typedef typename linalg_traits<this_type>::value_type value_type;
+    typedef typename linalg_traits<this_type>::reference reference;
+    typedef typename linalg_traits<this_type>::porigin_type porigin_type;
+
+    iterator begin_, end_;
+    porigin_type origin;
+    size_type nr, nc;
+
+    part_col_ref(ref_M m)
+      : begin_(mat_col_begin(m)), end_(mat_col_end(m)),
+	origin(linalg_origin(m)), nr(mat_nrows(m)), nc(mat_ncols(m)) {}
+
+    part_col_ref(const part_col_ref<CPT, PART> &cr) :
+      begin_(cr.begin_),end_(cr.end_), origin(cr.origin),nr(cr.nr),nc(cr.nc) {}
+
+    reference operator()(size_type i, size_type j) const {
+      return reference(ref_or_value_type<reference>::r(
+					 linalg_traits<M>::access(begin_+j, i),
+					 PART(), value_type()));
+    }
+  };
+   
+
+  
+  template<typename PT, typename PART> std::ostream &operator <<
+    (std::ostream &o, const part_col_ref<PT, PART>& m)
+  { gmm::write(o,m); return o; }
+
+  
+
+
+
+
+template <typename TYPE, typename PART, typename PT>
+  struct part_return_ {
+    typedef abstract_null_type return_type;
+  };
+  template <typename PT, typename PART>
+  struct part_return_<row_major, PART, PT> {
+    typedef typename std::iterator_traits<PT>::value_type L;
+    typedef typename select_return<part_row_ref<const L *, PART>,
+		     part_row_ref< L *, PART>, PT>::return_type return_type;
+  };
+  template <typename PT, typename PART>
+  struct part_return_<col_major, PART, PT> {
+    typedef typename std::iterator_traits<PT>::value_type L;
+    typedef typename select_return<part_col_ref<const L *, PART>,
+		     part_col_ref<L *, PART>, PT>::return_type return_type;
+  };
+
+  template <typename PT, typename PART, typename LT> struct part_return__{
+    typedef abstract_null_type return_type;
+  };
+
+  template <typename PT, typename PART>
+  struct part_return__<PT, PART, abstract_matrix> {
+    typedef typename std::iterator_traits<PT>::value_type L;
+    typedef typename part_return_<typename principal_orientation_type<
+      typename linalg_traits<L>::sub_orientation>::potype, PART,
+      PT>::return_type return_type;
+  };
+
+  template <typename PT, typename PART>
+  struct part_return__<PT, PART, abstract_vector> {
+    typedef typename std::iterator_traits<PT>::value_type L;
+    typedef typename select_return<part_vector<const L *, PART>,
+      part_vector<L *, PART>, PT>::return_type return_type;
+  };
+
+  template <typename PT, typename PART> struct part_return {
+    typedef typename std::iterator_traits<PT>::value_type L;
+    typedef typename part_return__<PT, PART,
+      typename linalg_traits<L>::linalg_type>::return_type return_type;
+  };
+
+  template <typename L> inline 
+  typename part_return<const L *, linalg_real_part>::return_type
+  real_part(const L &l) {
+    return typename part_return<const L *, linalg_real_part>::return_type
+      (linalg_cast(const_cast<L &>(l)));
+  }
+
+  template <typename L> inline 
+  typename part_return<L *, linalg_real_part>::return_type
+  real_part(L &l) {
+    return typename part_return<L *, linalg_real_part>::return_type(linalg_cast(l));
+  }
+
+  template <typename L> inline 
+  typename part_return<const L *, linalg_imag_part>::return_type
+  imag_part(const L &l) {
+    return typename part_return<const L *, linalg_imag_part>::return_type
+      (linalg_cast(const_cast<L &>(l)));
+  }
+
+  template <typename L> inline 
+  typename part_return<L *, linalg_imag_part>::return_type
+  imag_part(L &l) {
+    return typename part_return<L *, linalg_imag_part>::return_type(linalg_cast(l));
+  }
+
+
+  template <typename PT, typename PART>
+  struct linalg_traits<part_vector<PT, PART> > {
+    typedef part_vector<PT, PART> this_type;
+    typedef this_type * pthis_type;
+    typedef PT pV;
+    typedef typename std::iterator_traits<PT>::value_type V;
+    typedef typename linalg_traits<V>::index_sorted index_sorted;
+    typedef typename linalg_traits<V>::is_reference V_reference;
+    typedef typename linalg_traits<V>::origin_type origin_type;
+    typedef typename select_ref<const origin_type *, origin_type *,
+			        PT>::ref_type porigin_type;
+    typedef typename which_reference<PT>::is_reference is_reference;
+    typedef abstract_vector linalg_type;
+    typedef typename linalg_traits<V>::value_type vtype;
+    typedef typename number_traits<vtype>::magnitude_type value_type;
+    typedef typename select_ref<value_type, ref_elt_vector<value_type,
+		     which_part<typename linalg_traits<V>::reference,
+				PART> >, PT>::ref_type reference;
+    typedef typename select_ref<typename linalg_traits<V>::const_iterator,
+	    typename linalg_traits<V>::iterator, PT>::ref_type pre_iterator;
+    typedef typename select_ref<abstract_null_type, 
+	    part_vector_iterator<pre_iterator, pre_iterator, PART>,
+	    PT>::ref_type iterator;
+    typedef part_vector_iterator<typename linalg_traits<V>::const_iterator,
+				 pre_iterator, PART> const_iterator;
+    typedef typename linalg_traits<V>::storage_type storage_type;
+    static size_type size(const this_type &v) { return v.size(); }
+    static iterator begin(this_type &v) {
+      iterator it; it.it = v.begin_;
+      if (!is_const_reference(is_reference()) && is_sparse(storage_type()))
+	set_to_begin(it, v.origin, pthis_type(), is_reference());
+      return it;
+    }
+    static const_iterator begin(const this_type &v) {
+      const_iterator it(v.begin_);
+      if (!is_const_reference(is_reference()) && is_sparse(storage_type()))
+	{ set_to_begin(it, v.origin, pthis_type(), is_reference()); }
+      return it;
+    }
+    static iterator end(this_type &v) {
+      iterator it(v.end_);
+      if (!is_const_reference(is_reference()) && is_sparse(storage_type()))
+	set_to_end(it, v.origin, pthis_type(), is_reference());
+      return it;
+    }
+    static const_iterator end(const this_type &v) {
+      const_iterator it(v.end_);
+      if (!is_const_reference(is_reference()) && is_sparse(storage_type()))
+	set_to_end(it, v.origin, pthis_type(), is_reference());
+      return it;
+    }
+    static origin_type* origin(this_type &v) { return v.origin; }
+    static const origin_type* origin(const this_type &v) { return v.origin; }
+
+    static void clear(origin_type* o, const iterator &begin_,
+		      const iterator &end_, abstract_sparse) {
+      std::deque<size_type> ind;
+      iterator it = begin_;
+      for (; it != end_; ++it) ind.push_front(it.index());
+      for (; !(ind.empty()); ind.pop_back())
+	access(o, begin_, end_, ind.back()) = value_type(0);
+    }
+    static void clear(origin_type* o, const iterator &begin_,
+		      const iterator &end_, abstract_skyline) {
+      clear(o, begin_, end_, abstract_sparse());
+    }
+    static void clear(origin_type* o, const iterator &begin_,
+		      const iterator &end_, abstract_dense) {
+      for (iterator it = begin_; it != end_; ++it) *it = value_type(0);
+    }
+
+   static void clear(origin_type* o, const iterator &begin_,
+		      const iterator &end_) 
+    { clear(o, begin_, end_, storage_type()); }
+    static void do_clear(this_type &v) { clear(v.origin, begin(v), end(v)); }
+    static value_type access(const origin_type *o, const const_iterator &it,
+			     const const_iterator &ite, size_type i) { 
+      return  real_or_imag_part(linalg_traits<V>::access(o, it.it, ite.it,i),
+				PART());
+    }
+    static reference access(origin_type *o, const iterator &it,
+			    const iterator &ite, size_type i)
+    { return reference(linalg_traits<V>::access(o, it.it, ite.it,i)); }
+  };
+
+  template <typename PT, typename PART>
+  struct linalg_traits<part_row_ref<PT, PART> > {
+    typedef part_row_ref<PT, PART> this_type;
+    typedef typename std::iterator_traits<PT>::value_type M;
+    typedef typename linalg_traits<M>::origin_type origin_type;
+    typedef typename select_ref<const origin_type *, origin_type *,
+			        PT>::ref_type porigin_type;
+    typedef typename which_reference<PT>::is_reference is_reference;
+    typedef abstract_matrix linalg_type;
+    typedef typename linalg_traits<M>::value_type vtype;
+    typedef typename number_traits<vtype>::magnitude_type value_type;
+    typedef typename linalg_traits<M>::storage_type storage_type;
+    typedef abstract_null_type sub_col_type;
+    typedef abstract_null_type const_sub_col_type;
+    typedef abstract_null_type col_iterator;
+    typedef abstract_null_type const_col_iterator;
+    typedef typename org_type<typename linalg_traits<M>::const_sub_row_type>::t
+            pre_const_sub_row_type;
+    typedef typename org_type<typename linalg_traits<M>::sub_row_type>::t pre_sub_row_type;
+    typedef part_vector<const pre_const_sub_row_type *, PART>
+            const_sub_row_type;
+    typedef typename select_ref<abstract_null_type,
+	    part_vector<pre_sub_row_type *, PART>, PT>::ref_type sub_row_type;
+    typedef typename linalg_traits<M>::const_row_iterator const_row_iterator;
+    typedef typename select_ref<abstract_null_type, typename
+            linalg_traits<M>::row_iterator, PT>::ref_type row_iterator;
+    typedef typename select_ref<
+            typename linalg_traits<const_sub_row_type>::reference,
+	    typename linalg_traits<sub_row_type>::reference,
+				PT>::ref_type reference;
+    typedef row_major sub_orientation;
+    typedef typename linalg_traits<M>::index_sorted index_sorted;
+    static size_type ncols(const this_type &v) { return v.nc; }
+    static size_type nrows(const this_type &v) { return v.nr; }
+    static const_sub_row_type row(const const_row_iterator &it)
+    { return const_sub_row_type(linalg_traits<M>::row(it)); }
+    static sub_row_type row(const row_iterator &it)
+    { return sub_row_type(linalg_traits<M>::row(it)); }
+    static row_iterator row_begin(this_type &m) { return m.begin_; }
+    static row_iterator row_end(this_type &m) { return m.end_; }
+    static const_row_iterator row_begin(const this_type &m)
+    { return m.begin_; }
+    static const_row_iterator row_end(const this_type &m) { return m.end_; }
+    static origin_type* origin(this_type &v) { return v.origin; }
+    static const origin_type* origin(const this_type &v) { return v.origin; }
+    static void do_clear(this_type &v);
+    static value_type access(const const_row_iterator &itrow, size_type i)
+    { return real_or_imag_part(linalg_traits<M>::access(itrow, i), PART()); }
+    static reference access(const row_iterator &itrow, size_type i) {
+      return reference(ref_or_value_type<reference>::r(
+					 linalg_traits<M>::access(itrow, i),
+					 PART(), value_type()));
+    }
+  };
+
+  template <typename PT, typename PART>
+  struct linalg_traits<part_col_ref<PT, PART> > {
+    typedef part_col_ref<PT, PART> this_type;
+    typedef typename std::iterator_traits<PT>::value_type M;
+    typedef typename linalg_traits<M>::origin_type origin_type;
+    typedef typename select_ref<const origin_type *, origin_type *,
+			        PT>::ref_type porigin_type;
+    typedef typename which_reference<PT>::is_reference is_reference;
+    typedef abstract_matrix linalg_type;
+    typedef typename linalg_traits<M>::value_type vtype;
+    typedef typename number_traits<vtype>::magnitude_type value_type;
+    typedef typename linalg_traits<M>::storage_type storage_type;
+    typedef abstract_null_type sub_row_type;
+    typedef abstract_null_type const_sub_row_type;
+    typedef abstract_null_type row_iterator;
+    typedef abstract_null_type const_row_iterator;
+    typedef typename org_type<typename linalg_traits<M>::const_sub_col_type>::t
+            pre_const_sub_col_type;
+    typedef typename org_type<typename linalg_traits<M>::sub_col_type>::t pre_sub_col_type;
+    typedef part_vector<const pre_const_sub_col_type *, PART>
+            const_sub_col_type;
+    typedef typename select_ref<abstract_null_type,
+	    part_vector<pre_sub_col_type *, PART>, PT>::ref_type sub_col_type;
+    typedef typename linalg_traits<M>::const_col_iterator const_col_iterator;
+    typedef typename select_ref<abstract_null_type, typename
+            linalg_traits<M>::col_iterator, PT>::ref_type col_iterator;
+    typedef typename select_ref<
+            typename linalg_traits<const_sub_col_type>::reference,
+	    typename linalg_traits<sub_col_type>::reference,
+				PT>::ref_type reference;
+    typedef col_major sub_orientation;
+    typedef typename linalg_traits<M>::index_sorted index_sorted;
+    static size_type nrows(const this_type &v) { return v.nr; }
+    static size_type ncols(const this_type &v) { return v.nc; }
+    static const_sub_col_type col(const const_col_iterator &it)
+    { return const_sub_col_type(linalg_traits<M>::col(it)); }
+    static sub_col_type col(const col_iterator &it)
+    { return sub_col_type(linalg_traits<M>::col(it)); }
+    static col_iterator col_begin(this_type &m) { return m.begin_; }
+    static col_iterator col_end(this_type &m) { return m.end_; }
+    static const_col_iterator col_begin(const this_type &m)
+    { return m.begin_; }
+    static const_col_iterator col_end(const this_type &m) { return m.end_; }
+    static origin_type* origin(this_type &v) { return v.origin; }
+    static const origin_type* origin(const this_type &v) { return v.origin; }
+    static void do_clear(this_type &v);
+    static value_type access(const const_col_iterator &itcol, size_type i)
+    { return real_or_imag_part(linalg_traits<M>::access(itcol, i), PART()); }
+    static reference access(const col_iterator &itcol, size_type i) {
+      return reference(ref_or_value_type<reference>::r(
+					 linalg_traits<M>::access(itcol, i),
+					 PART(), value_type()));
+    }
+  };
+
+  template <typename PT, typename PART> 
+  void linalg_traits<part_col_ref<PT, PART> >::do_clear(this_type &v) { 
+    col_iterator it = mat_col_begin(v), ite = mat_col_end(v);
+    for (; it != ite; ++it) clear(col(it));
+  }
+  
+  template <typename PT, typename PART> 
+  void linalg_traits<part_row_ref<PT, PART> >::do_clear(this_type &v) { 
+    row_iterator it = mat_row_begin(v), ite = mat_row_end(v);
+    for (; it != ite; ++it) clear(row(it));
+  }
+}
+
+#endif //  GMM_REAL_PART_H
diff --git a/gmm/gmm_ref.h b/gmm/gmm_ref.h
new file mode 100644
index 000000000..67af37739
--- /dev/null
+++ b/gmm/gmm_ref.h
@@ -0,0 +1,526 @@
+/* -*- c++ -*- (enables emacs c++ mode) */
+/*===========================================================================
+
+ Copyright (C) 2000-2017 Yves Renard
+
+ This file is a part of GetFEM++
+
+ GetFEM++  is  free software;  you  can  redistribute  it  and/or modify it
+ under  the  terms  of the  GNU  Lesser General Public License as published
+ by  the  Free Software Foundation;  either version 3 of the License,  or
+ (at your option) any later version along with the GCC Runtime Library
+ Exception either version 3.1 or (at your option) any later version.
+ This program  is  distributed  in  the  hope  that it will be useful,  but
+ WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ or  FITNESS  FOR  A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+ License and GCC Runtime Library Exception for more details.
+ You  should  have received a copy of the GNU Lesser General Public License
+ along  with  this program;  if not, write to the Free Software Foundation,
+ Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301, USA.
+
+ As a special exception, you  may use  this file  as it is a part of a free
+ software  library  without  restriction.  Specifically,  if   other  files
+ instantiate  templates  or  use macros or inline functions from this file,
+ or  you compile this  file  and  link  it  with other files  to produce an
+ executable, this file  does  not  by itself cause the resulting executable
+ to be covered  by the GNU Lesser General Public License.  This   exception
+ does not  however  invalidate  any  other  reasons why the executable file
+ might be covered by the GNU Lesser General Public License.
+
+===========================================================================*/
+
+
+#ifndef GMM_REF_H__
+#define GMM_REF_H__
+
+/** @file gmm_ref.h
+   @author  Yves Renard <Yves.Renard@insa-lyon.fr>
+   @date August 26, 2000.
+ *  @brief Provide some simple pseudo-containers.
+ *  
+ *  WARNING : modifiying the container infirm the validity of references.
+ */
+
+
+#include <iterator>
+#include "gmm_except.h"
+
+namespace gmm {
+
+  /* ********************************************************************* */
+  /* Simple reference.                                                     */
+  /* ********************************************************************* */
+
+  template<typename ITER> class tab_ref {
+
+    protected :
+
+      ITER begin_, end_;
+
+    public :
+
+      typedef typename std::iterator_traits<ITER>::value_type  value_type;
+      typedef typename std::iterator_traits<ITER>::pointer     pointer;
+      typedef typename std::iterator_traits<ITER>::pointer     const_pointer;
+      typedef typename std::iterator_traits<ITER>::reference   reference;
+      typedef typename std::iterator_traits<ITER>::reference   const_reference;
+      typedef typename std::iterator_traits<ITER>::difference_type
+	                                                       difference_type;
+      typedef ITER                            iterator;
+      typedef ITER                            const_iterator;
+      typedef std::reverse_iterator<const_iterator> const_reverse_iterator;
+      typedef std::reverse_iterator<iterator> reverse_iterator;
+      typedef size_t size_type;
+    
+      bool empty(void) const { return begin_ == end_; }
+      size_type size(void) const { return end_ - begin_; }
+
+      const iterator &begin(void) { return begin_; }
+      const const_iterator &begin(void) const { return begin_; }
+      const iterator &end(void) { return end_; }
+      const const_iterator &end(void) const { return end_; }
+      reverse_iterator rbegin(void) { return reverse_iterator(end()); }
+      const_reverse_iterator rbegin(void) const
+      { return const_reverse_iterator(end()); }
+      reverse_iterator rend(void) { return reverse_iterator(begin()); }
+      const_reverse_iterator rend(void) const
+      { return const_reverse_iterator(begin()); }
+
+      reference front(void) { return *begin(); }
+      const_reference front(void) const { return *begin(); }
+      reference back(void) { return *(--(end())); }
+      const_reference back(void) const { return *(--(end())); }
+      void pop_front(void) { ++begin_; }
+
+      const_reference operator [](size_type ii) const { return *(begin_ + ii);}
+      reference operator [](size_type ii) { return *(begin_ + ii); }
+
+      tab_ref(void) {}
+      tab_ref(const ITER &b, const ITER &e) : begin_(b), end_(e) {}
+  };
+
+
+  /* ********************************************************************* */
+  /* Reference with index.                                                 */
+  /* ********************************************************************* */
+
+//   template<typename ITER> struct tab_ref_index_iterator_
+//     : public dynamic_array<size_t>::const_iterator
+//   {
+//     typedef typename std::iterator_traits<ITER>::value_type  value_type;
+//     typedef typename std::iterator_traits<ITER>::pointer     pointer;
+//     typedef typename std::iterator_traits<ITER>::reference   reference;
+//     typedef typename std::iterator_traits<ITER>::difference_type  
+//     difference_type;
+//     typedef std::random_access_iterator_tag iterator_category;
+//     typedef size_t size_type;
+//     typedef dynamic_array<size_type>::const_iterator dnas_iterator_;
+//     typedef tab_ref_index_iterator_<ITER> iterator;
+    
+
+//     ITER piter;
+    
+//     iterator operator ++(int)
+//     { iterator tmp = *this; ++(*((dnas_iterator_ *)(this))); return tmp; }
+//     iterator operator --(int)
+//     { iterator tmp = *this; --(*((dnas_iterator_ *)(this))); return tmp; }
+//     iterator &operator ++()
+//     { ++(*((dnas_iterator_ *)(this))); return *this; }
+//     iterator &operator --()
+//     { --(*((dnas_iterator_ *)(this))); return *this; }
+//     iterator &operator +=(difference_type i)
+//     { (*((dnas_iterator_ *)(this))) += i; return *this; }
+//     iterator &operator -=(difference_type i)
+//     { (*((dnas_iterator_ *)(this))) -= i; return *this; }
+//     iterator operator +(difference_type i) const
+//     { iterator it = *this; return (it += i); }
+//     iterator operator -(difference_type i) const
+//     { iterator it = *this; return (it -= i); }
+//     difference_type operator -(const iterator &i) const
+//     { return *((dnas_iterator_ *)(this)) - *((dnas_iterator_ *)(&i)); }
+	
+//     reference operator *() const
+//     { return *(piter + *((*((dnas_iterator_ *)(this))))); }
+//     reference operator [](int ii)
+//     { return *(piter + *((*((dnas_iterator_ *)(this+ii))))); }
+    
+//     bool operator ==(const iterator &i) const
+//     { 
+//       return ((piter) == ((i.piter))
+//        && *((dnas_iterator_ *)(this)) == *((*((dnas_iterator_ *)(this)))));
+//     }
+//     bool operator !=(const iterator &i) const
+//     { return !(i == *this); }
+//     bool operator < (const iterator &i) const
+//     { 
+//       return ((piter) == ((i.piter))
+// 	 && *((dnas_iterator_ *)(this)) < *((*((dnas_iterator_ *)(this)))));
+//     }
+
+//     tab_ref_index_iterator_(void) {}
+//     tab_ref_index_iterator_(const ITER &iter, const dnas_iterator_ &dnas_iter)
+//       : dnas_iterator_(dnas_iter), piter(iter) {}
+//   };
+
+
+//   template<typename ITER> class tab_ref_index
+//   {
+//     public :
+
+//       typedef typename std::iterator_traits<ITER>::value_type value_type;
+//       typedef typename std::iterator_traits<ITER>::pointer    pointer;
+//       typedef typename std::iterator_traits<ITER>::pointer    const_pointer;
+//       typedef typename std::iterator_traits<ITER>::reference  reference;
+//       typedef typename std::iterator_traits<ITER>::reference  const_reference;
+//       typedef typename std::iterator_traits<ITER>::difference_type
+// 	                                                       difference_type;
+//       typedef size_t size_type; 
+//       typedef tab_ref_index_iterator_<ITER> iterator;
+//       typedef iterator                          const_iterator;
+//       typedef std::reverse_iterator<const_iterator> const_reverse_iterator;
+//       typedef std::reverse_iterator<iterator> reverse_iterator;
+    
+//     protected :
+
+//       ITER begin_;
+//       dynamic_array<size_type> index_;
+
+//     public :
+
+//       bool empty(void) const { return index_.empty(); }
+//       size_type size(void) const { return index_.size(); }
+
+
+//       iterator begin(void) { return iterator(begin_, index_.begin()); }
+//       const_iterator begin(void) const
+//       { return iterator(begin_, index_.begin()); }
+//       iterator end(void) { return iterator(begin_, index_.end()); }
+//       const_iterator end(void) const { return iterator(begin_, index_.end()); }
+//       reverse_iterator rbegin(void) { return reverse_iterator(end()); }
+//       const_reverse_iterator rbegin(void) const
+//       { return const_reverse_iterator(end()); }
+//       reverse_iterator rend(void) { return reverse_iterator(begin()); }
+//       const_reverse_iterator rend(void) const
+//       { return const_reverse_iterator(begin()); }
+
+
+//       reference front(void) { return *(begin_ +index_[0]); }
+//       const_reference front(void) const { return *(begin_ +index_[0]); }
+//       reference back(void) { return *(--(end())); }
+//       const_reference back(void) const { return *(--(end())); }
+   
+//       tab_ref_index(void) {}
+//       tab_ref_index(const ITER &b, const dynamic_array<size_type> &ind)
+//       { begin_ = b; index_ = ind; }
+
+//     // to be changed in a const_reference ?
+//       value_type operator [](size_type ii) const
+//       { return *(begin_ + index_[ii]);}
+//       reference operator [](size_type ii) { return *(begin_ + index_[ii]); }
+
+//   };
+
+
+  /// iterator over a gmm::tab_ref_index_ref<ITER,ITER_INDEX>
+  template<typename ITER, typename ITER_INDEX>
+    struct tab_ref_index_ref_iterator_
+    {
+      typedef typename std::iterator_traits<ITER>::value_type value_type;
+      typedef typename std::iterator_traits<ITER>::pointer    pointer;
+      typedef typename std::iterator_traits<ITER>::reference  reference;
+      typedef typename std::iterator_traits<ITER>::difference_type
+                                                              difference_type;
+      typedef std::random_access_iterator_tag iterator_category;
+      typedef tab_ref_index_ref_iterator_<ITER, ITER_INDEX> iterator;
+      typedef size_t size_type;
+
+      ITER piter;
+      ITER_INDEX iter_index;
+      
+      iterator operator ++(int)
+      { iterator tmp = *this; ++iter_index; return tmp; }
+      iterator operator --(int)
+      { iterator tmp = *this; --iter_index; return tmp; }
+      iterator &operator ++() { ++iter_index; return *this; }
+      iterator &operator --() { --iter_index; return *this; }
+      iterator &operator +=(difference_type i)
+      { iter_index += i; return *this; }
+      iterator &operator -=(difference_type i)
+      { iter_index -= i; return *this; }
+      iterator operator +(difference_type i) const
+      { iterator it = *this; return (it += i); }
+      iterator operator -(difference_type i) const
+      { iterator it = *this; return (it -= i); }
+      difference_type operator -(const iterator &i) const
+      { return iter_index - i.iter_index; }
+	
+      reference operator *() const
+      { return *(piter + *iter_index); }
+      reference operator [](size_type ii) const
+      { return *(piter + *(iter_index+ii)); }
+      
+      bool operator ==(const iterator &i) const
+      { return ((piter) == ((i.piter)) && iter_index == i.iter_index); }
+      bool operator !=(const iterator &i) const { return !(i == *this); }
+      bool operator < (const iterator &i) const
+      { return ((piter) == ((i.piter)) && iter_index < i.iter_index); }
+
+      tab_ref_index_ref_iterator_(void) {}
+      tab_ref_index_ref_iterator_(const ITER &iter, 
+				  const ITER_INDEX &dnas_iter)
+	: piter(iter), iter_index(dnas_iter) {}
+      
+    };
+
+  /** 
+      convenience template function for quick obtention of a indexed iterator
+      without having to specify its (long) typename
+  */
+  template<typename ITER, typename ITER_INDEX>
+  tab_ref_index_ref_iterator_<ITER,ITER_INDEX>
+  index_ref_iterator(ITER it, ITER_INDEX it_i) {
+    return tab_ref_index_ref_iterator_<ITER,ITER_INDEX>(it, it_i);
+  }
+
+  /** indexed array reference (given a container X, and a set of indexes I, 
+      this class provides a pseudo-container Y such that
+      @code Y[i] = X[I[i]] @endcode
+  */
+  template<typename ITER, typename ITER_INDEX> class tab_ref_index_ref {
+  public :
+    
+    typedef std::iterator_traits<ITER>            traits_type;
+    typedef typename traits_type::value_type      value_type;
+    typedef typename traits_type::pointer         pointer;
+    typedef typename traits_type::pointer         const_pointer;
+    typedef typename traits_type::reference       reference;
+    typedef typename traits_type::reference       const_reference;
+    typedef typename traits_type::difference_type difference_type;
+    typedef size_t                                size_type;
+    typedef tab_ref_index_ref_iterator_<ITER, ITER_INDEX>   iterator;
+    typedef iterator                              const_iterator;
+    typedef std::reverse_iterator<const_iterator>     const_reverse_iterator;
+    typedef std::reverse_iterator<iterator>           reverse_iterator;
+    
+  protected :
+
+    ITER begin_;
+    ITER_INDEX index_begin_, index_end_;
+
+  public :
+    
+    bool empty(void) const { return index_begin_ == index_end_; }
+    size_type size(void) const { return index_end_ - index_begin_; }
+    
+    iterator begin(void) { return iterator(begin_, index_begin_); }
+    const_iterator begin(void) const
+    { return iterator(begin_, index_begin_); }
+    iterator end(void) { return iterator(begin_, index_end_); }
+    const_iterator end(void) const { return iterator(begin_, index_end_); }
+    reverse_iterator rbegin(void) { return reverse_iterator(end()); }
+    const_reverse_iterator rbegin(void) const
+    { return const_reverse_iterator(end()); }
+    reverse_iterator rend(void) { return reverse_iterator(begin()); }
+    const_reverse_iterator rend(void) const
+    { return const_reverse_iterator(begin()); }
+    
+    reference front(void) { return *(begin_ + *index_begin_); }
+    const_reference front(void) const { return *(begin_ + *index_begin_); }
+    reference back(void) { return *(--(end())); }
+    const_reference back(void) const { return *(--(end())); }
+    void pop_front(void) { ++index_begin_; }
+    
+    tab_ref_index_ref(void) {}
+    tab_ref_index_ref(const ITER &b, const ITER_INDEX &bi,
+		      const ITER_INDEX &ei)
+      : begin_(b), index_begin_(bi), index_end_(ei) {}
+    
+    // to be changed in a const_reference ?
+    const_reference operator [](size_type ii) const
+    { return *(begin_ + index_begin_[ii]);}
+    reference operator [](size_type ii)
+    { return *(begin_ + index_begin_[ii]); }
+
+  };
+
+
+  /* ********************************************************************* */
+  /* Reference on regularly spaced elements.                               */
+  /* ********************************************************************* */
+
+  template<typename ITER> struct tab_ref_reg_spaced_iterator_ {
+    
+    typedef typename std::iterator_traits<ITER>::value_type value_type;
+    typedef typename std::iterator_traits<ITER>::pointer    pointer;
+    typedef typename std::iterator_traits<ITER>::reference  reference;
+    typedef typename std::iterator_traits<ITER>::difference_type
+                                                            difference_type;
+    typedef typename std::iterator_traits<ITER>::iterator_category
+                                                            iterator_category;
+    typedef size_t size_type;
+    typedef tab_ref_reg_spaced_iterator_<ITER> iterator;
+    
+    ITER it;
+    size_type N, i;
+    
+    iterator operator ++(int) { iterator tmp = *this; i++; return tmp; }
+    iterator operator --(int) { iterator tmp = *this; i--; return tmp; }
+    iterator &operator ++()   { i++; return *this; }
+    iterator &operator --()   { i--; return *this; }
+    iterator &operator +=(difference_type ii) { i+=ii; return *this; }
+    iterator &operator -=(difference_type ii) { i-=ii; return *this; }
+    iterator operator +(difference_type ii) const 
+    { iterator itt = *this; return (itt += ii); }
+    iterator operator -(difference_type ii) const
+    { iterator itt = *this; return (itt -= ii); }
+    difference_type operator -(const iterator &ii) const
+    { return (N ? (it - ii.it) / N : 0) + i - ii.i; }
+
+    reference operator *() const { return *(it + i*N); }
+    reference operator [](size_type ii) const { return *(it + (i+ii)*N); }
+
+    bool operator ==(const iterator &ii) const
+    { return (*this - ii) == difference_type(0); }
+    bool operator !=(const iterator &ii) const
+    { return  (*this - ii) != difference_type(0); }
+    bool operator < (const iterator &ii) const
+    { return (*this - ii) < difference_type(0); }
+
+    tab_ref_reg_spaced_iterator_(void) {}
+    tab_ref_reg_spaced_iterator_(const ITER &iter, size_type n, size_type ii)
+      : it(iter), N(n), i(ii) { }
+    
+  };
+
+  /** 
+      convenience template function for quick obtention of a strided iterator
+      without having to specify its (long) typename
+  */
+  template<typename ITER> tab_ref_reg_spaced_iterator_<ITER> 
+  reg_spaced_iterator(ITER it, size_t stride) {
+    return tab_ref_reg_spaced_iterator_<ITER>(it, stride);
+  }
+
+  /**
+     provide a "strided" view a of container
+  */
+  template<typename ITER> class tab_ref_reg_spaced {
+  public :
+
+    typedef typename std::iterator_traits<ITER>::value_type value_type;
+    typedef typename std::iterator_traits<ITER>::pointer    pointer;
+    typedef typename std::iterator_traits<ITER>::pointer    const_pointer;
+    typedef typename std::iterator_traits<ITER>::reference  reference;
+    typedef typename std::iterator_traits<ITER>::reference  const_reference;
+    typedef typename std::iterator_traits<ITER>::difference_type
+            difference_type;
+    typedef size_t size_type;
+    typedef tab_ref_reg_spaced_iterator_<ITER> iterator;
+    typedef iterator                          const_iterator;
+    typedef std::reverse_iterator<const_iterator> const_reverse_iterator;
+    typedef std::reverse_iterator<iterator> reverse_iterator;
+    
+  protected :
+
+    ITER begin_;
+    size_type N, size_;
+    
+  public :
+    
+    bool empty(void) const { return size_ == 0; }
+    size_type size(void) const { return size_; }
+    
+    iterator begin(void) { return iterator(begin_, N, 0); }
+    const_iterator begin(void) const { return iterator(begin_, N, 0); }
+    iterator end(void) { return iterator(begin_, N, size_); }
+    const_iterator end(void) const { return iterator(begin_, N, size_); }
+    reverse_iterator rbegin(void) { return reverse_iterator(end()); }
+    const_reverse_iterator rbegin(void) const
+    { return const_reverse_iterator(end()); }
+    reverse_iterator rend(void) { return reverse_iterator(begin()); }
+    const_reverse_iterator rend(void) const
+    { return const_reverse_iterator(begin()); }
+    
+    reference front(void) { return *begin_; }
+    const_reference front(void) const { return *begin_; }
+    reference back(void) { return *(begin_ + N * (size_-1)); }
+    const_reference back(void) const { return *(begin_ + N * (size_-1)); }
+    void pop_front(void) { begin_ += N; }
+    
+    tab_ref_reg_spaced(void) {}
+    tab_ref_reg_spaced(const ITER &b, size_type n, size_type s)
+      : begin_(b), N(n), size_(s) {}
+    
+    
+    const_reference operator [](size_type ii) const
+    { return *(begin_ + ii * N);}
+    reference operator [](size_type ii) { return *(begin_ + ii * N); }
+    
+  };
+
+  /// iterator over a tab_ref_with_selection
+  template<typename ITER, typename COND> 
+  struct tab_ref_with_selection_iterator_ : public ITER {
+    typedef typename std::iterator_traits<ITER>::value_type value_type;
+    typedef typename std::iterator_traits<ITER>::pointer    pointer;
+    typedef typename std::iterator_traits<ITER>::reference  reference;
+    typedef typename std::iterator_traits<ITER>::difference_type
+                                                              difference_type;
+    typedef std::forward_iterator_tag iterator_category;
+    typedef tab_ref_with_selection_iterator_<ITER, COND> iterator;
+    const COND cond;
+    
+    void forward(void) { while (!(cond)(*this)) ITER::operator ++(); }
+    iterator &operator ++()
+    { ITER::operator ++(); forward(); return *this; }
+    iterator operator ++(int)
+    { iterator tmp = *this; ++(*this); return tmp; }
+    
+    tab_ref_with_selection_iterator_(void) {}
+    tab_ref_with_selection_iterator_(const ITER &iter, const COND c)
+      : ITER(iter), cond(c) {}
+    
+  };
+
+  /**
+     given a container X and a predicate P, provide pseudo-container Y
+     of all elements of X such that P(X[i]).
+  */
+  template<typename ITER, typename COND> class tab_ref_with_selection {
+    
+  protected :
+    
+    ITER begin_, end_;
+    COND cond;
+    
+  public :
+    
+    typedef typename std::iterator_traits<ITER>::value_type value_type;
+    typedef typename std::iterator_traits<ITER>::pointer    pointer;
+    typedef typename std::iterator_traits<ITER>::pointer    const_pointer;
+    typedef typename std::iterator_traits<ITER>::reference  reference;
+    typedef typename std::iterator_traits<ITER>::reference  const_reference;
+    typedef size_t  size_type;
+    typedef tab_ref_with_selection_iterator_<ITER, COND> iterator;
+    typedef iterator   const_iterator;
+    
+    iterator begin(void) const
+    { iterator it(begin_, cond); it.forward(); return it; }
+    iterator end(void) const { return iterator(end_, cond); }
+    bool empty(void) const { return begin_ == end_; }
+    
+    value_type front(void) const { return *begin(); }
+    void pop_front(void) { ++begin_; begin_ = begin(); }
+    
+    COND &condition(void) { return cond; }
+    const COND &condition(void) const { return cond; }
+    
+    tab_ref_with_selection(void) {}
+    tab_ref_with_selection(const ITER &b, const ITER &e, const COND &c)
+      : begin_(b), end_(e), cond(c) { begin_ = begin(); }
+    
+  };
+
+}
+
+#endif /* GMM_REF_H__  */
diff --git a/gmm/gmm_scaled.h b/gmm/gmm_scaled.h
new file mode 100644
index 000000000..485af32a1
--- /dev/null
+++ b/gmm/gmm_scaled.h
@@ -0,0 +1,434 @@
+/* -*- c++ -*- (enables emacs c++ mode) */
+/*===========================================================================
+
+ Copyright (C) 2002-2017 Yves Renard
+
+ This file is a part of GetFEM++
+
+ GetFEM++  is  free software;  you  can  redistribute  it  and/or modify it
+ under  the  terms  of the  GNU  Lesser General Public License as published
+ by  the  Free Software Foundation;  either version 3 of the License,  or
+ (at your option) any later version along with the GCC Runtime Library
+ Exception either version 3.1 or (at your option) any later version.
+ This program  is  distributed  in  the  hope  that it will be useful,  but
+ WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ or  FITNESS  FOR  A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+ License and GCC Runtime Library Exception for more details.
+ You  should  have received a copy of the GNU Lesser General Public License
+ along  with  this program;  if not, write to the Free Software Foundation,
+ Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301, USA.
+
+ As a special exception, you  may use  this file  as it is a part of a free
+ software  library  without  restriction.  Specifically,  if   other  files
+ instantiate  templates  or  use macros or inline functions from this file,
+ or  you compile this  file  and  link  it  with other files  to produce an
+ executable, this file  does  not  by itself cause the resulting executable
+ to be covered  by the GNU Lesser General Public License.  This   exception
+ does not  however  invalidate  any  other  reasons why the executable file
+ might be covered by the GNU Lesser General Public License.
+
+===========================================================================*/
+
+/**@file gmm_scaled.h
+   @author  Yves Renard <Yves.Renard@insa-lyon.fr>
+   @date November 10, 2002.
+   @brief get a scaled view of a vector/matrix.
+*/
+#ifndef GMM_SCALED_H__
+#define GMM_SCALED_H__
+
+#include "gmm_def.h"
+
+namespace gmm {
+
+  /* ********************************************************************* */
+  /*		Scaled references on vectors            		   */
+  /* ********************************************************************* */
+
+  template <typename IT, typename S> struct scaled_const_iterator {
+    typedef typename strongest_numeric_type<typename std::iterator_traits<IT>::value_type,
+					    S>::T value_type;
+
+    typedef typename std::iterator_traits<IT>::pointer         pointer;
+    typedef typename std::iterator_traits<IT>::reference       reference;
+    typedef typename std::iterator_traits<IT>::difference_type difference_type;
+    typedef typename std::iterator_traits<IT>::iterator_category
+    iterator_category;
+
+    IT it;
+    S r;
+    
+    scaled_const_iterator(void) {}
+    scaled_const_iterator(const IT &i, S x) : it(i), r(x) {}
+    
+    inline size_type index(void) const { return it.index(); }
+    inline scaled_const_iterator operator ++(int)
+    { scaled_const_iterator tmp = *this; ++it; return tmp; }
+    inline scaled_const_iterator operator --(int) 
+    { scaled_const_iterator tmp = *this; --it; return tmp; }
+    inline scaled_const_iterator &operator ++() { ++it; return *this; }
+    inline scaled_const_iterator &operator --() { --it; return *this; }
+    inline scaled_const_iterator &operator +=(difference_type i)
+      { it += i; return *this; }
+    inline scaled_const_iterator &operator -=(difference_type i)
+      { it -= i; return *this; }
+    inline scaled_const_iterator operator +(difference_type i) const
+      { scaled_const_iterator itb = *this; return (itb += i); }
+    inline scaled_const_iterator operator -(difference_type i) const
+      { scaled_const_iterator itb = *this; return (itb -= i); }
+    inline difference_type operator -(const scaled_const_iterator &i) const
+      { return difference_type(it - i.it); }
+    
+    inline value_type operator  *() const { return (*it) * value_type(r); }
+    inline value_type operator [](size_type ii) const { return it[ii] * r; }
+    
+    inline bool operator ==(const scaled_const_iterator &i) const
+      { return (i.it == it); }
+    inline bool operator !=(const scaled_const_iterator &i) const
+      { return (i.it != it); }
+    inline bool operator < (const scaled_const_iterator &i) const
+      { return (it < i.it); }
+  };
+
+  template <typename V, typename S> struct scaled_vector_const_ref {
+    typedef scaled_vector_const_ref<V,S> this_type;
+    typedef typename linalg_traits<this_type>::value_type value_type;
+    typedef typename linalg_traits<V>::const_iterator iterator;
+    typedef typename linalg_traits<this_type>::reference reference;
+    typedef typename linalg_traits<this_type>::origin_type origin_type;
+
+    iterator begin_, end_;
+    const origin_type *origin;
+    size_type size_;
+    S r;
+
+    scaled_vector_const_ref(const V &v, S rr)
+      : begin_(vect_const_begin(v)), end_(vect_const_end(v)),
+	origin(linalg_origin(v)), size_(vect_size(v)), r(rr) {}
+
+    reference operator[](size_type i) const
+    { return value_type(r) * linalg_traits<V>::access(origin, begin_, end_, i); }
+  };
+
+
+   template<typename V, typename S> std::ostream &operator <<
+     (std::ostream &o, const scaled_vector_const_ref<V,S>& m)
+  { gmm::write(o,m); return o; }
+
+  /* ********************************************************************* */
+  /*		Scaled references on matrices            		   */
+  /* ********************************************************************* */
+
+  template <typename M, typename S> struct scaled_row_const_iterator {
+    typedef scaled_row_const_iterator<M,S> iterator;
+    typedef typename linalg_traits<M>::const_row_iterator ITER;
+    typedef ptrdiff_t difference_type;
+    typedef size_t size_type;
+
+    ITER it;
+    S r;
+
+    inline iterator operator ++(int) { iterator tmp=*this; it++; return tmp; }
+    inline iterator operator --(int) { iterator tmp=*this; it--; return tmp; }
+    inline iterator &operator ++()   { it++; return *this; }
+    inline iterator &operator --()   { it--; return *this; }
+    iterator &operator +=(difference_type i) { it += i; return *this; }
+    iterator &operator -=(difference_type i) { it -= i; return *this; }
+    iterator operator +(difference_type i) const 
+    { iterator itt = *this; return (itt += i); }
+    iterator operator -(difference_type i) const
+    { iterator itt = *this; return (itt -= i); }
+    difference_type operator -(const iterator &i) const
+    { return it - i.it; }
+
+    inline ITER operator *() const { return it; }
+    inline ITER operator [](int i) { return it + i; }
+
+    inline bool operator ==(const iterator &i) const { return (it == i.it); }
+    inline bool operator !=(const iterator &i) const { return !(i == *this); }
+    inline bool operator < (const iterator &i) const { return (it < i.it); }
+
+    scaled_row_const_iterator(void) {}
+    scaled_row_const_iterator(const ITER &i, S rr)
+      : it(i), r(rr) { }
+
+  };
+
+  template <typename M, typename S> struct  scaled_row_matrix_const_ref {
+    
+    typedef scaled_row_matrix_const_ref<M,S> this_type;
+    typedef typename linalg_traits<M>::const_row_iterator iterator;
+    typedef typename linalg_traits<this_type>::value_type value_type;
+    typedef typename linalg_traits<this_type>::origin_type origin_type;
+
+    iterator begin_, end_;
+    const origin_type *origin;
+    S r;
+    size_type nr, nc;
+
+    scaled_row_matrix_const_ref(const M &m, S rr)
+      : begin_(mat_row_begin(m)), end_(mat_row_end(m)),
+	origin(linalg_origin(m)), r(rr), nr(mat_nrows(m)), nc(mat_ncols(m)) {}
+
+    value_type operator()(size_type i, size_type j) const
+    { return r * linalg_traits<M>::access(begin_+i, j); }
+  };
+
+
+  template<typename M, typename S> std::ostream &operator <<
+    (std::ostream &o, const scaled_row_matrix_const_ref<M,S>& m)
+  { gmm::write(o,m); return o; }
+
+
+  template <typename M, typename S> struct scaled_col_const_iterator {
+    typedef scaled_col_const_iterator<M,S> iterator;
+    typedef typename linalg_traits<M>::const_col_iterator ITER;
+    typedef ptrdiff_t difference_type;
+    typedef size_t size_type;
+
+    ITER it;
+    S r;
+
+    iterator operator ++(int) { iterator tmp = *this; it++; return tmp; }
+    iterator operator --(int) { iterator tmp = *this; it--; return tmp; }
+    iterator &operator ++()   { it++; return *this; }
+    iterator &operator --()   { it--; return *this; }
+    iterator &operator +=(difference_type i) { it += i; return *this; }
+    iterator &operator -=(difference_type i) { it -= i; return *this; }
+    iterator operator +(difference_type i) const 
+    { iterator itt = *this; return (itt += i); }
+    iterator operator -(difference_type i) const
+    { iterator itt = *this; return (itt -= i); }
+    difference_type operator -(const iterator &i) const
+    { return it - i.it; }
+
+    ITER operator *() const { return it; }
+    ITER operator [](int i) { return it + i; }
+
+    bool operator ==(const iterator &i) const { return (it == i.it); }
+    bool operator !=(const iterator &i) const { return !(i == *this); }
+    bool operator < (const iterator &i) const { return (it < i.it); }
+
+    scaled_col_const_iterator(void) {}
+    scaled_col_const_iterator(const ITER &i, S rr)
+      : it(i), r(rr) { }
+
+  };
+
+  template <typename M, typename S> struct  scaled_col_matrix_const_ref {
+    
+    typedef scaled_col_matrix_const_ref<M,S> this_type;
+    typedef typename linalg_traits<M>::const_col_iterator iterator;
+    typedef typename linalg_traits<this_type>::value_type value_type;
+    typedef typename linalg_traits<this_type>::origin_type origin_type;
+
+    iterator begin_, end_;
+    const origin_type *origin;
+    S r;
+    size_type nr, nc;
+
+    scaled_col_matrix_const_ref(const M &m, S rr)
+      : begin_(mat_col_begin(m)), end_(mat_col_end(m)),
+	origin(linalg_origin(m)), r(rr), nr(mat_nrows(m)), nc(mat_ncols(m)) {}
+
+    value_type operator()(size_type i, size_type j) const
+    { return r * linalg_traits<M>::access(begin_+j, i); }
+  };
+
+
+
+  template<typename M, typename S> std::ostream &operator <<
+    (std::ostream &o, const scaled_col_matrix_const_ref<M,S>& m)
+  { gmm::write(o,m); return o; }
+
+
+  template <typename L, typename S, typename R> struct scaled_return__ {
+    typedef abstract_null_type return_type;
+  };
+  template <typename L, typename S> struct scaled_return__<L, S, row_major> 
+  { typedef scaled_row_matrix_const_ref<L,S> return_type; };
+  template <typename L, typename S> struct scaled_return__<L, S, col_major> 
+  { typedef scaled_col_matrix_const_ref<L,S> return_type; };
+  
+
+  template <typename L, typename S, typename LT> struct scaled_return_ {
+    typedef abstract_null_type return_type;
+  };
+  template <typename L, typename S> struct scaled_return_<L, S, abstract_vector> 
+  { typedef scaled_vector_const_ref<L,S> return_type; };
+  template <typename L, typename S> struct scaled_return_<L, S, abstract_matrix> {
+    typedef typename scaled_return__<L, S, 
+      typename principal_orientation_type<typename
+      linalg_traits<L>::sub_orientation>::potype>::return_type return_type;
+  };
+
+  template <typename L, typename S> struct scaled_return {
+    typedef typename scaled_return_<L, S, typename
+      linalg_traits<L>::linalg_type>::return_type return_type;
+  };
+
+  template <typename L, typename S> inline
+  typename scaled_return<L,S>::return_type
+  scaled(const L &v, S x)
+  { return scaled(v, x, typename linalg_traits<L>::linalg_type()); }
+
+  template <typename V, typename S> inline
+  typename scaled_return<V,S>::return_type
+  scaled(const V &v, S x, abstract_vector)
+  { return scaled_vector_const_ref<V,S>(v, x); }
+
+  template <typename M, typename S> inline
+  typename scaled_return<M,S>::return_type
+  scaled(const M &m, S x,abstract_matrix) {
+    return scaled(m, x,  typename principal_orientation_type<typename
+		  linalg_traits<M>::sub_orientation>::potype());
+  }
+
+  template <typename M, typename S> inline
+  typename scaled_return<M,S>::return_type
+  scaled(const M &m, S x, row_major) {
+    return scaled_row_matrix_const_ref<M,S>(m, x);
+  }
+
+  template <typename M, typename S> inline
+  typename scaled_return<M,S>::return_type
+  scaled(const M &m, S x, col_major) {
+    return scaled_col_matrix_const_ref<M,S>(m, x);
+  }
+
+
+  /* ******************************************************************** */
+  /*	matrix or vector scale                                	          */
+  /* ******************************************************************** */
+
+  template <typename L> inline
+  void scale(L& l, typename linalg_traits<L>::value_type a)
+  { scale(l, a, typename linalg_traits<L>::linalg_type()); }
+
+  template <typename L> inline
+  void scale(const L& l, typename linalg_traits<L>::value_type a)
+  { scale(linalg_const_cast(l), a); }
+
+  template <typename L> inline
+  void scale(L& l, typename linalg_traits<L>::value_type a, abstract_vector) {
+    typename linalg_traits<L>::iterator it = vect_begin(l), ite = vect_end(l);
+    for ( ; it != ite; ++it) *it *= a;
+  }
+
+  template <typename L> 
+  void scale(L& l, typename linalg_traits<L>::value_type a, abstract_matrix) {
+    scale(l, a, typename principal_orientation_type<typename
+	  linalg_traits<L>::sub_orientation>::potype());
+  }
+
+  template <typename L> 
+  void scale(L& l, typename linalg_traits<L>::value_type a, row_major) {
+    typename linalg_traits<L>::row_iterator it = mat_row_begin(l),
+      ite = mat_row_end(l);
+    for ( ; it != ite; ++it) scale(linalg_traits<L>::row(it), a);
+  }
+
+  template <typename L> 
+  void scale(L& l, typename linalg_traits<L>::value_type a, col_major) {
+    typename linalg_traits<L>::col_iterator it = mat_col_begin(l),
+      ite = mat_col_end(l);
+    for ( ; it != ite; ++it) scale(linalg_traits<L>::col(it), a);
+  }
+
+  template <typename V, typename S> struct linalg_traits<scaled_vector_const_ref<V,S> > {
+    typedef scaled_vector_const_ref<V,S> this_type;
+    typedef linalg_const is_reference;
+    typedef abstract_vector linalg_type;
+    typedef typename strongest_numeric_type<S, typename linalg_traits<V>::value_type>::T value_type;
+    typedef typename linalg_traits<V>::origin_type origin_type;
+    typedef value_type reference;
+    typedef abstract_null_type iterator;
+    typedef scaled_const_iterator<typename linalg_traits<V>::const_iterator, S>
+      const_iterator;
+    typedef typename linalg_traits<V>::storage_type storage_type;
+    typedef typename linalg_traits<V>::index_sorted index_sorted;
+    static size_type size(const this_type &v) { return v.size_; }
+    static const_iterator begin(const this_type &v)
+    { return const_iterator(v.begin_, v.r); }
+    static const_iterator end(const this_type &v)
+    { return const_iterator(v.end_, v.r); }
+    static const origin_type* origin(const this_type &v) { return v.origin; }
+    static value_type access(const origin_type *o, const const_iterator &it,
+			     const const_iterator &ite, size_type i)
+    { return it.r * (linalg_traits<V>::access(o, it.it, ite.it, i)); }
+
+  };
+
+
+  template <typename M, typename S> struct linalg_traits<scaled_row_matrix_const_ref<M,S> > {
+    typedef scaled_row_matrix_const_ref<M,S> this_type;
+    typedef linalg_const is_reference;
+    typedef abstract_matrix linalg_type;
+    typedef typename linalg_traits<M>::origin_type origin_type;
+    typedef typename strongest_numeric_type<S, typename linalg_traits<M>::value_type>::T value_type;
+    typedef value_type reference;
+    typedef typename linalg_traits<M>::storage_type storage_type;
+    typedef typename org_type<typename linalg_traits<M>::const_sub_row_type>::t vector_type;
+    typedef scaled_vector_const_ref<vector_type,S> sub_row_type;
+    typedef scaled_vector_const_ref<vector_type,S> const_sub_row_type;
+    typedef scaled_row_const_iterator<M,S> row_iterator;
+    typedef scaled_row_const_iterator<M,S> const_row_iterator;
+    typedef abstract_null_type const_sub_col_type;
+    typedef abstract_null_type sub_col_type;
+    typedef abstract_null_type const_col_iterator;
+    typedef abstract_null_type col_iterator;
+    typedef row_major sub_orientation;
+    typedef typename linalg_traits<M>::index_sorted index_sorted;
+    static size_type nrows(const this_type &m)
+    { return m.nr; }
+    static size_type ncols(const this_type &m)
+    { return m.nc; }
+    static const_sub_row_type row(const const_row_iterator &it)
+    { return scaled(linalg_traits<M>::row(it.it), it.r); }
+    static const_row_iterator row_begin(const this_type &m)
+    { return const_row_iterator(m.begin_, m.r); }
+    static const_row_iterator row_end(const this_type &m)
+    { return const_row_iterator(m.end_, m.r); }
+    static const origin_type* origin(const this_type &m) { return m.origin; }
+    static value_type access(const const_row_iterator &it, size_type i)
+    { return it.r * (linalg_traits<M>::access(it.it, i)); }
+  };
+
+  template <typename M, typename S> struct linalg_traits<scaled_col_matrix_const_ref<M,S> > {
+    typedef scaled_col_matrix_const_ref<M,S> this_type;
+    typedef linalg_const is_reference;
+    typedef abstract_matrix linalg_type;
+    typedef typename strongest_numeric_type<S, typename linalg_traits<M>::value_type>::T value_type;
+    typedef typename linalg_traits<M>::origin_type origin_type;
+    typedef value_type reference;
+    typedef typename linalg_traits<M>::storage_type storage_type;
+    typedef typename org_type<typename linalg_traits<M>::const_sub_col_type>::t vector_type;
+    typedef abstract_null_type sub_col_type;
+    typedef scaled_vector_const_ref<vector_type,S> const_sub_col_type;
+    typedef abstract_null_type  col_iterator;
+    typedef scaled_col_const_iterator<M,S> const_col_iterator;
+    typedef abstract_null_type const_sub_row_type;
+    typedef abstract_null_type sub_row_type;
+    typedef abstract_null_type const_row_iterator;
+    typedef abstract_null_type row_iterator;
+    typedef col_major sub_orientation;
+    typedef typename linalg_traits<M>::index_sorted index_sorted;
+    static size_type ncols(const this_type &m)
+    { return m.nc; }
+    static size_type nrows(const this_type &m)
+    { return m.nr; }
+    static const_sub_col_type col(const const_col_iterator &it)
+    { return scaled(linalg_traits<M>::col(it.it), it.r); }
+    static const_col_iterator col_begin(const this_type &m)
+    { return const_col_iterator(m.begin_, m.r); }
+    static const_col_iterator col_end(const this_type &m)
+    { return const_col_iterator(m.end_, m.r); }
+    static const origin_type* origin(const this_type &m) { return m.origin; }
+    static value_type access(const const_col_iterator &it, size_type i)
+    { return it.r * (linalg_traits<M>::access(it.it, i)); }
+  };
+
+
+}
+
+#endif //  GMM_SCALED_H__
diff --git a/gmm/gmm_solver_Schwarz_additive.h b/gmm/gmm_solver_Schwarz_additive.h
new file mode 100644
index 000000000..7f8554b5a
--- /dev/null
+++ b/gmm/gmm_solver_Schwarz_additive.h
@@ -0,0 +1,805 @@
+/* -*- c++ -*- (enables emacs c++ mode) */
+/*===========================================================================
+
+ Copyright (C) 2002-2017 Yves Renard
+
+ This file is a part of GetFEM++
+
+ GetFEM++  is  free software;  you  can  redistribute  it  and/or modify it
+ under  the  terms  of the  GNU  Lesser General Public License as published
+ by  the  Free Software Foundation;  either version 3 of the License,  or
+ (at your option) any later version along with the GCC Runtime Library
+ Exception either version 3.1 or (at your option) any later version.
+ This program  is  distributed  in  the  hope  that it will be useful,  but
+ WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ or  FITNESS  FOR  A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+ License and GCC Runtime Library Exception for more details.
+ You  should  have received a copy of the GNU Lesser General Public License
+ along  with  this program;  if not, write to the Free Software Foundation,
+ Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301, USA.
+
+ As a special exception, you  may use  this file  as it is a part of a free
+ software  library  without  restriction.  Specifically,  if   other  files
+ instantiate  templates  or  use macros or inline functions from this file,
+ or  you compile this  file  and  link  it  with other files  to produce an
+ executable, this file  does  not  by itself cause the resulting executable
+ to be covered  by the GNU Lesser General Public License.  This   exception
+ does not  however  invalidate  any  other  reasons why the executable file
+ might be covered by the GNU Lesser General Public License.
+
+===========================================================================*/
+
+/**@file gmm_solver_Schwarz_additive.h
+   @author  Yves Renard <Yves.Renard@insa-lyon.fr>
+   @author  Michel Fournie <fournie@mip.ups-tlse.fr>
+   @date October 13, 2002.
+*/
+
+#ifndef GMM_SOLVERS_SCHWARZ_ADDITIVE_H__
+#define GMM_SOLVERS_SCHWARZ_ADDITIVE_H__ 
+
+#include "gmm_kernel.h"
+#include "gmm_superlu_interface.h"
+#include "gmm_solver_cg.h"
+#include "gmm_solver_gmres.h"
+#include "gmm_solver_bicgstab.h"
+#include "gmm_solver_qmr.h"
+
+namespace gmm {
+      
+  /* ******************************************************************** */
+  /*		Additive Schwarz interfaced local solvers                 */
+  /* ******************************************************************** */
+
+  struct using_cg {};
+  struct using_gmres {};
+  struct using_bicgstab {};
+  struct using_qmr {};
+
+  template <typename P, typename local_solver, typename Matrix>
+  struct actual_precond {
+    typedef P APrecond;
+    static const APrecond &transform(const P &PP) { return PP; }
+  };
+
+  template <typename Matrix1, typename Precond, typename Vector> 
+  void AS_local_solve(using_cg, const Matrix1 &A, Vector &x, const Vector &b,
+		 const Precond &P, iteration &iter)
+  { cg(A, x, b, P, iter); }
+
+  template <typename Matrix1, typename Precond, typename Vector> 
+  void AS_local_solve(using_gmres, const Matrix1 &A, Vector &x,
+		      const Vector &b, const Precond &P, iteration &iter)
+  { gmres(A, x, b, P, 100, iter); }
+  
+  template <typename Matrix1, typename Precond, typename Vector> 
+  void AS_local_solve(using_bicgstab, const Matrix1 &A, Vector &x,
+		      const Vector &b, const Precond &P, iteration &iter)
+  { bicgstab(A, x, b, P, iter); }
+
+  template <typename Matrix1, typename Precond, typename Vector> 
+  void AS_local_solve(using_qmr, const Matrix1 &A, Vector &x,
+		      const Vector &b, const Precond &P, iteration &iter)
+  { qmr(A, x, b, P, iter); }
+
+#if defined(GMM_USES_SUPERLU)
+  struct using_superlu {};
+
+  template <typename P, typename Matrix>
+  struct actual_precond<P, using_superlu, Matrix> {
+    typedef typename linalg_traits<Matrix>::value_type value_type;
+    typedef SuperLU_factor<value_type> APrecond;
+    template <typename PR>
+    static APrecond transform(const PR &) { return APrecond(); }
+    static const APrecond &transform(const APrecond &PP) { return PP; }
+  };
+
+  template <typename Matrix1, typename Precond, typename Vector> 
+  void AS_local_solve(using_superlu, const Matrix1 &, Vector &x,
+		      const Vector &b, const Precond &P, iteration &iter)
+  { P.solve(x, b); iter.set_iteration(1); }
+#endif
+
+  /* ******************************************************************** */
+  /*		Additive Schwarz Linear system                            */
+  /* ******************************************************************** */
+
+  template <typename Matrix1, typename Matrix2, typename Precond,
+	    typename local_solver>
+  struct add_schwarz_mat{
+    typedef typename linalg_traits<Matrix1>::value_type value_type;
+
+    const Matrix1 *A;
+    const std::vector<Matrix2> *vB;
+    std::vector<Matrix2> vAloc;
+    mutable iteration iter;
+    double residual;
+    mutable size_type itebilan;
+    mutable std::vector<std::vector<value_type> > gi, fi;
+    std::vector<typename actual_precond<Precond, local_solver,
+					Matrix1>::APrecond> precond1;
+
+    void init(const Matrix1 &A_, const std::vector<Matrix2> &vB_,
+	      iteration iter_, const Precond &P, double residual_);
+
+    add_schwarz_mat(void) {}
+    add_schwarz_mat(const Matrix1 &A_, const std::vector<Matrix2> &vB_,
+		iteration iter_, const Precond &P, double residual_)
+    { init(A_, vB_, iter_, P, residual_); }
+  };
+
+  template <typename Matrix1, typename Matrix2, typename Precond,
+	    typename local_solver>
+  void add_schwarz_mat<Matrix1, Matrix2, Precond, local_solver>::init(
+       const Matrix1 &A_, const std::vector<Matrix2> &vB_,
+       iteration iter_, const Precond &P, double residual_) {
+
+    vB = &vB_; A = &A_; iter = iter_;
+    residual = residual_;
+    
+    size_type nb_sub = vB->size();
+    vAloc.resize(nb_sub);
+    gi.resize(nb_sub); fi.resize(nb_sub);
+    precond1.resize(nb_sub);
+    std::fill(precond1.begin(), precond1.end(),
+	      actual_precond<Precond, local_solver, Matrix1>::transform(P));
+    itebilan = 0;
+    
+    if (iter.get_noisy()) cout << "Init pour sub dom ";
+#ifdef GMM_USES_MPI
+    int size,tranche,borne_sup,borne_inf,rank,tag1=11,tag2=12,tag3=13,sizepr = 0;
+    //    int tab[4];
+    double t_ref,t_final;
+    MPI_Status status;
+    t_ref=MPI_Wtime();
+    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
+    MPI_Comm_size(MPI_COMM_WORLD, &size);
+    tranche=nb_sub/size;
+    borne_inf=rank*tranche;
+    borne_sup=(rank+1)*tranche;
+    // if (rank==size-1) borne_sup = nb_sub;
+
+    cout << "Nombre de sous domaines " << borne_sup - borne_inf << endl;
+
+    int sizeA = mat_nrows(*A);
+    gmm::csr_matrix<value_type> Acsr(sizeA, sizeA), Acsrtemp(sizeA, sizeA);
+    gmm::copy(gmm::eff_matrix(*A), Acsr);
+    int next = (rank + 1) % size;
+    int previous = (rank + size - 1) % size;
+    //communication of local information on ring pattern
+    //Each process receive  Nproc-1 contributions 
+
+    for (int nproc = 0; nproc < size; ++nproc) {
+       for (size_type i = size_type(borne_inf); i < size_type(borne_sup); ++i) {
+// 	for (size_type i = 0; i < nb_sub/size; ++i) {
+// 	for (size_type i = 0; i < nb_sub; ++i) {
+	// size_type i=(rank+size*(j-1)+nb_sub)%nb_sub;
+
+	cout << "Sous domaines " << i << " : " << mat_ncols((*vB)[i]) << endl;
+#else
+	for (size_type i = 0; i < nb_sub; ++i) {
+#endif
+	  
+	  if (iter.get_noisy()) cout << i << " " << std::flush;
+	  Matrix2 Maux(mat_ncols((*vB)[i]), mat_nrows((*vB)[i]));
+	  
+#ifdef GMM_USES_MPI
+	  Matrix2 Maux2(mat_ncols((*vB)[i]), mat_ncols((*vB)[i]));
+	  if (nproc == 0) {
+	    gmm::resize(vAloc[i], mat_ncols((*vB)[i]), mat_ncols((*vB)[i]));
+	    gmm::clear(vAloc[i]);
+	  }
+	  gmm::mult(gmm::transposed((*vB)[i]), Acsr, Maux);
+	  gmm::mult(Maux, (*vB)[i], Maux2);
+	  gmm::add(Maux2, vAloc[i]);
+#else
+	  gmm::resize(vAloc[i], mat_ncols((*vB)[i]), mat_ncols((*vB)[i]));
+	  gmm::mult(gmm::transposed((*vB)[i]), *A, Maux);
+	  gmm::mult(Maux, (*vB)[i], vAloc[i]);
+#endif
+
+#ifdef GMM_USES_MPI
+	  if (nproc == size - 1 ) {
+#endif
+	    precond1[i].build_with(vAloc[i]);
+	    gmm::resize(fi[i], mat_ncols((*vB)[i]));
+	    gmm::resize(gi[i], mat_ncols((*vB)[i]));
+#ifdef GMM_USES_MPI
+	  }
+#else
+	}
+#endif
+#ifdef GMM_USES_MPI
+     }
+      if (nproc != size - 1) {
+        MPI_Sendrecv(&(Acsr.jc[0]), sizeA+1, MPI_INT, next, tag2,
+                     &(Acsrtemp.jc[0]), sizeA+1, MPI_INT, previous, tag2,
+                     MPI_COMM_WORLD, &status);
+        if (Acsrtemp.jc[sizeA] > size_type(sizepr)) {
+          sizepr = Acsrtemp.jc[sizeA];
+          gmm::resize(Acsrtemp.pr, sizepr);
+          gmm::resize(Acsrtemp.ir, sizepr);
+        }
+        MPI_Sendrecv(&(Acsr.ir[0]), Acsr.jc[sizeA], MPI_INT, next, tag1,
+                     &(Acsrtemp.ir[0]), Acsrtemp.jc[sizeA], MPI_INT, previous, tag1,
+                     MPI_COMM_WORLD, &status);
+        
+        MPI_Sendrecv(&(Acsr.pr[0]), Acsr.jc[sizeA], mpi_type(value_type()), next, tag3, 
+                     &(Acsrtemp.pr[0]), Acsrtemp.jc[sizeA], mpi_type(value_type()), previous, tag3,
+                     MPI_COMM_WORLD, &status);
+        gmm::copy(Acsrtemp, Acsr);
+      }
+    }
+      t_final=MPI_Wtime();
+    cout<<"temps boucle precond "<< t_final-t_ref<<endl;
+#endif
+    if (iter.get_noisy()) cout << "\n";
+  }
+  
+  template <typename Matrix1, typename Matrix2, typename Precond,
+	    typename Vector2, typename Vector3, typename local_solver>
+  void mult(const add_schwarz_mat<Matrix1, Matrix2, Precond, local_solver> &M,
+	    const Vector2 &p, Vector3 &q) {
+    size_type itebilan = 0;
+#ifdef GMM_USES_MPI
+    static double tmult_tot = 0.0;
+    double t_ref = MPI_Wtime();
+#endif
+    // cout << "tmult AS begin " << endl;
+    mult(*(M.A), p, q);
+#ifdef GMM_USES_MPI
+    tmult_tot += MPI_Wtime()-t_ref;
+    cout << "tmult_tot = " << tmult_tot << endl;
+#endif
+    std::vector<double> qbis(gmm::vect_size(q));
+    std::vector<double> qter(gmm::vect_size(q));
+#ifdef GMM_USES_MPI
+    //    MPI_Status status;
+    //    MPI_Request request,request1;
+    //    int tag=111;
+    int size,tranche,borne_sup,borne_inf,rank;
+    size_type nb_sub=M.fi.size();
+    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
+    MPI_Comm_size(MPI_COMM_WORLD, &size);
+    tranche=nb_sub/size;
+    borne_inf=rank*tranche;
+    borne_sup=(rank+1)*tranche;
+    // if (rank==size-1) borne_sup=nb_sub;
+    //    int next = (rank + 1) % size;
+    //    int previous = (rank + size - 1) % size;
+    t_ref = MPI_Wtime();
+     for (size_type i = size_type(borne_inf); i < size_type(borne_sup); ++i)
+//        for (size_type i = 0; i < nb_sub/size; ++i)
+      // for (size_type j = 0; j < nb_sub; ++j)
+#else
+    for (size_type i = 0; i < M.fi.size(); ++i)
+#endif
+      {
+#ifdef GMM_USES_MPI
+	// size_type i=j; // (rank+size*(j-1)+nb_sub)%nb_sub;
+#endif
+	gmm::mult(gmm::transposed((*(M.vB))[i]), q, M.fi[i]);
+       M.iter.init();
+       AS_local_solve(local_solver(), (M.vAloc)[i], (M.gi)[i],
+		      (M.fi)[i],(M.precond1)[i],M.iter);
+       itebilan = std::max(itebilan, M.iter.get_iteration());
+       }
+
+#ifdef GMM_USES_MPI
+    cout << "First  AS loop time " <<  MPI_Wtime() - t_ref << endl;
+#endif
+
+    gmm::clear(q);
+#ifdef GMM_USES_MPI
+    t_ref = MPI_Wtime();
+    // for (size_type j = 0; j < nb_sub; ++j)
+    for (size_type i = size_type(borne_inf); i < size_type(borne_sup); ++i)
+
+#else
+      for (size_type i = 0; i < M.gi.size(); ++i)
+#endif
+	{
+
+#ifdef GMM_USES_MPI
+	  // size_type i=j; // (rank+size*(j-1)+nb_sub)%nb_sub;
+// 	  gmm::mult((*(M.vB))[i], M.gi[i], qbis,qbis);
+	  gmm::mult((*(M.vB))[i], M.gi[i], qter);
+	  add(qter,qbis,qbis);
+#else
+	  gmm::mult((*(M.vB))[i], M.gi[i], q, q);
+#endif
+	}
+#ifdef GMM_USES_MPI
+     //WARNING this add only if you use the ring pattern below
+  // need to do this below if using a n explicit ring pattern communication
+
+//      add(qbis,q,q);
+    cout << "Second AS loop time " <<  MPI_Wtime() - t_ref << endl;
+#endif
+
+
+#ifdef GMM_USES_MPI
+    //    int tag1=11;
+    static double t_tot = 0.0;
+    double t_final;
+    t_ref=MPI_Wtime();
+//     int next = (rank + 1) % size;
+//     int previous = (rank + size - 1) % size;
+    //communication of local information on ring pattern
+    //Each process receive  Nproc-1 contributions 
+
+//     if (size > 1) {
+//     for (int nproc = 0; nproc < size-1; ++nproc) 
+//       {
+
+// 	MPI_Sendrecv(&(qbis[0]), gmm::vect_size(q), MPI_DOUBLE, next, tag1,
+// 		   &(qter[0]), gmm::vect_size(q),MPI_DOUBLE,previous,tag1,
+// 		   MPI_COMM_WORLD,&status);
+// 	gmm::copy(qter, qbis);
+// 	add(qbis,q,q);
+//       }
+//     }
+    MPI_Allreduce(&(qbis[0]), &(q[0]),gmm::vect_size(q), MPI_DOUBLE,
+		  MPI_SUM,MPI_COMM_WORLD);
+    t_final=MPI_Wtime();
+    t_tot += t_final-t_ref;
+     cout<<"["<< rank<<"] temps reduce Resol "<< t_final-t_ref << " t_tot = " << t_tot << endl;
+#endif 
+
+    if (M.iter.get_noisy() > 0) cout << "itebloc = " << itebilan << endl;
+    M.itebilan += itebilan;
+    M.iter.set_resmax((M.iter.get_resmax() + M.residual) * 0.5);
+  }
+
+  template <typename Matrix1, typename Matrix2, typename Precond,
+	    typename Vector2, typename Vector3, typename local_solver>
+  void mult(const add_schwarz_mat<Matrix1, Matrix2, Precond, local_solver> &M,
+	    const Vector2 &p, const Vector3 &q) {
+    mult(M, p, const_cast<Vector3 &>(q));
+  }
+
+  template <typename Matrix1, typename Matrix2, typename Precond,
+	    typename Vector2, typename Vector3, typename Vector4,
+	    typename local_solver>
+  void mult(const add_schwarz_mat<Matrix1, Matrix2, Precond, local_solver> &M,
+	    const Vector2 &p, const Vector3 &p2, Vector4 &q)
+  { mult(M, p, q); add(p2, q); }
+
+  template <typename Matrix1, typename Matrix2, typename Precond,
+	    typename Vector2, typename Vector3, typename Vector4,
+	    typename local_solver>
+  void mult(const add_schwarz_mat<Matrix1, Matrix2, Precond, local_solver> &M,
+	    const Vector2 &p, const Vector3 &p2, const Vector4 &q)
+  { mult(M, p, const_cast<Vector4 &>(q)); add(p2, q); }
+
+  /* ******************************************************************** */
+  /*		Additive Schwarz interfaced global solvers                */
+  /* ******************************************************************** */
+
+  template <typename ASM_type, typename Vect>
+  void AS_global_solve(using_cg, const ASM_type &ASM, Vect &x,
+		       const Vect &b, iteration &iter)
+  { cg(ASM, x, b, *(ASM.A), identity_matrix(), iter); }
+
+  template <typename ASM_type, typename Vect>
+  void AS_global_solve(using_gmres, const ASM_type &ASM, Vect &x,
+		       const Vect &b, iteration &iter)
+  { gmres(ASM, x, b, identity_matrix(), 100, iter); }
+
+  template <typename ASM_type, typename Vect>
+  void AS_global_solve(using_bicgstab, const ASM_type &ASM, Vect &x,
+		       const Vect &b, iteration &iter)
+  { bicgstab(ASM, x, b, identity_matrix(), iter); }
+
+  template <typename ASM_type, typename Vect>
+  void AS_global_solve(using_qmr,const ASM_type &ASM, Vect &x,
+		       const Vect &b, iteration &iter)
+  { qmr(ASM, x, b, identity_matrix(), iter); }
+
+#if defined(GMM_USES_SUPERLU)
+  template <typename ASM_type, typename Vect>
+  void AS_global_solve(using_superlu, const ASM_type &, Vect &,
+		       const Vect &, iteration &) {
+    GMM_ASSERT1(false, "You cannot use SuperLU as "
+		"global solver in additive Schwarz meethod");
+  }
+#endif
+  
+  /* ******************************************************************** */
+  /*	            Linear Additive Schwarz method                        */
+  /* ******************************************************************** */
+  /* ref : Domain decomposition algorithms for the p-version finite       */
+  /*       element method for elliptic problems, Luca F. Pavarino,        */
+  /*       PhD thesis, Courant Institute of Mathematical Sciences, 1992.  */
+  /* ******************************************************************** */
+
+  /** Function to call if the ASM matrix is precomputed for successive solve
+   * with the same system.
+   */
+  template <typename Matrix1, typename Matrix2,
+	    typename Vector2, typename Vector3, typename Precond,
+	    typename local_solver, typename global_solver>
+  void additive_schwarz(
+    add_schwarz_mat<Matrix1, Matrix2, Precond, local_solver> &ASM, Vector3 &u,
+    const Vector2 &f, iteration &iter, const global_solver&) {
+
+    typedef typename linalg_traits<Matrix1>::value_type value_type;
+
+    size_type nb_sub = ASM.vB->size(), nb_dof = gmm::vect_size(f);
+    ASM.itebilan = 0;
+    std::vector<value_type> g(nb_dof);
+    std::vector<value_type> gbis(nb_dof);
+#ifdef GMM_USES_MPI
+    double t_init=MPI_Wtime();
+    int size,tranche,borne_sup,borne_inf,rank;
+    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
+    MPI_Comm_size(MPI_COMM_WORLD, &size);
+    tranche=nb_sub/size;
+    borne_inf=rank*tranche;
+    borne_sup=(rank+1)*tranche;
+    // if (rank==size-1) borne_sup=nb_sub*size;
+    for (size_type i = size_type(borne_inf); i < size_type(borne_sup); ++i)
+//     for (size_type i = 0; i < nb_sub/size; ++i)
+      // for (size_type j = 0; j < nb_sub; ++j)
+      // for (size_type i = rank; i < nb_sub; i+=size)
+#else
+    for (size_type i = 0; i < nb_sub; ++i)
+#endif
+    {
+
+#ifdef GMM_USES_MPI
+      // size_type i=j; // (rank+size*(j-1)+nb_sub)%nb_sub;
+#endif
+      gmm::mult(gmm::transposed((*(ASM.vB))[i]), f, ASM.fi[i]);
+      ASM.iter.init();
+      AS_local_solve(local_solver(), ASM.vAloc[i], ASM.gi[i], ASM.fi[i],
+		     ASM.precond1[i], ASM.iter);
+      ASM.itebilan = std::max(ASM.itebilan, ASM.iter.get_iteration());
+#ifdef GMM_USES_MPI
+    gmm::mult((*(ASM.vB))[i], ASM.gi[i], gbis,gbis);
+#else   
+    gmm::mult((*(ASM.vB))[i], ASM.gi[i], g, g);
+#endif
+    }
+#ifdef GMM_USES_MPI
+    cout<<"temps boucle init "<< MPI_Wtime()-t_init<<endl;
+    double t_ref,t_final;
+    t_ref=MPI_Wtime();
+    MPI_Allreduce(&(gbis[0]), &(g[0]),gmm::vect_size(g), MPI_DOUBLE,
+		  MPI_SUM,MPI_COMM_WORLD);
+    t_final=MPI_Wtime();
+    cout<<"temps reduce init "<< t_final-t_ref<<endl;
+#endif
+#ifdef GMM_USES_MPI
+    t_ref=MPI_Wtime();
+    cout<<"begin global AS"<<endl;
+#endif
+    AS_global_solve(global_solver(), ASM, u, g, iter);
+#ifdef GMM_USES_MPI
+    t_final=MPI_Wtime();
+    cout<<"temps AS Global Solve "<< t_final-t_ref<<endl;
+#endif
+    if (iter.get_noisy())
+      cout << "Total number of internal iterations : " << ASM.itebilan << endl;
+  }
+
+  /** Global function. Compute the ASM matrix and call the previous function.
+   *  The ASM matrix represent the preconditionned linear system.
+   */
+  template <typename Matrix1, typename Matrix2,
+	    typename Vector2, typename Vector3, typename Precond,
+	    typename local_solver, typename global_solver>
+  void additive_schwarz(const Matrix1 &A, Vector3 &u,
+				  const Vector2 &f, const Precond &P,
+				  const std::vector<Matrix2> &vB,
+				  iteration &iter, local_solver,
+				  global_solver) {
+    iter.set_rhsnorm(vect_norm2(f));
+    if (iter.get_rhsnorm() == 0.0) { gmm::clear(u); return; }
+    iteration iter2 = iter; iter2.reduce_noisy();
+    iter2.set_maxiter(size_type(-1));
+    add_schwarz_mat<Matrix1, Matrix2, Precond, local_solver>
+      ASM(A, vB, iter2, P, iter.get_resmax());
+    additive_schwarz(ASM, u, f, iter, global_solver());
+  }
+
+  /* ******************************************************************** */
+  /*		Sequential Non-Linear Additive Schwarz method             */
+  /* ******************************************************************** */
+  /* ref : Nonlinearly Preconditionned Inexact Newton Algorithms,         */
+  /*       Xiao-Chuan Cai, David E. Keyes,                                */
+  /*       SIAM J. Sci. Comp. 24: p183-200.  l                             */
+  /* ******************************************************************** */
+
+  template <typename Matrixt, typename MatrixBi> 
+  class NewtonAS_struct {
+    
+  public :
+    typedef Matrixt tangent_matrix_type;
+    typedef MatrixBi B_matrix_type;
+    typedef typename linalg_traits<Matrixt>::value_type value_type;
+    typedef std::vector<value_type> Vector;
+    
+    virtual size_type size(void) = 0;
+    virtual const std::vector<MatrixBi> &get_vB() = 0;
+    
+    virtual void compute_F(Vector &f, Vector &x) = 0;
+    virtual void compute_tangent_matrix(Matrixt &M, Vector &x) = 0;
+    // compute Bi^T grad(F(X)) Bi
+    virtual void compute_sub_tangent_matrix(Matrixt &Mloc, Vector &x,
+					    size_type i) = 0;
+    // compute Bi^T F(X)
+    virtual void compute_sub_F(Vector &fi, Vector &x, size_type i) = 0;
+
+    virtual ~NewtonAS_struct() {}
+  };
+
+  template <typename Matrixt, typename MatrixBi> 
+  struct AS_exact_gradient {
+    const std::vector<MatrixBi> &vB;
+    std::vector<Matrixt> vM;
+    std::vector<Matrixt> vMloc;
+
+    void init(void) {
+      for (size_type i = 0; i < vB.size(); ++i) {
+	Matrixt aux(gmm::mat_ncols(vB[i]), gmm::mat_ncols(vM[i]));
+	gmm::resize(vMloc[i], gmm::mat_ncols(vB[i]), gmm::mat_ncols(vB[i]));
+	gmm::mult(gmm::transposed(vB[i]), vM[i], aux);
+	gmm::mult(aux, vB[i], vMloc[i]);
+      }
+    }
+    AS_exact_gradient(const std::vector<MatrixBi> &vB_) : vB(vB_) {
+      vM.resize(vB.size()); vMloc.resize(vB.size());
+      for (size_type i = 0; i < vB.size(); ++i) {
+	gmm::resize(vM[i], gmm::mat_nrows(vB[i]), gmm::mat_nrows(vB[i]));
+      }
+    }
+  };
+
+  template <typename Matrixt, typename MatrixBi,
+	    typename Vector2, typename Vector3>
+  void mult(const AS_exact_gradient<Matrixt, MatrixBi> &M,
+	    const Vector2 &p, Vector3 &q) {
+    gmm::clear(q);
+    typedef typename gmm::linalg_traits<Vector3>::value_type T;
+    std::vector<T> v(gmm::vect_size(p)), w, x;
+    for (size_type i = 0; i < M.vB.size(); ++i) {
+      w.resize(gmm::mat_ncols(M.vB[i]));
+      x.resize(gmm::mat_ncols(M.vB[i]));
+      gmm::mult(M.vM[i], p, v);
+      gmm::mult(gmm::transposed(M.vB[i]), v, w);
+      double rcond;
+      SuperLU_solve(M.vMloc[i], x, w, rcond);
+      // gmm::iteration iter(1E-10, 0, 100000);
+      //gmm::gmres(M.vMloc[i], x, w, gmm::identity_matrix(), 50, iter);
+      gmm::mult_add(M.vB[i], x, q);
+    }
+  }
+
+  template <typename Matrixt, typename MatrixBi,
+	    typename Vector2, typename Vector3>
+  void mult(const AS_exact_gradient<Matrixt, MatrixBi> &M,
+	    const Vector2 &p, const Vector3 &q) {
+    mult(M, p, const_cast<Vector3 &>(q));
+  }
+
+  template <typename Matrixt, typename MatrixBi,
+	    typename Vector2, typename Vector3, typename Vector4>
+  void mult(const AS_exact_gradient<Matrixt, MatrixBi> &M,
+	    const Vector2 &p, const Vector3 &p2, Vector4 &q)
+  { mult(M, p, q); add(p2, q); }
+
+  template <typename Matrixt, typename MatrixBi,
+	    typename Vector2, typename Vector3, typename Vector4>
+  void mult(const AS_exact_gradient<Matrixt, MatrixBi> &M,
+	    const Vector2 &p, const Vector3 &p2, const Vector4 &q)
+  { mult(M, p, const_cast<Vector4 &>(q)); add(p2, q); }
+
+  struct S_default_newton_line_search {
+    
+    double conv_alpha, conv_r;
+    size_t it, itmax, glob_it;
+
+    double alpha, alpha_old, alpha_mult, first_res, alpha_max_ratio;
+    double alpha_min_ratio, alpha_min;
+    size_type count, count_pat;
+    bool max_ratio_reached;
+    double alpha_max_ratio_reached, r_max_ratio_reached;
+    size_type it_max_ratio_reached;
+
+    
+    double converged_value(void) { return conv_alpha; };
+    double converged_residual(void) { return conv_r; };
+
+    virtual void init_search(double r, size_t git, double = 0.0) {
+      alpha_min_ratio = 0.9;
+      alpha_min = 1e-10;
+      alpha_max_ratio = 10.0;
+      alpha_mult = 0.25;
+      itmax = size_type(-1);
+      glob_it = git; if (git <= 1) count_pat = 0;
+      conv_alpha = alpha = alpha_old = 1.;
+      conv_r = first_res = r; it = 0;
+      count = 0;
+      max_ratio_reached = false;
+    }
+    virtual double next_try(void) {
+      alpha_old = alpha;
+      if (alpha >= 0.4) alpha *= 0.5; else alpha *= alpha_mult; ++it;
+      return alpha_old;
+    }
+    virtual bool is_converged(double r, double = 0.0) {
+      // cout << "r = " << r << " alpha = " << alpha / alpha_mult << " count_pat = " << count_pat << endl;
+      if (!max_ratio_reached && r < first_res * alpha_max_ratio) {
+	alpha_max_ratio_reached = alpha_old; r_max_ratio_reached = r;
+	it_max_ratio_reached = it; max_ratio_reached = true; 
+      }
+      if (max_ratio_reached && r < r_max_ratio_reached * 0.5
+	  && r > first_res * 1.1 && it <= it_max_ratio_reached+1) {
+	alpha_max_ratio_reached = alpha_old; r_max_ratio_reached = r;
+	it_max_ratio_reached = it;
+      }
+      if (count == 0 || r < conv_r)
+	{ conv_r = r; conv_alpha = alpha_old; count = 1; }
+      if (conv_r < first_res) ++count;
+
+      if (r < first_res *  alpha_min_ratio)
+	{ count_pat = 0; return true; }      
+      if (count >= 5 || (alpha < alpha_min && max_ratio_reached)) {
+	if (conv_r < first_res * 0.99) count_pat = 0;
+	if (/*gmm::random() * 50. < -log(conv_alpha)-4.0 ||*/ count_pat >= 3)
+	  { conv_r=r_max_ratio_reached; conv_alpha=alpha_max_ratio_reached; }
+	if (conv_r >= first_res * 0.9999) count_pat++;
+	return true;
+      }
+      return false;
+    }
+    S_default_newton_line_search(void) { count_pat = 0; }
+  };
+
+
+  
+  template <typename Matrixt, typename MatrixBi, typename Vector,
+	    typename Precond, typename local_solver, typename global_solver>
+  void Newton_additive_Schwarz(NewtonAS_struct<Matrixt, MatrixBi> &NS,
+			       const Vector &u_,
+			       iteration &iter, const Precond &P,
+			       local_solver, global_solver) {
+    Vector &u = const_cast<Vector &>(u_);
+    typedef typename linalg_traits<Vector>::value_type value_type;
+    typedef typename number_traits<value_type>::magnitude_type mtype;
+    typedef actual_precond<Precond, local_solver, Matrixt> chgt_precond;
+    
+    double residual = iter.get_resmax();
+
+    S_default_newton_line_search internal_ls;
+    S_default_newton_line_search external_ls;
+
+    typename chgt_precond::APrecond PP = chgt_precond::transform(P);
+    iter.set_rhsnorm(mtype(1));
+    iteration iternc(iter);
+    iternc.reduce_noisy(); iternc.set_maxiter(size_type(-1));
+    iteration iter2(iternc);
+    iteration iter3(iter2); iter3.reduce_noisy();
+    iteration iter4(iter3);
+    iternc.set_name("Local Newton");
+    iter2.set_name("Linear System for Global Newton");
+    iternc.set_resmax(residual/100.0);
+    iter3.set_resmax(residual/10000.0);
+    iter2.set_resmax(residual/1000.0);
+    iter4.set_resmax(residual/1000.0);
+    std::vector<value_type> rhs(NS.size()), x(NS.size()), d(NS.size());
+    std::vector<value_type> xi, xii, fi, di;
+
+    std::vector< std::vector<value_type> > vx(NS.get_vB().size());
+    for (size_type i = 0; i < NS.get_vB().size(); ++i) // for exact gradient
+      vx[i].resize(NS.size()); // for exact gradient
+
+    Matrixt Mloc, M(NS.size(), NS.size());
+    NS.compute_F(rhs, u);
+    mtype act_res=gmm::vect_norm2(rhs), act_res_new(0), precond_res = act_res;
+    mtype alpha;
+    
+    while(!iter.finished(std::min(act_res, precond_res))) {
+      for (int SOR_step = 0;  SOR_step >= 0; --SOR_step) {
+	gmm::clear(rhs);
+	for (size_type isd = 0; isd < NS.get_vB().size(); ++isd) {
+	  const MatrixBi &Bi = (NS.get_vB())[isd];
+	  size_type si = mat_ncols(Bi);
+	  gmm::resize(Mloc, si, si);
+	  xi.resize(si); xii.resize(si); fi.resize(si); di.resize(si);
+	  
+	  iternc.init();
+	  iternc.set_maxiter(30); // ?
+	  if (iternc.get_noisy())
+	    cout << "Non-linear local problem " << isd << endl;
+	  gmm::clear(xi);
+	  gmm::copy(u, x);
+	  NS.compute_sub_F(fi, x, isd); gmm::scale(fi, value_type(-1));
+	  mtype r = gmm::vect_norm2(fi), r_t(r);
+	  if (r > value_type(0)) {
+	    iternc.set_rhsnorm(std::max(r, mtype(1)));
+	    while(!iternc.finished(r)) {
+	      NS.compute_sub_tangent_matrix(Mloc, x, isd);
+
+	      PP.build_with(Mloc);
+	      iter3.init();
+	      AS_local_solve(local_solver(), Mloc, di, fi, PP, iter3);
+	      
+	      internal_ls.init_search(r, iternc.get_iteration());
+	      do {
+		alpha = internal_ls.next_try();
+		gmm::add(xi, gmm::scaled(di, -alpha), xii);
+		gmm::mult(Bi, gmm::scaled(xii, -1.0), u, x);
+		NS.compute_sub_F(fi, x, isd); gmm::scale(fi, value_type(-1));
+		r_t = gmm::vect_norm2(fi);
+	      } while (!internal_ls.is_converged(r_t));
+	      
+	      if (alpha != internal_ls.converged_value()) {
+		alpha = internal_ls.converged_value();
+		gmm::add(xi, gmm::scaled(di, -alpha), xii);
+		gmm::mult(Bi, gmm::scaled(xii, -1.0), u, x);
+		NS.compute_sub_F(fi, x, isd); gmm::scale(fi, value_type(-1));
+		r_t = gmm::vect_norm2(fi);
+	      }
+	      gmm::copy(x, vx[isd]); // for exact gradient
+
+	      if (iternc.get_noisy()) cout << "(step=" << alpha << ")\t";
+	      ++iternc; r = r_t; gmm::copy(xii, xi); 
+	    }
+	    if (SOR_step) gmm::mult(Bi, gmm::scaled(xii, -1.0), u, u);
+	    gmm::mult(Bi, gmm::scaled(xii, -1.0), rhs, rhs);
+	  }
+	}
+	precond_res = gmm::vect_norm2(rhs);
+	if (SOR_step) cout << "SOR step residual = " << precond_res << endl;
+	if (precond_res < residual) break;
+	cout << "Precond residual = " << precond_res << endl;
+      }
+
+      iter2.init();
+      // solving linear system for the global Newton method
+      if (0) {
+	NS.compute_tangent_matrix(M, u);
+	add_schwarz_mat<Matrixt, MatrixBi, Precond, local_solver>
+	  ASM(M, NS.get_vB(), iter4, P, iter.get_resmax());
+	AS_global_solve(global_solver(), ASM, d, rhs, iter2);
+      }
+      else {  // for exact gradient
+	AS_exact_gradient<Matrixt, MatrixBi> eg(NS.get_vB());
+	for (size_type i = 0; i < NS.get_vB().size(); ++i) {
+	  NS.compute_tangent_matrix(eg.vM[i], vx[i]);
+	}
+	eg.init();
+	gmres(eg, d, rhs, gmm::identity_matrix(), 50, iter2);
+      }
+
+      //      gmm::add(gmm::scaled(rhs, 0.1), u); ++iter;
+      external_ls.init_search(act_res, iter.get_iteration());
+      do {
+	alpha = external_ls.next_try();
+	gmm::add(gmm::scaled(d, alpha), u, x);
+	NS.compute_F(rhs, x);
+	act_res_new = gmm::vect_norm2(rhs);
+      } while (!external_ls.is_converged(act_res_new));
+      
+      if (alpha != external_ls.converged_value()) {
+	alpha = external_ls.converged_value();
+	gmm::add(gmm::scaled(d, alpha), u, x);
+	NS.compute_F(rhs, x);
+	act_res_new = gmm::vect_norm2(rhs);
+      }
+
+      if (iter.get_noisy() > 1) cout << endl;
+      act_res = act_res_new; 
+      if (iter.get_noisy()) cout << "(step=" << alpha << ")\t unprecond res = " << act_res << " ";
+      
+      
+      ++iter; gmm::copy(x, u);
+    }
+  }
+
+}
+
+
+#endif //  GMM_SOLVERS_SCHWARZ_ADDITIVE_H__
diff --git a/gmm/gmm_solver_bfgs.h b/gmm/gmm_solver_bfgs.h
new file mode 100644
index 000000000..28a1bc01f
--- /dev/null
+++ b/gmm/gmm_solver_bfgs.h
@@ -0,0 +1,210 @@
+/* -*- c++ -*- (enables emacs c++ mode) */
+/*===========================================================================
+
+ Copyright (C) 2004-2017 Yves Renard
+
+ This file is a part of GetFEM++
+
+ GetFEM++  is  free software;  you  can  redistribute  it  and/or modify it
+ under  the  terms  of the  GNU  Lesser General Public License as published
+ by  the  Free Software Foundation;  either version 3 of the License,  or
+ (at your option) any later version along with the GCC Runtime Library
+ Exception either version 3.1 or (at your option) any later version.
+ This program  is  distributed  in  the  hope  that it will be useful,  but
+ WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ or  FITNESS  FOR  A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+ License and GCC Runtime Library Exception for more details.
+ You  should  have received a copy of the GNU Lesser General Public License
+ along  with  this program;  if not, write to the Free Software Foundation,
+ Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301, USA.
+
+ As a special exception, you  may use  this file  as it is a part of a free
+ software  library  without  restriction.  Specifically,  if   other  files
+ instantiate  templates  or  use macros or inline functions from this file,
+ or  you compile this  file  and  link  it  with other files  to produce an
+ executable, this file  does  not  by itself cause the resulting executable
+ to be covered  by the GNU Lesser General Public License.  This   exception
+ does not  however  invalidate  any  other  reasons why the executable file
+ might be covered by the GNU Lesser General Public License.
+
+===========================================================================*/
+
+/**@file gmm_solver_bfgs.h 
+   @author  Yves Renard <Yves.Renard@insa-lyon.fr>
+   @date October 14 2004.
+   @brief Implements BFGS (Broyden, Fletcher, Goldfarb, Shanno) algorithm.
+ */
+#ifndef GMM_BFGS_H
+#define GMM_BFGS_H
+
+#include "gmm_kernel.h"
+#include "gmm_iter.h"
+
+namespace gmm {
+
+  // BFGS algorithm (Broyden, Fletcher, Goldfarb, Shanno)
+  // Quasi Newton method for optimization problems.
+  // with Wolfe Line search.
+
+
+  // delta[k] = x[k+1] - x[k]
+  // gamma[k] = grad f(x[k+1]) - grad f(x[k])
+  // H[0] = I
+  // BFGS : zeta[k] = delta[k] - H[k] gamma[k]
+  // DFP  : zeta[k] = H[k] gamma[k]
+  // tau[k] = gamma[k]^T zeta[k]
+  // rho[k] = 1 / gamma[k]^T delta[k]
+  // BFGS : H[k+1] = H[k] + rho[k](zeta[k] delta[k]^T + delta[k] zeta[k]^T)
+  //                 - rho[k]^2 tau[k] delta[k] delta[k]^T
+  // DFP  : H[k+1] = H[k] + rho[k] delta[k] delta[k]^T 
+  //                 - (1/tau[k])zeta[k] zeta[k]^T 
+
+  // Object representing the inverse of the Hessian
+  template <typename VECTOR> struct bfgs_invhessian {
+    
+    typedef typename linalg_traits<VECTOR>::value_type T;
+    typedef typename number_traits<T>::magnitude_type R;
+
+    std::vector<VECTOR> delta, gamma, zeta;
+    std::vector<T> tau, rho;
+    int version;
+
+    template<typename VEC1, typename VEC2> void hmult(const VEC1 &X, VEC2 &Y) {
+      copy(X, Y);
+      for (size_type k = 0 ; k < delta.size(); ++k) {
+	T xdelta = vect_sp(X, delta[k]), xzeta = vect_sp(X, zeta[k]);
+	switch (version) {
+	case 0 : // BFGS
+	  add(scaled(zeta[k], rho[k]*xdelta), Y);
+	  add(scaled(delta[k], rho[k]*(xzeta-rho[k]*tau[k]*xdelta)), Y);
+	  break;
+	case 1 : // DFP
+	  add(scaled(delta[k], rho[k]*xdelta), Y);
+	  add(scaled(zeta[k], -xzeta/tau[k]), Y);
+	  break;
+	}
+      }
+    }
+    
+    void restart(void) {
+      delta.resize(0); gamma.resize(0); zeta.resize(0); 
+      tau.resize(0); rho.resize(0);
+    }
+    
+    template<typename VECT1, typename VECT2>
+    void update(const VECT1 &deltak, const VECT2 &gammak) {
+      T vsp = vect_sp(deltak, gammak);
+      if (vsp == T(0)) return;
+      size_type N = vect_size(deltak), k = delta.size();
+      VECTOR Y(N);
+      hmult(gammak, Y);
+      delta.resize(k+1); gamma.resize(k+1); zeta.resize(k+1);
+      tau.resize(k+1); rho.resize(k+1);
+      resize(delta[k], N); resize(gamma[k], N); resize(zeta[k], N); 
+      gmm::copy(deltak, delta[k]);
+      gmm::copy(gammak, gamma[k]);
+      rho[k] = R(1) / vsp;
+      if (version == 0)
+	add(delta[k], scaled(Y, -1), zeta[k]);
+      else
+	gmm::copy(Y, zeta[k]);
+      tau[k] = vect_sp(gammak,  zeta[k]);
+    }
+    
+    bfgs_invhessian(int v = 0) { version = v; }
+  };
+
+
+  template <typename FUNCTION, typename DERIVATIVE, typename VECTOR> 
+  void bfgs(const FUNCTION &f, const DERIVATIVE &grad, VECTOR &x,
+	    int restart, iteration& iter, int version = 0,
+	    double lambda_init=0.001, double print_norm=1.0) {
+
+    typedef typename linalg_traits<VECTOR>::value_type T;
+    typedef typename number_traits<T>::magnitude_type R;
+    
+    bfgs_invhessian<VECTOR> invhessian(version);
+    VECTOR r(vect_size(x)), d(vect_size(x)), y(vect_size(x)), r2(vect_size(x));
+    grad(x, r);
+    R lambda = lambda_init, valx = f(x), valy;
+    int nb_restart(0);
+    
+    if (iter.get_noisy() >= 1) cout << "value " << valx / print_norm << " ";
+    while (! iter.finished_vect(r)) {
+
+      invhessian.hmult(r, d); gmm::scale(d, T(-1));
+      
+      // Wolfe Line search
+      R derivative = gmm::vect_sp(r, d);    
+      R lambda_min(0), lambda_max(0), m1 = 0.27, m2 = 0.57;
+      bool unbounded = true, blocked = false, grad_computed = false;
+      
+      for(;;) {
+	add(x, scaled(d, lambda), y);
+	valy = f(y);
+	if (iter.get_noisy() >= 2) {
+	  cout.precision(15);
+	  cout << "Wolfe line search, lambda = " << lambda 
+ 	       << " value = " << valy /print_norm << endl;
+// 	       << " derivative = " << derivative
+// 	       << " lambda min = " << lambda_min << " lambda max = "
+// 	       << lambda_max << endl; getchar();
+	}
+	if (valy <= valx + m1 * lambda * derivative) {
+	  grad(y, r2); grad_computed = true;
+	  T derivative2 = gmm::vect_sp(r2, d);
+	  if (derivative2 >= m2*derivative) break;
+	  lambda_min = lambda;
+	}
+	else {
+	  lambda_max = lambda;
+	  unbounded = false;
+	}
+	if (unbounded) lambda *= R(10);
+	else  lambda = (lambda_max + lambda_min) / R(2);
+	if (lambda == lambda_max || lambda == lambda_min) break;
+	// valy <= R(2)*valx replaced by
+	// valy <= valx + gmm::abs(derivative)*lambda_init
+	// for compatibility with negative values (08.24.07).
+	if (valy <= valx + R(2)*gmm::abs(derivative)*lambda &&
+	    (lambda < R(lambda_init*1E-8) ||
+	     (!unbounded && lambda_max-lambda_min < R(lambda_init*1E-8))))
+	{ blocked = true; lambda = lambda_init; break; }
+      }
+
+      // Rank two update
+      ++iter;
+      if (!grad_computed) grad(y, r2);
+      gmm::add(scaled(r2, -1), r);
+      if ((iter.get_iteration() % restart) == 0 || blocked) { 
+	if (iter.get_noisy() >= 1) cout << "Restart\n";
+	invhessian.restart();
+	if (++nb_restart > 10) {
+	  if (iter.get_noisy() >= 1) cout << "BFGS is blocked, exiting\n";
+	  return;
+	}
+      }
+      else {
+	invhessian.update(gmm::scaled(d,lambda), gmm::scaled(r,-1));
+	nb_restart = 0;
+      }
+      copy(r2, r); copy(y, x); valx = valy;
+      if (iter.get_noisy() >= 1)
+	cout << "BFGS value " << valx/print_norm << "\t";
+    }
+
+  }
+
+
+  template <typename FUNCTION, typename DERIVATIVE, typename VECTOR> 
+  inline void dfp(const FUNCTION &f, const DERIVATIVE &grad, VECTOR &x,
+	    int restart, iteration& iter, int version = 1) {
+    bfgs(f, grad, x, restart, iter, version);
+
+  }
+
+
+}
+
+#endif 
+
diff --git a/gmm/gmm_solver_bicgstab.h b/gmm/gmm_solver_bicgstab.h
new file mode 100644
index 000000000..858478fbe
--- /dev/null
+++ b/gmm/gmm_solver_bicgstab.h
@@ -0,0 +1,160 @@
+/* -*- c++ -*- (enables emacs c++ mode) */
+/*===========================================================================
+
+ Copyright (C) 2002-2017 Yves Renard
+
+ This file is a part of GetFEM++
+
+ GetFEM++  is  free software;  you  can  redistribute  it  and/or modify it
+ under  the  terms  of the  GNU  Lesser General Public License as published
+ by  the  Free Software Foundation;  either version 3 of the License,  or
+ (at your option) any later version along with the GCC Runtime Library
+ Exception either version 3.1 or (at your option) any later version.
+ This program  is  distributed  in  the  hope  that it will be useful,  but
+ WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ or  FITNESS  FOR  A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+ License and GCC Runtime Library Exception for more details.
+ You  should  have received a copy of the GNU Lesser General Public License
+ along  with  this program;  if not, write to the Free Software Foundation,
+ Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301, USA.
+
+ As a special exception, you  may use  this file  as it is a part of a free
+ software  library  without  restriction.  Specifically,  if   other  files
+ instantiate  templates  or  use macros or inline functions from this file,
+ or  you compile this  file  and  link  it  with other files  to produce an
+ executable, this file  does  not  by itself cause the resulting executable
+ to be covered  by the GNU Lesser General Public License.  This   exception
+ does not  however  invalidate  any  other  reasons why the executable file
+ might be covered by the GNU Lesser General Public License.
+
+===========================================================================*/
+
+// This file is a modified version of bicgstab.h from ITL.
+// See http://osl.iu.edu/research/itl/
+// Following the corresponding Copyright notice.
+//===========================================================================
+//
+// Copyright (c) 1998-2001, University of Notre Dame. All rights reserved.
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+//    * Redistributions of source code must retain the above copyright
+//      notice, this list of conditions and the following disclaimer.
+//    * Redistributions in binary form must reproduce the above copyright
+//      notice, this list of conditions and the following disclaimer in the
+//      documentation and/or other materials provided with the distribution.
+//    * Neither the name of the University of Notre Dame nor the
+//      names of its contributors may be used to endorse or promote products
+//      derived from this software without specific prior written permission.
+//
+// THIS SOFTWARE  IS  PROVIDED  BY  THE TRUSTEES  OF  INDIANA UNIVERSITY  AND
+// CONTRIBUTORS  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,
+// BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND  FITNESS
+// FOR  A PARTICULAR PURPOSE ARE DISCLAIMED. IN  NO  EVENT SHALL THE TRUSTEES
+// OF INDIANA UNIVERSITY AND CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+// INCIDENTAL, SPECIAL, EXEMPLARY,  OR CONSEQUENTIAL DAMAGES (INCLUDING,  BUT
+// NOT  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA,  OR PROFITS;  OR BUSINESS  INTERRUPTION)  HOWEVER  CAUSED AND ON ANY
+// THEORY  OF  LIABILITY,  WHETHER  IN  CONTRACT,  STRICT  LIABILITY, OR TORT
+// (INCLUDING  NEGLIGENCE  OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+// THIS  SOFTWARE,  EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+//===========================================================================
+
+/**@file gmm_solver_bicgstab.h
+   @author Andrew Lumsdaine <lums@osl.iu.edu>
+   @author Lie-Quan Lee <llee@osl.iu.edu>
+   @author  Yves Renard <Yves.Renard@insa-lyon.fr>
+   @date October 13, 2002.
+   @brief BiCGStab iterative solver.
+*/
+
+#ifndef GMM_SOLVER_BICGSTAB_H__
+#define GMM_SOLVER_BICGSTAB_H__
+
+#include "gmm_kernel.h"
+#include "gmm_iter.h"
+
+namespace gmm {
+
+  /* ******************************************************************** */
+  /*		BiConjugate Gradient Stabilized               		  */
+  /* (preconditionned, with parametrable scalar product)        	  */
+  /* ******************************************************************** */
+
+  template <typename Matrix, typename Vector, typename VectorB,
+	    typename Preconditioner>
+  void bicgstab(const Matrix& A, Vector& x, const VectorB& b,
+	       const Preconditioner& M, iteration &iter) {
+
+    typedef typename linalg_traits<Vector>::value_type T;
+    typedef typename number_traits<T>::magnitude_type R;
+    typedef typename temporary_dense_vector<Vector>::vector_type temp_vector;
+    
+    T rho_1, rho_2(0), alpha(0), beta, omega(0);
+    temp_vector p(vect_size(x)), phat(vect_size(x)), s(vect_size(x)),
+      shat(vect_size(x)), 
+      t(vect_size(x)), v(vect_size(x)), r(vect_size(x)), rtilde(vect_size(x));
+    
+    gmm::mult(A, gmm::scaled(x, -T(1)), b, r);	  
+    gmm::copy(r, rtilde);
+    R norm_r = gmm::vect_norm2(r);
+    iter.set_rhsnorm(gmm::vect_norm2(b));
+
+    if (iter.get_rhsnorm() == 0.0) { clear(x); return; }
+    
+    while (!iter.finished(norm_r)) {
+      
+      rho_1 = gmm::vect_sp(rtilde, r);
+      if (rho_1 == T(0)) {
+	if (iter.get_maxiter() == size_type(-1)) 
+	  { GMM_ASSERT1(false, "Bicgstab failed to converge"); }
+	else { GMM_WARNING1("Bicgstab failed to converge"); return; }
+      }
+      
+      if (iter.first())
+	gmm::copy(r, p);
+      else {
+	if (omega == T(0)) {
+	  if (iter.get_maxiter() == size_type(-1))
+	    { GMM_ASSERT1(false, "Bicgstab failed to converge"); }
+	  else { GMM_WARNING1("Bicgstab failed to converge"); return; }
+	}
+	
+	beta = (rho_1 / rho_2) * (alpha / omega);
+	
+	gmm::add(gmm::scaled(v, -omega), p);
+	gmm::add(r, gmm::scaled(p, beta), p);      
+      }
+      gmm::mult(M, p, phat);
+      gmm::mult(A, phat, v);	
+      alpha = rho_1 / gmm::vect_sp(v, rtilde);
+      gmm::add(r, gmm::scaled(v, -alpha), s);
+      
+      if (iter.finished_vect(s)) 
+	{ gmm::add(gmm::scaled(phat, alpha), x); break; }
+      
+      gmm::mult(M, s, shat);	
+      gmm::mult(A, shat, t);
+      omega = gmm::vect_sp(t, s) / gmm::vect_norm2_sqr(t);
+      
+      gmm::add(gmm::scaled(phat, alpha), x); 
+      gmm::add(gmm::scaled(shat, omega), x);
+      gmm::add(s, gmm::scaled(t, -omega), r); 
+      norm_r = gmm::vect_norm2(r);
+      rho_2 = rho_1;
+      
+      ++iter;
+    }
+  }
+  
+  template <typename Matrix, typename Vector, typename VectorB,
+	    typename Preconditioner>
+  void bicgstab(const Matrix& A, const Vector& x, const VectorB& b,
+	       const Preconditioner& M, iteration &iter)
+  { bicgstab(A, linalg_const_cast(x), b, M, iter); }
+  
+}
+
+
+#endif //  GMM_SOLVER_BICGSTAB_H__
diff --git a/gmm/gmm_solver_cg.h b/gmm/gmm_solver_cg.h
new file mode 100644
index 000000000..a2876786a
--- /dev/null
+++ b/gmm/gmm_solver_cg.h
@@ -0,0 +1,180 @@
+/* -*- c++ -*- (enables emacs c++ mode) */
+/*===========================================================================
+
+ Copyright (C) 2002-2017 Yves Renard
+
+ This file is a part of GetFEM++
+
+ GetFEM++  is  free software;  you  can  redistribute  it  and/or modify it
+ under  the  terms  of the  GNU  Lesser General Public License as published
+ by  the  Free Software Foundation;  either version 3 of the License,  or
+ (at your option) any later version along with the GCC Runtime Library
+ Exception either version 3.1 or (at your option) any later version.
+ This program  is  distributed  in  the  hope  that it will be useful,  but
+ WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ or  FITNESS  FOR  A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+ License and GCC Runtime Library Exception for more details.
+ You  should  have received a copy of the GNU Lesser General Public License
+ along  with  this program;  if not, write to the Free Software Foundation,
+ Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301, USA.
+
+ As a special exception, you  may use  this file  as it is a part of a free
+ software  library  without  restriction.  Specifically,  if   other  files
+ instantiate  templates  or  use macros or inline functions from this file,
+ or  you compile this  file  and  link  it  with other files  to produce an
+ executable, this file  does  not  by itself cause the resulting executable
+ to be covered  by the GNU Lesser General Public License.  This   exception
+ does not  however  invalidate  any  other  reasons why the executable file
+ might be covered by the GNU Lesser General Public License.
+
+===========================================================================*/
+
+// This file is a modified version of cg.h from ITL.
+// See http://osl.iu.edu/research/itl/
+// Following the corresponding Copyright notice.
+//===========================================================================
+//
+// Copyright (c) 1998-2001, University of Notre Dame. All rights reserved.
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+//    * Redistributions of source code must retain the above copyright
+//      notice, this list of conditions and the following disclaimer.
+//    * Redistributions in binary form must reproduce the above copyright
+//      notice, this list of conditions and the following disclaimer in the
+//      documentation and/or other materials provided with the distribution.
+//    * Neither the name of the University of Notre Dame nor the
+//      names of its contributors may be used to endorse or promote products
+//      derived from this software without specific prior written permission.
+//
+// THIS SOFTWARE  IS  PROVIDED  BY  THE TRUSTEES  OF  INDIANA UNIVERSITY  AND
+// CONTRIBUTORS  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,
+// BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND  FITNESS
+// FOR  A PARTICULAR PURPOSE ARE DISCLAIMED. IN  NO  EVENT SHALL THE TRUSTEES
+// OF INDIANA UNIVERSITY AND CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+// INCIDENTAL, SPECIAL, EXEMPLARY,  OR CONSEQUENTIAL DAMAGES (INCLUDING,  BUT
+// NOT  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA,  OR PROFITS;  OR BUSINESS  INTERRUPTION)  HOWEVER  CAUSED AND ON ANY
+// THEORY  OF  LIABILITY,  WHETHER  IN  CONTRACT,  STRICT  LIABILITY, OR TORT
+// (INCLUDING  NEGLIGENCE  OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+// THIS  SOFTWARE,  EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+//===========================================================================
+
+/**@file gmm_solver_cg.h
+   @author  Andrew Lumsdaine <lums@osl.iu.edu>
+   @author  Lie-Quan Lee <llee@osl.iu.edu>
+   @author  Yves Renard <Yves.Renard@insa-lyon.fr>
+   @date October 13, 2002.
+   @brief Conjugate gradient iterative solver. 
+*/
+#ifndef GMM_SOLVER_CG_H__
+#define GMM_SOLVER_CG_H__
+
+#include "gmm_kernel.h"
+#include "gmm_iter.h"
+
+namespace gmm {
+
+  /* ******************************************************************** */
+  /*		conjugate gradient                           		  */
+  /* (preconditionned, with parametrable additional scalar product)       */
+  /* ******************************************************************** */
+
+  template <typename Matrix, typename Matps, typename Precond, 
+            typename Vector1, typename Vector2>
+  void cg(const Matrix& A, Vector1& x, const Vector2& b, const Matps& PS,
+	  const Precond &P, iteration &iter) {
+
+    typedef typename temporary_dense_vector<Vector1>::vector_type temp_vector;
+    typedef typename linalg_traits<Vector1>::value_type T;
+
+    T rho, rho_1(0), a;
+    temp_vector p(vect_size(x)), q(vect_size(x)), r(vect_size(x)),
+      z(vect_size(x));
+    iter.set_rhsnorm(gmm::sqrt(gmm::abs(vect_hp(PS, b, b))));
+
+    if (iter.get_rhsnorm() == 0.0)
+      clear(x);
+    else {
+      mult(A, scaled(x, T(-1)), b, r);
+      mult(P, r, z);
+      rho = vect_hp(PS, z, r);
+      copy(z, p);
+
+      while (!iter.finished_vect(r)) {
+
+	if (!iter.first()) { 
+	  mult(P, r, z);
+	  rho = vect_hp(PS, z, r);
+	  add(z, scaled(p, rho / rho_1), p);
+	}
+	mult(A, p, q);
+
+	a = rho / vect_hp(PS, q, p);	
+	add(scaled(p, a), x);
+	add(scaled(q, -a), r);
+	rho_1 = rho;
+
+	++iter;
+      }
+    }
+  }
+
+  template <typename Matrix, typename Matps, typename Precond, 
+            typename Vector1, typename Vector2>
+  void cg(const Matrix& A, Vector1& x, const Vector2& b, const Matps& PS,
+	  const gmm::identity_matrix &, iteration &iter) {
+
+    typedef typename temporary_dense_vector<Vector1>::vector_type temp_vector;
+    typedef typename linalg_traits<Vector1>::value_type T;
+
+    T rho, rho_1(0), a;
+    temp_vector p(vect_size(x)), q(vect_size(x)), r(vect_size(x));
+    iter.set_rhsnorm(gmm::sqrt(gmm::abs(vect_hp(PS, b, b))));
+
+    if (iter.get_rhsnorm() == 0.0)
+      clear(x);
+    else {
+      mult(A, scaled(x, T(-1)), b, r);
+      rho = vect_hp(PS, r, r);
+      copy(r, p);
+
+      while (!iter.finished_vect(r)) {
+
+	if (!iter.first()) { 
+	  rho = vect_hp(PS, r, r);
+	  add(r, scaled(p, rho / rho_1), p);
+	}	
+	mult(A, p, q);
+	a = rho / vect_hp(PS, q, p);	
+	add(scaled(p, a), x);
+	add(scaled(q, -a), r);
+	rho_1 = rho;
+	++iter;
+      }
+    }
+  }
+
+  template <typename Matrix, typename Matps, typename Precond, 
+            typename Vector1, typename Vector2> inline 
+  void cg(const Matrix& A, const Vector1& x, const Vector2& b, const Matps& PS,
+	 const Precond &P, iteration &iter)
+  { cg(A, linalg_const_cast(x), b, PS, P, iter); }
+
+  template <typename Matrix, typename Precond, 
+            typename Vector1, typename Vector2> inline
+  void cg(const Matrix& A, Vector1& x, const Vector2& b,
+	 const Precond &P, iteration &iter)
+  { cg(A, x , b, identity_matrix(), P, iter); }
+
+  template <typename Matrix, typename Precond, 
+            typename Vector1, typename Vector2> inline
+  void cg(const Matrix& A, const Vector1& x, const Vector2& b,
+	 const Precond &P, iteration &iter)
+  { cg(A, x , b , identity_matrix(), P , iter); }
+
+}
+
+
+#endif //  GMM_SOLVER_CG_H__
diff --git a/gmm/gmm_solver_constrained_cg.h b/gmm/gmm_solver_constrained_cg.h
new file mode 100644
index 000000000..44716bffe
--- /dev/null
+++ b/gmm/gmm_solver_constrained_cg.h
@@ -0,0 +1,165 @@
+/* -*- c++ -*- (enables emacs c++ mode) */
+/*===========================================================================
+
+ Copyright (C) 2002-2017 Yves Renard
+
+ This file is a part of GetFEM++
+
+ GetFEM++  is  free software;  you  can  redistribute  it  and/or modify it
+ under  the  terms  of the  GNU  Lesser General Public License as published
+ by  the  Free Software Foundation;  either version 3 of the License,  or
+ (at your option) any later version along with the GCC Runtime Library
+ Exception either version 3.1 or (at your option) any later version.
+ This program  is  distributed  in  the  hope  that it will be useful,  but
+ WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ or  FITNESS  FOR  A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+ License and GCC Runtime Library Exception for more details.
+ You  should  have received a copy of the GNU Lesser General Public License
+ along  with  this program;  if not, write to the Free Software Foundation,
+ Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301, USA.
+
+ As a special exception, you  may use  this file  as it is a part of a free
+ software  library  without  restriction.  Specifically,  if   other  files
+ instantiate  templates  or  use macros or inline functions from this file,
+ or  you compile this  file  and  link  it  with other files  to produce an
+ executable, this file  does  not  by itself cause the resulting executable
+ to be covered  by the GNU Lesser General Public License.  This   exception
+ does not  however  invalidate  any  other  reasons why the executable file
+ might be covered by the GNU Lesser General Public License.
+
+===========================================================================*/
+
+/**@file gmm_solver_constrained_cg.h
+   @author  Yves Renard <Yves.Renard@insa-lyon.fr>
+   @date October 13, 2002.
+   @brief Constrained conjugate gradient. */
+//  preconditionning does not work
+
+#ifndef GMM_SOLVER_CCG_H__
+#define GMM_SOLVER_CCG_H__
+
+#include "gmm_kernel.h"
+#include "gmm_iter.h"
+
+namespace gmm {
+
+  template <typename CMatrix, typename CINVMatrix, typename Matps,
+	    typename VectorX>
+  void pseudo_inverse(const CMatrix &C, CINVMatrix &CINV,
+		      const Matps& /* PS */, VectorX&) {
+    // compute the pseudo inverse of the non-square matrix C such
+    // CINV = inv(C * trans(C)) * C.
+    // based on a conjugate gradient method.
+    
+    // optimisable : copie de la ligne, precalcul de C * trans(C).
+    
+    typedef VectorX TmpVec;
+    typedef typename linalg_traits<VectorX>::value_type value_type;
+    
+    size_type nr = mat_nrows(C), nc = mat_ncols(C);
+    
+    TmpVec d(nr), e(nr), l(nc), p(nr), q(nr), r(nr);
+    value_type rho, rho_1, alpha;
+    clear(d);
+    clear(CINV);
+    
+    for (size_type i = 0; i < nr; ++i) {
+      d[i] = 1.0; rho = 1.0;
+      clear(e);
+      copy(d, r);
+      copy(d, p);
+      
+      while (rho >= 1E-38) { /* conjugate gradient to compute e             */
+	                     /* which is the i nd row of inv(C * trans(C))  */
+	mult(gmm::transposed(C), p, l);
+	mult(C, l, q);	  
+	alpha = rho / vect_sp(p, q);
+	add(scaled(p, alpha), e);  
+	add(scaled(q, -alpha), r); 
+	rho_1 = rho;
+	rho = vect_sp(r, r);
+	add(r, scaled(p, rho / rho_1), p);
+      }
+      
+      mult(transposed(C), e, l); /* l is the i nd row of CINV     */
+      // cout << "l = " << l << endl;
+      clean(l, 1E-15);
+      copy(l, mat_row(CINV, i));
+      
+      d[i] = 0.0;
+    }
+  }
+  
+  /** Compute the minimum of @f$ 1/2((Ax).x) - bx @f$ under the contraint @f$ Cx <= f @f$ */
+  template < typename Matrix,  typename CMatrix, typename Matps,
+	     typename VectorX, typename VectorB, typename VectorF,
+	     typename Preconditioner >
+  void constrained_cg(const Matrix& A, const CMatrix& C, VectorX& x,
+		      const VectorB& b, const VectorF& f,const Matps& PS,
+		      const Preconditioner& M, iteration &iter) {
+    typedef typename temporary_dense_vector<VectorX>::vector_type TmpVec;
+    typedef typename temporary_vector<CMatrix>::vector_type TmpCVec;
+    typedef row_matrix<TmpCVec> TmpCmat;
+    
+    typedef typename linalg_traits<VectorX>::value_type value_type;
+    value_type rho = 1.0, rho_1, lambda, gamma;
+    TmpVec p(vect_size(x)), q(vect_size(x)), q2(vect_size(x)),
+      r(vect_size(x)), old_z(vect_size(x)), z(vect_size(x)),
+      memox(vect_size(x));
+    std::vector<bool> satured(mat_nrows(C));
+    clear(p);
+    iter.set_rhsnorm(sqrt(vect_sp(PS, b, b)));
+    if (iter.get_rhsnorm() == 0.0) iter.set_rhsnorm(1.0);
+   
+    TmpCmat CINV(mat_nrows(C), mat_ncols(C));
+    pseudo_inverse(C, CINV, PS, x);
+    
+    while(true) {
+      // computation of residu
+      copy(z, old_z);
+      copy(x, memox);
+      mult(A, scaled(x, -1.0), b, r);
+      mult(M, r, z); // preconditionner not coherent
+      bool transition = false;
+      for (size_type i = 0; i < mat_nrows(C); ++i) {
+	value_type al = vect_sp(mat_row(C, i), x) - f[i];
+	if (al >= -1.0E-15) {
+	  if (!satured[i]) { satured[i] = true; transition = true; }
+	  value_type bb = vect_sp(mat_row(CINV, i), z);
+	  if (bb > 0.0) add(scaled(mat_row(C, i), -bb), z);
+	}
+	else
+	  satured[i] = false;
+      }
+    
+      // descent direction
+      rho_1 = rho; rho = vect_sp(PS, r, z); // ...
+      
+      if (iter.finished(rho)) break;
+      
+      if (iter.get_noisy() > 0 && transition) std::cout << "transition\n";
+      if (transition || iter.first()) gamma = 0.0;
+      else gamma = std::max(0.0, (rho - vect_sp(PS, old_z, z) ) / rho_1);
+      // std::cout << "gamma = " << gamma << endl;
+      // itl::add(r, itl::scaled(p, gamma), p);
+      add(z, scaled(p, gamma), p); // ...
+      
+      ++iter;
+      // one dimensionnal optimization
+      mult(A, p, q);
+      lambda = rho / vect_sp(PS, q, p);
+      for (size_type i = 0; i < mat_nrows(C); ++i)
+	if (!satured[i]) {
+	  value_type bb = vect_sp(mat_row(C, i), p) - f[i];
+	  if (bb > 0.0)
+	    lambda = std::min(lambda, (f[i]-vect_sp(mat_row(C, i), x)) / bb);
+	}
+      add(x, scaled(p, lambda), x);
+      add(memox, scaled(x, -1.0), memox);
+      
+    }
+  }
+  
+}
+
+#endif //  GMM_SOLVER_CCG_H__
diff --git a/gmm/gmm_solver_gmres.h b/gmm/gmm_solver_gmres.h
new file mode 100644
index 000000000..b124905e2
--- /dev/null
+++ b/gmm/gmm_solver_gmres.h
@@ -0,0 +1,173 @@
+/* -*- c++ -*- (enables emacs c++ mode) */
+/*===========================================================================
+
+ Copyright (C) 2002-2017 Yves Renard
+
+ This file is a part of GetFEM++
+
+ GetFEM++  is  free software;  you  can  redistribute  it  and/or modify it
+ under  the  terms  of the  GNU  Lesser General Public License as published
+ by  the  Free Software Foundation;  either version 3 of the License,  or
+ (at your option) any later version along with the GCC Runtime Library
+ Exception either version 3.1 or (at your option) any later version.
+ This program  is  distributed  in  the  hope  that it will be useful,  but
+ WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ or  FITNESS  FOR  A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+ License and GCC Runtime Library Exception for more details.
+ You  should  have received a copy of the GNU Lesser General Public License
+ along  with  this program;  if not, write to the Free Software Foundation,
+ Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301, USA.
+
+ As a special exception, you  may use  this file  as it is a part of a free
+ software  library  without  restriction.  Specifically,  if   other  files
+ instantiate  templates  or  use macros or inline functions from this file,
+ or  you compile this  file  and  link  it  with other files  to produce an
+ executable, this file  does  not  by itself cause the resulting executable
+ to be covered  by the GNU Lesser General Public License.  This   exception
+ does not  however  invalidate  any  other  reasons why the executable file
+ might be covered by the GNU Lesser General Public License.
+
+===========================================================================*/
+
+// This file is a modified version of gmres.h from ITL.
+// See http://osl.iu.edu/research/itl/
+// Following the corresponding Copyright notice.
+//===========================================================================
+//
+// Copyright (c) 1998-2001, University of Notre Dame. All rights reserved.
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+//    * Redistributions of source code must retain the above copyright
+//      notice, this list of conditions and the following disclaimer.
+//    * Redistributions in binary form must reproduce the above copyright
+//      notice, this list of conditions and the following disclaimer in the
+//      documentation and/or other materials provided with the distribution.
+//    * Neither the name of the University of Notre Dame nor the
+//      names of its contributors may be used to endorse or promote products
+//      derived from this software without specific prior written permission.
+//
+// THIS SOFTWARE  IS  PROVIDED  BY  THE TRUSTEES  OF  INDIANA UNIVERSITY  AND
+// CONTRIBUTORS  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,
+// BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND  FITNESS
+// FOR  A PARTICULAR PURPOSE ARE DISCLAIMED. IN  NO  EVENT SHALL THE TRUSTEES
+// OF INDIANA UNIVERSITY AND CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+// INCIDENTAL, SPECIAL, EXEMPLARY,  OR CONSEQUENTIAL DAMAGES (INCLUDING,  BUT
+// NOT  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA,  OR PROFITS;  OR BUSINESS  INTERRUPTION)  HOWEVER  CAUSED AND ON ANY
+// THEORY  OF  LIABILITY,  WHETHER  IN  CONTRACT,  STRICT  LIABILITY, OR TORT
+// (INCLUDING  NEGLIGENCE  OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+// THIS  SOFTWARE,  EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+//===========================================================================
+
+/**@file gmm_solver_gmres.h
+   @author  Andrew Lumsdaine <lums@osl.iu.edu>
+   @author  Lie-Quan Lee     <llee@osl.iu.edu>
+   @author  Yves Renard <Yves.Renard@insa-lyon.fr>
+   @date October 13, 2002.
+   @brief GMRES (Generalized Minimum Residual) iterative solver.
+*/
+#ifndef GMM_KRYLOV_GMRES_H
+#define GMM_KRYLOV_GMRES_H
+
+#include "gmm_kernel.h"
+#include "gmm_iter.h"
+#include "gmm_modified_gram_schmidt.h"
+
+namespace gmm {
+
+  /** Generalized Minimum Residual
+   
+      This solve the unsymmetric linear system Ax = b using restarted GMRES.
+      
+      See: Y. Saad and M. Schulter. GMRES: A generalized minimum residual
+      algorithm for solving nonsysmmetric linear systems, SIAM
+      J. Sci. Statist. Comp.  7(1986), pp, 856-869
+  */
+  template <typename Mat, typename Vec, typename VecB, typename Precond,
+	    typename Basis >
+  void gmres(const Mat &A, Vec &x, const VecB &b, const Precond &M,
+	     int restart, iteration &outer, Basis& KS) {
+
+    typedef typename linalg_traits<Vec>::value_type T;
+    typedef typename number_traits<T>::magnitude_type R;
+
+    std::vector<T> w(vect_size(x)), r(vect_size(x)), u(vect_size(x));
+    std::vector<T> c_rot(restart+1), s_rot(restart+1), s(restart+1);
+    gmm::dense_matrix<T> H(restart+1, restart);
+#ifdef GMM_USES_MPI
+      double t_ref, t_prec = MPI_Wtime(), t_tot = 0;
+      static double tmult_tot = 0.0;
+t_ref = MPI_Wtime();
+    cout << "GMRES " << endl;
+#endif
+    mult(M,b,r);
+    outer.set_rhsnorm(gmm::vect_norm2(r));
+    if (outer.get_rhsnorm() == 0.0) { clear(x); return; }
+    
+    mult(A, scaled(x, T(-1)), b, w);
+    mult(M, w, r);
+    R beta = gmm::vect_norm2(r), beta_old = beta;
+    int blocked = 0;
+
+    iteration inner = outer;
+    inner.reduce_noisy();
+    inner.set_maxiter(restart);
+    inner.set_name("GMRes inner");
+
+    while (! outer.finished(beta)) {
+      
+      gmm::copy(gmm::scaled(r, R(1)/beta), KS[0]);
+      gmm::clear(s);
+      s[0] = beta;
+      
+      size_type i = 0; inner.init();
+      
+      do {
+	mult(A, KS[i], u);
+	mult(M, u, KS[i+1]);
+	orthogonalize(KS, mat_col(H, i), i);
+	R a = gmm::vect_norm2(KS[i+1]);
+	H(i+1, i) = T(a);
+	gmm::scale(KS[i+1], T(1) / a);
+	for (size_type k = 0; k < i; ++k)
+	  Apply_Givens_rotation_left(H(k,i), H(k+1,i), c_rot[k], s_rot[k]);
+	
+	Givens_rotation(H(i,i), H(i+1,i), c_rot[i], s_rot[i]);
+	Apply_Givens_rotation_left(H(i,i), H(i+1,i), c_rot[i], s_rot[i]);
+	Apply_Givens_rotation_left(s[i], s[i+1], c_rot[i], s_rot[i]);
+	
+	++inner, ++outer, ++i;
+      } while (! inner.finished(gmm::abs(s[i])));
+
+      upper_tri_solve(H, s, i, false);
+      combine(KS, s, x, i);
+      mult(A, gmm::scaled(x, T(-1)), b, w);
+      mult(M, w, r);
+      beta_old = std::min(beta, beta_old); beta = gmm::vect_norm2(r);
+      if (int(inner.get_iteration()) < restart -1 || beta_old <= beta)
+	++blocked; else blocked = 0;
+      if (blocked > 10) {
+	if (outer.get_noisy()) cout << "Gmres is blocked, exiting\n";
+	break;
+      }
+#ifdef GMM_USES_MPI
+	t_tot = MPI_Wtime() - t_ref;
+	cout << "temps GMRES : " << t_tot << endl; 
+#endif
+    }
+  }
+
+
+  template <typename Mat, typename Vec, typename VecB, typename Precond >
+  void gmres(const Mat &A, Vec &x, const VecB &b,
+	     const Precond &M, int restart, iteration& outer) {
+    typedef typename linalg_traits<Vec>::value_type T;
+    modified_gram_schmidt<T> orth(restart, vect_size(x));
+    gmres(A, x, b, M, restart, outer, orth); 
+  }
+
+}
+
+#endif
diff --git a/gmm/gmm_solver_idgmres.h b/gmm/gmm_solver_idgmres.h
new file mode 100644
index 000000000..79bb9064d
--- /dev/null
+++ b/gmm/gmm_solver_idgmres.h
@@ -0,0 +1,805 @@
+/* -*- c++ -*- (enables emacs c++ mode) */
+/*===========================================================================
+
+ Copyright (C) 2003-2017 Yves Renard, Caroline Lecalvez
+
+ This file is a part of GetFEM++
+
+ GetFEM++  is  free software;  you  can  redistribute  it  and/or modify it
+ under  the  terms  of the  GNU  Lesser General Public License as published
+ by  the  Free Software Foundation;  either version 3 of the License,  or
+ (at your option) any later version along with the GCC Runtime Library
+ Exception either version 3.1 or (at your option) any later version.
+ This program  is  distributed  in  the  hope  that it will be useful,  but
+ WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ or  FITNESS  FOR  A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+ License and GCC Runtime Library Exception for more details.
+ You  should  have received a copy of the GNU Lesser General Public License
+ along  with  this program;  if not, write to the Free Software Foundation,
+ Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301, USA.
+
+ As a special exception, you  may use  this file  as it is a part of a free
+ software  library  without  restriction.  Specifically,  if   other  files
+ instantiate  templates  or  use macros or inline functions from this file,
+ or  you compile this  file  and  link  it  with other files  to produce an
+ executable, this file  does  not  by itself cause the resulting executable
+ to be covered  by the GNU Lesser General Public License.  This   exception
+ does not  however  invalidate  any  other  reasons why the executable file
+ might be covered by the GNU Lesser General Public License.
+
+===========================================================================*/
+
+/**@file gmm_solver_idgmres.h
+   @author  Caroline Lecalvez <Caroline.Lecalvez@gmm.insa-tlse.fr>
+   @author  Yves Renard <Yves.Renard@insa-lyon.fr>
+   @date October 6, 2003.
+   @brief Implicitly restarted and deflated Generalized Minimum Residual.
+*/
+#ifndef GMM_IDGMRES_H
+#define GMM_IDGMRES_H
+
+#include "gmm_kernel.h"
+#include "gmm_iter.h"
+#include "gmm_dense_sylvester.h"
+
+namespace gmm {
+
+  template <typename T> compare_vp {
+    bool operator()(const std::pair<T, size_type> &a,
+		    const std::pair<T, size_type> &b) const
+    { return (gmm::abs(a.first) > gmm::abs(b.first)); }
+  }
+
+  struct idgmres_state {
+    size_type m, tb_deb, tb_def, p, k, nb_want, nb_unwant;
+    size_type nb_nolong, tb_deftot, tb_defwant, conv, nb_un, fin;
+    bool ok;
+
+    idgmres_state(size_type mm, size_type pp, size_type kk)
+      : m(mm), tb_deb(1), tb_def(0), p(pp), k(kk), nb_want(0),
+	nb_unwant(0), nb_nolong(0), tb_deftot(0), tb_defwant(0),
+	conv(0), nb_un(0), fin(0), ok(false); {}
+  }
+
+    idgmres_state(size_type mm, size_type pp, size_type kk)
+      : m(mm), tb_deb(1), tb_def(0), p(pp), k(kk), nb_want(0),
+	nb_unwant(0), nb_nolong(0), tb_deftot(0), tb_defwant(0),
+	conv(0), nb_un(0), fin(0), ok(false); {}
+  
+
+  template <typename CONT, typename IND>
+  apply_permutation(CONT &cont, const IND &ind) {
+    size_type m = ind.end() - ind.begin();
+    std::vector<bool> sorted(m, false);
+    
+    for (size_type l = 0; l < m; ++l)
+      if (!sorted[l] && ind[l] != l) {
+
+	typeid(cont[0]) aux = cont[l];
+	k = ind[l];
+	cont[l] = cont[k];
+	sorted[l] = true;
+	
+	for(k2 = ind[k]; k2 != l; k2 = ind[k]) {
+	  cont[k] = cont[k2];
+	  sorted[k] = true;
+	  k = k2;
+	}
+	cont[k] = aux;
+      }
+  }
+
+
+  /** Implicitly restarted and deflated Generalized Minimum Residual
+
+      See: C. Le Calvez, B. Molina, Implicitly restarted and deflated
+      FOM and GMRES, numerical applied mathematics,
+      (30) 2-3 (1999) pp191-212.
+      
+      @param A Real or complex unsymmetric matrix.
+      @param x initial guess vector and final result.
+      @param b right hand side
+      @param M preconditionner
+      @param m size of the subspace between two restarts
+      @param p number of converged ritz values seeked
+      @param k size of the remaining Krylov subspace when the p ritz values
+      have not yet converged 0 <= p <= k < m.
+      @param tol_vp : tolerance on the ritz values.
+      @param outer
+      @param KS
+  */
+  template < typename Mat, typename Vec, typename VecB, typename Precond,
+	     typename Basis >
+  void idgmres(const Mat &A, Vec &x, const VecB &b, const Precond &M,
+	     size_type m, size_type p, size_type k, double tol_vp,
+	     iteration &outer, Basis& KS) {
+
+    typedef typename linalg_traits<Mat>::value_type T;
+    typedef typename number_traits<T>::magnitude_type R;
+    
+    R a, beta;
+    idgmres_state st(m, p, k);
+
+    std::vector<T> w(vect_size(x)), r(vect_size(x)), u(vect_size(x));
+    std::vector<T> c_rot(m+1), s_rot(m+1), s(m+1);
+    std::vector<T> y(m+1), ztest(m+1), gam(m+1);
+    std::vector<T> gamma(m+1);
+    gmm::dense_matrix<T> H(m+1, m), Hess(m+1, m),
+      Hobl(m+1, m), W(vect_size(x), m+1);
+
+    gmm::clear(H);
+
+    outer.set_rhsnorm(gmm::vect_norm2(b));
+    if (outer.get_rhsnorm() == 0.0) { clear(x); return; }
+    
+    mult(A, scaled(x, -1.0), b, w);
+    mult(M, w, r);
+    beta = gmm::vect_norm2(r);
+
+    iteration inner = outer;
+    inner.reduce_noisy();
+    inner.set_maxiter(m);
+    inner.set_name("GMRes inner iter");
+    
+    while (! outer.finished(beta)) {
+      
+      gmm::copy(gmm::scaled(r, 1.0/beta), KS[0]);
+      gmm::clear(s);
+      s[0] = beta;
+      gmm::copy(s, gamma);
+
+      inner.set_maxiter(m - st.tb_deb + 1);
+      size_type i = st.tb_deb - 1; inner.init();
+      
+      do {
+	mult(A, KS[i], u);
+	mult(M, u, KS[i+1]);
+	orthogonalize_with_refinment(KS, mat_col(H, i), i);
+	H(i+1, i) = a = gmm::vect_norm2(KS[i+1]);
+	gmm::scale(KS[i+1], R(1) / a);
+
+	gmm::copy(mat_col(H, i), mat_col(Hess, i));
+	gmm::copy(mat_col(H, i), mat_col(Hobl, i));
+	
+
+	for (size_type l = 0; l < i; ++l)
+	  Apply_Givens_rotation_left(H(l,i), H(l+1,i), c_rot[l], s_rot[l]);
+	
+	Givens_rotation(H(i,i), H(i+1,i), c_rot[i], s_rot[i]);
+	Apply_Givens_rotation_left(H(i,i), H(i+1,i), c_rot[i], s_rot[i]);
+	H(i+1, i) = T(0); 
+	Apply_Givens_rotation_left(s[i], s[i+1], c_rot[i], s_rot[i]);
+	
+	++inner, ++outer, ++i;
+      } while (! inner.finished(gmm::abs(s[i])));
+
+      if (inner.converged()) {
+	gmm::copy(s, y);
+	upper_tri_solve(H, y, i, false);
+	combine(KS, y, x, i);
+	mult(A, gmm::scaled(x, T(-1)), b, w);
+	mult(M, w, r);
+	beta = gmm::vect_norm2(r); // + verif sur beta ... � faire
+	break;
+      }
+
+      gmm::clear(gam); gam[m] = s[i];
+      for (size_type l = m; l > 0; --l)
+	Apply_Givens_rotation_left(gam[l-1], gam[l], gmm::conj(c_rot[l-1]),
+				   -s_rot[l-1]);
+
+      mult(KS.mat(), gam, r);
+      beta = gmm::vect_norm2(r);
+      
+      mult(Hess, scaled(y, T(-1)), gamma, ztest);
+      // En fait, d'apr�s Caroline qui s'y connait ztest et gam devrait
+      // �tre confondus
+      // Quand on aura v�rifi� que �a marche, il faudra utiliser gam � la 
+      // place de ztest.
+      if (st.tb_def < p) {
+        T nss = H(m,m-1) / ztest[m];
+	nss /= gmm::abs(nss); // ns � calculer plus tard aussi
+	gmm::copy(KS.mat(), W); gmm::copy(scaled(r, nss /beta), mat_col(W, m));
+	
+	// Computation of the oblique matrix
+	sub_interval SUBI(0, m);
+	add(scaled(sub_vector(ztest, SUBI), -Hobl(m, m-1) / ztest[m]),
+	    sub_vector(mat_col(Hobl, m-1), SUBI));
+	Hobl(m, m-1) *= nss * beta / ztest[m]; 
+
+	/* **************************************************************** */
+	/*  Locking                                                         */
+	/* **************************************************************** */
+
+	// Computation of the Ritz eigenpairs.
+	std::vector<std::complex<R> > eval(m);
+	dense_matrix<T> YB(m-st.tb_def, m-st.tb_def);
+	std::vector<char> pure(m-st.tb_def, 0);
+	gmm::clear(YB);
+
+	select_eval(Hobl, eval, YB, pure, st);
+
+	if (st.conv != 0) {
+	  // DEFLATION using the QR Factorization of YB
+	  
+	  T alpha = Lock(W, Hobl,
+			 sub_matrix(YB,  sub_interval(0, m-st.tb_def)),
+			 sub_interval(st.tb_def, m-st.tb_def), 
+			 (st.tb_defwant < p)); 
+	  // ns *= alpha; // � calculer plus tard ??
+	  //  V(:,m+1) = alpha*V(:, m+1); �a devait servir � qlq chose ...
+
+
+	  //       Clean the portions below the diagonal corresponding
+	  //       to the lock Schur vectors
+
+	  for (size_type j = st.tb_def; j < st.tb_deftot; ++j) {
+	    if ( pure[j-st.tb_def] == 0)
+	      gmm::clear(sub_vector(mat_col(Hobl,j), sub_interval(j+1,m-j)));
+	    else if (pure[j-st.tb_def] == 1) {
+	      gmm::clear(sub_matrix(Hobl, sub_interval(j+2,m-j-1),
+				    sub_interval(j, 2))); 
+	      ++j;
+	    }
+	    else GMM_ASSERT3(false, "internal error");
+	  }
+	  
+	  if (!st.ok) {
+
+	    // attention si m = 0;
+	    size_type mm = std::min(k+st.nb_unwant+st.nb_nolong, m-1);
+
+	    if (eval_sort[m-mm-1].second != R(0)
+		&& eval_sort[m-mm-1].second == -eval_sort[m-mm].second) ++mm;
+
+	    std::vector<complex<R> > shifts(m-mm);
+	    for (size_type i = 0; i < m-mm; ++i)
+	      shifts[i] = eval_sort[i].second;
+
+	    apply_shift_to_Arnoldi_factorization(W, Hobl, shifts, mm,
+						 m-mm, true);
+
+	    st.fin = mm;
+	  }
+	  else
+	    st.fin = st.tb_deftot;
+
+
+	  /* ************************************************************** */
+	  /*  Purge                                                         */
+	  /* ************************************************************** */
+
+	  if (st.nb_nolong + st.nb_unwant > 0) {
+
+	    std::vector<std::complex<R> > eval(m);
+	    dense_matrix<T> YB(st.fin, st.tb_deftot);
+	    std::vector<char> pure(st.tb_deftot, 0);
+	    gmm::clear(YB);
+	    st.nb_un = st.nb_nolong + st.nb_unwant;
+	    
+	    select_eval_for_purging(Hobl, eval, YB, pure, st);
+	    
+	    T alpha = Lock(W, Hobl, YB, sub_interval(0, st.fin), ok);
+
+	    //       Clean the portions below the diagonal corresponding
+	    //       to the unwanted lock Schur vectors
+	    
+	    for (size_type j = 0; j < st.tb_deftot; ++j) {
+	      if ( pure[j] == 0)
+		gmm::clear(sub_vector(mat_col(Hobl,j), sub_interval(j+1,m-j)));
+	      else if (pure[j] == 1) {
+		gmm::clear(sub_matrix(Hobl, sub_interval(j+2,m-j-1),
+				      sub_interval(j, 2))); 
+		++j;
+	      }
+	      else GMM_ASSERT3(false, "internal error");
+	    }
+
+	    gmm::dense_matrix<T> z(st.nb_un, st.fin - st.nb_un);
+	    sub_interval SUBI(0, st.nb_un), SUBJ(st.nb_un, st.fin - st.nb_un);
+	    sylvester(sub_matrix(Hobl, SUBI),
+		      sub_matrix(Hobl, SUBJ),
+		      sub_matrix(gmm::scaled(Hobl, -T(1)), SUBI, SUBJ), z);
+	    
+	  }
+
+	}
+	
+      }
+    }
+  }
+  
+
+  template < typename Mat, typename Vec, typename VecB, typename Precond >
+    void idgmres(const Mat &A, Vec &x, const VecB &b,
+		 const Precond &M, size_type m, iteration& outer) {
+    typedef typename linalg_traits<Mat>::value_type T;
+    modified_gram_schmidt<T> orth(m, vect_size(x));
+    gmres(A, x, b, M, m, outer, orth); 
+  }
+
+
+  // Lock stage of an implicit restarted Arnoldi process.
+  // 1- QR factorization of YB through Householder matrices
+  //    Q(Rl) = YB
+  //     (0 )
+  // 2- Update of the Arnoldi factorization.
+  //    H <- Q*HQ,  W <- WQ
+  // 3- Restore the Hessemberg form of H.
+
+  template <typename T, typename MATYB>
+    void Lock(gmm::dense_matrix<T> &W, gmm::dense_matrix<T> &H,
+	      const MATYB &YB, const sub_interval SUB,
+	      bool restore, T &ns) {
+
+    size_type n = mat_nrows(W), m = mat_ncols(W) - 1;
+    size_type ncols = mat_ncols(YB), nrows = mat_nrows(YB);
+    size_type begin = min(SUB); end = max(SUB) - 1;
+    sub_interval SUBR(0, nrows), SUBC(0, ncols);
+    T alpha(1);
+
+    GMM_ASSERT2(((end-begin) == ncols) && (m == mat_nrows(H)) 
+		&& (m+1 == mat_ncols(H)), "dimensions mismatch");
+    
+    // DEFLATION using the QR Factorization of YB
+	  
+    dense_matrix<T> QR(n_rows, n_rows);
+    gmmm::copy(YB, sub_matrix(QR, SUBR, SUBC));
+    gmm::clear(submatrix(QR, SUBR, sub_interval(ncols, nrows-ncols)));
+    qr_factor(QR); 
+
+
+    apply_house_left(QR, sub_matrix(H, SUB));
+    apply_house_right(QR, sub_matrix(H, SUBR, SUB));
+    apply_house_right(QR, sub_matrix(W, sub_interval(0, n), SUB));
+    
+    //       Restore to the initial block hessenberg form
+    
+    if (restore) {
+      
+      // verifier quand m = 0 ...
+      gmm::dense_matrix tab_p(end - st.tb_deftot, end - st.tb_deftot);
+      gmm::copy(identity_matrix(), tab_p);
+      
+      for (size_type j = end-1; j >= st.tb_deftot+2; --j) {
+	
+	size_type jm = j-1;
+	std::vector<T> v(jm - st.tb_deftot);
+	sub_interval SUBtot(st.tb_deftot, jm - st.tb_deftot);
+	sub_interval SUBtot2(st.tb_deftot, end - st.tb_deftot);
+	gmm::copy(sub_vector(mat_row(H, j), SUBtot), v);
+	house_vector_last(v);
+	w.resize(end);
+	col_house_update(sub_matrix(H, SUBI, SUBtot), v, w);
+	w.resize(end - st.tb_deftot);
+	row_house_update(sub_matrix(H, SUBtot, SUBtot2), v, w);
+	gmm::clear(sub_vector(mat_row(H, j),
+			      sub_interval(st.tb_deftot, j-1-st.tb_deftot)));
+	w.resize(end - st.tb_deftot);
+	col_house_update(sub_matrix(tab_p, sub_interval(0, end-st.tb_deftot),
+				    sub_interval(0, jm-st.tb_deftot)), v, w);
+	w.resize(n);
+	col_house_update(sub_matrix(W, sub_interval(0, n), SUBtot), v, w);
+      }
+      
+      //       restore positive subdiagonal elements
+      
+      std::vector<T> d(fin-st.tb_deftot); d[0] = T(1);
+      
+      // We compute d[i+1] in order 
+      // (d[i+1] * H(st.tb_deftot+i+1,st.tb_deftoti)) / d[i] 
+      // be equal to |H(st.tb_deftot+i+1,st.tb_deftot+i))|.
+      for (size_type j = 0; j+1 < end-st.tb_deftot; ++j) {
+	T e = H(st.tb_deftot+j, st.tb_deftot+j-1);
+	d[j+1] = (e == T(0)) ? T(1) :  d[j] * gmm::abs(e) / e;
+	scale(sub_vector(mat_row(H, st.tb_deftot+j+1),
+			 sub_interval(st.tb_deftot, m-st.tb_deftot)), d[j+1]);
+	scale(mat_col(H, st.tb_deftot+j+1), T(1) / d[j+1]);
+	scale(mat_col(W, st.tb_deftot+j+1), T(1) / d[j+1]);
+      }
+
+      alpha = tab_p(end-st.tb_deftot-1, end-st.tb_deftot-1) / d[end-st.tb_deftot-1];
+      alpha /= gmm::abs(alpha);
+      scale(mat_col(W, m), alpha);
+	    
+    }
+	 
+    return alpha;
+  }
+
+
+
+
+
+
+
+
+  // Apply p implicit shifts to the Arnoldi factorization
+  // AV = VH+H(k+p+1,k+p) V(:,k+p+1) e_{k+p}*
+  // and produces the following new Arnoldi factorization
+  // A(VQ) = (VQ)(Q*HQ)+H(k+p+1,k+p) V(:,k+p+1) e_{k+p}* Q
+  // where only the first k columns are relevant.
+  //
+  // Dan Sorensen and Richard J. Radke, 11/95
+  template<typename T, typename C>
+    apply_shift_to_Arnoldi_factorization(dense_matrix<T> V, dense_matrix<T> H,
+					 std::vector<C> Lambda, size_type &k,
+					 size_type p, bool true_shift = false) {
+
+
+    size_type k1 = 0, num = 0, kend = k+p, kp1 = k + 1;
+    bool mark = false;
+    T c, s, x, y, z;
+
+    dense_matrix<T> q(1, kend);
+    gmm::clear(q); q(0,kend-1) = T(1);
+    std::vector<T> hv(3), w(std::max(kend, mat_nrows(V)));
+
+    for(size_type jj = 0; jj < p; ++jj) {
+      //     compute and apply a bulge chase sweep initiated by the
+      //     implicit shift held in w(jj)
+   
+      if (abs(Lambda[jj].real()) == 0.0) {
+	//       apply a real shift using 2 by 2 Givens rotations
+
+	for (size_type k1 = 0, k2 = 0; k2 != kend-1; k1 = k2+1) {
+	  k2 = k1;
+	  while (h(k2+1, k2) != T(0) && k2 < kend-1) ++k2;
+
+	  Givens_rotation(H(k1, k1) - Lambda[jj], H(k1+1, k1), c, s);
+	  
+	  for (i = k1; i <= k2; ++i) {
+            if (i > k1) Givens_rotation(H(i, i-1), H(i+1, i-1), c, s);
+            
+	    // Ne pas oublier de nettoyer H(i+1,i-1) (le mettre � z�ro).
+	    // V�rifier qu'au final H(i+1,i) est bien un r�el positif.
+
+            // apply rotation from left to rows of H
+	    row_rot(sub_matrix(H, sub_interval(i,2), sub_interval(i, kend-i)),
+		    c, s, 0, 0);
+	    
+	    // apply rotation from right to columns of H
+            size_type ip2 = std::min(i+2, kend);
+            col_rot(sub_matrix(H, sub_interval(0, ip2), sub_interval(i, 2))
+		    c, s, 0, 0);
+            
+            // apply rotation from right to columns of V
+	    col_rot(V, c, s, i, i+1);
+            
+            // accumulate e'  Q so residual can be updated k+p
+	    Apply_Givens_rotation_left(q(0,i), q(0,i+1), c, s);
+	    // peut �tre que nous utilisons G au lieu de G* et que
+	    // nous allons trop loin en k2.
+	  }
+	}
+	
+	num = num + 1;
+      }
+      else {
+      
+	// Apply a double complex shift using 3 by 3 Householder 
+	// transformations
+      
+	if (jj == p || mark)
+	  mark = false;     // skip application of conjugate shift
+	else {
+	  num = num + 2;    // mark that a complex conjugate
+	  mark = true;      // pair has been applied
+
+	  // Indices de fin de boucle � surveiller... de pr�s !
+	  for (size_type k1 = 0, k3 = 0; k3 != kend-2; k1 = k3+1) {
+	    k3 = k1;
+	    while (h(k3+1, k3) != T(0) && k3 < kend-2) ++k3;
+	    size_type k2 = k1+1;
+
+
+            x = H(k1,k1) * H(k1,k1) + H(k1,k2) * H(k2,k1)
+	      - 2.0*Lambda[jj].real() * H(k1,k1) + gmm::abs_sqr(Lambda[jj]);
+	    y = H(k2,k1) * (H(k1,k1) + H(k2,k2) - 2.0*Lambda[jj].real());
+	    z = H(k2+1,k2) * H(k2,k1);
+
+	    for (size_type i = k1; i <= k3; ++i) {
+	      if (i > k1) {
+		x = H(i, i-1);
+		y = H(i+1, i-1);
+		z = H(i+2, i-1);
+		// Ne pas oublier de nettoyer H(i+1,i-1) et H(i+2,i-1) 
+		// (les mettre � z�ro).
+	      }
+
+	      hv[0] = x; hv[1] = y; hv[2] = z;
+	      house_vector(v);
+
+	      // V�rifier qu'au final H(i+1,i) est bien un r�el positif
+
+	      // apply transformation from left to rows of H
+	      w.resize(kend-i);
+	      row_house_update(sub_matrix(H, sub_interval(i, 2),
+					  sub_interval(i, kend-i)), v, w);
+               
+	      // apply transformation from right to columns of H
+               
+	      size_type ip3 = std::min(kend, i + 3);
+	      w.resize(ip3);
+              col_house_update(sub_matrix(H, sub_interval(0, ip3),
+					  sub_interval(i, 2)), v, w);
+               
+	      // apply transformation from right to columns of V
+	      
+	      w.resize(mat_nrows(V));
+	      col_house_update(sub_matrix(V, sub_interval(0, mat_nrows(V)),
+					  sub_interval(i, 2)), v, w);
+               
+	      // accumulate e' Q so residual can be updated  k+p
+
+	      w.resize(1);
+	      col_house_update(sub_matrix(q, sub_interval(0,1),
+					  sub_interval(i,2)), v, w);
+               
+	    }
+	  }
+         
+	  //           clean up step with Givens rotation
+
+	  i = kend-2;
+	  c = x; s = y;
+	  if (i > k1) Givens_rotation(H(i, i-1), H(i+1, i-1), c, s);
+            
+	  // Ne pas oublier de nettoyer H(i+1,i-1) (le mettre � z�ro).
+	  // V�rifier qu'au final H(i+1,i) est bien un r�el positif.
+
+	  // apply rotation from left to rows of H
+	  row_rot(sub_matrix(H, sub_interval(i,2), sub_interval(i, kend-i)),
+		    c, s, 0, 0);
+	    
+	  // apply rotation from right to columns of H
+	  size_type ip2 = std::min(i+2, kend);
+	  col_rot(sub_matrix(H, sub_interval(0, ip2), sub_interval(i, 2))
+		  c, s, 0, 0);
+            
+	  // apply rotation from right to columns of V
+	  col_rot(V, c, s, i, i+1);
+            
+	  // accumulate e'  Q so residual can be updated k+p
+	  Apply_Givens_rotation_left(q(0,i), q(0,i+1), c, s);
+
+	}
+      }
+    }
+
+    //  update residual and store in the k+1 -st column of v
+
+    k = kend - num;
+    scale(mat_col(V, kend), q(0, k));
+    
+    if (k < mat_nrows(H)) {
+      if (true_shift)
+	gmm::copy(mat_col(V, kend), mat_col(V, k));
+      else
+	   //   v(:,k+1) = v(:,kend+1) + v(:,k+1)*h(k+1,k);
+	   //   v(:,k+1) = v(:,kend+1) ;
+	gmm::add(scaled(mat_col(V, kend), H(kend, kend-1)), 
+		 scaled(mat_col(V, k), H(k, k-1)), mat_col(V, k));
+    }
+
+    H(k, k-1) = vect_norm2(mat_col(V, k));
+    scale(mat_col(V, kend), T(1) / H(k, k-1));
+  }
+
+
+
+  template<typename MAT, typename EVAL, typename PURE>
+  void select_eval(const MAT &Hobl, EVAL &eval, MAT &YB, PURE &pure,
+		   idgmres_state &st) {
+
+    typedef typename linalg_traits<MAT>::value_type T;
+    typedef typename number_traits<T>::magnitude_type R;
+    size_type m = st.m;
+
+    // Computation of the Ritz eigenpairs.
+    
+    col_matrix< std::vector<T> > evect(m-st.tb_def, m-st.tb_def);
+    // std::vector<std::complex<R> > eval(m);
+    std::vector<R> ritznew(m, T(-1));
+	
+    // dense_matrix<T> evect_lock(st.tb_def, st.tb_def);
+    
+    sub_interval SUB1(st.tb_def, m-st.tb_def);
+    implicit_qr_algorithm(sub_matrix(Hobl, SUB1),
+			  sub_vector(eval, SUB1), evect);
+    sub_interval SUB2(0, st.tb_def);
+    implicit_qr_algorithm(sub_matrix(Hobl, SUB2),
+			  sub_vector(eval, SUB2), /* evect_lock */);
+    
+    for (size_type l = st.tb_def; l < m; ++l)
+      ritznew[l] = gmm::abs(evect(m-st.tb_def-1, l-st.tb_def) * Hobl(m, m-1));
+    
+    std::vector< std::pair<T, size_type> > eval_sort(m);
+    for (size_type l = 0; l < m; ++l)
+      eval_sort[l] = std::pair<T, size_type>(eval[l], l);
+    std::sort(eval_sort.begin(), eval_sort.end(), compare_vp());
+    
+    std::vector<size_type> index(m);
+    for (size_type l = 0; l < m; ++l) index[l] = eval_sort[l].second;
+    
+    std::vector<bool> kept(m, false);
+    std::fill(kept.begin(), kept.begin()+st.tb_def, true);
+
+    apply_permutation(eval, index);
+    apply_permutation(evect, index);
+    apply_permutation(ritznew, index);
+    apply_permutation(kept, index);
+
+    //	Which are the eigenvalues that converged ?
+    //
+    //	nb_want is the number of eigenvalues of 
+    //	Hess(tb_def+1:n,tb_def+1:n) that converged and are WANTED
+    //
+    //	nb_unwant is the number of eigenvalues of 
+    //	Hess(tb_def+1:n,tb_def+1:n) that converged and are UNWANTED
+    //
+    //	nb_nolong is the number of eigenvalues of 
+    //	Hess(1:tb_def,1:tb_def) that are NO LONGER WANTED. 
+    //
+    //	tb_deftot is the number of the deflated eigenvalues
+    //	that is tb_def + nb_want + nb_unwant
+    //
+    //	tb_defwant is the number of the wanted deflated eigenvalues
+    //	that is tb_def + nb_want - nb_nolong
+    
+    st.nb_want = 0, st.nb_unwant = 0, st.nb_nolong = 0;
+    size_type j, ind;
+    
+    for (j = 0, ind = 0; j < m-p; ++j) {
+      if (ritznew[j] == R(-1)) {
+	if (std::imag(eval[j]) != R(0)) {
+	  st.nb_nolong += 2; ++j; //  � adapter dans le cas complexe ...
+	} 
+	else st.nb_nolong++;
+      }
+      else {
+	if (ritznew[j]
+	    < tol_vp * gmm::abs(eval[j])) {
+	  
+	  for (size_type l = 0, l < m-st.tb_def; ++l)
+	    YB(l, ind) = std::real(evect(l, j));
+	  kept[j] = true;
+	  ++j; ++st.nb_unwant; ind++;
+	  
+	  if (std::imag(eval[j]) != R(0)) {
+	    for (size_type l = 0, l < m-st.tb_def; ++l)
+	      YB(l, ind) = std::imag(evect(l, j));
+	    pure[ind-1] = 1;
+	    pure[ind] = 2;
+	    
+	    kept[j] = true;
+	    
+	    st.nb_unwant++;
+	    ++ind;
+	  }
+	}
+      }
+    }
+    
+    
+    for (; j < m; ++j) {
+      if (ritznew[j] != R(-1)) {
+
+	for (size_type l = 0, l < m-st.tb_def; ++l)
+	  YB(l, ind) = std::real(evect(l, j));
+	pure[ind] = 1;
+	++ind;
+	kept[j] = true;
+	++st.nb_want;
+	
+	if (ritznew[j]
+	    < tol_vp * gmm::abs(eval[j])) {
+	  for (size_type l = 0, l < m-st.tb_def; ++l)
+	    YB(l, ind) = std::imag(evect(l, j));
+	  pure[ind] = 2;
+	  
+	  j++;
+	  kept[j] = true;
+	  
+	  st.nb_want++;
+	  ++ind;	      
+	}
+      }
+    }
+    
+    std::vector<T> shift(m - st.tb_def - st.nb_want - st.nb_unwant);
+    for (size_type j = 0, i = 0; j < m; ++j)
+      if (!kept[j]) shift[i++] = eval[j];
+    
+    // st.conv (st.nb_want+st.nb_unwant) is the number of eigenpairs that
+    //   have just converged.
+    // st.tb_deftot is the total number of eigenpairs that have converged.
+    
+    size_type st.conv = ind;
+    size_type st.tb_deftot = st.tb_def + st.conv;
+    size_type st.tb_defwant = st.tb_def + st.nb_want - st.nb_nolong;
+    
+    sub_interval SUBYB(0, st.conv);
+    
+    if ( st.tb_defwant >= p ) { // An invariant subspace has been found.
+      
+      st.nb_unwant = 0;
+      st.nb_want = p + st.nb_nolong - st.tb_def;
+      st.tb_defwant = p;
+      
+      if ( pure[st.conv - st.nb_want + 1] == 2 ) {
+	++st.nb_want; st.tb_defwant = ++p;// il faudrait que ce soit un p local
+      }
+      
+      SUBYB = sub_interval(st.conv - st.nb_want, st.nb_want);
+      // YB = YB(:, st.conv-st.nb_want+1 : st.conv); // On laisse en suspend ..
+      // pure = pure(st.conv-st.nb_want+1 : st.conv,1); // On laisse suspend ..
+      st.conv = st.nb_want;
+      st.tb_deftot = st.tb_def + st.conv;
+      st.ok = true;
+    }
+    
+  }
+
+
+
+  template<typename MAT, typename EVAL, typename PURE>
+  void select_eval_for_purging(const MAT &Hobl, EVAL &eval, MAT &YB,
+			       PURE &pure, idgmres_state &st) {
+
+    typedef typename linalg_traits<MAT>::value_type T;
+    typedef typename number_traits<T>::magnitude_type R;
+    size_type m = st.m;
+
+    // Computation of the Ritz eigenpairs.
+    
+    col_matrix< std::vector<T> > evect(st.tb_deftot, st.tb_deftot);
+    
+    sub_interval SUB1(0, st.tb_deftot);
+    implicit_qr_algorithm(sub_matrix(Hobl, SUB1),
+			  sub_vector(eval, SUB1), evect);
+    std::fill(eval.begin() + st.tb_deftot, eval.end(), std::complex<R>(0));
+    
+    std::vector< std::pair<T, size_type> > eval_sort(m);
+    for (size_type l = 0; l < m; ++l)
+      eval_sort[l] = std::pair<T, size_type>(eval[l], l);
+    std::sort(eval_sort.begin(), eval_sort.end(), compare_vp());
+
+    std::vector<bool> sorted(m);
+    std::fill(sorted.begin(), sorted.end(), false);
+    
+    std::vector<size_type> ind(m);
+    for (size_type l = 0; l < m; ++l) ind[l] = eval_sort[l].second;
+    
+    std::vector<bool> kept(m, false);
+    std::fill(kept.begin(), kept.begin()+st.tb_def, true);
+
+    apply_permutation(eval, ind);
+    apply_permutation(evect, ind);
+    
+    size_type j;
+    for (j = 0; j < st.tb_deftot; ++j) {
+	  
+      for (size_type l = 0, l < st.tb_deftot; ++l)
+	YB(l, j) = std::real(evect(l, j));
+      
+      if (std::imag(eval[j]) != R(0)) {
+	for (size_type l = 0, l < m-st.tb_def; ++l)
+	  YB(l, j+1) = std::imag(evect(l, j));
+	pure[j] = 1;
+	pure[j+1] = 2;
+	
+	j += 2;
+      }
+      else ++j;
+    }
+  }
+  
+
+
+
+
+
+}
+
+#endif
diff --git a/gmm/gmm_solver_qmr.h b/gmm/gmm_solver_qmr.h
new file mode 100644
index 000000000..ca6b8e075
--- /dev/null
+++ b/gmm/gmm_solver_qmr.h
@@ -0,0 +1,210 @@
+/* -*- c++ -*- (enables emacs c++ mode) */
+/*===========================================================================
+
+ Copyright (C) 2002-2017 Yves Renard
+
+ This file is a part of GetFEM++
+
+ GetFEM++  is  free software;  you  can  redistribute  it  and/or modify it
+ under  the  terms  of the  GNU  Lesser General Public License as published
+ by  the  Free Software Foundation;  either version 3 of the License,  or
+ (at your option) any later version along with the GCC Runtime Library
+ Exception either version 3.1 or (at your option) any later version.
+ This program  is  distributed  in  the  hope  that it will be useful,  but
+ WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ or  FITNESS  FOR  A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+ License and GCC Runtime Library Exception for more details.
+ You  should  have received a copy of the GNU Lesser General Public License
+ along  with  this program;  if not, write to the Free Software Foundation,
+ Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301, USA.
+
+ As a special exception, you  may use  this file  as it is a part of a free
+ software  library  without  restriction.  Specifically,  if   other  files
+ instantiate  templates  or  use macros or inline functions from this file,
+ or  you compile this  file  and  link  it  with other files  to produce an
+ executable, this file  does  not  by itself cause the resulting executable
+ to be covered  by the GNU Lesser General Public License.  This   exception
+ does not  however  invalidate  any  other  reasons why the executable file
+ might be covered by the GNU Lesser General Public License.
+
+===========================================================================*/
+
+// This file is a modified version of qmr.h from ITL.
+// See http://osl.iu.edu/research/itl/
+// Following the corresponding Copyright notice.
+//===========================================================================
+//
+// Copyright (c) 1997-2001, The Trustees of Indiana University.
+// All rights reserved.
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+//    * Redistributions of source code must retain the above copyright
+//      notice, this list of conditions and the following disclaimer.
+//    * Redistributions in binary form must reproduce the above copyright
+//      notice, this list of conditions and the following disclaimer in the
+//      documentation and/or other materials provided with the distribution.
+//    * Neither the name of the University of Notre Dame nor the
+//      names of its contributors may be used to endorse or promote products
+//      derived from this software without specific prior written permission.
+//
+// THIS SOFTWARE  IS  PROVIDED  BY  THE TRUSTEES  OF  INDIANA UNIVERSITY  AND
+// CONTRIBUTORS  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,
+// BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND  FITNESS
+// FOR  A PARTICULAR PURPOSE ARE DISCLAIMED. IN  NO  EVENT SHALL THE TRUSTEES
+// OF INDIANA UNIVERSITY AND CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+// INCIDENTAL, SPECIAL, EXEMPLARY,  OR CONSEQUENTIAL DAMAGES (INCLUDING,  BUT
+// NOT  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA,  OR PROFITS;  OR BUSINESS  INTERRUPTION)  HOWEVER  CAUSED AND ON ANY
+// THEORY  OF  LIABILITY,  WHETHER  IN  CONTRACT,  STRICT  LIABILITY, OR TORT
+// (INCLUDING  NEGLIGENCE  OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+// THIS  SOFTWARE,  EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+//===========================================================================
+
+/**@file gmm_solver_qmr.h
+   @author Andrew Lumsdaine <lums@osl.iu.edu>
+   @author Lie-Quan Lee     <llee@osl.iu.edu>
+   @author  Yves Renard <Yves.Renard@insa-lyon.fr>
+   @date October 13, 2002.
+   @brief Quasi-Minimal Residual iterative solver.
+*/
+#ifndef GMM_QMR_H
+#define GMM_QMR_H
+
+#include "gmm_kernel.h"
+#include "gmm_iter.h"
+
+namespace gmm {
+
+  /** Quasi-Minimal Residual.
+     
+     This routine solves the unsymmetric linear system Ax = b using
+     the Quasi-Minimal Residual method.
+   
+     See: R. W. Freund and N. M. Nachtigal, A quasi-minimal residual
+     method for non-Hermitian linear systems, Numerical Math.,
+     60(1991), pp. 315-339
+  
+     Preconditioner -  Incomplete LU, Incomplete LU with threshold,
+                       SSOR or identity_preconditioner.
+  */
+  template <typename Matrix, typename Vector, typename VectorB,
+	    typename Precond1>
+  void qmr(const Matrix &A, Vector &x, const VectorB &b, const Precond1 &M1,
+	   iteration& iter) {
+
+    typedef typename linalg_traits<Vector>::value_type T;
+    typedef typename number_traits<T>::magnitude_type R;
+
+    T delta(0), ep(0), beta(0), theta_1(0), gamma_1(0);
+    T theta(0), gamma(1), eta(-1);
+    R rho_1(0), rho, xi;
+
+    typedef typename temporary_vector<Vector>::vector_type TmpVec;
+    size_type nn = vect_size(x);
+    TmpVec r(nn), v_tld(nn), y(nn), w_tld(nn), z(nn), v(nn), w(nn);
+    TmpVec y_tld(nn), z_tld(nn), p(nn), q(nn), p_tld(nn), d(nn), s(nn);
+
+    iter.set_rhsnorm(double(gmm::vect_norm2(b)));
+    if (iter.get_rhsnorm() == 0.0) { clear(x); return; }
+
+    gmm::mult(A, gmm::scaled(x, T(-1)), b, r);
+    gmm::copy(r, v_tld);
+
+    gmm::left_mult(M1, v_tld, y);
+    rho = gmm::vect_norm2(y);
+
+    gmm::copy(r, w_tld);
+    gmm::transposed_right_mult(M1, w_tld, z);
+    xi = gmm::vect_norm2(z);
+  
+    while (! iter.finished_vect(r)) {
+    
+      if (rho == R(0) || xi == R(0)) {
+	if (iter.get_maxiter() == size_type(-1)) 
+	  { GMM_ASSERT1(false, "QMR failed to converge"); }
+	else { GMM_WARNING1("QMR failed to converge"); return; }
+      }
+      gmm::copy(gmm::scaled(v_tld, T(R(1)/rho)), v);
+      gmm::scale(y, T(R(1)/rho));
+
+      gmm::copy(gmm::scaled(w_tld, T(R(1)/xi)), w);
+      gmm::scale(z, T(R(1)/xi));
+
+      delta = gmm::vect_sp(z, y);
+      if (delta == T(0)) {
+	if (iter.get_maxiter() == size_type(-1)) 
+	  { GMM_ASSERT1(false, "QMR failed to converge"); }
+	else { GMM_WARNING1("QMR failed to converge"); return; }
+      }
+      gmm::right_mult(M1, y, y_tld);		
+      gmm::transposed_left_mult(M1, z, z_tld);
+
+      if (iter.first()) {
+	gmm::copy(y_tld, p);
+	gmm::copy(z_tld, q);
+      } else {
+	gmm::add(y_tld, gmm::scaled(p, -(T(xi  * delta) / ep)), p);
+	gmm::add(z_tld, gmm::scaled(q, -(T(rho * delta) / ep)), q);
+      }
+    
+      gmm::mult(A, p, p_tld);
+
+      ep = gmm::vect_sp(q, p_tld);
+      if (ep == T(0)) {
+	if (iter.get_maxiter() == size_type(-1)) 
+	  { GMM_ASSERT1(false, "QMR failed to converge"); }
+	else { GMM_WARNING1("QMR failed to converge"); return; }
+      }
+      beta = ep / delta;
+      if (beta == T(0)) {
+	if (iter.get_maxiter() == size_type(-1)) 
+	  { GMM_ASSERT1(false, "QMR failed to converge"); }
+	else { GMM_WARNING1("QMR failed to converge"); return; }
+      }
+      gmm::add(p_tld, gmm::scaled(v, -beta), v_tld);
+      gmm::left_mult(M1, v_tld, y);
+
+      rho_1 = rho;
+      rho = gmm::vect_norm2(y);
+
+      gmm::mult(gmm::transposed(A), q, w_tld);
+      gmm::add(w_tld, gmm::scaled(w, -beta), w_tld);
+      gmm::transposed_right_mult(M1, w_tld, z);
+
+      xi = gmm::vect_norm2(z);
+
+      gamma_1 = gamma;
+      theta_1 = theta;
+
+      theta = rho / (gamma_1 * beta);
+      gamma = T(1) / gmm::sqrt(T(1) + gmm::sqr(theta));
+
+      if (gamma == T(0)) {
+	if (iter.get_maxiter() == size_type(-1)) 
+	  { GMM_ASSERT1(false, "QMR failed to converge"); }
+	else { GMM_WARNING1("QMR failed to converge"); return; }
+      }
+      eta = -eta * T(rho_1) * gmm::sqr(gamma) / (beta * gmm::sqr(gamma_1));
+
+      if (iter.first()) {
+	gmm::copy(gmm::scaled(p, eta), d);
+	gmm::copy(gmm::scaled(p_tld, eta), s);
+      } else {
+	T tmp = gmm::sqr(theta_1 * gamma);
+	gmm::add(gmm::scaled(p, eta), gmm::scaled(d, tmp), d);
+	gmm::add(gmm::scaled(p_tld, eta), gmm::scaled(s, tmp), s);
+      }
+      gmm::add(d, x);
+      gmm::add(gmm::scaled(s, T(-1)), r);
+
+      ++iter;
+    }
+  }
+
+
+}
+
+#endif 
+
diff --git a/gmm/gmm_std.h b/gmm/gmm_std.h
new file mode 100644
index 000000000..8727e059b
--- /dev/null
+++ b/gmm/gmm_std.h
@@ -0,0 +1,424 @@
+/* -*- c++ -*- (enables emacs c++ mode) */
+/*===========================================================================
+
+ Copyright (C) 2002-2017 Yves Renard
+
+ This file is a part of GetFEM++
+
+ GetFEM++  is  free software;  you  can  redistribute  it  and/or modify it
+ under  the  terms  of the  GNU  Lesser General Public License as published
+ by  the  Free Software Foundation;  either version 3 of the License,  or
+ (at your option) any later version along with the GCC Runtime Library
+ Exception either version 3.1 or (at your option) any later version.
+ This program  is  distributed  in  the  hope  that it will be useful,  but
+ WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ or  FITNESS  FOR  A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+ License and GCC Runtime Library Exception for more details.
+ You  should  have received a copy of the GNU Lesser General Public License
+ along  with  this program;  if not, write to the Free Software Foundation,
+ Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301, USA.
+
+ As a special exception, you  may use  this file  as it is a part of a free
+ software  library  without  restriction.  Specifically,  if   other  files
+ instantiate  templates  or  use macros or inline functions from this file,
+ or  you compile this  file  and  link  it  with other files  to produce an
+ executable, this file  does  not  by itself cause the resulting executable
+ to be covered  by the GNU Lesser General Public License.  This   exception
+ does not  however  invalidate  any  other  reasons why the executable file
+ might be covered by the GNU Lesser General Public License.
+
+===========================================================================*/
+
+/**@file gmm_std.h
+@author  Yves Renard <Yves.Renard@insa-lyon.fr>,
+@author  Julien Pommier <Julien.Pommier@insa-toulouse.fr>
+@date June 01, 1995.
+@brief basic setup for gmm (includes, typedefs etc.)
+*/
+#ifndef GMM_STD_H__
+#define GMM_STD_H__
+
+//#include <getfem/getfem_arch_config.h>
+
+#ifndef __USE_STD_IOSTREAM
+# define __USE_STD_IOSTREAM
+#endif
+
+#ifndef __USE_BSD
+# define __USE_BSD
+#endif
+
+#ifndef __USE_ISOC99
+# define __USE_ISOC99
+#endif
+
+#if defined(_MSC_VER) && _MSC_VER >= 1400 // Secure versions for VC++
+# define GMM_SECURE_CRT
+# define SECURE_NONCHAR_SSCANF sscanf_s
+# define SECURE_NONCHAR_FSCANF fscanf_s
+# define SECURE_STRNCPY(a, la, b, lb) strncpy_s(a, la, b, lb)
+# define SECURE_FOPEN(F, filename, mode) (*(F) = 0,  fopen_s(F, filename, mode))
+# define SECURE_SPRINTF1(S, l, st, p1) sprintf_s(S, l, st, p1) 
+# define SECURE_SPRINTF2(S, l, st, p1, p2) sprintf_s(S, l, st, p1, p2) 
+# define SECURE_SPRINTF4(S, l, st, p1, p2, p3, p4) sprintf_s(S, l, st, p1, p2, p3, p4)
+# define SECURE_STRDUP(s) _strdup(s)
+# ifndef _SCL_SECURE_NO_DEPRECATE
+#   error Add the option /D_SCL_SECURE_NO_DEPRECATE to the compilation command
+# endif
+#else
+# define SECURE_NONCHAR_SSCANF sscanf
+# define SECURE_NONCHAR_FSCANF fscanf
+# define SECURE_STRNCPY(a, la, b, lb) strncpy(a, b, lb)
+# define SECURE_FOPEN(F, filename, mode) ((*(F)) = fopen(filename, mode))
+# define SECURE_SPRINTF1(S, l, st, p1) sprintf(S, st, p1)
+# define SECURE_SPRINTF2(S, l, st, p1, p2) sprintf(S, st, p1, p2)
+# define SECURE_SPRINTF4(S, l, st, p1, p2, p3, p4) sprintf(S, st, p1, p2, p3, p4) 
+# define SECURE_STRDUP(s) strdup(s)
+#endif
+
+inline void GMM_NOPERATION_(int) { }
+#define GMM_NOPERATION(a) { GMM_NOPERATION_(abs(&(a) != &(a))); }
+
+/* ********************************************************************** */
+/*	Compilers detection.						  */
+/* ********************************************************************** */
+
+/* for sun CC 5.0 ...
+#if defined(__SUNPRO_CC) && __SUNPRO_CC >= 0x500
+# include <stdcomp.h>
+# undef _RWSTD_NO_CLASS_PARTIAL_SPEC
+# undef _RWSTD_NO_NAMESPACE
+#endif
+*/
+/* for VISUAL C++ ...
+#if defined(_MSC_VER) //  && !defined(__MWERKS__)
+#define _GETFEM_MSVCPP_ _MSC_VER
+#endif
+*/
+
+#if defined(__GNUC__)
+#  if (__GNUC__ < 4)
+#    error : PLEASE UPDATE g++ TO AT LEAST 4.8 VERSION
+#  endif
+#endif
+
+/* ********************************************************************** */
+/*	C++ Standard Headers.						  */
+/* ********************************************************************** */
+#include <clocale>
+#include <cstdlib>
+#include <cstddef>
+#include <cmath>
+#include <cstring>
+#include <cctype>
+#include <cassert>
+#include <climits>
+#include <iostream>
+//#include <ios>
+#include <fstream>
+#include <ctime>
+#include <exception>
+#include <typeinfo>
+#include <stdexcept>
+#include <iterator>
+#include <algorithm>
+#include <vector>
+#include <deque>
+#include <string>
+#include <complex>
+#include <limits>
+#include <sstream>
+#include <numeric>
+#include <memory>
+#include <array>
+#include <locale.h>
+
+namespace std {
+#if defined(__GNUC__) && (__cplusplus <= 201103L)
+  template<typename _Tp>
+    struct _MakeUniq
+    { typedef unique_ptr<_Tp> __single_object; };
+  template<typename _Tp>
+    struct _MakeUniq<_Tp[]>
+    { typedef unique_ptr<_Tp[]> __array; };
+  template<typename _Tp, size_t _Bound>
+    struct _MakeUniq<_Tp[_Bound]>
+    { struct __invalid_type { }; };
+  /// std::make_unique for single objects
+  template<typename _Tp, typename... _Args>
+    inline typename _MakeUniq<_Tp>::__single_object
+    make_unique(_Args&&... __args)
+    { return unique_ptr<_Tp>(new _Tp(std::forward<_Args>(__args)...)); }
+  /// std::make_unique for arrays of unknown bound
+  template<typename _Tp>
+    inline typename _MakeUniq<_Tp>::__array
+    make_unique(size_t __num)
+    { return unique_ptr<_Tp>(new typename remove_extent<_Tp>::type[__num]()); }
+  /// Disable std::make_unique for arrays of known bound
+  template<typename _Tp, typename... _Args>
+    inline typename _MakeUniq<_Tp>::__invalid_type
+    make_unique(_Args&&...) = delete;
+#endif
+
+
+  // Should simply be replaced by std::shared_ptr<T[]> when it will be supported
+  // by the STL
+  template <typename T> class shared_array_ptr : shared_ptr<T> {
+  public:
+    shared_array_ptr() {}
+    shared_array_ptr(T *q) : std::shared_ptr<T>(q, default_delete<T[]>()) {}
+    template <typename Y> shared_array_ptr(const std::shared_ptr<Y> &p, T *q)
+      : std::shared_ptr<T>(p, q) {}
+    T *get() const { return shared_ptr<T>::get(); }
+    T& operator*() const { return shared_ptr<T>::operator*(); }
+    T* operator->() const { return shared_ptr<T>::operator->(); }
+  };
+  
+  template <typename T> shared_array_ptr<T> make_shared_array(size_t num)
+  { return shared_array_ptr<T>(new T[num]); }
+}
+
+
+
+
+#ifdef GETFEM_HAVE_OPENMP
+
+#include <omp.h>
+	/**number of OpenMP threads*/
+	inline size_t num_threads(){return omp_get_max_threads();}
+	/**index of the current thread*/
+	inline size_t this_thread() {return omp_get_thread_num();}
+	/**is the program running in the parallel section*/
+	inline bool me_is_multithreaded_now(){return static_cast<bool>(omp_in_parallel());}
+#else
+	inline size_t num_threads(){return size_t(1);}
+	inline size_t this_thread() {return size_t(0);}
+	inline bool me_is_multithreaded_now(){return false;}
+#endif
+
+namespace gmm {
+
+	using std::endl; using std::cout; using std::cerr;
+        using std::ends; using std::cin; using std::isnan;
+
+#ifdef _WIN32
+
+	class standard_locale {
+		std::string cloc;
+		std::locale cinloc;
+	public :
+		inline standard_locale(void) : cinloc(cin.getloc())
+		{
+			if (!me_is_multithreaded_now()){
+				 cloc=setlocale(LC_NUMERIC, 0);
+				 setlocale(LC_NUMERIC,"C");
+			}
+		}
+
+		inline ~standard_locale() {
+			if (!me_is_multithreaded_now())
+					setlocale(LC_NUMERIC, cloc.c_str());
+
+		}
+	};
+#else
+	/**this is the above solutions for linux, but I still needs to be tested.*/
+	//class standard_locale {
+	//	locale_t oldloc;
+	//	locale_t temploc;
+
+	//public :
+	//	inline standard_locale(void) : oldloc(uselocale((locale_t)0))
+	//	{
+	//			temploc = newlocale(LC_NUMERIC, "C", NULL);
+    //              uselocale(temploc);
+	//	}
+
+	//	inline ~standard_locale()
+	//	{
+	//		    uselocale(oldloc);
+	//			freelocale(temploc);
+	//	}
+	//};
+
+
+  class standard_locale {
+    std::string cloc;
+    std::locale cinloc;
+
+  public :
+    inline standard_locale(void)
+      : cloc(setlocale(LC_NUMERIC, 0)), cinloc(cin.getloc())
+    { setlocale(LC_NUMERIC,"C"); cin.imbue(std::locale("C")); }
+    inline ~standard_locale()
+    { setlocale(LC_NUMERIC, cloc.c_str()); cin.imbue(cinloc); }
+  };
+
+
+#endif
+
+  class stream_standard_locale {
+    std::locale cloc;
+    std::ios &io;
+
+  public :
+    inline stream_standard_locale(std::ios &i)
+      : cloc(i.getloc()), io(i) { io.imbue(std::locale("C")); }
+    inline ~stream_standard_locale() { io.imbue(cloc); }
+  };
+
+
+
+
+  /* ******************************************************************* */
+  /*       Clock functions.                                              */
+  /* ******************************************************************* */
+
+# if  defined(HAVE_SYS_TIMES)
+  inline double uclock_sec(void) {
+    static double ttclk = 0.;
+    if (ttclk == 0.) ttclk = sysconf(_SC_CLK_TCK);
+    tms t; times(&t); return double(t.tms_utime) / ttclk;
+  }
+# else
+  inline double uclock_sec(void)
+  { return double(clock())/double(CLOCKS_PER_SEC); }
+# endif
+
+  /* ******************************************************************** */
+  /*	Fixed size integer types.                     			  */
+  /* ******************************************************************** */
+  // Remark : the test program dynamic_array tests the length of
+  //          resulting integers
+
+  template <size_t s> struct fixed_size_integer_generator {
+    typedef void int_base_type;
+    typedef void uint_base_type;
+  };
+
+  template <> struct fixed_size_integer_generator<sizeof(char)> {
+    typedef signed char int_base_type;
+    typedef unsigned char uint_base_type;
+  };
+
+  template <> struct fixed_size_integer_generator<sizeof(short int)
+    - ((sizeof(short int) == sizeof(char)) ? 78 : 0)> {
+  typedef signed short int int_base_type;
+  typedef unsigned short int uint_base_type;
+};
+
+template <> struct fixed_size_integer_generator<sizeof(int)
+  - ((sizeof(int) == sizeof(short int)) ? 59 : 0)> {
+    typedef signed int int_base_type;
+    typedef unsigned int uint_base_type;
+  };
+
+template <> struct fixed_size_integer_generator<sizeof(long)
+  - ((sizeof(int) == sizeof(long)) ? 93 : 0)> {
+    typedef signed long int_base_type;
+    typedef unsigned long uint_base_type;
+  };
+
+template <> struct fixed_size_integer_generator<sizeof(long long)
+  - ((sizeof(long long) == sizeof(long)) ? 99 : 0)> {
+    typedef signed long long int_base_type;
+    typedef unsigned long long uint_base_type;
+  };
+
+typedef fixed_size_integer_generator<1>::int_base_type int8_type;
+typedef fixed_size_integer_generator<1>::uint_base_type uint8_type;
+typedef fixed_size_integer_generator<2>::int_base_type int16_type;
+typedef fixed_size_integer_generator<2>::uint_base_type uint16_type;
+typedef fixed_size_integer_generator<4>::int_base_type int32_type;
+typedef fixed_size_integer_generator<4>::uint_base_type uint32_type;
+typedef fixed_size_integer_generator<8>::int_base_type int64_type;
+typedef fixed_size_integer_generator<8>::uint_base_type uint64_type;
+
+// #if INT_MAX == 32767
+//   typedef signed int    int16_type;
+//   typedef unsigned int uint16_type;
+// #elif  SHRT_MAX == 32767
+//   typedef signed short int    int16_type;
+//   typedef unsigned short int uint16_type;
+// #else
+// # error "impossible to build a 16 bits integer"
+// #endif
+
+// #if INT_MAX == 2147483647
+//   typedef signed int    int32_type;
+//   typedef unsigned int uint32_type;
+// #elif  SHRT_MAX == 2147483647
+//   typedef signed short int    int32_type;
+//   typedef unsigned short int uint32_type;
+// #elif LONG_MAX == 2147483647
+//   typedef signed long int    int32_type;
+//   typedef unsigned long int uint32_type;
+// #else
+// # error "impossible to build a 32 bits integer"
+// #endif
+
+// #if INT_MAX == 9223372036854775807L || INT_MAX == 9223372036854775807
+//   typedef signed int    int64_type;
+//   typedef unsigned int uint64_type;
+// #elif LONG_MAX == 9223372036854775807L || LONG_MAX == 9223372036854775807
+//   typedef signed long int    int64_type;
+//   typedef unsigned long int uint64_type;
+// #elif LLONG_MAX == 9223372036854775807LL || LLONG_MAX == 9223372036854775807L || LLONG_MAX == 9223372036854775807
+//   typedef signed long long int int64_type;
+//   typedef unsigned long long int uint64_type;
+// #else
+// # error "impossible to build a 64 bits integer"
+// #endif
+
+#if defined(__GNUC__) && !defined(__ICC)
+/*
+   g++ can issue a warning at each usage of a function declared with this special attribute
+   (also works with typedefs and variable declarations)
+*/
+# define IS_DEPRECATED __attribute__ ((__deprecated__))
+/*
+  the specified function is inlined at any optimization level
+*/
+# define ALWAYS_INLINE __attribute__((always_inline))
+#else
+# define IS_DEPRECATED
+# define ALWAYS_INLINE
+#endif
+
+}
+
+  /* ******************************************************************** */
+  /*	Import/export classes and interfaces from a shared library          */
+  /* ******************************************************************** */
+
+#if defined(EXPORTED_TO_SHARED_LIB)
+#  if defined(_MSC_VER) || defined(__INTEL_COMPILER)
+#     define APIDECL __declspec(dllexport)
+#  elif defined(__GNUC__)
+#     define __attribute__((visibility("default")))
+#  else
+#     define APIDECL
+#  endif
+#   if defined(IMPORTED_FROM_SHARED_LIB)
+#	  error INTENTIONAL COMPILCATION ERROR, DLL IMPORT AND EXPORT ARE INCOMPITABLE
+#   endif
+#endif
+
+#if defined(IMPORTED_FROM_SHARED_LIB)
+#  if defined(_MSC_VER) || defined(__INTEL_COMPILER)
+#     define APIDECL __declspec(dllimport)
+#  else
+#     define APIDECL
+#  endif
+#   if defined(EXPORTED_TO_SHARED_LIB)
+#	  error INTENTIONAL COMPILCATION ERROR, DLL IMPORT AND EXPORT ARE INCOMPITABLE
+#   endif
+#endif
+
+#ifndef EXPORTED_TO_SHARED_LIB
+#  ifndef IMPORTED_FROM_SHARED_LIB
+#    define APIDECL  //empty, used during static linking
+#  endif
+#endif
+
+#endif /* GMM_STD_H__ */
diff --git a/gmm/gmm_sub_index.h b/gmm/gmm_sub_index.h
new file mode 100644
index 000000000..f1f0097ce
--- /dev/null
+++ b/gmm/gmm_sub_index.h
@@ -0,0 +1,224 @@
+/* -*- c++ -*- (enables emacs c++ mode) */
+/*===========================================================================
+
+ Copyright (C) 2002-2017 Yves Renard
+
+ This file is a part of GetFEM++
+
+ GetFEM++  is  free software;  you  can  redistribute  it  and/or modify it
+ under  the  terms  of the  GNU  Lesser General Public License as published
+ by  the  Free Software Foundation;  either version 3 of the License,  or
+ (at your option) any later version along with the GCC Runtime Library
+ Exception either version 3.1 or (at your option) any later version.
+ This program  is  distributed  in  the  hope  that it will be useful,  but
+ WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ or  FITNESS  FOR  A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+ License and GCC Runtime Library Exception for more details.
+ You  should  have received a copy of the GNU Lesser General Public License
+ along  with  this program;  if not, write to the Free Software Foundation,
+ Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301, USA.
+
+ As a special exception, you  may use  this file  as it is a part of a free
+ software  library  without  restriction.  Specifically,  if   other  files
+ instantiate  templates  or  use macros or inline functions from this file,
+ or  you compile this  file  and  link  it  with other files  to produce an
+ executable, this file  does  not  by itself cause the resulting executable
+ to be covered  by the GNU Lesser General Public License.  This   exception
+ does not  however  invalidate  any  other  reasons why the executable file
+ might be covered by the GNU Lesser General Public License.
+
+===========================================================================*/
+
+/**@file gmm_sub_index.h
+   @author  Yves Renard <Yves.Renard@insa-lyon.fr>
+   @date October 13, 2002.
+   @brief sub-indices.
+*/
+
+#ifndef GMM_SUB_INDEX_H__
+#define GMM_SUB_INDEX_H__
+
+#include "gmm_def.h"
+
+namespace gmm {
+
+  /* ******************************************************************** */
+  /*		sub indices                               		  */
+  /* ******************************************************************** */
+
+  struct basic_index : public std::vector<size_t> {
+    
+    mutable size_type nb_ref;
+    // size_type key1; faire la somme des composantes
+    // const basic_index *rind; rindex s'il existe
+    
+
+    size_t operator[](size_type i) const {
+      return (i < size()) ? std::vector<size_t>::operator[](i) : size_type(-1);
+    }
+    
+    basic_index() : nb_ref(1) {}
+    basic_index(size_type j) : std::vector<size_t>(j), nb_ref(1) {}
+    template <typename IT> basic_index(IT b, IT e)
+      : std::vector<size_t>(e-b), nb_ref(1) { std::copy(b, e, begin()); }
+    basic_index(const basic_index *pbi) : nb_ref(1) {
+      const_iterator it = pbi->begin(), ite = pbi->end();
+      size_type i = 0;
+      for ( ; it != ite; ++it) i = std::max(i, *it);
+      resize(i+1); std::fill(begin(), end(), size_type(-1));
+      for (it = pbi->begin(), i = 0; it != ite; ++it, ++i)
+	std::vector<size_t>::operator[](*it) = i;
+    }
+    void swap(size_type i, size_type j) {
+      std::swap(std::vector<size_t>::operator[](i),
+		std::vector<size_t>::operator[](j));
+    }
+    
+  };
+
+  typedef basic_index *pbasic_index;
+
+  struct index_generator {
+
+    template <typename IT> static pbasic_index create_index(IT begin, IT end)
+    { return new basic_index(begin, end); }
+    static pbasic_index create_rindex(pbasic_index pbi)
+    { return new basic_index(pbi); }
+    static void attach(pbasic_index pbi) { if (pbi) pbi->nb_ref++; }
+    static void unattach(pbasic_index pbi)
+      { if (pbi && --(pbi->nb_ref) == 0) delete pbi; }
+
+  };
+
+  struct sub_index {
+
+    size_type first_, last_;
+    typedef basic_index base_type;
+    typedef base_type::const_iterator const_iterator;
+
+    mutable pbasic_index ind;
+    mutable pbasic_index rind;
+
+    void comp_extr(void) {
+      std::vector<size_t>::const_iterator it = ind->begin(), ite = ind->end();
+      if (it != ite) { first_=last_= *it; ++it; } else { first_=last_= 0; }
+      for (; it != ite; ++it) 
+	{ first_ = std::min(first_, *it); last_ = std::max(last_, *it); }
+    }
+
+    inline void test_rind(void) const
+    { if (!rind) rind = index_generator::create_rindex(ind); }
+    size_type size(void) const { return ind->size(); }
+    size_type first(void) const { return first_; }
+    size_type last(void) const { return last_; }
+    size_type index(size_type i) const { return (*ind)[i]; }
+    size_type rindex(size_type i) const {
+      test_rind();
+      if (i < rind->size()) return (*rind)[i]; else return size_type(-1);
+    }
+   
+    const_iterator  begin(void) const { return  ind->begin(); }
+    const_iterator    end(void) const { return  ind->end();   }
+    const_iterator rbegin(void) const { test_rind(); return rind->begin(); }
+    const_iterator   rend(void) const { test_rind(); return rind->end();   }
+
+    sub_index() : ind(0), rind(0) {}
+    template <typename IT> sub_index(IT it, IT ite)
+      : ind(index_generator::create_index(it, ite)),
+	rind(0) { comp_extr(); }
+    template <typename CONT> sub_index(const CONT &c)
+      : ind(index_generator::create_index(c.begin(), c.end())),
+	rind(0) { comp_extr(); }
+    ~sub_index() {
+      index_generator::unattach(rind);
+      index_generator::unattach(ind);
+    }
+    sub_index(const sub_index &si) : first_(si.first_), last_(si.last_),
+				     ind(si.ind), rind(si.rind)
+    { index_generator::attach(rind); index_generator::attach(ind); }
+    sub_index &operator =(const sub_index &si) {
+      index_generator::unattach(rind);
+      index_generator::unattach(ind);
+      ind = si.ind; rind = si.rind;
+      index_generator::attach(rind);
+      index_generator::attach(ind);
+      first_ = si.first_; last_ = si.last_;
+      return *this;
+    }
+  };
+
+  struct unsorted_sub_index : public sub_index {
+    typedef basic_index base_type;
+    typedef base_type::const_iterator const_iterator;
+    
+    template <typename IT> unsorted_sub_index(IT it, IT ite)
+      : sub_index(it, ite) {}
+    template <typename CONT> unsorted_sub_index(const CONT &c)
+      : sub_index(c) {}
+    unsorted_sub_index() {}
+    unsorted_sub_index(const unsorted_sub_index &si) : sub_index((const sub_index &)(si)) { }
+    unsorted_sub_index &operator =(const unsorted_sub_index &si)
+    { sub_index::operator =(si); return *this; }
+    void swap(size_type i, size_type j) {
+      GMM_ASSERT2(ind->nb_ref <= 1, "Operation not allowed on this index");
+      if (rind) rind->swap((*ind)[i], (*ind)[j]);
+      ind->swap(i, j);
+    }
+  };
+
+  inline std::ostream &operator << (std::ostream &o, const sub_index &si) { 
+    o << "sub_index(";
+    if (si.size() != 0) o << si.index(0);
+    for (size_type i = 1; i < si.size(); ++i) o << ", " << si.index(i);
+    o << ")";
+    return o;
+  }
+
+  struct sub_interval {
+    size_type min, max; 
+
+    size_type size(void) const { return max - min; }
+    size_type first(void) const { return min; }
+    size_type last(void) const { return max; }
+    size_type index(size_type i) const { return min + i; }
+    size_type step(void) const { return 1; }
+    size_type rindex(size_type i) const
+    { if (i >= min && i < max) return i - min; return size_type(-1); }
+    sub_interval(size_type mi, size_type l) : min(mi), max(mi+l) {}
+    sub_interval() {}
+  };
+
+  inline std::ostream &operator << (std::ostream &o, const sub_interval &si)
+  { o << "sub_interval(" << si.min << ", " << si.size() << ")"; return o; }
+
+  struct sub_slice {
+    size_type min, max, N;
+
+    size_type size(void) const { return (max - min) / N; }
+    size_type first(void) const { return min; }
+    size_type last(void) const { return (min == max) ? max : max+1-N; }
+    size_type step(void) const { return N; }
+    size_type index(size_type i) const { return min + N * i; }
+    size_type rindex(size_type i) const { 
+      if (i >= min && i < max)
+	{ size_type j = (i - min); if (j % N == 0) return j / N; }
+      return size_type(-1);
+    }
+    sub_slice(size_type mi, size_type l, size_type n)
+      : min(mi), max(mi+l*n), N(n) {}
+    sub_slice(void) {}
+  };
+
+  inline std::ostream &operator << (std::ostream &o, const sub_slice &si) {
+    o << "sub_slice(" << si.min << ", " << si.size() << ", " << si.step() 
+      << ")"; return o;
+  }
+
+  template<class SUBI> struct index_is_sorted
+  {  typedef linalg_true bool_type; };
+  template<> struct index_is_sorted<unsorted_sub_index>
+  {  typedef linalg_false bool_type; };
+
+}
+
+#endif //  GMM_SUB_INDEX_H__
diff --git a/gmm/gmm_sub_matrix.h b/gmm/gmm_sub_matrix.h
new file mode 100644
index 000000000..e79883c31
--- /dev/null
+++ b/gmm/gmm_sub_matrix.h
@@ -0,0 +1,406 @@
+/* -*- c++ -*- (enables emacs c++ mode) */
+/*===========================================================================
+
+ Copyright (C) 2002-2017 Yves Renard
+
+ This file is a part of GetFEM++
+
+ GetFEM++  is  free software;  you  can  redistribute  it  and/or modify it
+ under  the  terms  of the  GNU  Lesser General Public License as published
+ by  the  Free Software Foundation;  either version 3 of the License,  or
+ (at your option) any later version along with the GCC Runtime Library
+ Exception either version 3.1 or (at your option) any later version.
+ This program  is  distributed  in  the  hope  that it will be useful,  but
+ WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ or  FITNESS  FOR  A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+ License and GCC Runtime Library Exception for more details.
+ You  should  have received a copy of the GNU Lesser General Public License
+ along  with  this program;  if not, write to the Free Software Foundation,
+ Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301, USA.
+
+ As a special exception, you  may use  this file  as it is a part of a free
+ software  library  without  restriction.  Specifically,  if   other  files
+ instantiate  templates  or  use macros or inline functions from this file,
+ or  you compile this  file  and  link  it  with other files  to produce an
+ executable, this file  does  not  by itself cause the resulting executable
+ to be covered  by the GNU Lesser General Public License.  This   exception
+ does not  however  invalidate  any  other  reasons why the executable file
+ might be covered by the GNU Lesser General Public License.
+
+===========================================================================*/
+
+/**@file gmm_sub_matrix.h
+   @author Yves Renard <Yves.Renard@insa-lyon.fr>
+   @date October 13, 2002.
+   @brief Generic sub-matrices.
+*/
+
+#ifndef GMM_SUB_MATRIX_H__
+#define GMM_SUB_MATRIX_H__
+
+#include "gmm_sub_vector.h"
+
+namespace gmm {
+
+  /* ********************************************************************* */
+  /*		sub row matrices type                                      */
+  /* ********************************************************************* */
+
+  template <typename PT, typename SUBI1, typename SUBI2>
+  struct gen_sub_row_matrix {
+    typedef gen_sub_row_matrix<PT, SUBI1, SUBI2> this_type;
+    typedef typename std::iterator_traits<PT>::value_type M;
+    typedef M * CPT;
+    typedef typename std::iterator_traits<PT>::reference ref_M;
+    typedef typename select_ref<typename linalg_traits<M>
+            ::const_row_iterator, typename linalg_traits<M>::row_iterator,
+	    PT>::ref_type iterator;
+    typedef typename linalg_traits<this_type>::reference reference;
+    typedef typename linalg_traits<this_type>::porigin_type porigin_type;
+
+    SUBI1 si1;
+    SUBI2 si2;
+    iterator begin_;
+    porigin_type origin;
+    
+    reference operator()(size_type i, size_type j) const 
+    { return linalg_traits<M>::access(begin_ + si1.index(i), si2.index(j)); }
+   
+    size_type nrows(void) const { return si1.size(); }
+    size_type ncols(void) const { return si2.size(); }
+    
+    gen_sub_row_matrix(ref_M m, const SUBI1 &s1, const SUBI2 &s2)
+      : si1(s1), si2(s2), begin_(mat_row_begin(m)),
+	origin(linalg_origin(m)) {}
+    gen_sub_row_matrix() {}
+    gen_sub_row_matrix(const gen_sub_row_matrix<CPT, SUBI1, SUBI2> &cr) :
+      si1(cr.si1), si2(cr.si2), begin_(cr.begin_),origin(cr.origin) {}
+  };
+
+  template <typename PT, typename SUBI1, typename SUBI2>
+  struct gen_sub_row_matrix_iterator {
+    typedef gen_sub_row_matrix<PT, SUBI1, SUBI2> this_type;
+    typedef typename modifiable_pointer<PT>::pointer MPT;
+    typedef typename std::iterator_traits<PT>::value_type M;
+    typedef typename select_ref<typename linalg_traits<M>
+            ::const_row_iterator, typename linalg_traits<M>::row_iterator,
+	    PT>::ref_type ITER;
+    typedef ITER value_type;
+    typedef ITER *pointer;
+    typedef ITER &reference;
+    typedef ptrdiff_t difference_type;
+    typedef size_t size_type;
+    typedef std::random_access_iterator_tag  iterator_category;
+    typedef gen_sub_row_matrix_iterator<PT, SUBI1, SUBI2> iterator;
+
+    ITER it;
+    SUBI1 si1;
+    SUBI2 si2;
+    size_type ii;
+    
+    iterator operator ++(int) { iterator tmp = *this; ii++; return tmp; }
+    iterator operator --(int) { iterator tmp = *this; ii--; return tmp; }
+    iterator &operator ++()   { ii++; return *this; }
+    iterator &operator --()   { ii--; return *this; }
+    iterator &operator +=(difference_type i) { ii += i; return *this; }
+    iterator &operator -=(difference_type i) { ii -= i; return *this; }
+    iterator operator +(difference_type i) const 
+    { iterator itt = *this; return (itt += i); }
+    iterator operator -(difference_type i) const
+    { iterator itt = *this; return (itt -= i); }
+    difference_type operator -(const iterator &i) const { return ii - i.ii; }
+
+    ITER operator *() const { return it + si1.index(ii); }
+    ITER operator [](int i) { return it + si1.index(ii+i); }
+
+    bool operator ==(const iterator &i) const { return (ii == i.ii); }
+    bool operator !=(const iterator &i) const { return !(i == *this); }
+    bool operator < (const iterator &i) const { return (ii < i.ii); }
+
+    gen_sub_row_matrix_iterator(void) {}
+    gen_sub_row_matrix_iterator(const 
+	     gen_sub_row_matrix_iterator<MPT, SUBI1, SUBI2> &itm)
+      : it(itm.it), si1(itm.si1), si2(itm.si2), ii(itm.ii) {}
+    gen_sub_row_matrix_iterator(const ITER &iter, const SUBI1 &s1,
+				const SUBI2 &s2, size_type i)
+      : it(iter), si1(s1), si2(s2), ii(i) { }
+    
+  };
+
+  template <typename PT, typename SUBI1, typename SUBI2>
+  struct linalg_traits<gen_sub_row_matrix<PT, SUBI1, SUBI2> > {
+    typedef gen_sub_row_matrix<PT, SUBI1, SUBI2> this_type;
+    typedef typename std::iterator_traits<PT>::value_type M;
+    typedef typename which_reference<PT>::is_reference is_reference;
+    typedef abstract_matrix linalg_type;
+    typedef typename linalg_traits<M>::origin_type origin_type;
+    typedef typename select_ref<const origin_type *, origin_type *,
+				PT>::ref_type porigin_type;
+    typedef typename linalg_traits<M>::value_type value_type;
+    typedef typename select_ref<value_type,
+            typename linalg_traits<M>::reference, PT>::ref_type reference;
+    typedef abstract_null_type sub_col_type;
+    typedef abstract_null_type col_iterator;
+    typedef abstract_null_type const_sub_col_type;
+    typedef abstract_null_type const_col_iterator;
+    typedef typename sub_vector_type<const typename org_type<typename
+	    linalg_traits<M>::const_sub_row_type>::t *, SUBI2>::vector_type
+            const_sub_row_type;
+    typedef typename select_ref<abstract_null_type, 
+	    typename sub_vector_type<typename org_type<typename linalg_traits<M>::sub_row_type>::t *,
+	    SUBI2>::vector_type, PT>::ref_type sub_row_type;
+    typedef gen_sub_row_matrix_iterator<typename const_pointer<PT>::pointer,
+	    SUBI1, SUBI2> const_row_iterator;
+    typedef typename select_ref<abstract_null_type, 
+	    gen_sub_row_matrix_iterator<PT, SUBI1, SUBI2>, PT>::ref_type
+            row_iterator;
+    typedef typename linalg_traits<const_sub_row_type>::storage_type
+            storage_type;
+    typedef row_major sub_orientation;
+    typedef linalg_true index_sorted;
+    static size_type nrows(const this_type &m) { return m.nrows(); }
+    static size_type ncols(const this_type &m) { return m.ncols(); }
+    static const_sub_row_type row(const const_row_iterator &it)
+    { return const_sub_row_type(linalg_traits<M>::row(*it), it.si2); }
+    static sub_row_type row(const row_iterator &it)
+    { return sub_row_type(linalg_traits<M>::row(*it), it.si2); }
+    static const_row_iterator row_begin(const this_type &m)
+    { return const_row_iterator(m.begin_, m.si1, m.si2, 0); }
+    static row_iterator row_begin(this_type &m)
+    { return row_iterator(m.begin_, m.si1, m.si2, 0); }
+    static const_row_iterator row_end(const this_type &m)
+    { return const_row_iterator(m.begin_, m.si1, m.si2,  m.nrows()); }
+    static row_iterator row_end(this_type &m)
+    { return row_iterator(m.begin_, m.si1, m.si2, m.nrows()); }
+    static origin_type* origin(this_type &v) { return v.origin; }
+    static const origin_type* origin(const this_type &v) { return v.origin; }
+    static void do_clear(this_type &m) {
+      row_iterator it = mat_row_begin(m), ite = mat_row_end(m);
+      for (; it != ite; ++it) clear(row(it));
+    }
+    static value_type access(const const_row_iterator &itrow, size_type i)
+    { return linalg_traits<M>::access(*itrow, itrow.si2.index(i)); }
+    static reference access(const row_iterator &itrow, size_type i)
+    { return linalg_traits<M>::access(*itrow, itrow.si2.index(i)); }
+  };
+  
+  template <typename PT, typename SUBI1, typename SUBI2>
+  std::ostream &operator <<(std::ostream &o,
+			    const gen_sub_row_matrix<PT, SUBI1, SUBI2>& m)
+  { gmm::write(o,m); return o; }
+
+
+  /* ********************************************************************* */
+  /*		sub column matrices type                                   */
+  /* ********************************************************************* */
+
+  template <typename PT, typename SUBI1, typename SUBI2>
+  struct gen_sub_col_matrix {
+    typedef gen_sub_col_matrix<PT, SUBI1, SUBI2> this_type;
+    typedef typename std::iterator_traits<PT>::value_type M;
+    typedef M * CPT;
+    typedef typename std::iterator_traits<PT>::reference ref_M;
+    typedef typename select_ref<typename linalg_traits<M>
+            ::const_col_iterator, typename linalg_traits<M>::col_iterator,
+	    PT>::ref_type iterator;
+    typedef typename linalg_traits<this_type>::reference reference;
+    typedef typename linalg_traits<this_type>::porigin_type porigin_type;
+
+    SUBI1 si1;
+    SUBI2 si2;
+    iterator begin_;
+    porigin_type origin;
+    
+    reference operator()(size_type i, size_type j) const
+    { return linalg_traits<M>::access(begin_ + si2.index(j), si1.index(i)); }
+
+    size_type nrows(void) const { return si1.size(); }
+    size_type ncols(void) const { return si2.size(); }
+    
+    gen_sub_col_matrix(ref_M m, const SUBI1 &s1, const SUBI2 &s2)
+      : si1(s1), si2(s2), begin_(mat_col_begin(m)),
+        origin(linalg_origin(m)) {}
+    gen_sub_col_matrix() {}
+    gen_sub_col_matrix(const gen_sub_col_matrix<CPT, SUBI1, SUBI2> &cr) :
+      si1(cr.si1), si2(cr.si2), begin_(cr.begin_),origin(cr.origin) {}
+  };
+
+  template <typename PT, typename SUBI1, typename SUBI2>
+  struct gen_sub_col_matrix_iterator {
+    typedef gen_sub_col_matrix<PT, SUBI1, SUBI2> this_type;
+    typedef typename modifiable_pointer<PT>::pointer MPT;
+    typedef typename std::iterator_traits<PT>::value_type M;
+    typedef typename select_ref<typename linalg_traits<M>::const_col_iterator,
+				typename linalg_traits<M>::col_iterator,
+				PT>::ref_type ITER;
+    typedef ITER value_type;
+    typedef ITER *pointer;
+    typedef ITER &reference;
+    typedef ptrdiff_t difference_type;
+    typedef size_t size_type;
+    typedef std::random_access_iterator_tag  iterator_category;
+    typedef gen_sub_col_matrix_iterator<PT, SUBI1, SUBI2> iterator;
+
+    ITER it;
+    SUBI1 si1;
+    SUBI2 si2;
+    size_type ii;
+    
+    iterator operator ++(int) { iterator tmp = *this; ii++; return tmp; }
+    iterator operator --(int) { iterator tmp = *this; ii--; return tmp; }
+    iterator &operator ++()   { ii++; return *this; }
+    iterator &operator --()   { ii--; return *this; }
+    iterator &operator +=(difference_type i) { ii += i; return *this; }
+    iterator &operator -=(difference_type i) { ii -= i; return *this; }
+    iterator operator +(difference_type i) const 
+    { iterator itt = *this; return (itt += i); }
+    iterator operator -(difference_type i) const
+    { iterator itt = *this; return (itt -= i); }
+    difference_type operator -(const iterator &i) const { return ii - i.ii; }
+
+    ITER operator *() const { return it + si2.index(ii); }
+    ITER operator [](int i) { return it + si2.index(ii+i); }
+
+    bool operator ==(const iterator &i) const { return (ii == i.ii); }
+    bool operator !=(const iterator &i) const { return !(i == *this); }
+    bool operator < (const iterator &i) const { return (ii < i.ii); }
+
+    gen_sub_col_matrix_iterator(void) {}
+    gen_sub_col_matrix_iterator(const 
+	gen_sub_col_matrix_iterator<MPT, SUBI1, SUBI2> &itm)
+      : it(itm.it), si1(itm.si1), si2(itm.si2), ii(itm.ii) {}
+    gen_sub_col_matrix_iterator(const ITER &iter, const SUBI1 &s1,
+				const SUBI2 &s2, size_type i)
+      : it(iter), si1(s1), si2(s2), ii(i) { }
+  };
+
+  template <typename PT, typename SUBI1, typename SUBI2>
+  struct linalg_traits<gen_sub_col_matrix<PT, SUBI1, SUBI2> > {
+    typedef gen_sub_col_matrix<PT, SUBI1, SUBI2> this_type;
+    typedef typename std::iterator_traits<PT>::value_type M;
+    typedef typename linalg_traits<M>::origin_type origin_type;
+    typedef typename select_ref<const origin_type *, origin_type *,
+			        PT>::ref_type porigin_type;
+    typedef typename which_reference<PT>::is_reference is_reference;
+    typedef abstract_matrix linalg_type;
+    typedef typename linalg_traits<M>::value_type value_type;
+    typedef typename select_ref<value_type,
+            typename linalg_traits<M>::reference, PT>::ref_type reference;
+    typedef abstract_null_type sub_row_type;
+    typedef abstract_null_type row_iterator;
+    typedef abstract_null_type const_sub_row_type;
+    typedef abstract_null_type const_row_iterator;
+    typedef typename sub_vector_type<const typename org_type<typename linalg_traits<M>::const_sub_col_type>::t *, SUBI1>::vector_type const_sub_col_type;
+    typedef typename select_ref<abstract_null_type, typename sub_vector_type<typename org_type<typename linalg_traits<M>::sub_col_type>::t *, SUBI1>::vector_type, PT>::ref_type sub_col_type;
+    typedef gen_sub_col_matrix_iterator<typename const_pointer<PT>::pointer,
+	    SUBI1, SUBI2> const_col_iterator;
+    typedef typename select_ref<abstract_null_type, 
+	    gen_sub_col_matrix_iterator<PT, SUBI1, SUBI2>, PT>::ref_type
+            col_iterator;
+    typedef col_major sub_orientation;
+    typedef linalg_true index_sorted;
+    typedef typename linalg_traits<const_sub_col_type>::storage_type
+    storage_type;
+    static size_type nrows(const this_type &m) { return m.nrows(); }
+    static size_type ncols(const this_type &m) { return m.ncols(); }
+    static const_sub_col_type col(const const_col_iterator &it)
+    { return const_sub_col_type(linalg_traits<M>::col(*it), it.si1); }
+    static sub_col_type col(const col_iterator &it)
+    { return sub_col_type(linalg_traits<M>::col(*it), it.si1); }
+    static const_col_iterator col_begin(const this_type &m)
+    { return const_col_iterator(m.begin_, m.si1, m.si2, 0); }
+    static col_iterator col_begin(this_type &m)
+    { return col_iterator(m.begin_, m.si1, m.si2, 0); }
+    static const_col_iterator col_end(const this_type &m)
+    { return const_col_iterator(m.begin_, m.si1, m.si2,  m.ncols()); }
+    static col_iterator col_end(this_type &m)
+    { return col_iterator(m.begin_, m.si1, m.si2, m.ncols()); } 
+    static origin_type* origin(this_type &v) { return v.origin; }
+    static const origin_type* origin(const this_type &v) { return v.origin; }
+    static void do_clear(this_type &m) {
+      col_iterator it = mat_col_begin(m), ite = mat_col_end(m);
+      for (; it != ite; ++it) clear(col(it));
+    }
+    static value_type access(const const_col_iterator &itcol, size_type i)
+    { return linalg_traits<M>::access(*itcol, itcol.si1.index(i)); }
+    static reference access(const col_iterator &itcol, size_type i)
+    { return linalg_traits<M>::access(*itcol, itcol.si1.index(i)); }
+  };
+
+  template <typename PT, typename SUBI1, typename SUBI2> std::ostream &operator <<
+  (std::ostream &o, const gen_sub_col_matrix<PT, SUBI1, SUBI2>& m)
+  { gmm::write(o,m); return o; }
+
+  /* ******************************************************************** */
+  /*		sub matrices                                              */
+  /* ******************************************************************** */
+  
+  template <typename PT, typename SUBI1, typename SUBI2, typename ST>
+  struct sub_matrix_type_ {
+    typedef abstract_null_type return_type;
+  };
+  template <typename PT, typename SUBI1, typename SUBI2>
+  struct sub_matrix_type_<PT, SUBI1, SUBI2, col_major>
+  { typedef gen_sub_col_matrix<PT, SUBI1, SUBI2> matrix_type; };
+  template <typename PT, typename SUBI1, typename SUBI2>
+  struct sub_matrix_type_<PT, SUBI1, SUBI2, row_major>
+  { typedef gen_sub_row_matrix<PT, SUBI1, SUBI2> matrix_type; };
+  template <typename PT, typename SUBI1, typename SUBI2>
+  struct sub_matrix_type {
+    typedef typename std::iterator_traits<PT>::value_type M;
+    typedef typename sub_matrix_type_<PT, SUBI1, SUBI2,
+        typename principal_orientation_type<typename
+        linalg_traits<M>::sub_orientation>::potype>::matrix_type matrix_type;
+  };
+
+  template <typename M, typename SUBI1, typename SUBI2>  inline
+    typename select_return<typename sub_matrix_type<const M *, SUBI1, SUBI2>
+    ::matrix_type, typename sub_matrix_type<M *, SUBI1, SUBI2>::matrix_type,
+    M *>::return_type
+  sub_matrix(M &m, const SUBI1 &si1, const SUBI2 &si2) {
+    GMM_ASSERT2(si1.last() <= mat_nrows(m) && si2.last() <= mat_ncols(m),
+		"sub matrix too large");
+    return typename select_return<typename sub_matrix_type<const M *, SUBI1,
+      SUBI2>::matrix_type, typename sub_matrix_type<M *, SUBI1, SUBI2>
+      ::matrix_type, M *>::return_type(linalg_cast(m), si1, si2);
+  }
+
+  template <typename M, typename SUBI1, typename SUBI2>  inline
+    typename select_return<typename sub_matrix_type<const M *, SUBI1, SUBI2>
+    ::matrix_type, typename sub_matrix_type<M *, SUBI1, SUBI2>::matrix_type,
+    const M *>::return_type
+  sub_matrix(const M &m, const SUBI1 &si1, const SUBI2 &si2) {
+    GMM_ASSERT2(si1.last() <= mat_nrows(m) && si2.last() <= mat_ncols(m),
+		"sub matrix too large");
+    return typename select_return<typename sub_matrix_type<const M *, SUBI1,
+      SUBI2>::matrix_type, typename sub_matrix_type<M *, SUBI1, SUBI2>
+      ::matrix_type, const M *>::return_type(linalg_cast(m), si1, si2);
+  }
+
+  template <typename M, typename SUBI1>  inline
+    typename select_return<typename sub_matrix_type<const M *, SUBI1, SUBI1>
+    ::matrix_type, typename sub_matrix_type<M *, SUBI1, SUBI1>::matrix_type,
+    M *>::return_type
+  sub_matrix(M &m, const SUBI1 &si1) {
+    GMM_ASSERT2(si1.last() <= mat_nrows(m) && si1.last() <= mat_ncols(m),
+		"sub matrix too large");
+    return typename select_return<typename sub_matrix_type<const M *, SUBI1,
+      SUBI1>::matrix_type, typename sub_matrix_type<M *, SUBI1, SUBI1>
+      ::matrix_type, M *>::return_type(linalg_cast(m), si1, si1);
+  }
+
+  template <typename M, typename SUBI1>  inline
+    typename select_return<typename sub_matrix_type<const M *, SUBI1, SUBI1>
+    ::matrix_type, typename sub_matrix_type<M *, SUBI1, SUBI1>::matrix_type,
+    const M *>::return_type
+  sub_matrix(const M &m, const SUBI1 &si1) {
+    GMM_ASSERT2(si1.last() <= mat_nrows(m) && si1.last() <= mat_ncols(m),
+		"sub matrix too large");
+    return typename select_return<typename sub_matrix_type<const M *, SUBI1,
+      SUBI1>::matrix_type, typename sub_matrix_type<M *, SUBI1, SUBI1>
+      ::matrix_type, const M *>::return_type(linalg_cast(m), si1, si1);
+  }
+
+}
+
+#endif //  GMM_SUB_MATRIX_H__
diff --git a/gmm/gmm_sub_vector.h b/gmm/gmm_sub_vector.h
new file mode 100644
index 000000000..d35f908d5
--- /dev/null
+++ b/gmm/gmm_sub_vector.h
@@ -0,0 +1,560 @@
+/* -*- c++ -*- (enables emacs c++ mode) */
+/*===========================================================================
+
+ Copyright (C) 2002-2017 Yves Renard
+
+ This file is a part of GetFEM++
+
+ GetFEM++  is  free software;  you  can  redistribute  it  and/or modify it
+ under  the  terms  of the  GNU  Lesser General Public License as published
+ by  the  Free Software Foundation;  either version 3 of the License,  or
+ (at your option) any later version along with the GCC Runtime Library
+ Exception either version 3.1 or (at your option) any later version.
+ This program  is  distributed  in  the  hope  that it will be useful,  but
+ WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ or  FITNESS  FOR  A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+ License and GCC Runtime Library Exception for more details.
+ You  should  have received a copy of the GNU Lesser General Public License
+ along  with  this program;  if not, write to the Free Software Foundation,
+ Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301, USA.
+
+ As a special exception, you  may use  this file  as it is a part of a free
+ software  library  without  restriction.  Specifically,  if   other  files
+ instantiate  templates  or  use macros or inline functions from this file,
+ or  you compile this  file  and  link  it  with other files  to produce an
+ executable, this file  does  not  by itself cause the resulting executable
+ to be covered  by the GNU Lesser General Public License.  This   exception
+ does not  however  invalidate  any  other  reasons why the executable file
+ might be covered by the GNU Lesser General Public License.
+
+===========================================================================*/
+
+/**@file gmm_sub_vector.h
+   @author  Yves Renard <Yves.Renard@insa-lyon.fr>
+   @date October 13, 2002.
+   @brief Generic sub-vectors.
+*/
+
+#ifndef GMM_SUB_VECTOR_H__
+#define GMM_SUB_VECTOR_H__
+
+#include "gmm_interface.h"
+#include "gmm_sub_index.h"
+
+namespace gmm {
+
+  /* ********************************************************************* */
+  /*		sparse sub-vectors                                         */
+  /* ********************************************************************* */
+
+  template <typename IT, typename MIT, typename SUBI>
+  struct sparse_sub_vector_iterator {
+
+    IT itb, itbe;
+    SUBI si;
+
+    typedef std::iterator_traits<IT>                traits_type;
+    typedef typename traits_type::value_type        value_type;
+    typedef typename traits_type::pointer           pointer;
+    typedef typename traits_type::reference         reference;
+    typedef typename traits_type::difference_type   difference_type;
+    typedef std::bidirectional_iterator_tag         iterator_category;
+    typedef size_t                                  size_type;
+    typedef sparse_sub_vector_iterator<IT, MIT, SUBI>    iterator;
+
+    size_type index(void) const { return si.rindex(itb.index()); }
+    void forward(void);
+    void backward(void);
+    iterator &operator ++()
+    { ++itb; forward(); return *this; }
+    iterator operator ++(int) { iterator tmp = *this; ++(*this); return tmp; }
+    iterator &operator --()
+    { --itb; backward(); return *this; }
+    iterator operator --(int) { iterator tmp = *this; --(*this); return tmp; }
+    reference operator *() const { return *itb; }
+
+    bool operator ==(const iterator &i) const { return itb == i.itb; }
+    bool operator !=(const iterator &i) const { return !(i == *this); }
+
+    sparse_sub_vector_iterator(void) {}
+    sparse_sub_vector_iterator(const IT &it, const IT &ite, const SUBI &s)
+      : itb(it), itbe(ite), si(s) { forward(); }
+    sparse_sub_vector_iterator(const sparse_sub_vector_iterator<MIT, MIT,
+	 SUBI> &it) : itb(it.itb), itbe(it.itbe), si(it.si) {}
+  };
+
+  template <typename IT, typename MIT, typename SUBI>
+  void  sparse_sub_vector_iterator<IT, MIT, SUBI>::forward(void)
+  { while(itb!=itbe && index()==size_type(-1)) { ++itb; } }
+
+  template <typename IT, typename MIT, typename SUBI>
+  void  sparse_sub_vector_iterator<IT, MIT, SUBI>::backward(void)
+  { while(itb!=itbe && index()==size_type(-1)) --itb; }
+
+  template <typename PT, typename SUBI> struct sparse_sub_vector {
+    typedef sparse_sub_vector<PT, SUBI> this_type;
+    typedef typename std::iterator_traits<PT>::value_type V;
+    typedef V * CPT;
+    typedef typename select_ref<typename linalg_traits<V>::const_iterator,
+            typename linalg_traits<V>::iterator, PT>::ref_type iterator;
+    typedef typename linalg_traits<this_type>::reference reference;
+    typedef typename linalg_traits<this_type>::porigin_type porigin_type;
+
+    iterator begin_, end_;
+    porigin_type origin;
+    SUBI si;
+
+    size_type size(void) const { return si.size(); }
+   
+    reference operator[](size_type i) const
+    { return linalg_traits<V>::access(origin, begin_, end_, si.index(i)); }
+
+    sparse_sub_vector(V &v, const SUBI &s) : begin_(vect_begin(v)),
+       end_(vect_end(v)), origin(linalg_origin(v)), si(s) {}
+    sparse_sub_vector(const V &v, const SUBI &s) 
+      : begin_(vect_begin(const_cast<V &>(v))),
+       end_(vect_end(const_cast<V &>(v))),
+	origin(linalg_origin(const_cast<V &>(v))), si(s) {}
+    sparse_sub_vector() {}
+    sparse_sub_vector(const sparse_sub_vector<CPT, SUBI> &cr)
+      : begin_(cr.begin_),end_(cr.end_),origin(cr.origin), si(cr.si) {} 
+  };
+
+  template <typename IT, typename MIT, typename SUBI, typename ORG,
+	    typename PT> inline
+  void set_to_begin(sparse_sub_vector_iterator<IT, MIT, SUBI> &it,
+		    ORG o, sparse_sub_vector<PT, SUBI> *,
+		    linalg_modifiable) {
+    typedef sparse_sub_vector<PT, SUBI> VECT;
+    typedef typename linalg_traits<VECT>::V_reference ref_t;
+    set_to_begin(it.itb, o, typename linalg_traits<VECT>::pV(), ref_t());
+    set_to_end(it.itbe, o, typename linalg_traits<VECT>::pV(), ref_t());
+    it.forward();
+  }
+  template <typename IT, typename MIT, typename SUBI, typename ORG,
+	    typename PT> inline
+  void set_to_begin(sparse_sub_vector_iterator<IT, MIT, SUBI> &it,
+		    ORG o, const sparse_sub_vector<PT, SUBI> *, 
+		    linalg_modifiable) {
+    typedef sparse_sub_vector<PT, SUBI> VECT;
+    typedef typename linalg_traits<VECT>::V_reference ref_t;
+    set_to_begin(it.itb, o, typename linalg_traits<VECT>::pV(), ref_t());
+    set_to_end(it.itbe, o, typename linalg_traits<VECT>::pV(), ref_t());
+    it.forward();
+  }
+  
+  template <typename IT, typename MIT, typename SUBI, typename ORG,
+	    typename PT> inline
+  void set_to_end(sparse_sub_vector_iterator<IT, MIT, SUBI> &it,
+		    ORG o, sparse_sub_vector<PT, SUBI> *, linalg_modifiable) {
+    typedef sparse_sub_vector<PT, SUBI> VECT;
+    typedef typename linalg_traits<VECT>::V_reference ref_t;
+    set_to_end(it.itb, o, typename linalg_traits<VECT>::pV(), ref_t());
+    set_to_end(it.itbe, o, typename linalg_traits<VECT>::pV(), ref_t());
+    it.forward();
+  }
+  template <typename IT, typename MIT, typename SUBI, typename ORG,
+	    typename PT> inline
+  void set_to_end(sparse_sub_vector_iterator<IT, MIT, SUBI> &it,
+		    ORG o, const sparse_sub_vector<PT, SUBI> *,
+		  linalg_modifiable) {
+    typedef sparse_sub_vector<PT, SUBI> VECT;
+    typedef typename linalg_traits<VECT>::V_reference ref_t;
+    set_to_end(it.itb, o, typename linalg_traits<VECT>::pV(), ref_t());
+    set_to_end(it.itbe, o, typename linalg_traits<VECT>::pV(), ref_t());
+    it.forward();
+  }
+
+  template <typename PT, typename SUBI>
+  struct linalg_traits<sparse_sub_vector<PT, SUBI> > {
+    typedef sparse_sub_vector<PT, SUBI> this_type;
+    typedef this_type * pthis_type;
+    typedef PT pV;
+    typedef typename std::iterator_traits<PT>::value_type V;
+    typedef typename linalg_and<typename index_is_sorted<SUBI>::bool_type,
+	    typename linalg_traits<V>::index_sorted>::bool_type index_sorted;
+    typedef typename linalg_traits<V>::is_reference V_reference;
+    typedef typename linalg_traits<V>::origin_type origin_type;
+    typedef typename select_ref<const origin_type *, origin_type *,
+			        PT>::ref_type porigin_type;
+    typedef typename which_reference<PT>::is_reference is_reference;
+    typedef abstract_vector linalg_type;
+    typedef typename linalg_traits<V>::value_type value_type;
+    typedef typename select_ref<value_type, typename
+            linalg_traits<V>::reference, PT>::ref_type reference;
+    typedef typename select_ref<typename linalg_traits<V>::const_iterator,
+	    typename linalg_traits<V>::iterator, PT>::ref_type pre_iterator;
+    typedef typename select_ref<abstract_null_type, 
+	    sparse_sub_vector_iterator<pre_iterator, pre_iterator, SUBI>,
+	    PT>::ref_type iterator;
+    typedef sparse_sub_vector_iterator<typename linalg_traits<V>
+            ::const_iterator, pre_iterator, SUBI> const_iterator;
+    typedef abstract_sparse storage_type;
+    static size_type size(const this_type &v) { return v.size(); }
+    static iterator begin(this_type &v) {
+      iterator it;
+      it.itb = v.begin_; it.itbe = v.end_; it.si = v.si;
+      if (!is_const_reference(is_reference()))
+	set_to_begin(it, v.origin, pthis_type(), is_reference());
+      else it.forward();
+      return it;
+    }
+    static const_iterator begin(const this_type &v) {
+      const_iterator it; it.itb = v.begin_; it.itbe = v.end_; it.si = v.si;
+      if (!is_const_reference(is_reference()))
+	{ set_to_begin(it, v.origin, pthis_type(), is_reference()); }
+      else it.forward();
+      return it;
+    }
+    static iterator end(this_type &v) {
+      iterator it;
+      it.itb = v.end_; it.itbe = v.end_; it.si = v.si;
+      if (!is_const_reference(is_reference()))
+	set_to_end(it, v.origin, pthis_type(), is_reference());
+      else it.forward();
+      return it;
+    }
+    static const_iterator end(const this_type &v) {
+      const_iterator it; it.itb = v.end_; it.itbe = v.end_; it.si = v.si;
+      if (!is_const_reference(is_reference()))
+	set_to_end(it, v.origin, pthis_type(), is_reference());
+      else it.forward();
+      return it;
+    }
+    static origin_type* origin(this_type &v) { return v.origin; }
+    static const origin_type* origin(const this_type &v) { return v.origin; }
+    static void clear(origin_type* o, const iterator &begin_,
+		      const iterator &end_) {
+      std::deque<size_type> ind;
+      iterator it = begin_;
+      for (; it != end_; ++it) ind.push_front(it.index());
+      for (; !(ind.empty()); ind.pop_back())
+	access(o, begin_, end_, ind.back()) = value_type(0);
+    }
+    static void do_clear(this_type &v) { clear(v.origin, begin(v), end(v)); }
+    static value_type access(const origin_type *o, const const_iterator &it,
+			     const const_iterator &ite, size_type i)
+    { return linalg_traits<V>::access(o, it.itb, ite.itb, it.si.index(i)); }
+    static reference access(origin_type *o, const iterator &it,
+			    const iterator &ite, size_type i)
+    { return linalg_traits<V>::access(o, it.itb, ite.itb, it.si.index(i)); }
+  };
+
+  template <typename PT, typename SUBI> std::ostream &operator <<
+  (std::ostream &o, const sparse_sub_vector<PT, SUBI>& m)
+  { gmm::write(o,m); return o; }
+
+  /* ********************************************************************* */
+  /*		skyline sub-vectors                                        */
+  /* ********************************************************************* */
+
+    template <typename IT, typename MIT, typename SUBI>
+  struct skyline_sub_vector_iterator {
+
+    IT itb;
+    SUBI si;
+
+    typedef std::iterator_traits<IT>                traits_type;
+    typedef typename traits_type::value_type        value_type;
+    typedef typename traits_type::pointer           pointer;
+    typedef typename traits_type::reference         reference;
+    typedef typename traits_type::difference_type   difference_type;
+    typedef std::bidirectional_iterator_tag         iterator_category;
+    typedef size_t                                  size_type;
+    typedef skyline_sub_vector_iterator<IT, MIT, SUBI>    iterator;
+
+    size_type index(void) const
+    { return (itb.index() - si.min + si.step() - 1) / si.step(); }
+    void backward(void);
+    iterator &operator ++()
+    { itb += si.step(); return *this; }
+    iterator operator ++(int) { iterator tmp = *this; ++(*this); return tmp; }
+    iterator &operator --()
+    { itb -= si.step(); return *this; }
+    iterator operator --(int) { iterator tmp = *this; --(*this); return tmp; }
+
+    iterator &operator +=(difference_type i)
+    { itb += si.step() * i; return *this; }
+    iterator &operator -=(difference_type i)
+    { itb -= si.step() * i; return *this; }
+    iterator operator +(difference_type i) const
+    { iterator ii = *this; return (ii += i); }
+    iterator operator -(difference_type i) const
+    { iterator ii = *this; return (ii -= i); }
+    difference_type operator -(const iterator &i) const
+    { return (itb - i.itb) / si.step(); }
+
+    reference operator *() const  { return *itb; }
+    reference operator [](int ii) { return *(itb + ii * si.step());  }
+
+    bool operator ==(const iterator &i) const { return index() == i.index();}
+    bool operator !=(const iterator &i) const { return !(i == *this); }
+    bool operator < (const iterator &i) const { return index()  < i.index();}
+
+    skyline_sub_vector_iterator(void) {}
+    skyline_sub_vector_iterator(const IT &it, const SUBI &s)
+      : itb(it), si(s) {}
+    skyline_sub_vector_iterator(const skyline_sub_vector_iterator<MIT, MIT,
+	 SUBI> &it) : itb(it.itb), si(it.si) {}
+  };
+
+  template <typename IT, typename SUBI>
+  void update_for_sub_skyline(IT &it, IT &ite, const SUBI &si) {
+    if (it.index() >= si.max || ite.index() <= si.min) { it = ite; return; }
+    ptrdiff_t dec1 = si.min - it.index(), dec2 = ite.index() - si.max;
+    it  += (dec1 < 0) ? ((si.step()-((-dec1) % si.step())) % si.step()) : dec1;
+    ite -= (dec2 < 0) ? -((-dec2) % si.step()) : dec2;
+  }
+
+  template <typename PT, typename SUBI> struct skyline_sub_vector {
+    typedef skyline_sub_vector<PT, SUBI> this_type;
+    typedef typename std::iterator_traits<PT>::value_type V;
+    typedef V * pV;
+    typedef typename select_ref<typename linalg_traits<V>::const_iterator,
+            typename linalg_traits<V>::iterator, PT>::ref_type iterator;
+    typedef typename linalg_traits<this_type>::reference reference;
+    typedef typename linalg_traits<this_type>::porigin_type porigin_type;
+
+    iterator begin_, end_;
+    porigin_type origin;
+    SUBI si;
+
+    size_type size(void) const { return si.size(); }
+   
+    reference operator[](size_type i) const
+    { return linalg_traits<V>::access(origin, begin_, end_, si.index(i)); }
+
+    skyline_sub_vector(V &v, const SUBI &s) : begin_(vect_begin(v)),
+       end_(vect_end(v)), origin(linalg_origin(v)), si(s) {
+      update_for_sub_skyline(begin_, end_, si);
+    }
+    skyline_sub_vector(const V &v, const SUBI &s)
+      : begin_(vect_begin(const_cast<V &>(v))),
+	end_(vect_end(const_cast<V &>(v))),
+	origin(linalg_origin(const_cast<V &>(v))), si(s) {
+      update_for_sub_skyline(begin_, end_, si);
+    }
+    skyline_sub_vector() {}
+    skyline_sub_vector(const skyline_sub_vector<pV, SUBI> &cr)
+      : begin_(cr.begin_),end_(cr.end_),origin(cr.origin), si(cr.si) {}
+  };
+
+  template <typename IT, typename MIT, typename SUBI, typename ORG,
+	    typename PT> inline
+  void set_to_begin(skyline_sub_vector_iterator<IT, MIT, SUBI> &it,
+		    ORG o, skyline_sub_vector<PT, SUBI> *,
+		    linalg_modifiable) {
+    typedef skyline_sub_vector<PT, SUBI> VECT;
+    typedef typename linalg_traits<VECT>::V_reference ref_t;
+    IT itbe = it.itb;
+    set_to_begin(it.itb, o, typename linalg_traits<VECT>::pV(), ref_t());
+    set_to_end(itbe, o, typename linalg_traits<VECT>::pV(), ref_t());
+    update_for_sub_skyline(it.itb, itbe, it.si);
+  }
+  template <typename IT, typename MIT, typename SUBI, typename ORG,
+	    typename PT> inline
+  void set_to_begin(skyline_sub_vector_iterator<IT, MIT, SUBI> &it,
+		    ORG o, const skyline_sub_vector<PT, SUBI> *,
+		    linalg_modifiable) {
+    typedef skyline_sub_vector<PT, SUBI> VECT;
+    typedef typename linalg_traits<VECT>::V_reference ref_t;
+    IT itbe = it.itb;
+    set_to_begin(it.itb, o, typename linalg_traits<VECT>::pV(), ref_t());
+    set_to_end(itbe, o, typename linalg_traits<VECT>::pV(), ref_t());
+    update_for_sub_skyline(it.itb, itbe, it.si);
+  }
+  
+  template <typename IT, typename MIT, typename SUBI, typename ORG,
+	    typename PT> inline
+  void set_to_end(skyline_sub_vector_iterator<IT, MIT, SUBI> &it,
+		    ORG o, skyline_sub_vector<PT, SUBI> *,
+		  linalg_modifiable) {
+    typedef skyline_sub_vector<PT, SUBI> VECT;
+    typedef typename linalg_traits<VECT>::V_reference ref_t;
+    IT itb = it.itb;
+    set_to_begin(itb, o, typename linalg_traits<VECT>::pV(), ref_t());
+    set_to_end(it.itb, o, typename linalg_traits<VECT>::pV(), ref_t());
+    update_for_sub_skyline(itb, it.itb, it.si);
+  }
+  template <typename IT, typename MIT, typename SUBI, typename ORG,
+	    typename PT> inline
+  void set_to_end(skyline_sub_vector_iterator<IT, MIT, SUBI> &it,
+		    ORG o, const skyline_sub_vector<PT, SUBI> *,
+		  linalg_modifiable) {
+    typedef skyline_sub_vector<PT, SUBI> VECT;
+    typedef typename linalg_traits<VECT>::V_reference ref_t;
+    IT itb = it.itb;
+    set_to_begin(itb, o, typename linalg_traits<VECT>::pV(), ref_t());
+    set_to_end(it.itb, o, typename linalg_traits<VECT>::pV(), ref_t());
+    update_for_sub_skyline(itb, it.itb, it.si);   
+  }
+
+
+  template <typename PT, typename SUBI>
+  struct linalg_traits<skyline_sub_vector<PT, SUBI> > {
+    typedef skyline_sub_vector<PT, SUBI> this_type;
+    typedef this_type *pthis_type;
+    typedef typename std::iterator_traits<PT>::value_type V;
+    typedef typename linalg_traits<V>::is_reference V_reference;
+    typedef typename linalg_traits<V>::origin_type origin_type;
+    typedef typename select_ref<const origin_type *, origin_type *,
+			        PT>::ref_type porigin_type;
+    typedef V * pV;
+    typedef typename which_reference<PT>::is_reference is_reference;
+    typedef abstract_vector linalg_type;
+    typedef typename linalg_traits<V>::value_type value_type;
+    typedef typename select_ref<value_type, typename
+            linalg_traits<V>::reference, PT>::ref_type reference;
+    typedef typename linalg_traits<V>::const_iterator const_V_iterator;
+    typedef typename linalg_traits<V>::iterator V_iterator;    
+    typedef typename select_ref<const_V_iterator, V_iterator, 
+				PT>::ref_type pre_iterator;
+    typedef typename select_ref<abstract_null_type, 
+	    skyline_sub_vector_iterator<pre_iterator, pre_iterator, SUBI>,
+	    PT>::ref_type iterator;
+    typedef skyline_sub_vector_iterator<const_V_iterator, pre_iterator, SUBI>
+            const_iterator;
+    typedef abstract_skyline storage_type;
+    typedef linalg_true index_sorted;
+    static size_type size(const this_type &v) { return v.size(); }
+    static iterator begin(this_type &v) {
+      iterator it;
+      it.itb = v.begin_; it.si = v.si;
+      if (!is_const_reference(is_reference()))
+	set_to_begin(it, v.origin, pthis_type(), is_reference());
+      return it;
+    }
+    static const_iterator begin(const this_type &v) {
+      const_iterator it; it.itb = v.begin_; it.si = v.si;
+      if (!is_const_reference(is_reference()))
+	{ set_to_begin(it, v.origin, pthis_type(), is_reference()); }
+      return it;
+    }
+    static iterator end(this_type &v) {
+      iterator it;
+      it.itb = v.end_; it.si = v.si;
+      if (!is_const_reference(is_reference()))
+	set_to_end(it, v.origin, pthis_type(), is_reference());
+      return it;
+    }
+    static const_iterator end(const this_type &v) {
+      const_iterator it; it.itb = v.end_; it.si = v.si;
+      if (!is_const_reference(is_reference()))
+	set_to_end(it, v.origin, pthis_type(), is_reference());
+      return it;
+    }
+    static origin_type* origin(this_type &v) { return v.origin; }
+    static const origin_type* origin(const this_type &v) { return v.origin; }
+    static void clear(origin_type*, const iterator &it, const iterator &ite)
+    { std::fill(it, ite, value_type(0)); }
+    static void do_clear(this_type &v) { clear(v.origin, begin(v), end(v)); }
+    static value_type access(const origin_type *o, const const_iterator &it,
+			     const const_iterator &ite, size_type i)
+    { return linalg_traits<V>::access(o, it.itb, ite.itb, it.si.index(i)); }
+    static reference access(origin_type *o, const iterator &it,
+			    const iterator &ite, size_type i)
+    { return linalg_traits<V>::access(o, it.itb, ite.itb, it.si.index(i)); }
+  };
+
+  template <typename PT, typename SUBI> std::ostream &operator <<
+  (std::ostream &o, const skyline_sub_vector<PT, SUBI>& m)
+  { gmm::write(o,m); return o; }
+
+  /* ******************************************************************** */
+  /*		sub vector.                                               */
+  /* ******************************************************************** */
+  /* sub_vector_type<PT, SUBI>::vector_type is the sub vector type        */
+  /* returned by sub_vector(v, sub_index)                                 */
+  /************************************************************************/
+
+  template <typename PT, typename SUBI, typename st_type> struct svrt_ir {
+    typedef abstract_null_type vector_type;
+  };
+
+  template <typename PT>
+  struct svrt_ir<PT, sub_index, abstract_dense> {
+    typedef typename std::iterator_traits<PT>::value_type V;
+    typedef typename vect_ref_type<PT,  V>::iterator iterator;
+    typedef tab_ref_index_ref_with_origin<iterator,
+      sub_index::const_iterator, V> vector_type;
+  }; 
+
+  template <typename PT>
+  struct svrt_ir<PT, unsorted_sub_index, abstract_dense> {
+    typedef typename std::iterator_traits<PT>::value_type V;
+    typedef typename vect_ref_type<PT,  V>::iterator iterator;
+    typedef tab_ref_index_ref_with_origin<iterator,
+      unsorted_sub_index::const_iterator, V> vector_type;
+  }; 
+
+  template <typename PT>
+  struct svrt_ir<PT, sub_interval, abstract_dense> {
+    typedef typename std::iterator_traits<PT>::value_type V;
+    typedef typename vect_ref_type<PT,  V>::iterator iterator;
+    typedef tab_ref_with_origin<iterator, V> vector_type;
+  }; 
+
+  template <typename PT>
+  struct svrt_ir<PT, sub_slice, abstract_dense> {
+    typedef typename std::iterator_traits<PT>::value_type V;
+    typedef typename vect_ref_type<PT,  V>::iterator iterator;
+    typedef tab_ref_reg_spaced_with_origin<iterator, V> vector_type;
+  };
+
+  template <typename PT, typename SUBI>
+  struct svrt_ir<PT, SUBI, abstract_skyline> {
+    typedef skyline_sub_vector<PT, SUBI> vector_type;
+  };
+
+  template <typename PT>
+  struct svrt_ir<PT, sub_index, abstract_skyline> {
+    typedef sparse_sub_vector<PT, sub_index> vector_type;
+  };
+
+  template <typename PT>
+  struct svrt_ir<PT, unsorted_sub_index, abstract_skyline> {
+    typedef sparse_sub_vector<PT, unsorted_sub_index> vector_type;
+  };
+
+
+  template <typename PT, typename SUBI>
+  struct svrt_ir<PT, SUBI, abstract_sparse> {
+    typedef sparse_sub_vector<PT, SUBI> vector_type;
+  };
+
+  template <typename PT, typename SUBI>
+  struct sub_vector_type {
+    typedef typename std::iterator_traits<PT>::value_type V;
+    typedef typename svrt_ir<PT, SUBI,
+      typename linalg_traits<V>::storage_type>::vector_type vector_type;
+  };
+
+  template <typename V, typename SUBI>
+  typename select_return<
+    typename sub_vector_type<const V *, SUBI>::vector_type,
+    typename sub_vector_type<V *, SUBI>::vector_type, const V *>::return_type
+  sub_vector(const V &v, const SUBI &si) {
+    GMM_ASSERT2(si.last() <= vect_size(v),
+                "sub vector too large, " << si.last() << " > " << vect_size(v));
+    return typename select_return<
+      typename sub_vector_type<const V *, SUBI>::vector_type,
+      typename sub_vector_type<V *, SUBI>::vector_type, const V *>::return_type
+      (linalg_cast(v), si);
+  }
+
+  template <typename V, typename SUBI>
+  typename select_return<
+    typename sub_vector_type<const V *, SUBI>::vector_type,
+    typename sub_vector_type<V *, SUBI>::vector_type, V *>::return_type
+  sub_vector(V &v, const SUBI &si) {
+    GMM_ASSERT2(si.last() <= vect_size(v),
+                "sub vector too large, " << si.last() << " > " << vect_size(v));
+    return  typename select_return<
+      typename sub_vector_type<const V *, SUBI>::vector_type,
+      typename sub_vector_type<V *, SUBI>::vector_type, V *>::return_type
+      (linalg_cast(v), si);
+  }
+
+}
+
+#endif //  GMM_SUB_VECTOR_H__
diff --git a/gmm/gmm_superlu_interface.h b/gmm/gmm_superlu_interface.h
new file mode 100644
index 000000000..b732445e7
--- /dev/null
+++ b/gmm/gmm_superlu_interface.h
@@ -0,0 +1,410 @@
+/* -*- c++ -*- (enables emacs c++ mode) */
+/*===========================================================================
+
+ Copyright (C) 2003-2017 Yves Renard
+
+ This file is a part of GetFEM++
+
+ GetFEM++  is  free software;  you  can  redistribute  it  and/or modify it
+ under  the  terms  of the  GNU  Lesser General Public License as published
+ by  the  Free Software Foundation;  either version 3 of the License,  or
+ (at your option) any later version along with the GCC Runtime Library
+ Exception either version 3.1 or (at your option) any later version.
+ This program  is  distributed  in  the  hope  that it will be useful,  but
+ WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ or  FITNESS  FOR  A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+ License and GCC Runtime Library Exception for more details.
+ You  should  have received a copy of the GNU Lesser General Public License
+ along  with  this program;  if not, write to the Free Software Foundation,
+ Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301, USA.
+
+ As a special exception, you  may use  this file  as it is a part of a free
+ software  library  without  restriction.  Specifically,  if   other  files
+ instantiate  templates  or  use macros or inline functions from this file,
+ or  you compile this  file  and  link  it  with other files  to produce an
+ executable, this file  does  not  by itself cause the resulting executable
+ to be covered  by the GNU Lesser General Public License.  This   exception
+ does not  however  invalidate  any  other  reasons why the executable file
+ might be covered by the GNU Lesser General Public License.
+
+===========================================================================*/
+
+/**@file gmm_superlu_interface.h
+   @author  Yves Renard <Yves.Renard@insa-lyon.fr>
+   @date October 17, 2003.
+   @brief Interface with SuperLU (LU direct solver for sparse matrices).
+*/
+#if defined(GMM_USES_SUPERLU) && !defined(GETFEM_VERSION)
+
+#ifndef GMM_SUPERLU_INTERFACE_H
+#define GMM_SUPERLU_INTERFACE_H
+
+#include "gmm_kernel.h"
+
+typedef int int_t;
+
+/* because SRC/util.h defines TRUE and FALSE ... */
+#ifdef TRUE
+# undef TRUE
+#endif
+#ifdef FALSE
+# undef FALSE
+#endif
+
+#include "superlu/slu_Cnames.h"
+#include "superlu/supermatrix.h"
+#include "superlu/slu_util.h"
+
+namespace SuperLU_S {
+#include "superlu/slu_sdefs.h"
+}
+namespace SuperLU_D {
+#include "superlu/slu_ddefs.h"
+}
+namespace SuperLU_C {
+#include "superlu/slu_cdefs.h"
+}
+namespace SuperLU_Z {
+#include "superlu/slu_zdefs.h" 
+}
+
+
+
+namespace gmm {
+
+  /*  interface for Create_CompCol_Matrix */
+
+  inline void Create_CompCol_Matrix(SuperMatrix *A, int m, int n, int nnz,
+			     float *a, int *ir, int *jc) {
+    SuperLU_S::sCreate_CompCol_Matrix(A, m, n, nnz, a, ir, jc,
+				      SLU_NC, SLU_S, SLU_GE);
+  }
+  
+  inline void Create_CompCol_Matrix(SuperMatrix *A, int m, int n, int nnz,
+			     double *a, int *ir, int *jc) {
+    SuperLU_D::dCreate_CompCol_Matrix(A, m, n, nnz, a, ir, jc,
+				      SLU_NC, SLU_D, SLU_GE);
+  }
+  
+  inline void Create_CompCol_Matrix(SuperMatrix *A, int m, int n, int nnz,
+			     std::complex<float> *a, int *ir, int *jc) {
+    SuperLU_C::cCreate_CompCol_Matrix(A, m, n, nnz, (SuperLU_C::complex *)(a),
+				      ir, jc, SLU_NC, SLU_C, SLU_GE);
+  }
+  
+  inline void Create_CompCol_Matrix(SuperMatrix *A, int m, int n, int nnz,
+			     std::complex<double> *a, int *ir, int *jc) {
+    SuperLU_Z::zCreate_CompCol_Matrix(A, m, n, nnz,
+				      (SuperLU_Z::doublecomplex *)(a), ir, jc,
+				      SLU_NC, SLU_Z, SLU_GE);
+  }
+
+  /*  interface for Create_Dense_Matrix */
+
+  inline void Create_Dense_Matrix(SuperMatrix *A, int m, int n, float *a, int k)
+  { SuperLU_S::sCreate_Dense_Matrix(A, m, n, a, k, SLU_DN, SLU_S, SLU_GE); }
+  inline void Create_Dense_Matrix(SuperMatrix *A, int m, int n, double *a, int k)
+  { SuperLU_D::dCreate_Dense_Matrix(A, m, n, a, k, SLU_DN, SLU_D, SLU_GE); }
+  inline void Create_Dense_Matrix(SuperMatrix *A, int m, int n,
+			   std::complex<float> *a, int k) {
+    SuperLU_C::cCreate_Dense_Matrix(A, m, n, (SuperLU_C::complex *)(a),
+				    k, SLU_DN, SLU_C, SLU_GE);
+  }
+  inline void Create_Dense_Matrix(SuperMatrix *A, int m, int n, 
+			   std::complex<double> *a, int k) {
+    SuperLU_Z::zCreate_Dense_Matrix(A, m, n, (SuperLU_Z::doublecomplex *)(a),
+				    k, SLU_DN, SLU_Z, SLU_GE);
+  }
+
+  /*  interface for gssv */
+
+#define DECL_GSSV(NAMESPACE,FNAME,FLOATTYPE,KEYTYPE) \
+  inline void SuperLU_gssv(superlu_options_t *options, SuperMatrix *A, int *p, \
+  int *q, SuperMatrix *L, SuperMatrix *U, SuperMatrix *B,               \
+  SuperLUStat_t *stats, int *info, KEYTYPE) {                           \
+  NAMESPACE::FNAME(options, A, p, q, L, U, B, stats, info);             \
+  }
+
+  DECL_GSSV(SuperLU_S,sgssv,float,float)
+  DECL_GSSV(SuperLU_C,cgssv,float,std::complex<float>)
+  DECL_GSSV(SuperLU_D,dgssv,double,double)
+  DECL_GSSV(SuperLU_Z,zgssv,double,std::complex<double>)
+
+  /*  interface for gssvx */
+
+#define DECL_GSSVX(NAMESPACE,FNAME,FLOATTYPE,KEYTYPE) \
+    inline float SuperLU_gssvx(superlu_options_t *options, SuperMatrix *A,	\
+		     int *perm_c, int *perm_r, int *etree, char *equed,  \
+		     FLOATTYPE *R, FLOATTYPE *C, SuperMatrix *L,         \
+		     SuperMatrix *U, void *work, int lwork,              \
+		     SuperMatrix *B, SuperMatrix *X,                     \
+		     FLOATTYPE *recip_pivot_growth,                      \
+		     FLOATTYPE *rcond, FLOATTYPE *ferr, FLOATTYPE *berr, \
+		     SuperLUStat_t *stats, int *info, KEYTYPE) {         \
+    NAMESPACE::mem_usage_t mem_usage;                                    \
+    NAMESPACE::FNAME(options, A, perm_c, perm_r, etree, equed, R, C, L,  \
+		     U, work, lwork, B, X, recip_pivot_growth, rcond,    \
+		     ferr, berr, &mem_usage, stats, info);               \
+    return mem_usage.for_lu; /* bytes used by the factor storage */     \
+  }
+
+  DECL_GSSVX(SuperLU_S,sgssvx,float,float)
+  DECL_GSSVX(SuperLU_C,cgssvx,float,std::complex<float>)
+  DECL_GSSVX(SuperLU_D,dgssvx,double,double)
+  DECL_GSSVX(SuperLU_Z,zgssvx,double,std::complex<double>)
+
+  /* ********************************************************************* */
+  /*   SuperLU solve interface                                             */
+  /* ********************************************************************* */
+
+  template <typename MAT, typename VECTX, typename VECTB>
+  int SuperLU_solve(const MAT &A, const VECTX &X_, const VECTB &B,
+		     double& rcond_, int permc_spec = 3) {
+    VECTX &X = const_cast<VECTX &>(X_);
+    /*
+     * Get column permutation vector perm_c[], according to permc_spec:
+     *   permc_spec = 0: use the natural ordering 
+     *   permc_spec = 1: use minimum degree ordering on structure of A'*A
+     *   permc_spec = 2: use minimum degree ordering on structure of A'+A
+     *   permc_spec = 3: use approximate minimum degree column ordering
+     */
+    typedef typename linalg_traits<MAT>::value_type T;
+    typedef typename number_traits<T>::magnitude_type R;
+
+    int m = mat_nrows(A), n = mat_ncols(A), nrhs = 1, info = 0;
+
+    csc_matrix<T> csc_A(m, n); gmm::copy(A, csc_A);
+    std::vector<T> rhs(m), sol(m);
+    gmm::copy(B, rhs);
+
+    int nz = nnz(csc_A);
+    if ((2 * nz / n) >= m)
+      GMM_WARNING2("CAUTION : it seems that SuperLU has a problem"
+		  " for nearly dense sparse matrices");
+
+    superlu_options_t options;
+    set_default_options(&options);
+    options.ColPerm = NATURAL;
+    options.PrintStat = NO;
+    options.ConditionNumber = YES;
+    switch (permc_spec) {
+    case 1 : options.ColPerm = MMD_ATA; break;
+    case 2 : options.ColPerm = MMD_AT_PLUS_A; break;
+    case 3 : options.ColPerm = COLAMD; break;
+    }
+    SuperLUStat_t stat;
+    StatInit(&stat);
+
+    SuperMatrix SA, SL, SU, SB, SX; // SuperLU format.
+    Create_CompCol_Matrix(&SA, m, n, nz, (double *)(&(csc_A.pr[0])),
+			  (int *)(&(csc_A.ir[0])), (int *)(&(csc_A.jc[0])));
+    Create_Dense_Matrix(&SB, m, nrhs, &rhs[0], m);
+    Create_Dense_Matrix(&SX, m, nrhs, &sol[0], m);
+    memset(&SL,0,sizeof SL);
+    memset(&SU,0,sizeof SU);
+
+    std::vector<int> etree(n);
+    char equed[] = "B";
+    std::vector<R> Rscale(m),Cscale(n); // row scale factors
+    std::vector<R> ferr(nrhs), berr(nrhs);
+    R recip_pivot_gross, rcond;
+    std::vector<int> perm_r(m), perm_c(n);
+
+    SuperLU_gssvx(&options, &SA, &perm_c[0], &perm_r[0], 
+		  &etree[0] /* output */, equed /* output         */, 
+		  &Rscale[0] /* row scale factors (output)        */, 
+		  &Cscale[0] /* col scale factors (output)        */,
+		  &SL /* fact L (output)*/, &SU /* fact U (output)*/, 
+		  NULL /* work                                    */, 
+		  0 /* lwork: superlu auto allocates (input)      */, 
+		  &SB /* rhs */, &SX /* solution                  */,
+		  &recip_pivot_gross /* reciprocal pivot growth   */
+		  /* factor max_j( norm(A_j)/norm(U_j) ).         */,  
+		  &rcond /*estimate of the reciprocal condition   */
+		  /* number of the matrix A after equilibration   */,
+		  &ferr[0] /* estimated forward error             */,
+		  &berr[0] /* relative backward error             */,
+		  &stat, &info, T());
+    rcond_ = rcond;
+    Destroy_SuperMatrix_Store(&SB);
+    Destroy_SuperMatrix_Store(&SX);
+    Destroy_SuperMatrix_Store(&SA);
+    Destroy_SuperNode_Matrix(&SL);
+    Destroy_CompCol_Matrix(&SU);
+    StatFree(&stat);
+    GMM_ASSERT1(info >= 0, "SuperLU solve failed: info =" << info);
+    if (info > 0) GMM_WARNING1("SuperLU solve failed: info =" << info);
+    gmm::copy(sol, X);
+    return info;
+  }
+
+  template <class T> class SuperLU_factor {
+    typedef typename number_traits<T>::magnitude_type R;
+
+    csc_matrix<T> csc_A;
+    mutable SuperMatrix SA, SL, SB, SU, SX;
+    mutable SuperLUStat_t stat;
+    mutable superlu_options_t options;
+    float memory_used;
+    mutable std::vector<int> etree, perm_r, perm_c;
+    mutable std::vector<R> Rscale, Cscale;
+    mutable std::vector<R> ferr, berr;
+    mutable std::vector<T> rhs;
+    mutable std::vector<T> sol;
+    mutable bool is_init;
+    mutable char equed;
+
+  public :
+    enum { LU_NOTRANSP, LU_TRANSP, LU_CONJUGATED };
+    void free_supermatrix(void);
+    template <class MAT> void build_with(const MAT &A,  int permc_spec = 3);
+    template <typename VECTX, typename VECTB> 
+    /* transp = LU_NOTRANSP   -> solves Ax = B
+       transp = LU_TRANSP     -> solves A'x = B
+       transp = LU_CONJUGATED -> solves conj(A)X = B */
+    void solve(const VECTX &X_, const VECTB &B, int transp=LU_NOTRANSP) const;
+    SuperLU_factor(void) { is_init = false; }
+    SuperLU_factor(const SuperLU_factor& other) {
+      GMM_ASSERT2(!(other.is_init),
+		 "copy of initialized SuperLU_factor is forbidden");
+      is_init = false;
+    }
+    SuperLU_factor& operator=(const SuperLU_factor& other) {
+      GMM_ASSERT2(!(other.is_init) && !is_init,
+		  "assignment of initialized SuperLU_factor is forbidden");
+      return *this;
+    }
+    ~SuperLU_factor() { free_supermatrix(); }
+    float memsize() { return memory_used; }
+  };
+
+
+  template <class T> void SuperLU_factor<T>::free_supermatrix(void) {
+      if (is_init) {
+	if (SB.Store) Destroy_SuperMatrix_Store(&SB);
+	if (SX.Store) Destroy_SuperMatrix_Store(&SX);
+	if (SA.Store) Destroy_SuperMatrix_Store(&SA);
+	if (SL.Store) Destroy_SuperNode_Matrix(&SL);
+	if (SU.Store) Destroy_CompCol_Matrix(&SU);
+      }
+    }
+
+    
+    template <class T> template <class MAT>
+    void SuperLU_factor<T>::build_with(const MAT &A,  int permc_spec) {
+    /*
+     * Get column permutation vector perm_c[], according to permc_spec:
+     *   permc_spec = 0: use the natural ordering 
+     *   permc_spec = 1: use minimum degree ordering on structure of A'*A
+     *   permc_spec = 2: use minimum degree ordering on structure of A'+A
+     *   permc_spec = 3: use approximate minimum degree column ordering
+     */
+      free_supermatrix();
+      int n = mat_nrows(A), m = mat_ncols(A), info = 0;
+      csc_A.init_with(A);
+
+      rhs.resize(m); sol.resize(m);
+      gmm::clear(rhs);
+      int nz = nnz(csc_A);
+
+      set_default_options(&options);
+      options.ColPerm = NATURAL;
+      options.PrintStat = NO;
+      options.ConditionNumber = NO;
+      switch (permc_spec) {
+      case 1 : options.ColPerm = MMD_ATA; break;
+      case 2 : options.ColPerm = MMD_AT_PLUS_A; break;
+      case 3 : options.ColPerm = COLAMD; break;
+      }
+      StatInit(&stat);
+
+      Create_CompCol_Matrix(&SA, m, n, nz, (double *)(&(csc_A.pr[0])),
+			    (int *)(&(csc_A.ir[0])), (int *)(&(csc_A.jc[0])));
+
+      Create_Dense_Matrix(&SB, m, 0, &rhs[0], m);
+      Create_Dense_Matrix(&SX, m, 0, &sol[0], m);
+      memset(&SL,0,sizeof SL);
+      memset(&SU,0,sizeof SU);
+      equed = 'B';
+      Rscale.resize(m); Cscale.resize(n); etree.resize(n);
+      ferr.resize(1); berr.resize(1);
+      R recip_pivot_gross, rcond;
+      perm_r.resize(m); perm_c.resize(n);
+      memory_used = SuperLU_gssvx(&options, &SA, &perm_c[0], &perm_r[0], 
+		    &etree[0] /* output */, &equed /* output        */, 
+		    &Rscale[0] /* row scale factors (output)        */, 
+		    &Cscale[0] /* col scale factors (output)        */,
+		    &SL /* fact L (output)*/, &SU /* fact U (output)*/, 
+		    NULL /* work                                    */, 
+		    0 /* lwork: superlu auto allocates (input)      */, 
+		    &SB /* rhs */, &SX /* solution                  */,
+		    &recip_pivot_gross /* reciprocal pivot growth   */
+		    /* factor max_j( norm(A_j)/norm(U_j) ).         */,  
+		    &rcond /*estimate of the reciprocal condition   */
+		    /* number of the matrix A after equilibration   */,
+		    &ferr[0] /* estimated forward error             */,
+		    &berr[0] /* relative backward error             */,
+		    &stat, &info, T());
+      
+      Destroy_SuperMatrix_Store(&SB);
+      Destroy_SuperMatrix_Store(&SX);
+      Create_Dense_Matrix(&SB, m, 1, &rhs[0], m);
+      Create_Dense_Matrix(&SX, m, 1, &sol[0], m);
+      StatFree(&stat);
+
+      GMM_ASSERT1(info == 0, "SuperLU solve failed: info=" << info);
+      is_init = true;
+    }
+    
+    template <class T> template <typename VECTX, typename VECTB> 
+    void SuperLU_factor<T>::solve(const VECTX &X_, const VECTB &B,
+				  int transp) const {
+      VECTX &X = const_cast<VECTX &>(X_);
+      gmm::copy(B, rhs);
+      options.Fact = FACTORED;
+      options.IterRefine = NOREFINE;
+      switch (transp) {
+      case LU_NOTRANSP: options.Trans = NOTRANS; break;
+      case LU_TRANSP: options.Trans = TRANS; break;
+      case LU_CONJUGATED: options.Trans = CONJ; break;
+      default: GMM_ASSERT1(false, "invalid value for transposition option");
+      }
+      StatInit(&stat);
+      int info = 0;
+      R recip_pivot_gross, rcond;
+      SuperLU_gssvx(&options, &SA, &perm_c[0], &perm_r[0], 
+		    &etree[0] /* output */, &equed /* output        */, 
+		    &Rscale[0] /* row scale factors (output)        */, 
+		    &Cscale[0] /* col scale factors (output)        */,
+		    &SL /* fact L (output)*/, &SU /* fact U (output)*/, 
+		    NULL /* work                                    */, 
+		    0 /* lwork: superlu auto allocates (input)      */, 
+		    &SB /* rhs */, &SX /* solution                  */,
+		    &recip_pivot_gross /* reciprocal pivot growth   */
+		    /* factor max_j( norm(A_j)/norm(U_j) ).         */,  
+		    &rcond /*estimate of the reciprocal condition   */
+		    /* number of the matrix A after equilibration   */,
+		    &ferr[0] /* estimated forward error             */,
+		    &berr[0] /* relative backward error             */,
+		    &stat, &info, T());
+     StatFree(&stat);
+     GMM_ASSERT1(info == 0, "SuperLU solve failed: info=" << info);
+     gmm::copy(sol, X);
+    }
+
+  template <typename T, typename V1, typename V2> inline
+  void mult(const SuperLU_factor<T>& P, const V1 &v1, const V2 &v2) {
+    P.solve(v2,v1);
+  }
+
+  template <typename T, typename V1, typename V2> inline
+  void transposed_mult(const SuperLU_factor<T>& P,const V1 &v1,const V2 &v2) {
+    P.solve(v2, v1, SuperLU_factor<T>::LU_TRANSP);
+  }
+
+}
+
+  
+#endif // GMM_SUPERLU_INTERFACE_H
+
+#endif // GMM_USES_SUPERLU
diff --git a/gmm/gmm_transposed.h b/gmm/gmm_transposed.h
new file mode 100644
index 000000000..d9b6a8182
--- /dev/null
+++ b/gmm/gmm_transposed.h
@@ -0,0 +1,244 @@
+/* -*- c++ -*- (enables emacs c++ mode) */
+/*===========================================================================
+
+ Copyright (C) 2002-2017 Yves Renard
+
+ This file is a part of GetFEM++
+
+ GetFEM++  is  free software;  you  can  redistribute  it  and/or modify it
+ under  the  terms  of the  GNU  Lesser General Public License as published
+ by  the  Free Software Foundation;  either version 3 of the License,  or
+ (at your option) any later version along with the GCC Runtime Library
+ Exception either version 3.1 or (at your option) any later version.
+ This program  is  distributed  in  the  hope  that it will be useful,  but
+ WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ or  FITNESS  FOR  A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+ License and GCC Runtime Library Exception for more details.
+ You  should  have received a copy of the GNU Lesser General Public License
+ along  with  this program;  if not, write to the Free Software Foundation,
+ Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301, USA.
+
+ As a special exception, you  may use  this file  as it is a part of a free
+ software  library  without  restriction.  Specifically,  if   other  files
+ instantiate  templates  or  use macros or inline functions from this file,
+ or  you compile this  file  and  link  it  with other files  to produce an
+ executable, this file  does  not  by itself cause the resulting executable
+ to be covered  by the GNU Lesser General Public License.  This   exception
+ does not  however  invalidate  any  other  reasons why the executable file
+ might be covered by the GNU Lesser General Public License.
+
+===========================================================================*/
+
+/**@file gmm_transposed.h
+   @author  Yves Renard <Yves.Renard@insa-lyon.fr>
+   @date November 10, 2002.
+   @brief Generic transposed matrices
+*/
+#ifndef GMM_TRANSPOSED_H__
+#define GMM_TRANSPOSED_H__
+
+#include "gmm_def.h"
+
+namespace gmm {
+
+  /* ********************************************************************* */
+  /*		transposed reference                    		   */
+  /* ********************************************************************* */
+  
+  template <typename PT> struct  transposed_row_ref {
+    
+    typedef transposed_row_ref<PT> this_type;
+    typedef typename std::iterator_traits<PT>::value_type M;
+    typedef M * CPT;
+    typedef typename std::iterator_traits<PT>::reference ref_M;
+    typedef typename select_ref<typename linalg_traits<this_type>
+            ::const_col_iterator, typename linalg_traits<this_type>
+            ::col_iterator, PT>::ref_type iterator;
+    typedef typename linalg_traits<this_type>::reference reference;
+    typedef typename linalg_traits<this_type>::porigin_type porigin_type;
+
+    iterator begin_, end_;
+    porigin_type origin;
+    size_type nr, nc;
+
+    transposed_row_ref(ref_M m)
+      : begin_(mat_row_begin(m)), end_(mat_row_end(m)),
+	origin(linalg_origin(m)), nr(mat_ncols(m)), nc(mat_nrows(m)) {}
+
+    transposed_row_ref(const transposed_row_ref<CPT> &cr) :
+      begin_(cr.begin_),end_(cr.end_), origin(cr.origin),nr(cr.nr),nc(cr.nc) {}
+
+    reference operator()(size_type i, size_type j) const
+    { return linalg_traits<M>::access(begin_+j, i); }
+  };
+
+  template <typename PT> struct linalg_traits<transposed_row_ref<PT> > {
+    typedef transposed_row_ref<PT> this_type;
+    typedef typename std::iterator_traits<PT>::value_type M;
+    typedef typename linalg_traits<M>::origin_type origin_type;
+    typedef typename select_ref<const origin_type *, origin_type *,
+			        PT>::ref_type porigin_type;
+    typedef typename which_reference<PT>::is_reference is_reference;
+    typedef abstract_matrix linalg_type;
+    typedef typename linalg_traits<M>::value_type value_type;
+    typedef typename select_ref<value_type,
+            typename linalg_traits<M>::reference, PT>::ref_type reference;
+    typedef typename linalg_traits<M>::storage_type storage_type;
+    typedef abstract_null_type sub_row_type;
+    typedef abstract_null_type const_sub_row_type;
+    typedef abstract_null_type row_iterator;
+    typedef abstract_null_type const_row_iterator;
+    typedef typename linalg_traits<M>::const_sub_row_type const_sub_col_type;
+    typedef typename select_ref<abstract_null_type, typename
+	    linalg_traits<M>::sub_row_type, PT>::ref_type sub_col_type;
+    typedef typename linalg_traits<M>::const_row_iterator const_col_iterator;
+    typedef typename select_ref<abstract_null_type, typename
+            linalg_traits<M>::row_iterator, PT>::ref_type col_iterator;
+    typedef col_major sub_orientation;
+    typedef typename linalg_traits<M>::index_sorted index_sorted;
+    static size_type ncols(const this_type &v) { return v.nc; }
+    static size_type nrows(const this_type &v) { return v.nr; }
+    static const_sub_col_type col(const const_col_iterator &it)
+    { return linalg_traits<M>::row(it); }
+    static sub_col_type col(const col_iterator &it)
+    { return linalg_traits<M>::row(it); }
+    static col_iterator col_begin(this_type &m) { return m.begin_; }
+    static col_iterator col_end(this_type &m) { return m.end_; }
+    static const_col_iterator col_begin(const this_type &m)
+    { return m.begin_; }
+    static const_col_iterator col_end(const this_type &m) { return m.end_; }
+    static origin_type* origin(this_type &v) { return v.origin; }
+    static const origin_type* origin(const this_type &v) { return v.origin; }
+    static void do_clear(this_type &v);
+    static value_type access(const const_col_iterator &itcol, size_type i)
+    { return linalg_traits<M>::access(itcol, i); }
+    static reference access(const col_iterator &itcol, size_type i)
+    { return linalg_traits<M>::access(itcol, i); }
+  };
+  
+  template <typename PT> 
+  void linalg_traits<transposed_row_ref<PT> >::do_clear(this_type &v) { 
+    col_iterator it = mat_col_begin(v), ite = mat_col_end(v);
+    for (; it != ite; ++it) clear(col(it));
+  }
+  
+  template<typename PT> std::ostream &operator <<
+  (std::ostream &o, const transposed_row_ref<PT>& m)
+  { gmm::write(o,m); return o; }
+
+  template <typename PT> struct  transposed_col_ref {
+    
+    typedef transposed_col_ref<PT> this_type;
+    typedef typename std::iterator_traits<PT>::value_type M;
+    typedef M * CPT;
+    typedef typename std::iterator_traits<PT>::reference ref_M;
+    typedef typename select_ref<typename linalg_traits<this_type>
+            ::const_row_iterator, typename linalg_traits<this_type>
+            ::row_iterator, PT>::ref_type iterator;
+    typedef typename linalg_traits<this_type>::reference reference;
+    typedef typename linalg_traits<this_type>::porigin_type porigin_type;
+    
+    iterator begin_, end_;
+    porigin_type origin;
+    size_type nr, nc;
+
+    transposed_col_ref(ref_M m)
+      : begin_(mat_col_begin(m)), end_(mat_col_end(m)),
+	origin(linalg_origin(m)), nr(mat_ncols(m)), nc(mat_nrows(m)) {}
+
+    transposed_col_ref(const transposed_col_ref<CPT> &cr) :
+      begin_(cr.begin_),end_(cr.end_), origin(cr.origin),nr(cr.nr),nc(cr.nc) {}
+
+    reference operator()(size_type i, size_type j) const
+    { return linalg_traits<M>::access(begin_+i, j); }
+  };
+
+  template <typename PT> struct linalg_traits<transposed_col_ref<PT> > {
+    typedef transposed_col_ref<PT> this_type;
+    typedef typename std::iterator_traits<PT>::value_type M;
+    typedef typename linalg_traits<M>::origin_type origin_type;
+    typedef typename select_ref<const origin_type *, origin_type *,
+			        PT>::ref_type porigin_type;
+    typedef typename which_reference<PT>::is_reference is_reference;
+    typedef abstract_matrix linalg_type;
+    typedef typename linalg_traits<M>::value_type value_type;
+    typedef typename select_ref<value_type,
+            typename linalg_traits<M>::reference, PT>::ref_type reference;
+    typedef typename linalg_traits<M>::storage_type storage_type;
+    typedef abstract_null_type sub_col_type;
+    typedef abstract_null_type const_sub_col_type;
+    typedef abstract_null_type col_iterator;
+    typedef abstract_null_type const_col_iterator;
+    typedef typename linalg_traits<M>::const_sub_col_type const_sub_row_type;
+    typedef typename select_ref<abstract_null_type, typename
+	    linalg_traits<M>::sub_col_type, PT>::ref_type sub_row_type;
+    typedef typename linalg_traits<M>::const_col_iterator const_row_iterator;
+    typedef typename select_ref<abstract_null_type, typename
+            linalg_traits<M>::col_iterator, PT>::ref_type row_iterator;
+    typedef row_major sub_orientation;
+    typedef typename linalg_traits<M>::index_sorted index_sorted;
+    static size_type nrows(const this_type &v)
+    { return v.nr; }
+    static size_type ncols(const this_type &v)
+    { return v.nc; }
+    static const_sub_row_type row(const const_row_iterator &it)
+    { return linalg_traits<M>::col(it); }
+    static sub_row_type row(const row_iterator &it)
+    { return linalg_traits<M>::col(it); }
+    static row_iterator row_begin(this_type &m) { return m.begin_; }
+    static row_iterator row_end(this_type &m) { return m.end_; }
+    static const_row_iterator row_begin(const this_type &m)
+    { return m.begin_; }
+    static const_row_iterator row_end(const this_type &m) { return m.end_; }
+    static origin_type* origin(this_type &v) { return v.origin; }
+    static const origin_type* origin(const this_type &v) { return v.origin; }
+    static void do_clear(this_type &m);
+    static value_type access(const const_row_iterator &itrow, size_type i)
+    { return linalg_traits<M>::access(itrow, i); }
+    static reference access(const row_iterator &itrow, size_type i)
+    { return linalg_traits<M>::access(itrow, i); }
+  };
+
+  template <typename PT> 
+  void linalg_traits<transposed_col_ref<PT> >::do_clear(this_type &v) { 
+    row_iterator it = mat_row_begin(v), ite = mat_row_end(v);
+    for (; it != ite; ++it) clear(row(it));
+  }
+
+  template<typename PT> std::ostream &operator <<
+  (std::ostream &o, const transposed_col_ref<PT>& m)
+  { gmm::write(o,m); return o; }
+
+  template <typename TYPE, typename PT> struct transposed_return_ {
+    typedef abstract_null_type return_type;
+  };
+  template <typename PT> struct transposed_return_<row_major, PT> {
+    typedef typename std::iterator_traits<PT>::value_type L;
+    typedef typename select_return<transposed_row_ref<const L *>,
+            transposed_row_ref< L *>, PT>::return_type return_type;
+  };
+  template <typename PT> struct transposed_return_<col_major, PT> {
+    typedef typename std::iterator_traits<PT>::value_type L;
+    typedef typename select_return<transposed_col_ref<const L *>,
+            transposed_col_ref< L *>, PT>::return_type return_type;
+  };
+  template <typename PT> struct transposed_return {
+    typedef typename std::iterator_traits<PT>::value_type L;
+    typedef typename transposed_return_<typename principal_orientation_type<
+            typename linalg_traits<L>::sub_orientation>::potype,
+	    PT>::return_type return_type;
+  };
+
+  template <typename L> inline 
+  typename transposed_return<const L *>::return_type transposed(const L &l) {
+    return typename transposed_return<const L *>::return_type
+      (linalg_cast(const_cast<L &>(l)));
+  }
+
+  template <typename L> inline 
+  typename transposed_return<L *>::return_type transposed(L &l)
+  { return typename transposed_return<L *>::return_type(linalg_cast(l)); }
+
+}
+
+#endif //  GMM_TRANSPOSED_H__
diff --git a/gmm/gmm_tri_solve.h b/gmm/gmm_tri_solve.h
new file mode 100644
index 000000000..d05520eb3
--- /dev/null
+++ b/gmm/gmm_tri_solve.h
@@ -0,0 +1,222 @@
+/* -*- c++ -*- (enables emacs c++ mode) */
+/*===========================================================================
+
+ Copyright (C) 2002-2017 Yves Renard
+
+ This file is a part of GetFEM++
+
+ GetFEM++  is  free software;  you  can  redistribute  it  and/or modify it
+ under  the  terms  of the  GNU  Lesser General Public License as published
+ by  the  Free Software Foundation;  either version 3 of the License,  or
+ (at your option) any later version along with the GCC Runtime Library
+ Exception either version 3.1 or (at your option) any later version.
+ This program  is  distributed  in  the  hope  that it will be useful,  but
+ WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ or  FITNESS  FOR  A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+ License and GCC Runtime Library Exception for more details.
+ You  should  have received a copy of the GNU Lesser General Public License
+ along  with  this program;  if not, write to the Free Software Foundation,
+ Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301, USA.
+
+ As a special exception, you  may use  this file  as it is a part of a free
+ software  library  without  restriction.  Specifically,  if   other  files
+ instantiate  templates  or  use macros or inline functions from this file,
+ or  you compile this  file  and  link  it  with other files  to produce an
+ executable, this file  does  not  by itself cause the resulting executable
+ to be covered  by the GNU Lesser General Public License.  This   exception
+ does not  however  invalidate  any  other  reasons why the executable file
+ might be covered by the GNU Lesser General Public License.
+
+===========================================================================*/
+
+/**@file gmm_tri_solve.h
+   @author Yves Renard
+   @date October 13, 2002.
+   @brief Solve triangular linear system for dense matrices.
+*/
+
+#ifndef GMM_TRI_SOLVE_H__
+#define GMM_TRI_SOLVE_H__
+
+#include "gmm_interface.h"
+
+namespace gmm {
+
+  template <typename TriMatrix, typename VecX>
+  void upper_tri_solve__(const TriMatrix& T, VecX& x, size_t k,
+			 col_major, abstract_sparse, bool is_unit) {
+    typename linalg_traits<TriMatrix>::value_type x_j;
+    for (int j = int(k) - 1; j >= 0; --j) {
+      typedef typename linalg_traits<TriMatrix>::const_sub_col_type COL;
+      COL c = mat_const_col(T, j);
+      typename linalg_traits<typename org_type<COL>::t>::const_iterator 
+	it = vect_const_begin(c), ite = vect_const_end(c);
+      if (!is_unit) x[j] /= c[j];
+      for (x_j = x[j]; it != ite ; ++it)
+	if (int(it.index()) < j) x[it.index()] -= x_j * (*it);
+    }    
+  }
+
+  template <typename TriMatrix, typename VecX>
+  void upper_tri_solve__(const TriMatrix& T, VecX& x, size_t k,
+			 col_major, abstract_dense, bool is_unit) {
+    typename linalg_traits<TriMatrix>::value_type x_j;
+    for (int j = int(k) - 1; j >= 0; --j) {
+      typedef typename linalg_traits<TriMatrix>::const_sub_col_type COL;
+      COL c = mat_const_col(T, j);
+      typename linalg_traits<typename org_type<COL>::t>::const_iterator
+	it = vect_const_begin(c), ite = it + j;
+      typename linalg_traits<VecX>::iterator itx = vect_begin(x);
+      if (!is_unit) x[j] /= c[j];
+      for (x_j = x[j]; it != ite ; ++it, ++itx) *itx -= x_j * (*it);
+    }
+  }
+
+  template <typename TriMatrix, typename VecX>
+  void lower_tri_solve__(const TriMatrix& T, VecX& x, size_t k,
+			 col_major, abstract_sparse, bool is_unit) {
+    typename linalg_traits<TriMatrix>::value_type x_j;
+    // cout << "(lower col)The Tri Matrix = " << T << endl;
+    // cout << "k = " << endl;
+    for (int j = 0; j < int(k); ++j) {
+      typedef typename linalg_traits<TriMatrix>::const_sub_col_type COL;
+      COL c = mat_const_col(T, j);
+      typename linalg_traits<typename org_type<COL>::t>::const_iterator 
+	it = vect_const_begin(c), ite = vect_const_end(c);
+      if (!is_unit) x[j] /= c[j];
+      for (x_j = x[j]; it != ite ; ++it)
+	if (int(it.index()) > j && it.index() < k) x[it.index()] -= x_j*(*it);
+    }    
+  }
+  
+  template <typename TriMatrix, typename VecX>
+  void lower_tri_solve__(const TriMatrix& T, VecX& x, size_t k,
+			 col_major, abstract_dense, bool is_unit) {
+    typename linalg_traits<TriMatrix>::value_type x_j;
+    for (int j = 0; j < int(k); ++j) {
+      typedef typename linalg_traits<TriMatrix>::const_sub_col_type COL;
+      COL c = mat_const_col(T, j);
+      typename linalg_traits<typename org_type<COL>::t>::const_iterator 
+	it = vect_const_begin(c) + (j+1), ite = vect_const_begin(c) + k;
+      typename linalg_traits<VecX>::iterator itx = vect_begin(x) + (j+1);
+      if (!is_unit) x[j] /= c[j];
+      for (x_j = x[j]; it != ite ; ++it, ++itx) *itx -= x_j * (*it);
+    }    
+  }
+  
+
+  template <typename TriMatrix, typename VecX>
+  void upper_tri_solve__(const TriMatrix& T, VecX& x, size_t k,
+			 row_major, abstract_sparse, bool is_unit) {
+    typedef typename linalg_traits<TriMatrix>::const_sub_row_type ROW;
+    typename linalg_traits<TriMatrix>::value_type t;
+    typename linalg_traits<TriMatrix>::const_row_iterator
+      itr = mat_row_const_end(T);
+    for (int i = int(k) - 1; i >= 0; --i) {
+      --itr;
+      ROW c = linalg_traits<TriMatrix>::row(itr);
+      typename linalg_traits<typename org_type<ROW>::t>::const_iterator 
+	it = vect_const_begin(c), ite = vect_const_end(c);
+      for (t = x[i]; it != ite; ++it)
+	if (int(it.index()) > i && it.index() < k) t -= (*it) * x[it.index()];
+      if (!is_unit) x[i] = t / c[i]; else x[i] = t;    
+    }    
+  }
+
+  template <typename TriMatrix, typename VecX>
+  void upper_tri_solve__(const TriMatrix& T, VecX& x, size_t k,
+			 row_major, abstract_dense, bool is_unit) {
+    typename linalg_traits<TriMatrix>::value_type t;
+   
+    for (int i = int(k) - 1; i >= 0; --i) {
+      typedef typename linalg_traits<TriMatrix>::const_sub_row_type ROW;
+      ROW c = mat_const_row(T, i);
+      typename linalg_traits<typename org_type<ROW>::t>::const_iterator 
+	it = vect_const_begin(c) + (i + 1), ite = vect_const_begin(c) + k;
+      typename linalg_traits<VecX>::iterator itx = vect_begin(x) + (i+1);
+      
+      for (t = x[i]; it != ite; ++it, ++itx) t -= (*it) * (*itx);
+      if (!is_unit) x[i] = t / c[i]; else x[i] = t;   
+    }    
+  }
+
+  template <typename TriMatrix, typename VecX>
+  void lower_tri_solve__(const TriMatrix& T, VecX& x, size_t k,
+			 row_major, abstract_sparse, bool is_unit) {
+    typename linalg_traits<TriMatrix>::value_type t;
+   
+    for (int i = 0; i < int(k); ++i) {
+      typedef typename linalg_traits<TriMatrix>::const_sub_row_type ROW;
+      ROW c = mat_const_row(T, i);
+      typename linalg_traits<typename org_type<ROW>::t>::const_iterator 
+	it = vect_const_begin(c), ite = vect_const_end(c);
+
+      for (t = x[i]; it != ite; ++it)
+	if (int(it.index()) < i) t -= (*it) * x[it.index()];
+      if (!is_unit) x[i] = t / c[i]; else x[i] = t; 
+    }    
+  }
+
+  template <typename TriMatrix, typename VecX>
+  void lower_tri_solve__(const TriMatrix& T, VecX& x, size_t k,
+			 row_major, abstract_dense, bool is_unit) {
+    typename linalg_traits<TriMatrix>::value_type t;
+   
+    for (int i = 0; i < int(k); ++i) {
+      typedef typename linalg_traits<TriMatrix>::const_sub_row_type ROW;
+      ROW c = mat_const_row(T, i);
+      typename linalg_traits<typename org_type<ROW>::t>::const_iterator 
+	it = vect_const_begin(c), ite = it + i;
+      typename linalg_traits<VecX>::iterator itx = vect_begin(x);
+
+      for (t = x[i]; it != ite; ++it, ++itx) t -= (*it) * (*itx);
+      if (!is_unit) x[i] = t / c[i]; else x[i] = t;
+    }
+  }
+
+
+// Triangular Solve:  x <-- T^{-1} * x
+
+  template <typename TriMatrix, typename VecX> inline
+  void upper_tri_solve(const TriMatrix& T, VecX &x_, bool is_unit = false)
+  { upper_tri_solve(T, x_, mat_nrows(T), is_unit); }
+  
+  template <typename TriMatrix, typename VecX> inline
+  void lower_tri_solve(const TriMatrix& T, VecX &x_, bool is_unit = false)
+  { lower_tri_solve(T, x_, mat_nrows(T), is_unit); }
+
+  template <typename TriMatrix, typename VecX> inline
+  void upper_tri_solve(const TriMatrix& T, VecX &x_, size_t k,
+		       bool is_unit) {
+    VecX& x = const_cast<VecX&>(x_);
+    GMM_ASSERT2(mat_nrows(T) >= k && vect_size(x) >= k
+		&& mat_ncols(T) >= k && !is_sparse(x_), "dimensions mismatch");
+    upper_tri_solve__(T, x, k, 
+		      typename principal_orientation_type<typename
+		      linalg_traits<TriMatrix>::sub_orientation>::potype(),
+		      typename linalg_traits<TriMatrix>::storage_type(),
+		      is_unit);
+  }
+  
+  template <typename TriMatrix, typename VecX> inline
+  void lower_tri_solve(const TriMatrix& T, VecX &x_, size_t k,
+		       bool is_unit) {
+    VecX& x = const_cast<VecX&>(x_);
+    GMM_ASSERT2(mat_nrows(T) >= k && vect_size(x) >= k
+		&& mat_ncols(T) >= k && !is_sparse(x_), "dimensions mismatch");
+    lower_tri_solve__(T, x, k, 
+		      typename principal_orientation_type<typename
+		      linalg_traits<TriMatrix>::sub_orientation>::potype(),
+		      typename linalg_traits<TriMatrix>::storage_type(),
+		      is_unit);
+  }
+
+
+ 
+
+
+
+}
+
+
+#endif //  GMM_TRI_SOLVE_H__
diff --git a/gmm/gmm_vector.h b/gmm/gmm_vector.h
new file mode 100644
index 000000000..e69931dbe
--- /dev/null
+++ b/gmm/gmm_vector.h
@@ -0,0 +1,1571 @@
+/* -*- c++ -*- (enables emacs c++ mode) */
+/*===========================================================================
+
+ Copyright (C) 2002-2017 Yves Renard
+
+ This file is a part of GetFEM++
+
+ GetFEM++  is  free software;  you  can  redistribute  it  and/or modify it
+ under  the  terms  of the  GNU  Lesser General Public License as published
+ by  the  Free Software Foundation;  either version 3 of the License,  or
+ (at your option) any later version along with the GCC Runtime Library
+ Exception either version 3.1 or (at your option) any later version.
+ This program  is  distributed  in  the  hope  that it will be useful,  but
+ WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ or  FITNESS  FOR  A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+ License and GCC Runtime Library Exception for more details.
+ You  should  have received a copy of the GNU Lesser General Public License
+ along  with  this program;  if not, write to the Free Software Foundation,
+ Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301, USA.
+
+ As a special exception, you  may use  this file  as it is a part of a free
+ software  library  without  restriction.  Specifically,  if   other  files
+ instantiate  templates  or  use macros or inline functions from this file,
+ or  you compile this  file  and  link  it  with other files  to produce an
+ executable, this file  does  not  by itself cause the resulting executable
+ to be covered  by the GNU Lesser General Public License.  This   exception
+ does not  however  invalidate  any  other  reasons why the executable file
+ might be covered by the GNU Lesser General Public License.
+
+===========================================================================*/
+/**@file gmm_vector.h
+   @author  Yves Renard <Yves.Renard@insa-lyon.fr>
+   @date October 13, 2002.
+   @brief Declaration of the vector types (gmm::rsvector, gmm::wsvector,
+     gmm::slvector ,..)
+*/
+#ifndef GMM_VECTOR_H__
+#define GMM_VECTOR_H__
+
+#include <map>
+#include "gmm_interface.h"
+
+namespace gmm {
+
+  /*************************************************************************/
+  /*                                                                       */
+  /* Class ref_elt_vector: reference on a vector component.                */
+  /*                                                                       */
+  /*************************************************************************/
+
+
+  template<typename T, typename V> class ref_elt_vector {
+
+    V *pm;
+    size_type l;
+    
+    public :
+
+    operator T() const { return pm->r(l); }
+    ref_elt_vector(V *p, size_type ll) : pm(p), l(ll) {}
+    inline bool operator ==(T v) const { return ((*pm).r(l) == v); }
+    inline bool operator !=(T v) const { return ((*pm).r(l) != v); }
+    inline bool operator ==(std::complex<T> v) const
+    { return ((*pm).r(l) == v); }
+    inline bool operator !=(std::complex<T> v) const
+    { return ((*pm).r(l) != v); }
+    inline ref_elt_vector &operator +=(T v)
+    { (*pm).wa(l, v); return *this; }
+    inline ref_elt_vector &operator -=(T v)
+    { (*pm).wa(l, -v); return *this; }
+    inline ref_elt_vector &operator /=(T v)
+    { (*pm).w(l,(*pm).r(l) / v); return *this; }
+    inline ref_elt_vector &operator *=(T v)
+    { (*pm).w(l,(*pm).r(l) * v); return *this; }
+    inline ref_elt_vector &operator =(const ref_elt_vector &re)
+    { *this = T(re); return *this; }
+    inline ref_elt_vector &operator =(T v)
+    { (*pm).w(l,v); return *this; }
+    T operator +()    { return  T(*this);   }
+    T operator -()    { return -T(*this);   }
+    T operator +(T v) { return T(*this)+ v; }
+    T operator -(T v) { return T(*this)- v; }
+    T operator *(T v) { return T(*this)* v; }
+    T operator /(T v) { return T(*this)/ v; }
+    std::complex<T> operator +(std::complex<T> v) { return T(*this)+ v; }
+    std::complex<T> operator -(std::complex<T> v) { return T(*this)- v; }
+    std::complex<T> operator *(std::complex<T> v) { return T(*this)* v; }
+    std::complex<T> operator /(std::complex<T> v) { return T(*this)/ v; }
+  };
+
+  template<typename T, typename V> class ref_elt_vector<std::complex<T>,V> {
+
+    V *pm;
+    size_type l;
+    
+    public :
+
+    operator std::complex<T>() const { return pm->r(l); }
+    ref_elt_vector(V *p, size_type ll) : pm(p), l(ll) {}
+    inline bool operator ==(std::complex<T> v) const
+    { return ((*pm).r(l) == v); }
+    inline bool operator !=(std::complex<T> v) const
+    { return ((*pm).r(l) != v); }
+    inline bool operator ==(T v) const { return ((*pm).r(l) == v); }
+    inline bool operator !=(T v) const { return ((*pm).r(l) != v); }
+    inline ref_elt_vector &operator +=(std::complex<T> v)
+    { (*pm).w(l,(*pm).r(l) + v); return *this; }
+    inline ref_elt_vector &operator -=(std::complex<T> v)
+    { (*pm).w(l,(*pm).r(l) - v); return *this; }
+    inline ref_elt_vector &operator /=(std::complex<T> v)
+    { (*pm).w(l,(*pm).r(l) / v); return *this; }
+    inline ref_elt_vector &operator *=(std::complex<T> v)
+    { (*pm).w(l,(*pm).r(l) * v); return *this; }
+    inline ref_elt_vector &operator =(const ref_elt_vector &re)
+    { *this = T(re); return *this; }
+    inline ref_elt_vector &operator =(std::complex<T> v)
+    { (*pm).w(l,v); return *this; }
+    inline ref_elt_vector &operator =(T v)
+    { (*pm).w(l,std::complex<T>(v)); return *this; }
+    inline ref_elt_vector &operator +=(T v)
+    { (*pm).w(l,(*pm).r(l) + v); return *this; }
+    inline ref_elt_vector &operator -=(T v)
+    { (*pm).w(l,(*pm).r(l) - v); return *this; }
+    inline ref_elt_vector &operator /=(T v)
+    { (*pm).w(l,(*pm).r(l) / v); return *this; }
+    inline ref_elt_vector &operator *=(T v)
+    { (*pm).w(l,(*pm).r(l) * v); return *this; }
+    std::complex<T> operator +()    { return  std::complex<T>(*this);   }
+    std::complex<T> operator -()    { return -std::complex<T>(*this);   }
+    std::complex<T> operator +(T v) { return std::complex<T>(*this)+ v; }
+    std::complex<T> operator -(T v) { return std::complex<T>(*this)- v; }
+    std::complex<T> operator *(T v) { return std::complex<T>(*this)* v; }
+    std::complex<T> operator /(T v) { return std::complex<T>(*this)/ v; }
+    std::complex<T> operator +(std::complex<T> v)
+    { return std::complex<T>(*this)+ v; }
+    std::complex<T> operator -(std::complex<T> v)
+    { return std::complex<T>(*this)- v; }
+    std::complex<T> operator *(std::complex<T> v)
+    { return std::complex<T>(*this)* v; }
+    std::complex<T> operator /(std::complex<T> v)
+    { return std::complex<T>(*this)/ v; }
+  };  
+
+  
+  template<typename T, typename V> inline
+  bool operator ==(T v, const ref_elt_vector<T, V> &re) { return (v==T(re)); }
+  template<typename T, typename V> inline
+  bool operator !=(T v, const ref_elt_vector<T, V> &re) { return (v!=T(re)); }
+  template<typename T, typename V> inline
+  T &operator +=(T &v, const ref_elt_vector<T, V> &re) 
+  { v += T(re); return v; }
+  template<typename T, typename V> inline
+  T &operator -=(T &v, const ref_elt_vector<T, V> &re)
+  { v -= T(re); return v; }
+  template<typename T, typename V> inline
+  T &operator *=(T &v, const ref_elt_vector<T, V> &re) 
+  { v *= T(re); return v; }
+  template<typename T, typename V> inline
+  T &operator /=(T &v, const ref_elt_vector<T, V> &re)
+  { v /= T(re); return v; }
+  template<typename T, typename V> inline
+  T operator +(T v, const ref_elt_vector<T, V> &re) { return v+ T(re); }
+  template<typename T, typename V> inline
+  T operator -(T v, const ref_elt_vector<T, V> &re) { return v- T(re); }
+  template<typename T, typename V> inline
+  T operator *(T v, const ref_elt_vector<T, V> &re) { return v* T(re); }
+  template<typename T, typename V> inline
+  T operator /(T v, const ref_elt_vector<T, V> &re) { return v/ T(re); }
+  template<typename T, typename V> inline
+  std::complex<T> operator +(std::complex<T> v, const ref_elt_vector<T, V> &re)
+  { return v+ T(re); }
+  template<typename T, typename V> inline
+  std::complex<T> operator -(std::complex<T> v, const ref_elt_vector<T, V> &re)
+  { return v- T(re); }
+  template<typename T, typename V> inline
+  std::complex<T> operator *(std::complex<T> v, const ref_elt_vector<T, V> &re)
+  { return v* T(re); }
+  template<typename T, typename V> inline
+  std::complex<T> operator /(std::complex<T> v, const ref_elt_vector<T, V> &re)
+  { return v/ T(re); }
+  template<typename T, typename V> inline
+  std::complex<T> operator +(T v, const ref_elt_vector<std::complex<T>, V> &re)
+  { return v+ std::complex<T>(re); }
+  template<typename T, typename V> inline
+  std::complex<T> operator -(T v, const ref_elt_vector<std::complex<T>, V> &re)
+  { return v- std::complex<T>(re); }
+  template<typename T, typename V> inline
+  std::complex<T> operator *(T v, const ref_elt_vector<std::complex<T>, V> &re)
+  { return v* std::complex<T>(re); }
+  template<typename T, typename V> inline
+  std::complex<T> operator /(T v, const ref_elt_vector<std::complex<T>, V> &re)
+  { return v/ std::complex<T>(re); }
+  template<typename T, typename V> inline
+  typename number_traits<T>::magnitude_type
+  abs(const ref_elt_vector<T, V> &re) { return gmm::abs(T(re)); }
+  template<typename T, typename V> inline
+  T sqr(const ref_elt_vector<T, V> &re) { return gmm::sqr(T(re)); }
+  template<typename T, typename V> inline
+  typename number_traits<T>::magnitude_type
+  abs_sqr(const ref_elt_vector<T, V> &re) { return gmm::abs_sqr(T(re)); }
+  template<typename T, typename V> inline
+  T conj(const ref_elt_vector<T, V> &re) { return gmm::conj(T(re)); }
+  template<typename T, typename V> std::ostream &operator <<
+  (std::ostream &o, const ref_elt_vector<T, V> &re) { o << T(re); return o; }
+  template<typename T, typename V> inline
+  typename number_traits<T>::magnitude_type
+  real(const ref_elt_vector<T, V> &re) { return gmm::real(T(re)); }
+  template<typename T, typename V> inline
+  typename number_traits<T>::magnitude_type
+  imag(const ref_elt_vector<T, V> &re) { return gmm::imag(T(re)); }
+
+  /*************************************************************************/
+  /*                                                                       */
+  /* Class dsvector: sparse vector optimized for random write operations   */
+  /* with constant complexity for read and write operations.               */
+  /* Based on distribution sort principle.                                 */
+  /* Cheap for densely populated vectors.                                  */
+  /*                                                                       */
+  /*************************************************************************/
+
+  template<typename T> class dsvector;
+
+  template<typename T> struct dsvector_iterator {
+    size_type i;    // Current index.
+    T* p;           // Pointer to the current position.
+    dsvector<T> *v; // Pointer to the vector.
+    
+    typedef T                   value_type;
+    typedef value_type*         pointer;
+    typedef const value_type*   const_pointer;
+    typedef value_type&         reference;
+    // typedef size_t              size_type;
+    typedef ptrdiff_t           difference_type;
+    typedef std::bidirectional_iterator_tag iterator_category;
+    typedef dsvector_iterator<T> iterator;
+    
+    reference operator *() const { return *p; }
+    pointer operator->() const { return &(operator*()); }
+
+    iterator &operator ++() {
+      for (size_type k = (i & 15); k < 15; ++k)
+	{ ++p; ++i; if (*p != T(0)) return *this; }
+      v->next_pos(*(const_cast<const_pointer *>(&(p))), i);
+      return *this;
+    }
+    iterator operator ++(int) { iterator tmp = *this; ++(*this); return tmp; }
+    iterator &operator --() {
+      for (size_type k = (i & 15); k > 0; --k)
+	{ --p; --i; if (*p != T(0)) return *this; }
+      v->previous_pos(p, i);
+      return *this;
+    }
+    iterator operator --(int) { iterator tmp = *this; --(*this); return tmp; }
+
+    bool operator ==(const iterator &it) const
+    { return (i == it.i && p == it.p && v == it.v); }
+    bool operator !=(const iterator &it) const
+    { return !(it == *this); }
+    
+    size_type index(void) const { return i; }
+
+    dsvector_iterator(void) : i(size_type(-1)), p(0), v(0) {}
+    dsvector_iterator(dsvector<T> &w) : i(size_type(-1)), p(0), v(&w) {};
+  };
+
+
+  template<typename T> struct dsvector_const_iterator {
+    size_type i;          // Current index.
+    const T* p;           // Pointer to the current position.
+    const dsvector<T> *v; // Pointer to the vector.
+    
+    typedef T                   value_type;
+    typedef const value_type*   pointer;
+    typedef const value_type&   reference;
+    // typedef size_t           size_type;
+    typedef ptrdiff_t           difference_type;
+    typedef std::bidirectional_iterator_tag iterator_category;
+    typedef dsvector_const_iterator<T> iterator;
+   
+    reference operator *() const { return *p; }
+    pointer operator->() const { return &(operator*()); }
+    iterator &operator ++() {
+      for (size_type k = (i & 15); k < 15; ++k)
+	{ ++p; ++i; if (*p != T(0)) return *this; }
+      v->next_pos(p, i);
+      return *this;
+    }
+    iterator operator ++(int) { iterator tmp = *this; ++(*this); return tmp; }
+    iterator &operator --() {
+      for (size_type k = (i & 15); k > 0; --k)
+	{ --p; --i; if (*p != T(0)) return *this; }
+      v->previous_pos(p, i);
+      return *this;
+    }
+    iterator operator --(int) { iterator tmp = *this; --(*this); return tmp; }
+
+    bool operator ==(const iterator &it) const
+    { return (i == it.i && p == it.p && v == it.v); }
+    bool operator !=(const iterator &it) const
+    { return !(it == *this); }
+    
+    size_type index(void) const { return i; }
+
+    dsvector_const_iterator(void) : i(size_type(-1)), p(0) {}
+    dsvector_const_iterator(const dsvector_iterator<T> &it)
+      : i(it.i), p(it.p), v(it.v) {}
+    dsvector_const_iterator(const dsvector<T> &w)
+      : i(size_type(-1)), p(0), v(&w) {};
+  };
+
+  
+  /**
+     Sparse vector built on distribution sort principle.
+     Read and write access have a constant complexity depending only on the
+     vector size.
+  */
+  template<typename T> class dsvector {
+
+    typedef dsvector_iterator<T>       iterator;
+    typedef dsvector_const_iterator<T> const_iterator;
+    typedef dsvector<T>                this_type;
+    typedef T *                        pointer;
+    typedef const T *                  const_pointer;
+    typedef void *                     void_pointer;
+    typedef const void *               const_void_pointer;
+ 
+  protected:
+    size_type    n;         // Potential vector size
+    size_type    depth;     // Number of row of pointer arrays
+    size_type    mask;      // Mask for the first pointer array
+    size_type    shift;     // Shift for the first pointer array
+    void_pointer root_ptr;  // Root pointer
+
+    const T *read_access(size_type i) const {
+      GMM_ASSERT1(i < n, "index out of range");
+      size_type my_mask = mask, my_shift = shift;
+      void_pointer p = root_ptr;
+      if (!p) return 0;
+      for (size_type k = 0; k < depth; ++k) {
+	p = ((void **)(p))[(i & my_mask) >> my_shift];
+	if (!p) return 0;
+	my_mask = (my_mask >> 4);
+	my_shift -= 4;
+      }
+      GMM_ASSERT1(my_shift == 0, "internal error");
+      GMM_ASSERT1(my_mask == 15, "internal error");
+      return &(((const T *)(p))[i & 15]);
+    }
+
+    T *write_access(size_type i) {
+      GMM_ASSERT1(i < n, "index " << i << " out of range (size " << n << ")");
+      size_type my_mask = mask, my_shift = shift;
+      if (!root_ptr) {
+	if (depth) {
+	  root_ptr = new void_pointer[16];
+	  std::memset(root_ptr, 0, 16*sizeof(void_pointer));
+	} else {
+	  root_ptr = new T[16];
+	  for (size_type l = 0; l < 16; ++l) ((T *)(root_ptr))[l] = T(0);
+	}
+      }
+
+      void_pointer p = root_ptr;
+      for (size_type k = 0; k < depth; ++k) {
+	size_type j = (i & my_mask) >> my_shift;
+	void_pointer q = ((void_pointer *)(p))[j];
+	if (!q) {
+	  if (k+1 != depth) {
+	    q = new void_pointer[16];
+	    std::memset(q, 0, 16*sizeof(void_pointer));
+	  } else {
+	    q = new T[16];
+	    for (size_type l = 0; l < 16; ++l) ((T *)(q))[l] = T(0);
+	  }
+	  ((void_pointer *)(p))[j] = q;
+	}
+	p = q;
+	my_mask = (my_mask >> 4);
+	my_shift -= 4;
+      }
+      GMM_ASSERT1(my_shift == 0, "internal error");
+      GMM_ASSERT1(my_mask == 15, "internal error " << my_mask);
+      return &(((T *)(p))[i & 15]);
+    }
+
+    void init(size_type n_) {
+      n = n_; depth = 0; shift = 0; mask = 1; if (n_) --n_;
+      while (n_) { n_ /= 16; ++depth; shift += 4; mask *= 16; }
+      mask--; if (shift) shift -= 4; if (depth) --depth;
+      root_ptr = 0;
+    }
+
+    void rec_del(void_pointer p, size_type my_depth) {
+      if (my_depth) {
+	for (size_type k = 0; k < 16; ++k)
+	  if (((void_pointer *)(p))[k])
+	    rec_del(((void_pointer *)(p))[k], my_depth-1);
+	delete[] ((void_pointer *)(p));
+      } else {
+	delete[] ((T *)(p));
+      }
+    }
+
+    void rec_clean(void_pointer p, size_type my_depth, double eps) {
+      if (my_depth) {
+	for (size_type k = 0; k < 16; ++k)
+	  if (((void_pointer *)(p))[k])
+	    rec_clean(((void_pointer *)(p))[k], my_depth-1, eps);
+      } else {
+	for (size_type k = 0; k < 16; ++k)
+	  if (gmm::abs(((T *)(p))[k]) <= eps) ((T *)(p))[k] = T(0);
+      }
+    }
+
+    void rec_clean_i(void_pointer p, size_type my_depth, size_type my_mask,
+		     size_type i, size_type base) {
+      if (my_depth) {
+	my_mask = (my_mask >> 4);
+	for (size_type k = 0; k < 16; ++k)
+	  if (((void_pointer *)(p))[k] && (base + (k+1)*(mask+1)) >= i)
+	    rec_clean_i(((void_pointer *)(p))[k], my_depth-1, my_mask,
+			i, base + k*(my_mask+1));
+      } else {
+	for (size_type k = 0; k < 16; ++k)
+	  if (base+k > i) ((T *)(p))[k] = T(0);
+      }
+    }
+ 
+      
+    size_type rec_nnz(void_pointer p, size_type my_depth) const {
+      size_type nn = 0;
+      if (my_depth) {
+	for (size_type k = 0; k < 16; ++k)
+	  if (((void_pointer *)(p))[k])
+	    nn += rec_nnz(((void_pointer *)(p))[k], my_depth-1);
+      } else {
+	for (size_type k = 0; k < 16; ++k)
+	  if (((const T *)(p))[k] != T(0)) nn++;
+      }
+      return nn;
+    }
+
+    void copy_rec(void_pointer &p, const_void_pointer q, size_type my_depth) {
+      if (my_depth) {
+	p = new void_pointer[16];
+	std::memset(p, 0, 16*sizeof(void_pointer));
+	for (size_type l = 0; l < 16; ++l)
+	  if (((const const_void_pointer *)(q))[l])
+	    copy_rec(((void_pointer *)(p))[l],
+		     ((const const_void_pointer *)(q))[l], my_depth-1);
+      } else {
+	p = new T[16];
+	for (size_type l = 0; l < 16; ++l) ((T *)(p))[l] = ((const T *)(q))[l];
+      }
+    }
+
+    void copy(const dsvector<T> &v) {
+      if (root_ptr) rec_del(root_ptr, depth);
+      root_ptr = 0;
+      mask = v.mask; depth = v.depth; n = v.n; shift = v.shift;
+      if (v.root_ptr) copy_rec(root_ptr, v.root_ptr, depth);
+    }
+
+    void next_pos_rec(void_pointer p, size_type my_depth, size_type my_mask,
+		      const_pointer &pp, size_type &i, size_type base) const {
+      size_type ii = i;
+      if (my_depth) {
+	my_mask = (my_mask >> 4);
+	for (size_type k = 0; k < 16; ++k)
+	  if (((void_pointer *)(p))[k] && (base + (k+1)*(my_mask+1)) >= i) {
+	    next_pos_rec(((void_pointer *)(p))[k], my_depth-1, my_mask,
+			 pp, i, base + k*(my_mask+1));
+	    if (i != size_type(-1)) return; else i = ii;
+	}
+	i = size_type(-1); pp = 0;
+      } else {
+	for (size_type k = 0; k < 16; ++k)
+	  if (base+k > i && ((const_pointer)(p))[k] != T(0))
+	    { i = base+k; pp = &(((const_pointer)(p))[k]); return; }
+	i = size_type(-1); pp = 0;
+      }
+    }
+
+    void previous_pos_rec(void_pointer p, size_type my_depth, size_type my_mask,
+			  const_pointer &pp, size_type &i,
+			  size_type base) const {
+      size_type ii = i;
+      if (my_depth) {
+	my_mask = (my_mask >> 4);
+	for (size_type k = 15; k != size_type(-1); --k)
+	  if (((void_pointer *)(p))[k] && ((base + k*(my_mask+1)) < i)) {
+	    previous_pos_rec(((void_pointer *)(p))[k], my_depth-1,
+			     my_mask, pp, i, base + k*(my_mask+1));
+	    if (i != size_type(-1)) return; else i = ii;
+	}
+	i = size_type(-1); pp = 0;
+      } else {
+	for (size_type k = 15; k != size_type(-1); --k)
+	  if (base+k < i && ((const_pointer)(p))[k] != T(0))
+	    { i = base+k; pp = &(((const_pointer)(p))[k]); return; }
+	i = size_type(-1); pp = 0;
+      }
+    }
+    
+    
+  public:
+    void clean(double eps) { if (root_ptr) rec_clean(root_ptr, depth); }
+    void resize(size_type n_) {
+      if (n_ != n) {
+	n = n_;
+	if (n_ < n) { // Depth unchanged (a choice)
+	  if (root_ptr) rec_clean_i(root_ptr, depth, mask, n_, 0);
+	} else {
+	  // may change the depth (add some levels)
+	  size_type my_depth = 0, my_shift = 0, my_mask = 1; if (n_) --n_;
+	  while (n_) { n_ /= 16; ++my_depth; my_shift += 4; my_mask *= 16; }
+	  my_mask--; if (my_shift) my_shift -= 4; if (my_depth) --my_depth;
+	  if (my_depth > depth || depth == 0) {
+	    if (root_ptr) {
+	      for (size_type k = depth; k < my_depth; ++k) {
+		void_pointer *q = new void_pointer [16];
+		std::memset(q, 0, 16*sizeof(void_pointer));
+		q[0] = root_ptr; root_ptr = q;
+	      }
+	    }
+	    mask = my_mask; depth = my_depth; shift = my_shift;
+	  }
+	}
+      }
+    }
+    
+    void clear(void) { if (root_ptr) rec_del(root_ptr, depth); root_ptr = 0; }
+    
+    void next_pos(const_pointer &pp, size_type &i) const {
+      if (!root_ptr || i >= n) { pp = 0, i = size_type(-1); return; }
+      next_pos_rec(root_ptr, depth, mask, pp, i, 0);
+    }
+
+    void previous_pos(const_pointer &pp, size_type &i) const {
+      if (!root_ptr) { pp = 0, i = size_type(-1); return; }
+      if (i == size_type(-1)) { i = n; }
+      previous_pos_rec(root_ptr, depth, mask, pp, i, 0);
+    }
+
+    iterator begin(void) {
+      iterator it(*this); 
+      if (n && root_ptr) {
+	it.i = 0; it.p = const_cast<T *>(read_access(0));
+	if (!(it.p) || *(it.p) == T(0))
+	  next_pos(*(const_cast<const_pointer *>(&(it.p))), it.i);
+      }
+      return it;
+    }
+
+    iterator end(void) { return iterator(*this); }
+
+    const_iterator begin(void) const {
+      const_iterator it(*this);
+      if (n && root_ptr) {
+	it.i = 0; it.p = read_access(0);
+	if (!(it.p) || *(it.p) == T(0)) next_pos(it.p, it.i);
+      }
+      return it;
+    }
+
+    const_iterator end(void) const { return const_iterator(*this); }
+    
+    inline ref_elt_vector<T, dsvector<T> > operator [](size_type c)
+    { return ref_elt_vector<T, dsvector<T> >(this, c); }
+
+    inline void w(size_type c, const T &e) {
+      if (e == T(0)) { if (read_access(c)) *(write_access(c)) = e; }
+      else *(write_access(c)) = e;
+    }
+
+    inline void wa(size_type c, const T &e)
+    { if (e != T(0)) { *(write_access(c)) += e; } }
+
+    inline T r(size_type c) const
+    { const T *p = read_access(c); if (p) return *p; else return T(0); }
+
+    inline T operator [](size_type c) const { return r(c); }
+    
+    size_type nnz(void) const
+    { if (root_ptr) return rec_nnz(root_ptr, depth); else return 0; }
+    size_type size(void) const { return n; }
+
+    void swap(dsvector<T> &v) {
+      std::swap(n, v.n); std::swap(root_ptr, v.root_ptr);
+      std::swap(depth, v.depth); std::swap(shift, v.shift);
+      std::swap(mask, v.mask);
+    }
+    
+    /* Constructors */
+    dsvector(const dsvector<T> &v) { init(0); copy(v); }
+    dsvector<T> &operator =(const dsvector<T> &v) { copy(v); return *this; }
+    explicit dsvector(size_type l){ init(l); }
+    dsvector(void) { init(0); }
+    ~dsvector() { if (root_ptr) rec_del(root_ptr, depth); root_ptr = 0; }
+  };
+
+  template <typename T> struct linalg_traits<dsvector<T>> {
+    typedef dsvector<T> this_type;
+    typedef this_type origin_type;
+    typedef linalg_false is_reference;
+    typedef abstract_vector linalg_type;
+    typedef T value_type;
+    typedef ref_elt_vector<T, dsvector<T> > reference;
+    typedef dsvector_iterator<T>  iterator;
+    typedef dsvector_const_iterator<T> const_iterator;
+    typedef abstract_sparse storage_type;
+    typedef linalg_true index_sorted;
+    static size_type size(const this_type &v) { return v.size(); }
+    static iterator begin(this_type &v) { return v.begin(); }
+    static const_iterator begin(const this_type &v) { return v.begin(); }
+    static iterator end(this_type &v) { return v.end(); }
+    static const_iterator end(const this_type &v) { return v.end(); }
+    static origin_type* origin(this_type &v) { return &v; }
+    static const origin_type* origin(const this_type &v) { return &v; }
+    static void clear(origin_type* o, const iterator &, const iterator &)
+    { o->clear(); }
+    static void do_clear(this_type &v) { v.clear(); }
+    static value_type access(const origin_type *o, const const_iterator &,
+			     const const_iterator &, size_type i)
+    { return (*o)[i]; }
+    static reference access(origin_type *o, const iterator &, const iterator &,
+			    size_type i)
+    { return (*o)[i]; }
+    static void resize(this_type &v, size_type n) { v.resize(n); }
+  };
+
+  template<typename T> std::ostream &operator <<
+  (std::ostream &o, const dsvector<T>& v) { gmm::write(o,v); return o; }
+
+  /******* Optimized operations for dsvector<T> ****************************/
+
+  template <typename T> inline void copy(const dsvector<T> &v1,
+ 					 dsvector<T> &v2) {
+    GMM_ASSERT2(v1.size() == v2.size(), "dimensions mismatch");
+    v2 = v1;
+  }
+  template <typename T> inline void copy(const dsvector<T> &v1,
+					 const dsvector<T> &v2) {
+    GMM_ASSERT2(v1.size() == v2.size(), "dimensions mismatch");
+    v2 = const_cast<dsvector<T> &>(v1);
+  }
+ template <typename T> inline
+  void copy(const dsvector<T> &v1, const simple_vector_ref<dsvector<T> *> &v2){
+    simple_vector_ref<dsvector<T> *>
+      *svr = const_cast<simple_vector_ref<dsvector<T> *> *>(&v2);
+    dsvector<T>
+      *pv = const_cast<dsvector<T> *>((v2.origin));
+    GMM_ASSERT2(vect_size(v1) == vect_size(v2), "dimensions mismatch");
+    *pv = v1; svr->begin_ = vect_begin(*pv); svr->end_ = vect_end(*pv);
+  }
+  template <typename T> inline
+  void copy(const simple_vector_ref<const dsvector<T> *> &v1,
+	    dsvector<T> &v2)
+  { copy(*(v1.origin), v2); }
+  template <typename T> inline
+  void copy(const simple_vector_ref<dsvector<T> *> &v1, dsvector<T> &v2)
+  { copy(*(v1.origin), v2); }
+  template <typename T> inline
+  void copy(const simple_vector_ref<dsvector<T> *> &v1,
+	    const simple_vector_ref<dsvector<T> *> &v2)
+  { copy(*(v1.origin), v2); }
+  template <typename T> inline
+  void copy(const simple_vector_ref<const dsvector<T> *> &v1,
+	    const simple_vector_ref<dsvector<T> *> &v2)
+  { copy(*(v1.origin), v2); }
+  
+  template <typename T>
+  inline size_type nnz(const dsvector<T>& l) { return l.nnz(); }
+  
+  /*************************************************************************/
+  /*                                                                       */
+  /* Class wsvector: sparse vector optimized for random write operations,  */
+  /* with log(n) complexity for read and write operations.                 */
+  /* Based on std::map                                                     */
+  /*                                                                       */
+  /*************************************************************************/
+  
+  template<typename T> struct wsvector_iterator
+    : public std::map<size_type, T>::iterator {
+    typedef typename std::map<size_type, T>::iterator base_it_type;
+    typedef T                   value_type;
+    typedef value_type*         pointer;
+    typedef value_type&         reference;
+    // typedef size_t              size_type;
+    typedef ptrdiff_t           difference_type;
+    typedef std::bidirectional_iterator_tag iterator_category;
+    
+    reference operator *() const { return (base_it_type::operator*()).second; }
+    pointer operator->() const { return &(operator*()); }
+    size_type index(void) const { return (base_it_type::operator*()).first; }
+
+    wsvector_iterator(void) {}
+    wsvector_iterator(const base_it_type &it) : base_it_type(it) {}
+  };
+
+  template<typename T> struct wsvector_const_iterator
+    : public std::map<size_type, T>::const_iterator {
+    typedef typename std::map<size_type, T>::const_iterator base_it_type;
+    typedef T                   value_type;
+    typedef const value_type*   pointer;
+    typedef const value_type&   reference;
+    // typedef size_t              size_type;
+    typedef ptrdiff_t           difference_type;
+    typedef std::bidirectional_iterator_tag iterator_category;
+    
+    reference operator *() const { return (base_it_type::operator*()).second; }
+    pointer operator->() const { return &(operator*()); }
+    size_type index(void) const { return (base_it_type::operator*()).first; }
+
+    wsvector_const_iterator(void) {}
+    wsvector_const_iterator(const wsvector_iterator<T> &it)
+      : base_it_type(it) {}
+    wsvector_const_iterator(const base_it_type &it) : base_it_type(it) {}
+  };
+
+
+  /**
+     sparse vector built upon std::map.
+     Read and write access are quite fast (log n)
+  */
+  template<typename T> class wsvector : public std::map<size_type, T> {
+  public:
+    
+    typedef typename std::map<int, T>::size_type size_type;
+    typedef std::map<size_type, T> base_type;
+    typedef typename base_type::iterator iterator;
+    typedef typename base_type::const_iterator const_iterator;
+
+  protected:
+    size_type nbl;
+    
+  public:
+    void clean(double eps);
+    void resize(size_type);
+    
+    inline ref_elt_vector<T, wsvector<T> > operator [](size_type c)
+    { return ref_elt_vector<T, wsvector<T> >(this, c); }
+
+    inline void w(size_type c, const T &e) {
+      GMM_ASSERT2(c < nbl, "out of range");
+      if (e == T(0)) { this->erase(c); }
+      else base_type::operator [](c) = e;
+    }
+
+    inline void wa(size_type c, const T &e) {
+      GMM_ASSERT2(c < nbl, "out of range");
+      if (e != T(0)) {
+	iterator it = this->lower_bound(c);
+	if (it != this->end() && it->first == c) it->second += e;
+	else base_type::operator [](c) = e;
+      }
+    }
+
+    inline T r(size_type c) const {
+      GMM_ASSERT2(c < nbl, "out of range");
+      const_iterator it = this->lower_bound(c);
+      if (it != this->end() && c == it->first) return it->second;
+      else return T(0);
+    }
+
+    inline T operator [](size_type c) const { return r(c); }
+    
+    size_type nb_stored(void) const { return base_type::size(); }
+    size_type size(void) const { return nbl; }
+
+    void swap(wsvector<T> &v)
+    { std::swap(nbl, v.nbl); std::map<size_type, T>::swap(v); }
+				       
+
+    /* Constructors */
+    void init(size_type l) { nbl = l; this->clear(); }
+    explicit wsvector(size_type l){ init(l); }
+    wsvector(void) { init(0); }
+  };
+
+  template<typename T>  void wsvector<T>::clean(double eps) {
+    iterator it = this->begin(), itf = it, ite = this->end();
+    while (it != ite) {
+      ++itf; if (gmm::abs(it->second) <= eps) this->erase(it); it = itf;
+    }
+  }
+
+  template<typename T>  void wsvector<T>::resize(size_type n) {
+    if (n < nbl) {
+      iterator it = this->begin(), itf = it, ite = this->end();
+      while (it != ite) { ++itf; if (it->first >= n) this->erase(it); it=itf; }
+    }
+    nbl = n;
+  }
+
+  template <typename T> struct linalg_traits<wsvector<T> > {
+    typedef wsvector<T> this_type;
+    typedef this_type origin_type;
+    typedef linalg_false is_reference;
+    typedef abstract_vector linalg_type;
+    typedef T value_type;
+    typedef ref_elt_vector<T, wsvector<T> > reference;
+    typedef wsvector_iterator<T>  iterator;
+    typedef wsvector_const_iterator<T> const_iterator;
+    typedef abstract_sparse storage_type;
+    typedef linalg_true index_sorted;
+    static size_type size(const this_type &v) { return v.size(); }
+    static iterator begin(this_type &v) { return v.begin(); }
+    static const_iterator begin(const this_type &v) { return v.begin(); }
+    static iterator end(this_type &v) { return v.end(); }
+    static const_iterator end(const this_type &v) { return v.end(); }
+    static origin_type* origin(this_type &v) { return &v; }
+    static const origin_type* origin(const this_type &v) { return &v; }
+    static void clear(origin_type* o, const iterator &, const iterator &)
+    { o->clear(); }
+    static void do_clear(this_type &v) { v.clear(); }
+    static value_type access(const origin_type *o, const const_iterator &,
+			     const const_iterator &, size_type i)
+    { return (*o)[i]; }
+    static reference access(origin_type *o, const iterator &, const iterator &,
+			    size_type i)
+    { return (*o)[i]; }
+    static void resize(this_type &v, size_type n) { v.resize(n); }
+  };
+
+  template<typename T> std::ostream &operator <<
+  (std::ostream &o, const wsvector<T>& v) { gmm::write(o,v); return o; }
+
+  /******* Optimized BLAS for wsvector<T> **********************************/
+
+  template <typename T> inline void copy(const wsvector<T> &v1,
+					 wsvector<T> &v2) {
+    GMM_ASSERT2(vect_size(v1) == vect_size(v2), "dimensions mismatch");
+    v2 = v1;
+  }
+  template <typename T> inline
+  void copy(const wsvector<T> &v1, const simple_vector_ref<wsvector<T> *> &v2){
+    simple_vector_ref<wsvector<T> *>
+      *svr = const_cast<simple_vector_ref<wsvector<T> *> *>(&v2);
+    wsvector<T>
+      *pv = const_cast<wsvector<T> *>(v2.origin);
+    GMM_ASSERT2(vect_size(v1) == vect_size(v2), "dimensions mismatch");
+    *pv = v1; svr->begin_ = vect_begin(*pv); svr->end_ = vect_end(*pv);
+  }
+  template <typename T> inline
+  void copy(const simple_vector_ref<const wsvector<T> *> &v1,
+	    wsvector<T> &v2)
+  { copy(*(v1.origin), v2); }
+  template <typename T> inline
+  void copy(const simple_vector_ref<wsvector<T> *> &v1, wsvector<T> &v2)
+  { copy(*(v1.origin), v2); }
+
+  template <typename T> inline void clean(wsvector<T> &v, double eps) {
+    typedef typename number_traits<T>::magnitude_type R;
+    typename wsvector<T>::iterator it = v.begin(), ite = v.end(), itc;
+    while (it != ite) 
+      if (gmm::abs((*it).second) <= R(eps))
+	{ itc=it; ++it; v.erase(itc); } else ++it; 
+  }
+
+  template <typename T>
+  inline void clean(const simple_vector_ref<wsvector<T> *> &l, double eps) {
+    simple_vector_ref<wsvector<T> *>
+      *svr = const_cast<simple_vector_ref<wsvector<T> *> *>(&l);
+    wsvector<T>
+      *pv = const_cast<wsvector<T> *>((l.origin));
+    clean(*pv, eps);
+    svr->begin_ = vect_begin(*pv); svr->end_ = vect_end(*pv);
+  }
+
+  template <typename T>
+  inline size_type nnz(const wsvector<T>& l) { return l.nb_stored(); }
+
+  /*************************************************************************/
+  /*                                                                       */
+  /*    rsvector: sparse vector optimized for linear algebra operations.   */
+  /*                                                                       */
+  /*************************************************************************/
+
+  template<typename T> struct elt_rsvector_ {
+    size_type c; T e;
+    /* e is initialized by default to avoid some false warnings of valgrind.
+       (from http://valgrind.org/docs/manual/mc-manual.html:
+      
+       When memory is read into the CPU's floating point registers, the
+       relevant V bits are read from memory and they are immediately
+       checked. If any are invalid, an uninitialised value error is
+       emitted. This precludes using the floating-point registers to copy
+       possibly-uninitialised memory, but simplifies Valgrind in that it
+       does not have to track the validity status of the floating-point
+       registers.
+    */
+    elt_rsvector_(void) : e(0) {}
+    elt_rsvector_(size_type cc) : c(cc), e(0) {}
+    elt_rsvector_(size_type cc, const T &ee) : c(cc), e(ee) {}
+    bool operator < (const elt_rsvector_ &a) const { return c < a.c; }
+    bool operator == (const elt_rsvector_ &a) const { return c == a.c; }
+    bool operator != (const elt_rsvector_ &a) const { return c != a.c; }
+  };
+
+  template<typename T> struct rsvector_iterator {
+    typedef typename std::vector<elt_rsvector_<T> >::iterator IT;
+    typedef T                   value_type;
+    typedef value_type*         pointer;
+    typedef value_type&         reference;
+    typedef size_t              size_type;
+    typedef ptrdiff_t           difference_type;
+    typedef std::bidirectional_iterator_tag iterator_category;
+    typedef rsvector_iterator<T> iterator;
+
+    IT it;
+
+    reference operator *() const { return it->e; }
+    pointer operator->() const { return &(operator*()); }
+
+    iterator &operator ++() { ++it; return *this; }
+    iterator operator ++(int) { iterator tmp = *this; ++(*this); return tmp; }
+    iterator &operator --() { --it; return *this; }
+    iterator operator --(int) { iterator tmp = *this; --(*this); return tmp; }
+
+    bool operator ==(const iterator &i) const { return it == i.it; }
+    bool operator !=(const iterator &i) const { return !(i == *this); }
+
+    size_type index(void) const { return it->c; }
+    rsvector_iterator(void) {}
+    rsvector_iterator(const IT &i) : it(i) {}
+  };
+
+  template<typename T> struct rsvector_const_iterator {
+    typedef typename std::vector<elt_rsvector_<T> >::const_iterator IT;
+    typedef T                   value_type;
+    typedef const value_type*   pointer;
+    typedef const value_type&   reference;
+    typedef size_t              size_type;
+    typedef ptrdiff_t           difference_type;
+    typedef std::forward_iterator_tag iterator_category;
+    typedef rsvector_const_iterator<T> iterator;
+
+    IT it;
+
+    reference operator *() const { return it->e; }
+    pointer operator->() const { return &(operator*()); }
+    size_type index(void) const { return it->c; }
+
+    iterator &operator ++() { ++it; return *this; }
+    iterator operator ++(int) { iterator tmp = *this; ++(*this); return tmp; }
+    iterator &operator --() { --it; return *this; }
+    iterator operator --(int) { iterator tmp = *this; --(*this); return tmp; }
+
+    bool operator ==(const iterator &i) const { return it == i.it; }
+    bool operator !=(const iterator &i) const { return !(i == *this); }
+
+    rsvector_const_iterator(void) {}
+    rsvector_const_iterator(const rsvector_iterator<T> &i) : it(i.it) {}
+    rsvector_const_iterator(const IT &i) : it(i) {}
+  };
+
+  /**
+     sparse vector built upon std::vector. Read access is fast,
+     but insertion is O(n) 
+  */
+  template<typename T> class rsvector : public std::vector<elt_rsvector_<T> > {
+  public:
+    
+    typedef std::vector<elt_rsvector_<T> > base_type_;
+    typedef typename base_type_::iterator iterator;
+    typedef typename base_type_::const_iterator const_iterator;
+    typedef typename base_type_::size_type size_type;
+    typedef T value_type;
+
+  protected:
+    size_type nbl;    	/* size of the vector.	          	  */
+    
+  public:
+
+    void sup(size_type j);
+    void base_resize(size_type n) { base_type_::resize(n); }
+    void resize(size_type);
+    
+    ref_elt_vector<T, rsvector<T> > operator [](size_type c)
+    { return ref_elt_vector<T, rsvector<T> >(this, c); }
+
+    void w(size_type c, const T &e);
+    void wa(size_type c, const T &e);
+    T r(size_type c) const;
+    void swap_indices(size_type i, size_type j);
+
+    inline T operator [](size_type c) const { return r(c); }
+    
+    size_type nb_stored(void) const { return base_type_::size(); }
+    size_type size(void) const { return nbl; }
+    void clear(void) { base_type_::resize(0); }
+    void swap(rsvector<T> &v)
+    { std::swap(nbl, v.nbl); std::vector<elt_rsvector_<T> >::swap(v); }
+
+    /* Constructeurs */
+    explicit rsvector(size_type l) : nbl(l) { }
+    rsvector(void) : nbl(0) { }
+  };
+
+  template <typename T>
+  void rsvector<T>::swap_indices(size_type i, size_type j) {
+    if (i > j) std::swap(i, j);
+    if (i != j) {
+      int situation = 0;
+      elt_rsvector_<T> ei(i), ej(j), a;
+      iterator it, ite, iti, itj;
+      iti = std::lower_bound(this->begin(), this->end(), ei);
+      if (iti != this->end() && iti->c == i) situation += 1;
+      itj = std::lower_bound(this->begin(), this->end(), ej);
+      if (itj != this->end() && itj->c == j) situation += 2;
+
+      switch (situation) {
+      case 1 : a = *iti; a.c = j; it = iti; ++it; ite = this->end();
+	       for (; it != ite && it->c <= j; ++it, ++iti) *iti = *it;
+	       *iti = a;
+	       break;
+      case 2 : a = *itj; a.c = i; it = itj; ite = this->begin();
+	if (it != ite) {
+	  --it;
+	  while (it->c >= i) { *itj = *it;  --itj; if (it==ite) break; --it; }
+	}
+	*itj = a;
+	break;
+      case 3 : std::swap(iti->e, itj->e);
+	       break;
+      }
+    }
+  }
+
+  template <typename T> void rsvector<T>::sup(size_type j) {
+    if (nb_stored() != 0) {
+      elt_rsvector_<T> ev(j);
+      iterator it = std::lower_bound(this->begin(), this->end(), ev);
+      if (it != this->end() && it->c == j) {
+	for (iterator ite = this->end() - 1; it != ite; ++it) *it = *(it+1);
+	base_resize(nb_stored()-1);
+      }
+    }
+  }
+
+  template<typename T>  void rsvector<T>::resize(size_type n) {
+    if (n < nbl) {
+      for (size_type i = 0; i < nb_stored(); ++i)
+	if (base_type_::operator[](i).c >= n) { base_resize(i); break; }
+    }
+    nbl = n;
+  }
+
+  template <typename T> void rsvector<T>::w(size_type c, const T &e) {
+    GMM_ASSERT2(c < nbl, "out of range");
+    if (e == T(0)) sup(c);
+    else {
+      elt_rsvector_<T> ev(c, e);
+      if (nb_stored() == 0) {
+	base_type_::push_back(ev);
+      }
+      else {
+	iterator it = std::lower_bound(this->begin(), this->end(), ev);
+	if (it != this->end() && it->c == c) it->e = e;
+	else {
+	  size_type ind = it - this->begin(), nb = this->nb_stored();
+          if (nb - ind > 1100)
+            GMM_WARNING2("Inefficient addition of element in rsvector with "
+                         << this->nb_stored() - ind << " non-zero entries");
+	  base_type_::push_back(ev);
+	  if (ind != nb) {
+	    it = this->begin() + ind;
+	    iterator ite = this->end(); --ite; iterator itee = ite; 
+	    for (; ite != it; --ite) { --itee; *ite = *itee; }
+	    *it = ev;
+	  }
+	}
+      }
+    }
+  }
+
+  template <typename T> void rsvector<T>::wa(size_type c, const T &e) {
+    GMM_ASSERT2(c < nbl, "out of range");
+    if (e != T(0)) {
+      elt_rsvector_<T> ev(c, e);
+      if (nb_stored() == 0) {
+	base_type_::push_back(ev);
+      }
+      else {
+	iterator it = std::lower_bound(this->begin(), this->end(), ev);
+	if (it != this->end() && it->c == c) it->e += e;
+	else {
+	  size_type ind = it - this->begin(), nb = this->nb_stored();
+          if (nb - ind > 1100)
+            GMM_WARNING2("Inefficient addition of element in rsvector with "
+                         << this->nb_stored() - ind << " non-zero entries");
+	  base_type_::push_back(ev);
+	  if (ind != nb) {
+	    it = this->begin() + ind;
+	    iterator ite = this->end(); --ite; iterator itee = ite; 
+	    for (; ite != it; --ite) { --itee; *ite = *itee; }
+	    *it = ev;
+	  }
+	}
+      }
+    }
+  }
+  
+  template <typename T> T rsvector<T>::r(size_type c) const {
+    GMM_ASSERT2(c < nbl, "out of range. Index " << c 
+		<< " for a length of " << nbl);
+    if (nb_stored() != 0) {
+      elt_rsvector_<T> ev(c);
+      const_iterator it = std::lower_bound(this->begin(), this->end(), ev);
+      if (it != this->end() && it->c == c) return it->e;
+    }
+    return T(0);
+  }
+
+  template <typename T> struct linalg_traits<rsvector<T> > {
+    typedef rsvector<T> this_type;
+    typedef this_type origin_type;
+    typedef linalg_false is_reference;
+    typedef abstract_vector linalg_type;
+    typedef T value_type;
+    typedef ref_elt_vector<T, rsvector<T> > reference;
+    typedef rsvector_iterator<T>  iterator;
+    typedef rsvector_const_iterator<T> const_iterator;
+    typedef abstract_sparse storage_type;
+    typedef linalg_true index_sorted;
+    static size_type size(const this_type &v) { return v.size(); }
+    static iterator begin(this_type &v) { return iterator(v.begin()); }
+    static const_iterator begin(const this_type &v)
+    { return const_iterator(v.begin()); }
+    static iterator end(this_type &v) { return iterator(v.end()); }
+    static const_iterator end(const this_type &v)
+      { return const_iterator(v.end()); }
+    static origin_type* origin(this_type &v) { return &v; }
+    static const origin_type* origin(const this_type &v) { return &v; }
+    static void clear(origin_type* o, const iterator &, const iterator &)
+    { o->clear(); }
+    static void do_clear(this_type &v) { v.clear(); }
+    static value_type access(const origin_type *o, const const_iterator &,
+			     const const_iterator &, size_type i)
+    { return (*o)[i]; }
+    static reference access(origin_type *o, const iterator &, const iterator &,
+			    size_type i)
+    { return (*o)[i]; }
+    static void resize(this_type &v, size_type n) { v.resize(n); }
+  };
+
+  template<typename T> std::ostream &operator <<
+  (std::ostream &o, const rsvector<T>& v) { gmm::write(o,v); return o; }
+
+  /******* Optimized operations for rsvector<T> ****************************/
+
+  template <typename T> inline void copy(const rsvector<T> &v1,
+ 					 rsvector<T> &v2) {
+    GMM_ASSERT2(vect_size(v1) == vect_size(v2), "dimensions mismatch");
+    v2 = v1;
+  }
+  template <typename T> inline
+  void copy(const rsvector<T> &v1, const simple_vector_ref<rsvector<T> *> &v2){
+    simple_vector_ref<rsvector<T> *>
+      *svr = const_cast<simple_vector_ref<rsvector<T> *> *>(&v2);
+    rsvector<T>
+      *pv = const_cast<rsvector<T> *>((v2.origin));
+    GMM_ASSERT2(vect_size(v1) == vect_size(v2), "dimensions mismatch");
+    *pv = v1; svr->begin_ = vect_begin(*pv); svr->end_ = vect_end(*pv);
+  }
+  template <typename T> inline
+  void copy(const simple_vector_ref<const rsvector<T> *> &v1,
+	    rsvector<T> &v2)
+  { copy(*(v1.origin), v2); }
+  template <typename T> inline
+  void copy(const simple_vector_ref<rsvector<T> *> &v1, rsvector<T> &v2)
+  { copy(*(v1.origin), v2); }
+
+  template <typename V, typename T> inline void add(const V &v1,
+						    rsvector<T> &v2) {
+    if ((const void *)(&v1) != (const void *)(&v2)) {
+      GMM_ASSERT2(vect_size(v1) == vect_size(v2), "dimensions mismatch");
+	add_rsvector(v1, v2, typename linalg_traits<V>::storage_type());
+    }
+  }
+
+  template <typename V, typename T> 
+  inline void add_rsvector(const V &v1, rsvector<T> &v2, abstract_dense)
+  { add(v1, v2, abstract_dense(), abstract_sparse()); }
+
+  template <typename V, typename T> 
+  inline void add_rsvector(const V &v1, rsvector<T> &v2, abstract_skyline)
+  { add(v1, v2, abstract_skyline(), abstract_sparse()); }
+
+  template <typename V, typename T> 
+  void add_rsvector(const V &v1, rsvector<T> &v2, abstract_sparse) {
+    add_rsvector(v1, v2, typename linalg_traits<V>::index_sorted());
+  }
+
+  template <typename V, typename T> 
+  void add_rsvector(const V &v1, rsvector<T> &v2, linalg_false) {
+    add(v1, v2, abstract_sparse(), abstract_sparse());
+  }
+
+  template <typename V, typename T> 
+  void add_rsvector(const V &v1, rsvector<T> &v2, linalg_true) {
+    typename linalg_traits<V>::const_iterator it1 = vect_const_begin(v1),
+      ite1 = vect_const_end(v1);
+    typename rsvector<T>::iterator it2 = v2.begin(), ite2 = v2.end(), it3;
+    size_type nbc = 0, old_nbc = v2.nb_stored();
+    for (; it1 != ite1 && it2 != ite2 ; ++nbc)
+      if (it1.index() == it2->c) { ++it1; ++it2; }
+      else if (it1.index() < it2->c) ++it1; else ++it2;
+    for (; it1 != ite1; ++it1) ++nbc;
+    for (; it2 != ite2; ++it2) ++nbc;
+
+    v2.base_resize(nbc);
+    it3 = v2.begin() + old_nbc;
+    it2 = v2.end(); ite2 = v2.begin();
+    it1 = vect_end(v1); ite1 = vect_const_begin(v1);
+    while (it1 != ite1 && it3 != ite2) {
+      --it3; --it1; --it2;
+      if (it3->c > it1.index()) { *it2 = *it3; ++it1; }
+      else if (it3->c == it1.index()) { *it2=*it3; it2->e+=*it1; }
+      else { it2->c = it1.index(); it2->e = *it1; ++it3; }
+    }
+    while (it1 != ite1) { --it1; --it2; it2->c = it1.index(); it2->e = *it1; }
+  }
+
+  template <typename V, typename T> void copy(const V &v1, rsvector<T> &v2) {
+    if ((const void *)(&v1) != (const void *)(&v2)) {
+      GMM_ASSERT2(vect_size(v1) == vect_size(v2), "dimensions mismatch");
+      if (same_origin(v1, v2))
+	GMM_WARNING2("a conflict is possible in vector copy\n");
+      copy_rsvector(v1, v2, typename linalg_traits<V>::storage_type());
+    }
+  }
+
+  template <typename V, typename T> 
+  void copy_rsvector(const V &v1, rsvector<T> &v2, abstract_dense)
+  { copy_vect(v1, v2, abstract_dense(), abstract_sparse()); }
+
+  template <typename V, typename T> 
+  void copy_rsvector(const V &v1, rsvector<T> &v2, abstract_skyline)
+  { copy_vect(v1, v2, abstract_skyline(), abstract_sparse()); }
+
+  template <typename V, typename T>
+  void copy_rsvector(const V &v1, rsvector<T> &v2, abstract_sparse) {
+    copy_rsvector(v1, v2, typename linalg_traits<V>::index_sorted());
+  }
+  
+  template <typename V, typename T2>
+  void copy_rsvector(const V &v1, rsvector<T2> &v2, linalg_true) {
+    typedef typename linalg_traits<V>::value_type T1;
+    typename linalg_traits<V>::const_iterator it = vect_const_begin(v1),
+      ite = vect_const_end(v1);
+    v2.base_resize(nnz(v1));
+    typename rsvector<T2>::iterator it2 = v2.begin();
+    size_type nn = 0;
+    for (; it != ite; ++it)
+      if ((*it) != T1(0)) { it2->c = it.index(); it2->e = *it; ++it2; ++nn; }
+    v2.base_resize(nn);
+  }
+
+  template <typename V, typename T2>
+  void copy_rsvector(const V &v1, rsvector<T2> &v2, linalg_false) {
+    typedef typename linalg_traits<V>::value_type T1;
+    typename linalg_traits<V>::const_iterator it = vect_const_begin(v1),
+      ite = vect_const_end(v1);
+    v2.base_resize(nnz(v1));
+    typename rsvector<T2>::iterator it2 = v2.begin();
+    size_type nn = 0;
+    for (; it != ite; ++it)
+      if ((*it) != T1(0)) { it2->c = it.index(); it2->e = *it; ++it2; ++nn; }
+    v2.base_resize(nn);
+    std::sort(v2.begin(), v2.end());
+  }
+  
+  template <typename T> inline void clean(rsvector<T> &v, double eps) {
+    typedef typename number_traits<T>::magnitude_type R;
+    typename rsvector<T>::iterator it = v.begin(), ite = v.end();
+    for (; it != ite; ++it) if (gmm::abs((*it).e) <= eps) break;
+    if (it != ite) {
+      typename rsvector<T>::iterator itc = it;
+      size_type erased = 1;
+      for (++it; it != ite; ++it)
+	{ *itc = *it; if (gmm::abs((*it).e) <= R(eps)) ++erased; else ++itc; }
+      v.base_resize(v.nb_stored() - erased);
+    }
+  }
+
+  template <typename T>
+  inline void clean(const simple_vector_ref<rsvector<T> *> &l, double eps) {
+    simple_vector_ref<rsvector<T> *>
+      *svr = const_cast<simple_vector_ref<rsvector<T> *> *>(&l);
+    rsvector<T>
+      *pv = const_cast<rsvector<T> *>((l.origin));
+    clean(*pv, eps);
+    svr->begin_ = vect_begin(*pv); svr->end_ = vect_end(*pv);
+  }
+  
+  template <typename T>
+  inline size_type nnz(const rsvector<T>& l) { return l.nb_stored(); }
+
+  /*************************************************************************/
+  /*                                                                       */
+  /* Class slvector: 'sky-line' vector.                                    */
+  /*                                                                       */
+  /*************************************************************************/
+
+  template<typename T> struct slvector_iterator {
+    typedef T value_type;
+    typedef T *pointer;
+    typedef T &reference;
+    typedef ptrdiff_t difference_type;
+    typedef std::random_access_iterator_tag iterator_category;
+    typedef size_t size_type;
+    typedef slvector_iterator<T> iterator;
+    typedef typename std::vector<T>::iterator base_iterator;
+
+    base_iterator it;
+    size_type shift;
+    
+   
+    iterator &operator ++()
+    { ++it; ++shift; return *this; }
+    iterator &operator --()
+    { --it; --shift; return *this; }
+    iterator operator ++(int)
+    { iterator tmp = *this; ++(*(this)); return tmp; }
+    iterator operator --(int)
+    { iterator tmp = *this; --(*(this)); return tmp; }
+    iterator &operator +=(difference_type i)
+    { it += i; shift += i; return *this; }
+    iterator &operator -=(difference_type i)
+    { it -= i; shift -= i; return *this; }
+    iterator operator +(difference_type i) const
+    { iterator tmp = *this; return (tmp += i); }
+    iterator operator -(difference_type i) const
+    { iterator tmp = *this; return (tmp -= i); }
+    difference_type operator -(const iterator &i) const
+    { return it - i.it; }
+	
+    reference operator *() const
+    { return *it; }
+    reference operator [](int ii)
+    { return *(it + ii); }
+    
+    bool operator ==(const iterator &i) const
+    { return it == i.it; }
+    bool operator !=(const iterator &i) const
+    { return !(i == *this); }
+    bool operator < (const iterator &i) const
+    { return it < i.it; }
+    size_type index(void) const { return shift; }
+
+    slvector_iterator(void) {}
+    slvector_iterator(const base_iterator &iter, size_type s)
+      : it(iter), shift(s) {}
+  };
+
+  template<typename T> struct slvector_const_iterator {
+    typedef T value_type;
+    typedef const T *pointer;
+    typedef value_type reference;
+    typedef ptrdiff_t difference_type;
+    typedef std::random_access_iterator_tag iterator_category;
+    typedef size_t size_type;
+    typedef slvector_const_iterator<T> iterator;
+    typedef typename std::vector<T>::const_iterator base_iterator;
+
+    base_iterator it;
+    size_type shift;
+    
+   
+    iterator &operator ++()
+    { ++it; ++shift; return *this; }
+    iterator &operator --()
+    { --it; --shift; return *this; }
+    iterator operator ++(int)
+    { iterator tmp = *this; ++(*(this)); return tmp; }
+    iterator operator --(int)
+    { iterator tmp = *this; --(*(this)); return tmp; }
+    iterator &operator +=(difference_type i)
+    { it += i; shift += i; return *this; }
+    iterator &operator -=(difference_type i)
+    { it -= i; shift -= i; return *this; }
+    iterator operator +(difference_type i) const
+    { iterator tmp = *this; return (tmp += i); }
+    iterator operator -(difference_type i) const
+    { iterator tmp = *this; return (tmp -= i); }
+    difference_type operator -(const iterator &i) const
+    { return it - i.it; }
+	
+    value_type operator *() const
+    { return *it; }
+    value_type operator [](int ii)
+    { return *(it + ii); }
+    
+    bool operator ==(const iterator &i) const
+    { return it == i.it; }
+    bool operator !=(const iterator &i) const
+    { return !(i == *this); }
+    bool operator < (const iterator &i) const
+    { return it < i.it; }
+    size_type index(void) const { return shift; }
+
+    slvector_const_iterator(void) {}
+    slvector_const_iterator(const slvector_iterator<T>& iter)
+      : it(iter.it), shift(iter.shift) {}
+    slvector_const_iterator(const base_iterator &iter, size_type s)
+      : it(iter), shift(s) {}
+  };
+
+
+  /** skyline vector.
+   */
+  template <typename T> class slvector {
+    
+  public :
+    typedef slvector_iterator<T> iterators;
+    typedef slvector_const_iterator<T> const_iterators;
+    typedef typename std::vector<T>::size_type size_type;
+    typedef T value_type;
+
+  protected :
+    std::vector<T> data;
+    size_type shift;
+    size_type size_;
+
+
+  public :
+
+    size_type size(void) const { return size_; }
+    size_type first(void) const { return shift; }
+    size_type last(void) const { return shift + data.size(); }
+    ref_elt_vector<T, slvector<T> > operator [](size_type c)
+    { return ref_elt_vector<T, slvector<T> >(this, c); }
+
+    typename std::vector<T>::iterator data_begin(void) { return data.begin(); }
+    typename std::vector<T>::iterator data_end(void) { return data.end(); }
+    typename std::vector<T>::const_iterator data_begin(void) const
+      { return data.begin(); }
+    typename std::vector<T>::const_iterator data_end(void) const
+      { return data.end(); }
+
+    void w(size_type c, const T &e);
+    void wa(size_type c, const T &e);
+    T r(size_type c) const {
+      GMM_ASSERT2(c < size_, "out of range");
+      if (c < shift || c >= shift + data.size()) return T(0);
+      return data[c - shift];
+    }
+
+    inline T operator [](size_type c) const { return r(c); }
+    void resize(size_type);
+    void clear(void) { data.resize(0); shift = 0; }
+    void swap(slvector<T> &v) {
+      std::swap(data, v.data);
+      std::swap(shift, v.shift);
+      std::swap(size_, v.size_);
+    }
+
+
+    slvector(void) : data(0), shift(0), size_(0) {}
+    explicit slvector(size_type l) : data(0), shift(0), size_(l) {}
+    slvector(size_type l, size_type d, size_type s)
+      : data(d), shift(s), size_(l) {}
+
+  };
+
+  template<typename T>  void slvector<T>::resize(size_type n) {
+    if (n < last()) {
+      if (shift >= n) clear(); else { data.resize(n-shift); }
+    }
+    size_ = n;
+  }
+
+  template<typename T>  void slvector<T>::w(size_type c, const T &e) {
+    GMM_ASSERT2(c < size_, "out of range");
+    size_type s = data.size();
+    if (!s) { data.resize(1); shift = c; }
+    else if (c < shift) {
+      data.resize(s + shift - c); 
+      typename std::vector<T>::iterator it = data.begin(),it2=data.end()-1;
+      typename std::vector<T>::iterator it3 = it2 - shift + c;
+      for (; it3 >= it; --it3, --it2) *it2 = *it3;
+      std::fill(it, it + shift - c, T(0));
+      shift = c;
+    }
+    else if (c >= shift + s) {
+      data.resize(c - shift + 1, T(0));
+      // std::fill(data.begin() + s, data.end(), T(0));
+    }
+    data[c - shift] = e;
+  }
+
+  template<typename T>  void slvector<T>::wa(size_type c, const T &e) {
+    GMM_ASSERT2(c < size_, "out of range");
+    size_type s = data.size();
+    if (!s) { data.resize(1, e); shift = c; return; }
+    else if (c < shift) {
+      data.resize(s + shift - c); 
+      typename std::vector<T>::iterator it = data.begin(),it2=data.end()-1;
+      typename std::vector<T>::iterator it3 = it2 - shift + c;
+      for (; it3 >= it; --it3, --it2) *it2 = *it3;
+      std::fill(it, it + shift - c, T(0));
+      shift = c;
+      data[c - shift] = e;
+      return;
+    }
+    else if (c >= shift + s) {
+      data.resize(c - shift + 1, T(0));
+      data[c - shift] = e;
+      return;
+      // std::fill(data.begin() + s, data.end(), T(0));
+    }
+    data[c - shift] += e;
+  }
+  
+  
+  template <typename T> struct linalg_traits<slvector<T> > {
+    typedef slvector<T> this_type;
+    typedef this_type origin_type;
+    typedef linalg_false is_reference;
+    typedef abstract_vector linalg_type;
+    typedef T value_type;
+    typedef ref_elt_vector<T, slvector<T> > reference;
+    typedef slvector_iterator<T>  iterator;
+    typedef slvector_const_iterator<T> const_iterator;
+    typedef abstract_skyline storage_type;
+    typedef linalg_true index_sorted;
+    static size_type size(const this_type &v) { return v.size(); }
+    static iterator begin(this_type &v)
+      { return iterator(v.data_begin(), v.first()); }
+    static const_iterator begin(const this_type &v)
+      { return const_iterator(v.data_begin(), v.first()); }
+    static iterator end(this_type &v)
+      { return iterator(v.data_end(), v.last()); }
+    static const_iterator end(const this_type &v)
+      { return const_iterator(v.data_end(), v.last()); }
+    static origin_type* origin(this_type &v) { return &v; }
+    static const origin_type* origin(const this_type &v) { return &v; }
+    static void clear(origin_type* o, const iterator &, const iterator &)
+    { o->clear(); }
+    static void do_clear(this_type &v) { v.clear(); }
+    static value_type access(const origin_type *o, const const_iterator &,
+			     const const_iterator &, size_type i)
+    { return (*o)[i]; }
+    static reference access(origin_type *o, const iterator &, const iterator &,
+			    size_type i)
+    { return (*o)[i]; }
+    static void resize(this_type &v, size_type n) { v.resize(n); }
+  };
+
+  template<typename T> std::ostream &operator <<
+  (std::ostream &o, const slvector<T>& v) { gmm::write(o,v); return o; }
+
+  template <typename T>
+  inline size_type nnz(const slvector<T>& l) { return l.last() - l.first(); }
+
+}
+
+namespace std {
+  template <typename T> void swap(gmm::wsvector<T> &v, gmm::wsvector<T> &w)
+  { v.swap(w);}
+  template <typename T> void swap(gmm::rsvector<T> &v, gmm::rsvector<T> &w)
+  { v.swap(w);}
+  template <typename T> void swap(gmm::slvector<T> &v, gmm::slvector<T> &w)
+  { v.swap(w);}
+}
+
+
+
+#endif /* GMM_VECTOR_H__ */
diff --git a/gmm/gmm_vector_to_matrix.h b/gmm/gmm_vector_to_matrix.h
new file mode 100644
index 000000000..83fc0c54f
--- /dev/null
+++ b/gmm/gmm_vector_to_matrix.h
@@ -0,0 +1,340 @@
+/* -*- c++ -*- (enables emacs c++ mode) */
+/*===========================================================================
+
+ Copyright (C) 2003-2017 Yves Renard
+
+ This file is a part of GetFEM++
+
+ GetFEM++  is  free software;  you  can  redistribute  it  and/or modify it
+ under  the  terms  of the  GNU  Lesser General Public License as published
+ by  the  Free Software Foundation;  either version 3 of the License,  or
+ (at your option) any later version along with the GCC Runtime Library
+ Exception either version 3.1 or (at your option) any later version.
+ This program  is  distributed  in  the  hope  that it will be useful,  but
+ WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ or  FITNESS  FOR  A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+ License and GCC Runtime Library Exception for more details.
+ You  should  have received a copy of the GNU Lesser General Public License
+ along  with  this program;  if not, write to the Free Software Foundation,
+ Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301, USA.
+
+ As a special exception, you  may use  this file  as it is a part of a free
+ software  library  without  restriction.  Specifically,  if   other  files
+ instantiate  templates  or  use macros or inline functions from this file,
+ or  you compile this  file  and  link  it  with other files  to produce an
+ executable, this file  does  not  by itself cause the resulting executable
+ to be covered  by the GNU Lesser General Public License.  This   exception
+ does not  however  invalidate  any  other  reasons why the executable file
+ might be covered by the GNU Lesser General Public License.
+
+===========================================================================*/
+
+/**@file gmm_vector_to_matrix.h
+   @author  Yves Renard <Yves.Renard@insa-lyon.fr>
+   @date December 6, 2003.
+   @brief View vectors as row or column matrices. */
+#ifndef GMM_VECTOR_TO_MATRIX_H__
+#define GMM_VECTOR_TO_MATRIX_H__
+
+#include "gmm_interface.h"
+
+namespace gmm {
+
+  /* ********************************************************************* */
+  /*	     row vector -> transform a vector in a (1, n) matrix.          */
+  /* ********************************************************************* */
+
+  template <typename PT> struct gen_row_vector {
+    typedef gen_row_vector<PT> this_type;
+    typedef typename std::iterator_traits<PT>::value_type V;
+    typedef V * CPT;
+    typedef typename std::iterator_traits<PT>::reference ref_V;
+    typedef typename linalg_traits<this_type>::reference reference;
+
+    simple_vector_ref<PT> vec;
+    
+    reference operator()(size_type, size_type j) const { return vec[j]; }
+   
+    size_type nrows(void) const { return 1; }
+    size_type ncols(void) const { return vect_size(vec); }
+    
+    gen_row_vector(ref_V v) : vec(v) {}
+    gen_row_vector() {}
+    gen_row_vector(const gen_row_vector<CPT> &cr) : vec(cr.vec) {}
+  };
+
+  template <typename PT>
+  struct gen_row_vector_iterator {
+    typedef gen_row_vector<PT> this_type;
+    typedef typename modifiable_pointer<PT>::pointer MPT;
+    typedef typename std::iterator_traits<PT>::value_type V;
+    typedef simple_vector_ref<PT> value_type;
+    typedef const simple_vector_ref<PT> *pointer;
+    typedef const simple_vector_ref<PT> &reference;
+    typedef ptrdiff_t difference_type;
+    typedef size_t size_type;
+    typedef std::random_access_iterator_tag  iterator_category;
+    typedef gen_row_vector_iterator<PT> iterator;
+
+    simple_vector_ref<PT> vec;
+    bool isend;
+    
+    iterator &operator ++()   { isend = true; return *this; }
+    iterator &operator --()   { isend = false; return *this; }
+    iterator operator ++(int) { iterator tmp = *this; ++(*this); return tmp; }
+    iterator operator --(int) { iterator tmp = *this; --(*this); return tmp; }
+    iterator &operator +=(difference_type i)
+    { if (i) isend = false; return *this; }
+    iterator &operator -=(difference_type i)
+    { if (i) isend = true; return *this;  }
+    iterator operator +(difference_type i) const 
+    { iterator itt = *this; return (itt += i); }
+    iterator operator -(difference_type i) const
+    { iterator itt = *this; return (itt -= i); }
+    difference_type operator -(const iterator &i) const { 
+      return (isend == true) ? ((i.isend == true) ? 0 : 1)
+	                     : ((i.isend == true) ? -1 : 0);
+    }
+
+    const simple_vector_ref<PT>& operator *() const { return vec; }
+    const simple_vector_ref<PT>& operator [](int i) { return vec; }
+
+    bool operator ==(const iterator &i) const { return (isend == i.isend); }
+    bool operator !=(const iterator &i) const { return !(i == *this); }
+    bool operator < (const iterator &i) const { return (*this - i < 0); }
+
+    gen_row_vector_iterator(void) {}
+    gen_row_vector_iterator(const gen_row_vector_iterator<MPT> &itm)
+      : vec(itm.vec), isend(itm.isend) {}
+    gen_row_vector_iterator(const gen_row_vector<PT> &m, bool iis_end)
+      : vec(m.vec), isend(iis_end) { }
+    
+  };
+
+  template <typename PT>
+  struct linalg_traits<gen_row_vector<PT> > {
+    typedef gen_row_vector<PT> this_type;
+    typedef typename std::iterator_traits<PT>::value_type V;
+    typedef typename which_reference<PT>::is_reference is_reference;
+    typedef abstract_matrix linalg_type;
+    typedef typename linalg_traits<V>::origin_type origin_type;
+    typedef typename select_ref<const origin_type *, origin_type *,
+				PT>::ref_type porigin_type;
+    typedef typename linalg_traits<V>::value_type value_type;
+    typedef typename select_ref<value_type,
+            typename linalg_traits<V>::reference, PT>::ref_type reference;
+    typedef abstract_null_type sub_col_type;
+    typedef abstract_null_type col_iterator;
+    typedef abstract_null_type const_sub_col_type;
+    typedef abstract_null_type const_col_iterator;
+    typedef simple_vector_ref<const V *> const_sub_row_type;
+    typedef typename select_ref<abstract_null_type, 
+            simple_vector_ref<V *>, PT>::ref_type sub_row_type;
+    typedef gen_row_vector_iterator<typename const_pointer<PT>::pointer>
+            const_row_iterator;
+    typedef typename select_ref<abstract_null_type, 
+	    gen_row_vector_iterator<PT>, PT>::ref_type row_iterator;
+    typedef typename linalg_traits<V>::storage_type storage_type;
+    typedef row_major sub_orientation;
+    typedef typename linalg_traits<V>::index_sorted index_sorted;
+    static size_type nrows(const this_type &) { return 1; }
+    static size_type ncols(const this_type &m) { return m.ncols(); }
+    static const_sub_row_type row(const const_row_iterator &it) { return *it; }
+    static sub_row_type row(const row_iterator &it) { return *it; }
+    static const_row_iterator row_begin(const this_type &m)
+    { return const_row_iterator(m, false); }
+    static row_iterator row_begin(this_type &m)
+    { return row_iterator(m, false); }
+    static const_row_iterator row_end(const this_type &m)
+    { return const_row_iterator(m, true); }
+    static row_iterator row_end(this_type &m)
+    { return row_iterator(m, true); }
+    static origin_type* origin(this_type &m) { return m.vec.origin; }
+    static const origin_type* origin(const this_type &m)
+    { return m.vec.origin; }
+    static void do_clear(this_type &m)
+    { clear(row(mat_row_begin(m))); }
+    static value_type access(const const_row_iterator &itrow, size_type i)
+    { return itrow.vec[i]; }
+    static reference access(const row_iterator &itrow, size_type i)
+    { return itrow.vec[i]; }
+  };
+  
+  template <typename PT>
+  std::ostream &operator <<(std::ostream &o, const gen_row_vector<PT>& m)
+  { gmm::write(o,m); return o; }
+
+  /* ********************************************************************* */
+  /*	     col vector -> transform a vector in a (n, 1) matrix.          */
+  /* ********************************************************************* */
+
+  template <typename PT> struct gen_col_vector {
+    typedef gen_col_vector<PT> this_type;
+    typedef typename std::iterator_traits<PT>::value_type V;
+    typedef V * CPT;
+    typedef typename std::iterator_traits<PT>::reference ref_V;
+    typedef typename linalg_traits<this_type>::reference reference;
+
+    simple_vector_ref<PT> vec;
+    
+    reference operator()(size_type i, size_type) const { return vec[i]; }
+   
+    size_type ncols(void) const { return 1; }
+    size_type nrows(void) const { return vect_size(vec); }
+    
+    gen_col_vector(ref_V v) : vec(v) {}
+    gen_col_vector() {}
+    gen_col_vector(const gen_col_vector<CPT> &cr) : vec(cr.vec) {}
+  };
+
+  template <typename PT>
+  struct gen_col_vector_iterator {
+    typedef gen_col_vector<PT> this_type;
+    typedef typename modifiable_pointer<PT>::pointer MPT;
+    typedef typename std::iterator_traits<PT>::value_type V;
+    typedef simple_vector_ref<PT> value_type;
+    typedef const simple_vector_ref<PT> *pointer;
+    typedef const simple_vector_ref<PT> &reference;
+    typedef ptrdiff_t difference_type;
+    typedef size_t size_type;
+    typedef std::random_access_iterator_tag  iterator_category;
+    typedef gen_col_vector_iterator<PT> iterator;
+
+    simple_vector_ref<PT> vec;
+    bool isend;
+    
+    iterator &operator ++()   { isend = true; return *this; }
+    iterator &operator --()   { isend = false; return *this; }
+    iterator operator ++(int) { iterator tmp = *this; ++(*this); return tmp; }
+    iterator operator --(int) { iterator tmp = *this; --(*this); return tmp; }
+    iterator &operator +=(difference_type i)
+    { if (i) isend = false; return *this; }
+    iterator &operator -=(difference_type i)
+    { if (i) isend = true; return *this;  }
+    iterator operator +(difference_type i) const 
+    { iterator itt = *this; return (itt += i); }
+    iterator operator -(difference_type i) const
+    { iterator itt = *this; return (itt -= i); }
+    difference_type operator -(const iterator &i) const { 
+      return (isend == true) ? ((i.isend == true) ? 0 : 1)
+	                     : ((i.isend == true) ? -1 : 0);
+    }
+
+    const simple_vector_ref<PT>& operator *() const { return vec; }
+    const simple_vector_ref<PT>& operator [](int i) { return vec; }
+
+    bool operator ==(const iterator &i) const { return (isend == i.isend); }
+    bool operator !=(const iterator &i) const { return !(i == *this); }
+    bool operator < (const iterator &i) const { return (*this - i < 0); }
+
+    gen_col_vector_iterator(void) {}
+    gen_col_vector_iterator(const gen_col_vector_iterator<MPT> &itm)
+      : vec(itm.vec), isend(itm.isend) {}
+    gen_col_vector_iterator(const gen_col_vector<PT> &m, bool iis_end)
+      : vec(m.vec), isend(iis_end) { }
+    
+  };
+
+  template <typename PT>
+  struct linalg_traits<gen_col_vector<PT> > {
+    typedef gen_col_vector<PT> this_type;
+    typedef typename std::iterator_traits<PT>::value_type V;
+    typedef typename which_reference<PT>::is_reference is_reference;
+    typedef abstract_matrix linalg_type;
+    typedef typename linalg_traits<V>::origin_type origin_type;
+    typedef typename select_ref<const origin_type *, origin_type *,
+				PT>::ref_type porigin_type;
+    typedef typename linalg_traits<V>::value_type value_type;
+    typedef typename select_ref<value_type,
+            typename linalg_traits<V>::reference, PT>::ref_type reference;
+    typedef abstract_null_type sub_row_type;
+    typedef abstract_null_type row_iterator;
+    typedef abstract_null_type const_sub_row_type;
+    typedef abstract_null_type const_row_iterator;
+    typedef simple_vector_ref<const V *> const_sub_col_type;
+    typedef typename select_ref<abstract_null_type, 
+            simple_vector_ref<V *>, PT>::ref_type sub_col_type;
+    typedef gen_col_vector_iterator<typename const_pointer<PT>::pointer>
+            const_col_iterator;
+    typedef typename select_ref<abstract_null_type, 
+	    gen_col_vector_iterator<PT>, PT>::ref_type col_iterator;
+    typedef typename linalg_traits<V>::storage_type storage_type;
+    typedef col_major sub_orientation;
+    typedef typename linalg_traits<V>::index_sorted index_sorted;
+    static size_type ncols(const this_type &) { return 1; }
+    static size_type nrows(const this_type &m) { return m.nrows(); }
+    static const_sub_col_type col(const const_col_iterator &it) { return *it; }
+    static sub_col_type col(const col_iterator &it) { return *it; }
+    static const_col_iterator col_begin(const this_type &m)
+    { return const_col_iterator(m, false); }
+    static col_iterator col_begin(this_type &m)
+    { return col_iterator(m, false); }
+    static const_col_iterator col_end(const this_type &m)
+    { return const_col_iterator(m, true); }
+    static col_iterator col_end(this_type &m)
+    { return col_iterator(m, true); }
+    static origin_type* origin(this_type &m) { return m.vec.origin; }
+    static const origin_type* origin(const this_type &m)
+    { return m.vec.origin; }
+    static void do_clear(this_type &m)
+    { clear(col(mat_col_begin(m))); }
+    static value_type access(const const_col_iterator &itcol, size_type i)
+    { return itcol.vec[i]; }
+    static reference access(const col_iterator &itcol, size_type i)
+    { return itcol.vec[i]; }
+  };
+  
+  template <typename PT>
+  std::ostream &operator <<(std::ostream &o, const gen_col_vector<PT>& m)
+  { gmm::write(o,m); return o; }
+
+  /* ******************************************************************** */
+  /*		col and row vectors                                       */
+  /* ******************************************************************** */
+
+  
+  template <class V> inline
+  typename select_return< gen_row_vector<const V *>, gen_row_vector<V *>,
+			  const V *>::return_type
+  row_vector(const V& v) {
+    return typename select_return< gen_row_vector<const V *>,
+      gen_row_vector<V *>, const V *>::return_type(linalg_cast(v));
+  }
+
+  template <class V> inline
+  typename select_return< gen_row_vector<const V *>, gen_row_vector<V *>,
+			  V *>::return_type
+  row_vector(V& v) {
+    return typename select_return< gen_row_vector<const V *>,
+      gen_row_vector<V *>, V *>::return_type(linalg_cast(v));
+  }
+ 
+  template <class V> inline gen_row_vector<const V *>
+  const_row_vector(V& v)
+  { return gen_row_vector<const V *>(v); }
+ 
+
+  template <class V> inline
+  typename select_return< gen_col_vector<const V *>, gen_col_vector<V *>,
+			  const V *>::return_type
+  col_vector(const V& v) {
+    return typename select_return< gen_col_vector<const V *>,
+      gen_col_vector<V *>, const V *>::return_type(linalg_cast(v));
+  }
+
+  template <class V> inline
+  typename select_return< gen_col_vector<const V *>, gen_col_vector<V *>,
+			  V *>::return_type
+  col_vector(V& v) {
+    return typename select_return< gen_col_vector<const V *>,
+      gen_col_vector<V *>, V *>::return_type(linalg_cast(v));
+  }
+ 
+  template <class V> inline gen_col_vector<const V *>
+  const_col_vector(V& v)
+  { return gen_col_vector<const V *>(v); }
+ 
+
+}
+
+#endif //  GMM_VECTOR_TO_MATRIX_H__
diff --git a/hecl b/hecl
index 34e28fe18..f949aabf5 160000
--- a/hecl
+++ b/hecl
@@ -1 +1 @@
-Subproject commit 34e28fe18c77efe661e04742f9b3350eba880267
+Subproject commit f949aabf5c4632df97746c273cab27a1ea1bffe4