DCLN cooking and various bug fixes

2025-07-15 02:05:53 +00:00 · 2017-10-16 19:51:53 -10:00 · 2017-10-16 19:51:53 -10:00 · 5149128b60
commit 5149128b60
parent 4af2d975f4
84 changed files with 23876 additions and 52 deletions
--- a/DataSpec/DNACommon/CMakeLists.txt
+++ b/DataSpec/DNACommon/CMakeLists.txt
@ -34,6 +34,7 @@ set(DNACOMMON_SOURCES
    BabeDead.hpp BabeDead.cpp
    RigInverter.hpp RigInverter.cpp
    AROTBuilder.hpp AROTBuilder.cpp
+    OBBTreeBuilder.hpp OBBTreeBuilder.cpp
    Tweaks/ITweak.hpp
    Tweaks/TweakWriter.hpp
    Tweaks/ITweakGame.hpp
--- a/DataSpec/DNACommon/DeafBabe.cpp
+++ b/DataSpec/DNACommon/DeafBabe.cpp
@ -83,16 +83,40 @@ template void DeafBabeSendToBlender<DNAMP1::DeafBabe>(hecl::BlenderConnection::P
 template void DeafBabeSendToBlender<DNAMP2::DeafBabe>(hecl::BlenderConnection::PyOutStream& os, const DNAMP2::DeafBabe& db, bool isDcln, atInt32 idx);
 template void DeafBabeSendToBlender<DNAMP1::DCLN::Collision>(hecl::BlenderConnection::PyOutStream& os, const DNAMP1::DCLN::Collision& db, bool isDcln, atInt32 idx);

+template<class DEAFBABE>
+static void PopulateAreaFields(DEAFBABE& db,
+    const hecl::BlenderConnection::DataStream::ColMesh& colMesh,
+    const zeus::CAABox& fullAABB,
+    std::enable_if_t<std::is_same<DEAFBABE, DNAMP1::DeafBabe>::value ||
+                     std::is_same<DEAFBABE, DNAMP2::DeafBabe>::value, int>* = 0)
+{
+    AROTBuilder builder;
+    auto octree = builder.buildCol(colMesh, db.rootNodeType);
+    static_cast<std::unique_ptr<atUint8[]>&>(db.bspTree) = std::move(octree.first);
+    db.bspSize = octree.second;
+
+    db.unk1 = 0x1000000;
+    db.length = db.binarySize(0) - 8;
+    db.magic = 0xDEAFBABE;
+    db.version = 3;
+    db.aabb[0] = fullAABB.min;
+    db.aabb[1] = fullAABB.max;
+}
+
+template<class DEAFBABE>
+static void PopulateAreaFields(DEAFBABE& db,
+    const hecl::BlenderConnection::DataStream::ColMesh& colMesh,
+    const zeus::CAABox& fullAABB,
+    std::enable_if_t<std::is_same<DEAFBABE, DNAMP1::DCLN::Collision>::value, int>* = 0)
+{
+    db.magic = 0xDEAFBABE;
+    db.version = 2;
+    db.memSize = 0;
+}
+
 template<class DEAFBABE>
 void DeafBabeBuildFromBlender(DEAFBABE& db, const hecl::BlenderConnection::DataStream::ColMesh& colMesh)
 {
-    {
-        AROTBuilder builder;
-        auto octree = builder.buildCol(colMesh, db.rootNodeType);
-        static_cast<std::unique_ptr<atUint8[]>&>(db.bspTree) = std::move(octree.first);
-        db.bspSize = octree.second;
-    }
-
    db.materials.reserve(colMesh.materials.size());
    for (const hecl::BlenderConnection::DataStream::ColMesh::Material& mat : colMesh.materials)
    {
@ -186,15 +210,11 @@ void DeafBabeBuildFromBlender(DEAFBABE& db, const hecl::BlenderConnection::DataS
    db.triMatsCount = colMesh.trianges.size();
    db.triangleEdgesCount = colMesh.trianges.size() * 3;

-    db.unk1 = 0x1000000;
-    db.length = db.binarySize(0) - 8;
-    db.magic = 0xDEAFBABE;
-    db.version = 3;
-    db.aabb[0] = fullAABB.min;
-    db.aabb[1] = fullAABB.max;
+    PopulateAreaFields(db, colMesh, fullAABB);
 }

 template void DeafBabeBuildFromBlender<DNAMP1::DeafBabe>(DNAMP1::DeafBabe& db, const hecl::BlenderConnection::DataStream::ColMesh& colMesh);
 template void DeafBabeBuildFromBlender<DNAMP2::DeafBabe>(DNAMP2::DeafBabe& db, const hecl::BlenderConnection::DataStream::ColMesh& colMesh);
+template void DeafBabeBuildFromBlender<DNAMP1::DCLN::Collision>(DNAMP1::DCLN::Collision& db, const hecl::BlenderConnection::DataStream::ColMesh& colMesh);

 }
--- a/DataSpec/DNACommon/OBBTreeBuilder.cpp
+++ b/DataSpec/DNACommon/OBBTreeBuilder.cpp
@ -0,0 +1,256 @@
+#include <athena/Types.hpp>
+#include "OBBTreeBuilder.hpp"
+#include "zeus/CTransform.hpp"
+#include "DataSpec/DNAMP1/DCLN.hpp"
+#include "gmm/gmm.h"
+
+namespace DataSpec
+{
+
+using ColMesh = hecl::BlenderConnection::DataStream::ColMesh;
+
+struct FittedOBB
+{
+    zeus::CTransform xf;
+    zeus::CVector3f he;
+};
+
+static std::vector<int> MakeRootTriangleIndex(const ColMesh& mesh)
+{
+    std::vector<int> ret;
+    ret.reserve(mesh.trianges.size());
+    for (int i = 0; i < mesh.trianges.size(); ++i)
+        ret.push_back(i);
+    return ret;
+}
+
+static std::unordered_set<uint32_t> GetTriangleVerts(const ColMesh& mesh, int triIdx)
+{
+    const ColMesh::Triangle& T = mesh.trianges[triIdx];
+    std::unordered_set<uint32_t> verts;
+    verts.insert(mesh.edges[T.edges[0]].verts[0]);
+    verts.insert(mesh.edges[T.edges[0]].verts[1]);
+    verts.insert(mesh.edges[T.edges[1]].verts[0]);
+    verts.insert(mesh.edges[T.edges[1]].verts[1]);
+    verts.insert(mesh.edges[T.edges[2]].verts[0]);
+    verts.insert(mesh.edges[T.edges[2]].verts[1]);
+    return verts;
+}
+
+// method to set the OBB parameters which produce a box oriented according to
+// the covariance matrix C, which just containts the points pnts
+static FittedOBB BuildFromCovarianceMatrix(gmm::dense_matrix<float>& C,
+                                           const ColMesh& mesh, const std::vector<int>& index)
+{
+    FittedOBB ret;
+
+    // extract the eigenvalues and eigenvectors from C
+    gmm::dense_matrix<float> eigvec(3,3);
+    std::vector<float> eigval(3);
+    gmm::symmetric_qr_algorithm(C, eigval, eigvec);
+
+    // find the right, up and forward vectors from the eigenvectors
+    zeus::CVector3f r(eigvec(0,0), eigvec(1,0), eigvec(2,0));
+    zeus::CVector3f u(eigvec(0,1), eigvec(1,1), eigvec(2,1));
+    zeus::CVector3f f(eigvec(0,2), eigvec(1,2), eigvec(2,2));
+    r.normalize(); u.normalize(), f.normalize();
+
+    // set the rotation matrix using the eigvenvectors
+    ret.xf.basis[0][0]=r.x; ret.xf.basis[1][0]=u.x; ret.xf.basis[2][0]=f.x;
+    ret.xf.basis[0][1]=r.y; ret.xf.basis[1][1]=u.y; ret.xf.basis[2][1]=f.y;
+    ret.xf.basis[0][2]=r.z; ret.xf.basis[1][2]=u.z; ret.xf.basis[2][2]=f.z;
+
+    // now build the bounding box extents in the rotated frame
+    zeus::CVector3f minim(1e10f, 1e10f, 1e10f), maxim(-1e10f, -1e10f, -1e10f);
+    for (int triIdx : index)
+    {
+        std::unordered_set<uint32_t> verts = GetTriangleVerts(mesh, triIdx);
+        for (uint32_t v : verts)
+        {
+            const zeus::CVector3f& p = mesh.verts[v].val;
+            zeus::CVector3f p_prime(r.dot(p), u.dot(p), f.dot(p));
+            minim = zeus::min(minim, p_prime);
+            maxim = zeus::max(maxim, p_prime);
+        }
+    }
+
+    // set the center of the OBB to be the average of the
+    // minimum and maximum, and the extents be half of the
+    // difference between the minimum and maximum
+    zeus::CVector3f center = (maxim + minim) * 0.5f;
+    ret.xf.origin = ret.xf.basis * center;
+    ret.he = (maxim - minim) * 0.5f;
+
+    return ret;
+}
+
+// builds an OBB from triangles specified as an array of
+// points with integer indices into the point array. Forms
+// the covariance matrix for the triangles, then uses the
+// method build_from_covariance_matrix() method to fit
+// the box.  ALL points will be fit in the box, regardless
+// of whether they are indexed by a triangle or not.
+static FittedOBB FitOBB(const ColMesh& mesh, const std::vector<int>& index)
+{
+    float Ai, Am=0.0;
+    zeus::CVector3f mu, mui;
+    gmm::dense_matrix<float> C(3,3);
+    float cxx=0.0, cxy=0.0, cxz=0.0, cyy=0.0, cyz=0.0, czz=0.0;
+
+    // loop over the triangles this time to find the
+    // mean location
+    for (int i : index)
+    {
+        const ColMesh::Triangle& T = mesh.trianges[i];
+        std::unordered_set<uint32_t> verts = GetTriangleVerts(mesh, i);
+        auto it = verts.begin();
+        zeus::CVector3f p = mesh.verts[*it++].val;
+        zeus::CVector3f q = mesh.verts[*it++].val;
+        zeus::CVector3f r = mesh.verts[*it++].val;
+        mui = (p+q+r)/3.f;
+        Ai = (q-p).cross(r-p).magnitude() / 2.f;
+        mu += mui*Ai;
+        Am += Ai;
+
+        // these bits set the c terms to Am*E[xx], Am*E[xy], Am*E[xz]....
+        cxx += ( 9.0*mui.x*mui.x + p.x*p.x + q.x*q.x + r.x*r.x )*(Ai/12.0);
+        cxy += ( 9.0*mui.x*mui.y + p.x*p.y + q.x*q.y + r.x*r.y )*(Ai/12.0);
+        cxz += ( 9.0*mui.x*mui.z + p.x*p.z + q.x*q.z + r.x*r.z )*(Ai/12.0);
+        cyy += ( 9.0*mui.y*mui.y + p.y*p.y + q.y*q.y + r.y*r.y )*(Ai/12.0);
+        cyz += ( 9.0*mui.y*mui.z + p.y*p.z + q.y*q.z + r.y*r.z )*(Ai/12.0);
+    }
+    // divide out the Am fraction from the average position and
+    // covariance terms
+    mu = mu / Am;
+    cxx /= Am; cxy /= Am; cxz /= Am; cyy /= Am; cyz /= Am; czz /= Am;
+
+    // now subtract off the E[x]*E[x], E[x]*E[y], ... terms
+    cxx -= mu.x*mu.x; cxy -= mu.x*mu.y; cxz -= mu.x*mu.z;
+    cyy -= mu.y*mu.y; cyz -= mu.y*mu.z; czz -= mu.z*mu.z;
+
+    // now build the covariance matrix
+    C(0,0)=cxx; C(0,1)=cxy; C(0,2)=cxz;
+    C(1,0)=cxy; C(1,1)=cyy; C(1,2)=cyz;
+    C(2,0)=cxz; C(1,2)=cyz; C(2,2)=czz;
+
+    // set the obb parameters from the covariance matrix
+    return BuildFromCovarianceMatrix(C, mesh, index);
+}
+
+template <typename Node>
+static void MakeLeaf(const ColMesh& mesh, const std::vector<int>& index, Node& n)
+{
+    n.left.reset();
+    n.right.reset();
+    n.isLeaf = true;
+    n.leafData = std::make_unique<typename Node::LeafData>();
+    n.leafData->edgeIndexCount = atUint32(index.size() * 3);
+    n.leafData->edgeIndices.reserve(n.leafData->edgeIndexCount);
+    for (int i : index)
+    {
+        const ColMesh::Triangle& T = mesh.trianges[i];
+        for (int j = 0; j < 3; ++j)
+            n.leafData->edgeIndices.push_back(T.edges[j]);
+    }
+}
+
+template <typename Node>
+static std::unique_ptr<Node> RecursiveMakeNode(const ColMesh& mesh, const std::vector<int>& index)
+{
+    // calculate root OBB
+    FittedOBB obb = FitOBB(mesh, index);
+
+    // make results row-major and also invert the rotation basis
+    obb.xf.basis.transpose();
+
+    std::unique_ptr<Node> n = std::make_unique<Node>();
+    for (int i = 0; i < 3; ++i)
+    {
+        n->xf[i] = zeus::CVector4f{obb.xf.basis[i]};
+        n->xf[i].vec[3] = obb.xf.origin[i];
+    }
+    n->halfExtent = obb.he;
+
+    // terminate branch when volume < 1.0
+    if (obb.he[0] * obb.he[1] * obb.he[2] < 1.f)
+    {
+        MakeLeaf(mesh, index, *n);
+        return n;
+    }
+
+    n->isLeaf = false;
+
+    std::vector<int> indexNeg[3];
+    std::vector<int> indexPos[3];
+    for (int c = 0; c < 3; ++c)
+    {
+        // subdivide negative side
+        indexNeg[c].reserve(index.size());
+        for (int i : index)
+        {
+            std::unordered_set<uint32_t> verts = GetTriangleVerts(mesh, i);
+            for (uint32_t vtx : verts)
+            {
+                zeus::CVector3f v = mesh.verts[vtx].val;
+                v = obb.xf.basis * (v - obb.xf.origin);
+                if (v[c] < 0.f)
+                {
+                    indexNeg[c].push_back(i);
+                    break;
+                }
+            }
+        }
+
+        // subdivide positive side
+        indexPos[c].reserve(index.size());
+        for (int i : index)
+        {
+            std::unordered_set<uint32_t> verts = GetTriangleVerts(mesh, i);
+            for (uint32_t vtx : verts)
+            {
+                zeus::CVector3f v = mesh.verts[vtx].val;
+                v = obb.xf.basis * (v - obb.xf.origin);
+                if (v[c] >= 0.f)
+                {
+                    indexPos[c].push_back(i);
+                    break;
+                }
+            }
+        }
+    }
+
+    size_t idxMin = index.size();
+    int minComp = -1;
+    for (int c = 0; c < 3; ++c)
+    {
+        size_t test = std::max(indexNeg[c].size(), indexPos[c].size());
+        if (test < idxMin && test < index.size() * 3 / 4)
+        {
+            minComp = c;
+            idxMin = test;
+        }
+    }
+
+    if (minComp == -1)
+    {
+        MakeLeaf(mesh, index, *n);
+        return n;
+    }
+
+    n->left = RecursiveMakeNode<Node>(mesh, indexNeg[minComp]);
+    n->right = RecursiveMakeNode<Node>(mesh, indexPos[minComp]);
+
+    return n;
+}
+
+template <typename Node>
+std::unique_ptr<Node> OBBTreeBuilder::buildCol(const ColMesh& mesh)
+{
+    std::vector<int> root = MakeRootTriangleIndex(mesh);
+    return RecursiveMakeNode<Node>(mesh, root);
+}
+
+template std::unique_ptr<DNAMP1::DCLN::Collision::Node>
+OBBTreeBuilder::buildCol<DNAMP1::DCLN::Collision::Node>(const ColMesh& mesh);
+
+}
--- a/DataSpec/DNACommon/OBBTreeBuilder.hpp
+++ b/DataSpec/DNACommon/OBBTreeBuilder.hpp
@ -0,0 +1,18 @@
+#ifndef DNACOMMON_OBBTREEBUILDER_HPP
+#define DNACOMMON_OBBTREEBUILDER_HPP
+
+#include "DNACommon.hpp"
+
+namespace DataSpec
+{
+
+struct OBBTreeBuilder
+{
+    using ColMesh = hecl::BlenderConnection::DataStream::ColMesh;
+    template <typename Node>
+    static std::unique_ptr<Node> buildCol(const ColMesh& mesh);
+};
+
+}
+
+#endif // DNACOMMON_OBBTREEBUILDER_HPP
--- a/DataSpec/DNAMP1/DCLN.hpp
+++ b/DataSpec/DNAMP1/DCLN.hpp
@ -1,8 +1,10 @@
 #ifndef __DNAMP1_DCLN_HPP__
 #define __DNAMP1_DCLN_HPP__

+#include <athena/Types.hpp>
 #include "../DNACommon/DeafBabe.hpp"
 #include "../DNACommon/PAK.hpp"
+#include "../DNACommon/OBBTreeBuilder.hpp"
 #include "DNAMP1.hpp"
 #include "DeafBabe.hpp"

@ -13,6 +15,8 @@ namespace DNAMP1

 struct DCLN : BigDNA
 {
+    using Mesh = hecl::BlenderConnection::DataStream::ColMesh;
+
    DECL_DNA
    Value<atUint32> colCount;
    struct Collision : BigDNA
@ -40,19 +44,20 @@ struct DCLN : BigDNA
        Value<atUint32> vertCount;
        Vector<atVec3f, DNA_COUNT(vertCount)> verts;

-        struct LeafData : BigDNA
-        {
-            DECL_DNA
-            Value<atUint32> edgeIndexCount;
-            Vector<atUint16, DNA_COUNT(edgeIndexCount)> edgeIndices;
-            size_t getMemoryUsage() const { return (((edgeIndices.size() * 2) + 16) + 3) & ~3; }
-        };
-
        struct Node : BigDNA
        {
            Delete _d;
+
+            struct LeafData : BigDNA
+            {
+                DECL_DNA
+                Value<atUint32> edgeIndexCount;
+                Vector<atUint16, DNA_COUNT(edgeIndexCount)> edgeIndices;
+                size_t getMemoryUsage() const { return (((edgeIndices.size() * 2) + 16) + 3) & ~3; }
+            };
+
            Value<atVec4f> xf[3];
-            Value<atVec3f> origin;
+            Value<atVec3f> halfExtent;
            Value<bool> isLeaf;
            std::unique_ptr<LeafData> leafData;
            std::unique_ptr<Node> left;
@ -63,7 +68,7 @@ struct DCLN : BigDNA
                xf[0] = __dna_reader.readVec4fBig();
                xf[1] = __dna_reader.readVec4fBig();
                xf[2] = __dna_reader.readVec4fBig();
-                origin = __dna_reader.readVec3fBig();
+                halfExtent = __dna_reader.readVec3fBig();
                isLeaf = __dna_reader.readBool();
                if (isLeaf)
                {
@ -84,7 +89,7 @@ struct DCLN : BigDNA
                __dna_writer.writeVec4fBig(xf[0]);
                __dna_writer.writeVec4fBig(xf[1]);
                __dna_writer.writeVec4fBig(xf[2]);
-                __dna_writer.writeVec3fBig(origin);
+                __dna_writer.writeVec3fBig(halfExtent);
                __dna_writer.writeBool(isLeaf);
                if (isLeaf && leafData)
                    leafData->write(__dna_writer);
@ -121,6 +126,30 @@ struct DCLN : BigDNA

                return (ret + 3) & ~3;
            }
+
+            void sendToBlender(hecl::BlenderConnection::PyOutStream& os) const
+            {
+                os.format("obj = bpy.data.objects.new('%s', None)\n"
+                          "obj.empty_draw_type = 'CUBE'\n"
+                          "bpy.context.scene.objects.link(obj)\n"
+                          "mtx = Matrix(((%f,%f,%f,%f),(%f,%f,%f,%f),(%f,%f,%f,%f),(0.0,0.0,0.0,1.0)))\n"
+                          "mtxd = mtx.decompose()\n"
+                          "obj.rotation_mode = 'QUATERNION'\n"
+                          "obj.location = mtxd[0]\n"
+                          "obj.rotation_quaternion = mtxd[1]\n"
+                          "obj.scale = (%f,%f,%f)\n", isLeaf ? "leaf" : "branch",
+                          xf[0].vec[0], xf[0].vec[1], xf[0].vec[2], xf[0].vec[3],
+                          xf[1].vec[0], xf[1].vec[1], xf[1].vec[2], xf[1].vec[3],
+                          xf[2].vec[0], xf[2].vec[1], xf[2].vec[2], xf[2].vec[3],
+                          halfExtent.vec[0], halfExtent.vec[1], halfExtent.vec[2]);
+                if (isLeaf)
+                    os << "obj.show_name = True\n";
+                if (!isLeaf)
+                {
+                    left->sendToBlender(os);
+                    right->sendToBlender(os);
+                }
+            }
        };
        Node root;
        size_t getMemoryUsage()
@ -141,7 +170,8 @@ struct DCLN : BigDNA
        hecl::BlenderConnection::PyOutStream os = conn.beginPythonOut(true);
        os.format("import bpy\n"
                  "import bmesh\n"
-                  "from mathutils import Vector\n"
+                  "from mathutils import Vector, Matrix\n"
+
                  "\n"
                  "bpy.context.scene.name = '%s'\n"
                  "# Clear Scene\n"
@ -154,7 +184,10 @@ struct DCLN : BigDNA
        DeafBabe::BlenderInit(os);
        atInt32 idx = 0;
        for (const Collision& col : collision)
+        {
            DeafBabeSendToBlender(os, col, true, idx++);
+            col.root.sendToBlender(os);
+        }
        os.centerView();
        os.close();
    }
@ -171,12 +204,33 @@ struct DCLN : BigDNA
        DCLN dcln;
        dcln.read(rs);
        hecl::BlenderConnection& conn = btok.getBlenderConnection();
-        if (!conn.createBlend(outPath, hecl::BlenderConnection::BlendType::Mesh))
+        if (!conn.createBlend(outPath, hecl::BlenderConnection::BlendType::ColMesh))
            return false;

        dcln.sendToBlender(conn, pakRouter.getBestEntryName(entry, false));
        return conn.saveBlend();
    }
+
+    static bool Cook(const hecl::ProjectPath& outPath,
+                     const hecl::ProjectPath& inPath,
+                     const std::vector<Mesh>& meshes,
+                     hecl::BlenderConnection* conn = nullptr)
+    {
+        DCLN dcln;
+        dcln.colCount = atUint32(meshes.size());
+        for (const Mesh& mesh : meshes)
+        {
+            dcln.collision.emplace_back();
+            Collision& colOut = dcln.collision.back();
+            DeafBabeBuildFromBlender(colOut, mesh);
+            colOut.root = std::move(*OBBTreeBuilder::buildCol<Collision::Node>(mesh));
+            colOut.memSize = atUint32(colOut.root.getMemoryUsage());
+        }
+
+        athena::io::FileWriter w(outPath.getAbsolutePath());
+        dcln.write(w);
+        return true;
+    }
 };

 }
--- a/DataSpec/DNAMP1/ScriptObjects/CameraHint.hpp
+++ b/DataSpec/DNAMP1/ScriptObjects/CameraHint.hpp
@ -22,7 +22,7 @@ struct CameraHint : IScriptObject
    {
        DECL_YAML
        Value<atUint32> propertyCount;
-        Value<bool> unknown1; // 0x1
+        Value<bool> calculateCamPos; // 0x1
        Value<bool> chaseAllowed; // 0x2
        Value<bool> boostAllowed; // 0x4
        Value<bool> obscureAvoidance; // 0x8
--- a/DataSpec/SpecBase.cpp
+++ b/DataSpec/SpecBase.cpp
@ -249,6 +249,12 @@ void SpecBase::doCook(const hecl::ProjectPath& path, const hecl::ProjectPath& co
            cookMesh(cookedPath, path, ds, fast, btok, progress);
            break;
        }
+        case hecl::BlenderConnection::BlendType::ColMesh:
+        {
+            hecl::BlenderConnection::DataStream ds = conn.beginData();
+            cookColMesh(cookedPath, path, ds, fast, btok, progress);
+            break;
+        }
        case hecl::BlenderConnection::BlendType::Actor:
        {
            hecl::BlenderConnection::DataStream ds = conn.beginData();
--- a/DataSpec/SpecBase.hpp
+++ b/DataSpec/SpecBase.hpp
@ -71,6 +71,9 @@ struct SpecBase : hecl::Database::IDataSpec
    virtual void cookMesh(const hecl::ProjectPath& out, const hecl::ProjectPath& in,
                          BlendStream& ds, bool fast, hecl::BlenderToken& btok,
                          FCookProgress progress)=0;
+    virtual void cookColMesh(const hecl::ProjectPath& out, const hecl::ProjectPath& in,
+                             BlendStream& ds, bool fast, hecl::BlenderToken& btok,
+                             FCookProgress progress)=0;
    virtual void cookActor(const hecl::ProjectPath& out, const hecl::ProjectPath& in,
                           BlendStream& ds, bool fast, hecl::BlenderToken& btok,
                           FCookProgress progress)=0;
--- a/DataSpec/SpecMP1.cpp
+++ b/DataSpec/SpecMP1.cpp
@ -10,6 +10,7 @@
 #include "DNAMP1/STRG.hpp"
 #include "DNAMP1/SCAN.hpp"
 #include "DNAMP1/CMDL.hpp"
+#include "DNAMP1/DCLN.hpp"
 #include "DNAMP1/MREA.hpp"
 #include "DNAMP1/ANCS.hpp"
 #include "DNAMP1/AGSC.hpp"
@ -555,6 +556,8 @@ struct SpecMP1 : SpecBase
            {
            case hecl::BlenderConnection::BlendType::Mesh:
                return {SBIG('CMDL'), path.hash().val32()};
+            case hecl::BlenderConnection::BlendType::ColMesh:
+                return {SBIG('DCLN'), path.hash().val32()};
            case hecl::BlenderConnection::BlendType::Actor:
                if (path.getAuxInfo().size())
                {
@ -728,6 +731,14 @@ struct SpecMP1 : SpecBase
            DNAMP1::CMDL::Cook(out, in, mesh);
    }

+    void cookColMesh(const hecl::ProjectPath& out, const hecl::ProjectPath& in, BlendStream& ds, bool fast,
+                     hecl::BlenderToken& btok, FCookProgress progress)
+    {
+        std::vector<ColMesh> mesh = ds.compileColMeshes();
+        ds.close();
+        DNAMP1::DCLN::Cook(out, in, mesh);
+    }
+
    void cookActor(const hecl::ProjectPath& out, const hecl::ProjectPath& in, BlendStream& ds, bool fast,
                   hecl::BlenderToken& btok, FCookProgress progress)
    {
--- a/DataSpec/SpecMP2.cpp
+++ b/DataSpec/SpecMP2.cpp
@ -329,6 +329,12 @@ struct SpecMP2 : SpecBase
    {
    }

+    void cookColMesh(const hecl::ProjectPath& out, const hecl::ProjectPath& in,
+                     BlendStream& ds, bool fast, hecl::BlenderToken& btok,
+                     FCookProgress progress)
+    {
+    }
+
    void cookActor(const hecl::ProjectPath& out, const hecl::ProjectPath& in,
                   BlendStream& ds, bool fast, hecl::BlenderToken& btok,
                   FCookProgress progress)
--- a/DataSpec/SpecMP3.cpp
+++ b/DataSpec/SpecMP3.cpp
@ -523,6 +523,12 @@ struct SpecMP3 : SpecBase
    {
    }

+    void cookColMesh(const hecl::ProjectPath& out, const hecl::ProjectPath& in,
+                     BlendStream& ds, bool fast, hecl::BlenderToken& btok,
+                     FCookProgress progress)
+    {
+    }
+
    void cookActor(const hecl::ProjectPath& out, const hecl::ProjectPath& in,
                   BlendStream& ds, bool fast, hecl::BlenderToken& btok,
                   FCookProgress progress)
--- a/Editor/ViewManager.cpp
+++ b/Editor/ViewManager.cpp
@ -37,6 +37,7 @@ void ViewManager::BuildTestPART()
 void ViewManager::InitMP1(MP1::CMain& main)
 {
    main.Init(m_fileStoreManager, m_mainWindow.get(), m_voiceEngine.get(), *m_amuseAllocWrapper);
+    main.WarmupShaders();
 }

 void ViewManager::TestGameView::resized(const boo::SWindowRect& root, const boo::SWindowRect& sub)
--- a/13
+++ b/13
@ -0,0 +1,13 @@
+GetFEM++  is  free software;  you  can  redistribute  it  and/or modify it
+under  the  terms  of the  GNU  Lesser General Public License as published
+by  the  Free  Software  Foundation;  either version 3 of the License,  or
+(at your option)  any  later  version  along  with the GCC Runtime Library
+Exception either version 3.1 or (at your option) any later version.
+This program  is  distributed  in  the  hope  that it will be useful,  but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or  FITNESS  FOR  A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License and the GCC Runtime Library Exception for more details.
+You  should  have received a copy of the GNU Lesser General Public License
+along   with    this    program    (see  GNU_GPL_V3,    GNU_LGPL_V3    and
+GNU_GCC_RUNTIME_EXCEPTION files);  if  not,  write  to  the  Free Software
+Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
--- a/Runtime/Camera/CBallCamera.cpp
+++ b/Runtime/Camera/CBallCamera.cpp
@ -2423,9 +2423,9 @@ void CBallCamera::ApplyCameraHint(CStateManager& mgr)
            zeus::CVector3f camPos = mgr.GetPlayer().GetBallPosition() + hint->GetHint().GetBallToCam();
            if ((hint->GetHint().GetOverrideFlags() & 0x1) != 0)
            {
-                float f30 = hint->GetHint().GetBallToCam().toVec2f().magnitude();
-                zeus::CVector3f x23c = -zeus::CVector3f(hint->GetHint().GetBallToCam().toVec2f()).normalized();
-                camPos = FindDesiredPosition(f30, hint->GetHint().GetBallToCam().z, x23c, mgr, false);
+                float distance = hint->GetHint().GetBallToCam().toVec2f().magnitude();
+                zeus::CVector3f camToBall = -zeus::CVector3f(hint->GetHint().GetBallToCam().toVec2f()).normalized();
+                camPos = FindDesiredPosition(distance, hint->GetHint().GetBallToCam().z, camToBall, mgr, false);
            }
            TeleportCamera(zeus::lookAt(camPos, x1d8_lookPos), mgr);
            break;
--- a/Runtime/Graphics/CBooRenderer.hpp
+++ b/Runtime/Graphics/CBooRenderer.hpp
@ -53,6 +53,8 @@ public:
 class CBooRenderer : public IRenderer
 {
    friend class CBooModel;
+    friend class CModel;
+    friend class CGameArea;
    friend class CWorldTransManager;
    friend class CMorphBallShadow;

--- a/Runtime/Graphics/CModel.hpp
+++ b/Runtime/Graphics/CModel.hpp
@ -73,6 +73,7 @@ struct CBooSurface
 class CBooModel
 {
    friend class CModel;
+    friend class CGameArea;
    friend class CBooRenderer;
    friend class CMetroidModelInstance;
    friend class CSkinnedModel;
@ -151,6 +152,8 @@ private:
    void DrawNormalSurfaces(const CModelFlags& flags) const;
    void DrawSurfaces(const CModelFlags& flags) const;
    void DrawSurface(const CBooSurface& surf, const CModelFlags& flags) const;
+    void WarmupDrawSurfaces() const;
+    void WarmupDrawSurface(const CBooSurface& surf) const;

    static zeus::CVector3f g_PlayerPosition;
    static float g_ModSeconds;
@ -177,6 +180,7 @@ public:
    void RemapMaterialData(SShader& shader);
    bool TryLockTextures() const;
    void UnlockTextures() const;
+    void SyncLoadTextures() const;
    void Touch(int shaderIdx) const;
    void VerifyCurrentShader(int shaderIdx);
    boo::IGraphicsBufferD* UpdateUniformData(const CModelFlags& flags,
@ -214,15 +218,8 @@ public:

    static boo::ITexture* g_shadowMap;
    static zeus::CTransform g_shadowTexXf;
-    static void EnableShadowMaps(boo::ITexture* map, const zeus::CTransform& texXf)
-    {
-        g_shadowMap = map;
-        g_shadowTexXf = texXf;
-    }
-    static void DisableShadowMaps()
-    {
-        g_shadowMap = nullptr;
-    }
+    static void EnableShadowMaps(boo::ITexture* map, const zeus::CTransform& texXf);
+    static void DisableShadowMaps();
 };

 class CModel
@ -270,6 +267,9 @@ public:
    zeus::CVector3f GetPoolNormal(size_t idx) const;
    void ApplyVerticesCPU(boo::IGraphicsBufferD* vertBuf,
                          const std::vector<std::pair<zeus::CVector3f, zeus::CVector3f>>& vn) const;
+
+    void _WarmupShaders();
+    static void WarmupShaders(const SObjectTag& cmdlTag);
 };

 CFactoryFnReturn FModelFactory(const urde::SObjectTag& tag,
--- a/Runtime/Graphics/CModelBoo.cpp
+++ b/Runtime/Graphics/CModelBoo.cpp
@ -126,6 +126,16 @@ void CBooModel::EnsureViewDepStateCached(const CBooModel& model, const CBooSurfa
 boo::ITexture* CBooModel::g_shadowMap = nullptr;
 zeus::CTransform CBooModel::g_shadowTexXf;

+void CBooModel::EnableShadowMaps(boo::ITexture* map, const zeus::CTransform& texXf)
+{
+    g_shadowMap = map;
+    g_shadowTexXf = texXf;
+}
+void CBooModel::DisableShadowMaps()
+{
+    g_shadowMap = nullptr;
+}
+
 CBooModel::~CBooModel()
 {
    if (m_prev)
@ -488,6 +498,16 @@ void CBooModel::UnlockTextures() const
    const_cast<CBooModel*>(this)->x40_24_texturesLoaded = false;
 }

+void CBooModel::SyncLoadTextures() const
+{
+    if (!x40_24_texturesLoaded)
+    {
+        for (TCachedToken<CTexture>& tex : const_cast<std::vector<TCachedToken<CTexture>>&>(x1c_textures))
+            tex.GetObj();
+        const_cast<CBooModel*>(this)->x40_24_texturesLoaded = true;
+    }
+}
+
 void CBooModel::DrawFlat(ESurfaceSelection sel, EExtendedShader extendedIdx) const
 {
    const CBooSurface* surf;
@ -571,6 +591,39 @@ void CBooModel::DrawSurface(const CBooSurface& surf, const CModelFlags& flags) c
    CGraphics::DrawArrayIndexed(surf.m_data.idxStart, surf.m_data.idxCount);
 }

+void CBooModel::WarmupDrawSurfaces() const
+{
+    const CBooSurface* surf = x38_firstUnsortedSurface;
+    while (surf)
+    {
+        WarmupDrawSurface(*surf);
+        surf = surf->m_next;
+    }
+
+    surf = x3c_firstSortedSurface;
+    while (surf)
+    {
+        WarmupDrawSurface(*surf);
+        surf = surf->m_next;
+    }
+}
+
+void CBooModel::WarmupDrawSurface(const CBooSurface& surf) const
+{
+    if (m_uniUpdateCount > m_instances.size())
+        return;
+    const ModelInstance& inst = m_instances[m_uniUpdateCount-1];
+
+    for (const std::vector<boo::IShaderDataBinding*>& extendeds : inst.m_shaderDataBindings)
+    {
+        for (boo::IShaderDataBinding* binding : extendeds)
+        {
+            CGraphics::SetShaderDataBinding(binding);
+            CGraphics::DrawArrayIndexed(surf.m_data.idxStart, std::min(u32(3), surf.m_data.idxCount));
+        }
+    }
+}
+
 void CBooModel::UVAnimationBuffer::ProcessAnimation(u8*& bufOut, const UVAnimation& anim)
 {
    zeus::CMatrix4f& texMtxOut = reinterpret_cast<zeus::CMatrix4f&>(*bufOut);
@ -1041,6 +1094,9 @@ CModel::CModel(std::unique_ptr<u8[]>&& in, u32 /* dataLen */, IObjectStore* stor

    m_gfxToken = CGraphics::CommitResources([&](boo::IGraphicsDataFactory::Context& ctx) -> bool
    {
+        /* Index buffer is always static */
+        m_ibo = ctx.newStaticBuffer(boo::BufferUse::Index, iboData, 4, m_hmdlMeta.indexCount);
+
        if (!m_hmdlMeta.bankCount)
        {
            /* Non-skinned models use static vertex buffers shared with CBooModel instances */
@ -1056,8 +1112,6 @@ CModel::CModel(std::unique_ptr<u8[]>&& in, u32 /* dataLen */, IObjectStore* stor
            memmove(m_dynamicVertexData.get(), vboData, vboSz);
        }

-        /* Index buffer is always static */
-        m_ibo = ctx.newStaticBuffer(boo::BufferUse::Index, iboData, 4, m_hmdlMeta.indexCount);
        return true;
    });

@ -1172,6 +1226,36 @@ void CModel::ApplyVerticesCPU(boo::IGraphicsBufferD* vertBuf,
    vertBuf->unmap();
 }

+void CModel::_WarmupShaders()
+{
+    CBooModel::EnableShadowMaps(g_Renderer->x220_sphereRamp, zeus::CTransform::Identity());
+    CGraphics::CProjectionState backupProj = CGraphics::GetProjectionState();
+    zeus::CTransform backupViewPoint = CGraphics::g_ViewMatrix;
+    zeus::CTransform backupModel = CGraphics::g_GXModelMatrix;
+    CGraphics::SetModelMatrix(zeus::CTransform::Translate(-m_aabb.center()));
+    CGraphics::SetViewPointMatrix(zeus::CTransform::Translate(0.f, -2048.f, 0.f));
+    CGraphics::SetOrtho(-2048.f, 2048.f, 2048.f, -2048.f, 0.f, 4096.f);
+    CModelFlags defaultFlags;
+    for (CBooModel::SShader& shader : x18_matSets)
+    {
+        GetInstance().RemapMaterialData(shader);
+        GetInstance().SyncLoadTextures();
+        GetInstance().UpdateUniformData(defaultFlags, nullptr, nullptr);
+        GetInstance().WarmupDrawSurfaces();
+    }
+    CGraphics::SetProjectionState(backupProj);
+    CGraphics::SetViewPointMatrix(backupViewPoint);
+    CGraphics::SetModelMatrix(backupModel);
+    CBooModel::DisableShadowMaps();
+}
+
+void CModel::WarmupShaders(const SObjectTag& cmdlTag)
+{
+    TToken<CModel> model = g_SimplePool->GetObj(cmdlTag);
+    CModel* modelObj = model.GetObj();
+    modelObj->_WarmupShaders();
+}
+
 CFactoryFnReturn FModelFactory(const urde::SObjectTag& tag,
                               std::unique_ptr<u8[]>&& in, u32 len,
                               const urde::CVParamTransfer& vparms,
--- a/Runtime/Graphics/Shaders/CModelShadersGLSL.cpp
+++ b/Runtime/Graphics/Shaders/CModelShadersGLSL.cpp
@ -93,7 +93,7 @@ static const char* LightingShadowGLSL =
 "                   lights[0].angAtt[1] * angDot +\n"
 "                   lights[0].angAtt[0];\n"
 "    ret += lights[0].color * clamp(angAtt, 0.0, 1.0) * att * clamp(dot(normalize(-delta), mvNormIn.xyz), 0.0, 1.0) *\n"
-"           texture(extTex0, vtf.extTcgs[0]).r;\n"
+"           texture(extTex7, vtf.extTcgs[0]).r;\n"
 "    \n"
 "    for (int i=1 ; i<" _XSTR(URDE_MAX_LIGHTS) " ; ++i)\n"
 "    {\n"
--- a/Runtime/Graphics/Shaders/CModelShadersHLSL.cpp
+++ b/Runtime/Graphics/Shaders/CModelShadersHLSL.cpp
@ -92,7 +92,7 @@ static const char* LightingShadowHLSL =
 "                   lights[0].angAtt[1] * angDot +\n"
 "                   lights[0].angAtt[0];\n"
 "    ret += lights[0].color * saturate(angAtt) * att * saturate(dot(normalize(-delta), mvNormIn.xyz)) *\n"
-"           extTex0.Sample(clampSamp, vtf.extTcgs[0]).r;\n"
+"           extTex7.Sample(clampSamp, vtf.extTcgs[0]).r;\n"
 "    \n"
 "    for (int i=1 ; i<" _XSTR(URDE_MAX_LIGHTS) " ; ++i)\n"
 "    {\n"
--- a/Runtime/Graphics/Shaders/CModelShadersMetal.cpp
+++ b/Runtime/Graphics/Shaders/CModelShadersMetal.cpp
@ -79,7 +79,7 @@ static const char* LightingShadowMetal =
 "};\n"
 "\n"
 "static float4 EXTLightingShadowFunc(constant LightingUniform& lu, float4 mvPosIn, float4 mvNormIn,\n"
-"                                    thread VertToFrag& vtf, texture2d<float> extTex0)\n"
+"                                    thread VertToFrag& vtf, texture2d<float> extTex7)\n"
 "{\n"
 "    float4 ret = lu.ambient;\n"
 "    \n"
@ -93,7 +93,7 @@ static const char* LightingShadowMetal =
 "                   lu.lights[0].angAtt[1] * angDot +\n"
 "                   lu.lights[0].angAtt[0];\n"
 "    ret += lu.lights[0].color * saturate(angAtt) * att * saturate(dot(normalize(-delta), mvNormIn.xyz)) *\n"
-"           extTex0.sample(clampSamp, vtf.extTcgs0);\n"
+"           extTex7.sample(clampSamp, vtf.extTcgs0);\n"
 "    \n"
 "    for (int i=1 ; i<" _XSTR(URDE_MAX_LIGHTS) " ; ++i)\n"
 "    {\n"
--- a/Runtime/Graphics/Shaders/CParticleSwooshShadersGLSL.cpp
+++ b/Runtime/Graphics/Shaders/CParticleSwooshShadersGLSL.cpp
@ -27,7 +27,7 @@ BOO_GLSL_BINDING_HEAD
 "void main()\n"
 "{\n"
 "    vtf.color = colorIn;\n"
-"    vtf.uv = uvIn;\n"
+"    vtf.uv = uvIn.xy;\n"
 "    gl_Position = mvp * vec4(posIn.xyz, 1.0);\n"
 "}\n";

--- a/Runtime/Graphics/Shaders/CParticleSwooshShadersHLSL.cpp
+++ b/Runtime/Graphics/Shaders/CParticleSwooshShadersHLSL.cpp
@ -29,7 +29,7 @@ static const char* VS =
 "{\n"
 "    VertToFrag vtf;\n"
 "    vtf.color = v.colorIn;\n"
-"    vtf.uv = v.uvIn;\n"
+"    vtf.uv = v.uvIn.xy;\n"
 "    vtf.pos = mul(mvp, float4(v.posIn.xyz, 1.0));\n"
 "    return vtf;\n"
 "}\n";
--- a/Runtime/Graphics/Shaders/CParticleSwooshShadersMetal.cpp
+++ b/Runtime/Graphics/Shaders/CParticleSwooshShadersMetal.cpp
@ -31,7 +31,7 @@ static const char* VS =
 "{\n"
 "    VertToFrag vtf;\n"
 "    vtf.color = v.colorIn;\n"
-"    vtf.uv = v.uvIn;\n"
+"    vtf.uv = v.uvIn.xy;\n"
 "    vtf.pos = su.mvp * float4(v.posIn.xyz, 1.0);\n"
 "    return vtf;\n"
 "}\n";
--- a/Runtime/IMain.hpp
+++ b/Runtime/IMain.hpp
@ -40,9 +40,10 @@ public:
    virtual void Draw()=0;
    virtual bool Proc()=0;
    virtual void Shutdown()=0;
-    virtual boo::IWindow* GetMainWindow() const=0;
+    virtual boo::IWindow* GetMainWindow() const= 0;
    virtual void SetFlowState(EFlowState) = 0;
    virtual size_t GetExpectedIdSize() const = 0;
+    virtual void WarmupShaders() = 0;
 };
 }

--- a/Runtime/MP1/MP1.cpp
+++ b/Runtime/MP1/MP1.cpp
@ -17,14 +17,14 @@
 #include "Graphics/Shaders/CFluidPlaneShader.hpp"
 #include "Graphics/Shaders/CAABoxShader.hpp"
 #include "Graphics/Shaders/CWorldShadowShader.hpp"
-#include "Character/CCharLayoutInfo.hpp"
+#include "Graphics/Shaders/CParticleSwooshShaders.hpp"
 #include "Audio/CStreamAudioManager.hpp"
 #include "CGBASupport.hpp"
-#include "CBasics.hpp"
 #include "Audio/CAudioGroupSet.hpp"

 namespace urde
 {
+URDE_DECL_SPECIALIZE_SHADER(CParticleSwooshShaders)
 URDE_DECL_SPECIALIZE_SHADER(CThermalColdFilter)
 URDE_DECL_SPECIALIZE_SHADER(CThermalHotFilter)
 URDE_DECL_SPECIALIZE_SHADER(CSpaceWarpFilter)
@ -223,6 +223,7 @@ CMain::BooSetter::BooSetter(boo::IGraphicsDataFactory* factory,
                            boo::ITextureR* spareTex)
 {
    CGraphics::InitializeBoo(factory, cmdQ, spareTex);
+    TShader<CParticleSwooshShaders>::Initialize();
    TShader<CThermalColdFilter>::Initialize();
    TShader<CThermalHotFilter>::Initialize();
    TShader<CSpaceWarpFilter>::Initialize();
@ -318,8 +319,40 @@ void CMain::Init(const hecl::Runtime::FileStoreManager& storeMgr,
    //CStreamAudioManager::Start(false, "Audio/rui_samusL.dsp|Audio/rui_samusR.dsp", 0x7f, true, 1.f, 1.f);
 }

+static logvisor::Module WarmupLog("Shader Warmup");
+
+void CMain::WarmupShaders()
+{
+    if (m_warmupTags.size())
+        return;
+
+    size_t modelCount = 0;
+    g_ResFactory->EnumerateResources([&](const SObjectTag& tag)
+    {
+        if (tag.type == FOURCC('CMDL') || tag.type == FOURCC('MREA'))
+            ++modelCount;
+        return true;
+    });
+    m_warmupTags.reserve(modelCount);
+
+    g_ResFactory->EnumerateResources([&](const SObjectTag& tag)
+    {
+        if (tag.type == FOURCC('CMDL') || tag.type == FOURCC('MREA'))
+            m_warmupTags.push_back(tag);
+        return true;
+    });
+
+    m_warmupIt = m_warmupTags.begin();
+
+    WarmupLog.report(logvisor::Info, "Began warmup of %" PRISize " objects", modelCount);
+}
+
 bool CMain::Proc()
 {
+    // Warmup cycle overrides update
+    if (m_warmupTags.size())
+        return false;
+
    CGBASupport::GlobalPoll();
    x164_archSupport->UpdateTicks();
    x164_archSupport->Update();
@ -340,6 +373,33 @@ bool CMain::Proc()

 void CMain::Draw()
 {
+    // Warmup cycle overrides draw
+    if (m_warmupTags.size())
+    {
+        auto startTime = std::chrono::steady_clock::now();
+        while (m_warmupIt != m_warmupTags.end())
+        {
+            WarmupLog.report(logvisor::Info, "Warming %.4s %08X", m_warmupIt->type.getChars(), m_warmupIt->id.Value());
+
+            if (m_warmupIt->type == FOURCC('CMDL'))
+                CModel::WarmupShaders(*m_warmupIt);
+            else if (m_warmupIt->type == FOURCC('MREA'))
+                CGameArea::WarmupShaders(*m_warmupIt);
+            ++m_warmupIt;
+
+            // Approximately 3/4 frame of warmups
+            auto curTime = std::chrono::steady_clock::now();
+            if (std::chrono::duration_cast<std::chrono::milliseconds>(curTime - startTime).count() > 12)
+                break;
+        }
+        if (m_warmupIt == m_warmupTags.end())
+        {
+            m_warmupTags = std::vector<SObjectTag>();
+            WarmupLog.report(logvisor::Info, "Finished warmup");
+        }
+        return;
+    }
+
    x164_archSupport->Draw();
 }

@ -359,6 +419,7 @@ void CMain::Shutdown()
 {
    x164_archSupport.reset();
    ShutdownSubsystems();
+    TShader<CParticleSwooshShaders>::Shutdown();
    TShader<CThermalColdFilter>::Shutdown();
    TShader<CThermalHotFilter>::Shutdown();
    TShader<CSpaceWarpFilter>::Shutdown();
--- a/Runtime/MP1/MP1.hpp
+++ b/Runtime/MP1/MP1.hpp
@ -240,6 +240,10 @@ private:

    boo::IWindow* m_mainWindow = nullptr;

+    // Warmup state
+    std::vector<SObjectTag> m_warmupTags;
+    std::vector<SObjectTag>::iterator m_warmupIt;
+
    void InitializeSubsystems(const hecl::Runtime::FileStoreManager& storeMgr);

 public:
@ -259,6 +263,7 @@ public:
              boo::IWindow* window,
              boo::IAudioVoiceEngine* voiceEngine,
              amuse::IBackendVoiceAllocator& backend);
+    void WarmupShaders();
    bool Proc();
    void Draw();
    void Shutdown();
--- a/Runtime/World/CGameArea.cpp
+++ b/Runtime/World/CGameArea.cpp
@ -411,6 +411,39 @@ CGameArea::CGameArea(CInputStream& in, int idx, int mlvlVersion)
    xec_totalResourcesSize += g_ResFactory->ResourceSize(SObjectTag{FOURCC('MREA'), x84_mrea});
 }

+CGameArea::CGameArea(CAssetId mreaId)
+: x84_mrea(mreaId)
+{
+    while (StartStreamingMainArea()) {}
+
+    for (auto& req : xf8_loadTransactions)
+        req->WaitForComplete();
+
+    MREAHeader header = VerifyHeader();
+    x12c_postConstructed->x4c_insts.reserve(header.modelCount);
+
+    FillInStaticGeometry();
+
+    CBooModel::EnableShadowMaps(g_Renderer->x220_sphereRamp, zeus::CTransform::Identity());
+    CGraphics::CProjectionState backupProj = CGraphics::GetProjectionState();
+    zeus::CTransform backupViewPoint = CGraphics::g_ViewMatrix;
+    zeus::CTransform backupModel = CGraphics::g_GXModelMatrix;
+    CGraphics::SetViewPointMatrix(zeus::CTransform::Translate(0.f, -2048.f, 0.f));
+    CGraphics::SetOrtho(-2048.f, 2048.f, 2048.f, -2048.f, 0.f, 4096.f);
+    CModelFlags defaultFlags;
+    for (CMetroidModelInstance& inst : x12c_postConstructed->x4c_insts)
+    {
+        CGraphics::SetModelMatrix(zeus::CTransform::Translate(-inst.x34_aabb.center()));
+        inst.m_instance->SyncLoadTextures();
+        inst.m_instance->UpdateUniformData(defaultFlags, nullptr, nullptr);
+        inst.m_instance->WarmupDrawSurfaces();
+    }
+    CGraphics::SetProjectionState(backupProj);
+    CGraphics::SetViewPointMatrix(backupViewPoint);
+    CGraphics::SetModelMatrix(backupModel);
+    CBooModel::DisableShadowMaps();
+}
+
 bool CGameArea::IGetScriptingMemoryAlways() const
 {
    return false;
@ -1192,5 +1225,10 @@ bool CGameArea::CAreaObjectList::IsQualified(const CEntity& ent)
 {
    return (ent.GetAreaIdAlways() == x200c_areaIdx);
 }
+void CGameArea::WarmupShaders(const SObjectTag& mreaTag)
+{
+    // Calling this version of the constructor performs warmup implicitly
+    CGameArea area(mreaTag.id);
+}

 }
--- a/Runtime/World/CGameArea.hpp
+++ b/Runtime/World/CGameArea.hpp
@ -289,6 +289,7 @@ private:
 public:

    CGameArea(CInputStream& in, int idx, int mlvlVersion);
+    CGameArea(CAssetId mreaId); // Warmup constructor

    bool IsFinishedOccluding() const;
    void ReadDependencyList();
@ -372,6 +373,8 @@ public:
    CObjectList& GetAreaObjects() const { return *GetPostConstructed()->x10c0_areaObjs.get(); }

    CGameArea* GetNext() const { return x130_next; }
+
+    static void WarmupShaders(const SObjectTag& mreaTag);
 };

 }
--- a/gmm/gmm.h
+++ b/gmm/gmm.h
@ -0,0 +1,54 @@
+/* -*- c++ -*- (enables emacs c++ mode) */
+/*===========================================================================
+
+ Copyright (C) 2002-2017 Yves Renard
+
+ This file is a part of GetFEM++
+
+ GetFEM++  is  free software;  you  can  redistribute  it  and/or modify it
+ under  the  terms  of the  GNU  Lesser General Public License as published
+ by  the  Free Software Foundation;  either version 3 of the License,  or
+ (at your option) any later version along with the GCC Runtime Library
+ Exception either version 3.1 or (at your option) any later version.
+ This program  is  distributed  in  the  hope  that it will be useful,  but
+ WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ or  FITNESS  FOR  A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+ License and GCC Runtime Library Exception for more details.
+ You  should  have received a copy of the GNU Lesser General Public License
+ along  with  this program;  if not, write to the Free Software Foundation,
+ Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301, USA.
+
+ As a special exception, you  may use  this file  as it is a part of a free
+ software  library  without  restriction.  Specifically,  if   other  files
+ instantiate  templates  or  use macros or inline functions from this file,
+ or  you compile this  file  and  link  it  with other files  to produce an
+ executable, this file  does  not  by itself cause the resulting executable
+ to be covered  by the GNU Lesser General Public License.  This   exception
+ does not  however  invalidate  any  other  reasons why the executable file
+ might be covered by the GNU Lesser General Public License.
+
+===========================================================================*/
+
+/**@file gmm.h
+   @author  Yves Renard <Yves.Renard@insa-lyon.fr>
+   @date October 13, 2002.
+   @brief Include common gmm files.
+*/
+#ifndef GMM_H__
+#define GMM_H__
+
+#include "gmm_kernel.h"
+#include "gmm_dense_lu.h"
+#include "gmm_dense_qr.h"
+
+#include "gmm_iter_solvers.h"
+#include "gmm_condition_number.h"
+#include "gmm_inoutput.h"
+
+#include "gmm_lapack_interface.h"
+#include "gmm_superlu_interface.h"
+#include "gmm_range_basis.h"
+
+#include "gmm_domain_decomp.h"
+
+#endif //  GMM_H__
--- a/gmm/gmm_MUMPS_interface.h
+++ b/gmm/gmm_MUMPS_interface.h
@ -0,0 +1,355 @@
+/* -*- c++ -*- (enables emacs c++ mode) */
+/*===========================================================================
+
+ Copyright (C) 2003-2017 Yves Renard, Julien Pommier
+
+ This file is a part of GetFEM++
+
+ GetFEM++  is  free software;  you  can  redistribute  it  and/or modify it
+ under  the  terms  of the  GNU  Lesser General Public License as published
+ by  the  Free Software Foundation;  either version 3 of the License,  or
+ (at your option) any later version along with the GCC Runtime Library
+ Exception either version 3.1 or (at your option) any later version.
+ This program  is  distributed  in  the  hope  that it will be useful,  but
+ WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ or  FITNESS  FOR  A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+ License and GCC Runtime Library Exception for more details.
+ You  should  have received a copy of the GNU Lesser General Public License
+ along  with  this program;  if not, write to the Free Software Foundation,
+ Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301, USA.
+
+ As a special exception, you  may use  this file  as it is a part of a free
+ software  library  without  restriction.  Specifically,  if   other  files
+ instantiate  templates  or  use macros or inline functions from this file,
+ or  you compile this  file  and  link  it  with other files  to produce an
+ executable, this file  does  not  by itself cause the resulting executable
+ to be covered  by the GNU Lesser General Public License.  This   exception
+ does not  however  invalidate  any  other  reasons why the executable file
+ might be covered by the GNU Lesser General Public License.
+
+===========================================================================*/
+
+/**@file gmm_MUMPS_interface.h
+   @author Yves Renard <Yves.Renard@insa-lyon.fr>,
+   @author Julien Pommier <Julien.Pommier@insa-toulouse.fr>
+   @date December 8, 2005.
+   @brief Interface with MUMPS (LU direct solver for sparse matrices).
+*/
+#if defined(GMM_USES_MUMPS) || defined(HAVE_DMUMPS_C_H)
+
+#ifndef GMM_MUMPS_INTERFACE_H
+#define GMM_MUMPS_INTERFACE_H
+
+#include "gmm_kernel.h"
+
+
+extern "C" {
+
+#include <smumps_c.h>
+#undef F_INT
+#undef F_DOUBLE
+#undef F_DOUBLE2
+#include <dmumps_c.h>
+#undef F_INT
+#undef F_DOUBLE
+#undef F_DOUBLE2
+#include <cmumps_c.h>
+#undef F_INT
+#undef F_DOUBLE
+#undef F_DOUBLE2
+#include <zmumps_c.h>
+#undef F_INT
+#undef F_DOUBLE
+#undef F_DOUBLE2
+
+}
+
+namespace gmm {
+
+#define ICNTL(I) icntl[(I)-1]
+#define INFO(I) info[(I)-1]
+#define INFOG(I) infog[(I)-1]
+#define RINFOG(I) rinfog[(I)-1]
+
+  template <typename T> struct ij_sparse_matrix {
+    std::vector<int> irn;
+    std::vector<int> jcn;
+    std::vector<T> a;
+    bool sym;
+    
+    template <typename L> void store(const L& l, size_type i) {
+       typename linalg_traits<L>::const_iterator it = vect_const_begin(l),
+         ite = vect_const_end(l);
+       for (; it != ite; ++it) {
+         int ir = (int)i + 1, jc = (int)it.index() + 1;
+         if (*it != T(0) && (!sym || ir >= jc)) 
+         { irn.push_back(ir); jcn.push_back(jc); a.push_back(*it); }
+       }
+    }
+
+    template <typename L> void build_from(const L& l, row_major) {
+      for (size_type i = 0; i < mat_nrows(l); ++i)
+        store(mat_const_row(l, i), i);
+    }
+
+    template <typename L> void build_from(const L& l, col_major) {
+      for (size_type i = 0; i < mat_ncols(l); ++i)
+        store(mat_const_col(l, i), i);
+      irn.swap(jcn);
+    }
+
+    template <typename L> ij_sparse_matrix(const L& A, bool sym_) {
+      size_type nz = nnz(A);
+      sym = sym_;
+      irn.reserve(nz); jcn.reserve(nz); a.reserve(nz);
+      build_from(A,  typename principal_orientation_type<typename
+                 linalg_traits<L>::sub_orientation>::potype());
+    }
+  };
+
+  /* ********************************************************************* */
+  /*   MUMPS solve interface                                               */
+  /* ********************************************************************* */
+
+  template <typename T> struct mumps_interf {};
+
+  template <> struct mumps_interf<float> {
+    typedef SMUMPS_STRUC_C  MUMPS_STRUC_C;
+    typedef float value_type;
+
+    static void mumps_c(MUMPS_STRUC_C &id) { smumps_c(&id); }
+  };
+
+  template <> struct mumps_interf<double> {
+    typedef DMUMPS_STRUC_C  MUMPS_STRUC_C;
+    typedef double value_type;
+    static void mumps_c(MUMPS_STRUC_C &id) { dmumps_c(&id); }
+  };
+
+  template <> struct mumps_interf<std::complex<float> > {
+    typedef CMUMPS_STRUC_C  MUMPS_STRUC_C;
+    typedef mumps_complex value_type;
+    static void mumps_c(MUMPS_STRUC_C &id) { cmumps_c(&id); }
+  };
+
+  template <> struct mumps_interf<std::complex<double> > {
+    typedef ZMUMPS_STRUC_C  MUMPS_STRUC_C;
+    typedef mumps_double_complex value_type;
+    static void mumps_c(MUMPS_STRUC_C &id) { zmumps_c(&id); }
+  };
+
+
+  template <typename MUMPS_STRUCT>
+  static inline bool mumps_error_check(MUMPS_STRUCT &id) {
+    if (id.INFO(1) < 0) {
+      switch (id.INFO(1)) {
+        case -2:
+          GMM_ASSERT1(false, "Solve with MUMPS failed: NZ = " << id.INFO(2)
+                      << " is out of range");
+        case -6 : case -10 :
+          GMM_WARNING1("Solve with MUMPS failed: matrix is singular");
+          return false;
+        case -9:
+          GMM_ASSERT1(false, "Solve with MUMPS failed: error "
+                      << id.INFO(1) << ", increase ICNTL(14)");
+        case -13 :
+          GMM_ASSERT1(false, "Solve with MUMPS failed: not enough memory");
+        default :
+          GMM_ASSERT1(false, "Solve with MUMPS failed with error "
+                      << id.INFO(1));
+      }
+    }
+    return true;
+  }
+
+
+  /** MUMPS solve interface  
+   *  Works only with sparse or skyline matrices
+   */
+  template <typename MAT, typename VECTX, typename VECTB>
+  bool MUMPS_solve(const MAT &A, const VECTX &X_, const VECTB &B,
+                   bool sym = false, bool distributed = false) {
+    VECTX &X = const_cast<VECTX &>(X_);
+
+    typedef typename linalg_traits<MAT>::value_type T;
+    typedef typename mumps_interf<T>::value_type MUMPS_T;
+    GMM_ASSERT2(gmm::mat_nrows(A) == gmm::mat_ncols(A), "Non-square matrix");
+  
+    std::vector<T> rhs(gmm::vect_size(B)); gmm::copy(B, rhs);
+
+    ij_sparse_matrix<T> AA(A, sym);
+  
+    const int JOB_INIT = -1;
+    const int JOB_END = -2;
+    const int USE_COMM_WORLD = -987654;
+
+    typename mumps_interf<T>::MUMPS_STRUC_C id;
+
+    int rank(0);
+#ifdef GMM_USES_MPI
+    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
+#endif
+    
+    id.job = JOB_INIT;
+    id.par = 1;
+    id.sym = sym ? 2 : 0;
+    id.comm_fortran = USE_COMM_WORLD;
+    mumps_interf<T>::mumps_c(id);
+    
+    if (rank == 0 || distributed) {
+      id.n = int(gmm::mat_nrows(A));
+      if (distributed) {
+        id.nz_loc = int(AA.irn.size());
+        id.irn_loc = &(AA.irn[0]);
+        id.jcn_loc = &(AA.jcn[0]);
+        id.a_loc = (MUMPS_T*)(&(AA.a[0]));
+      } else {
+        id.nz = int(AA.irn.size());
+        id.irn = &(AA.irn[0]);
+        id.jcn = &(AA.jcn[0]);
+        id.a = (MUMPS_T*)(&(AA.a[0]));
+      }
+      if (rank == 0)
+        id.rhs = (MUMPS_T*)(&(rhs[0]));
+    }
+
+    id.ICNTL(1) = -1; // output stream for error messages
+    id.ICNTL(2) = -1; // output stream for other messages
+    id.ICNTL(3) = -1; // output stream for global information
+    id.ICNTL(4) = 0;  // verbosity level
+
+    if (distributed)
+      id.ICNTL(5) = 0;  // assembled input matrix (default)
+
+    id.ICNTL(14) += 80; /* small boost to the workspace size as we have encountered some problem
+                           who did not fit in the default settings of mumps.. 
+                           by default, ICNTL(14) = 15 or 20
+                        */
+    //cout << "ICNTL(14): " << id.ICNTL(14) << "\n";
+
+    if (distributed)
+      id.ICNTL(18) = 3; // strategy for distributed input matrix
+
+    // id.ICNTL(22) = 1;   /* enables out-of-core support */
+
+    id.job = 6;
+    mumps_interf<T>::mumps_c(id);
+    bool ok = mumps_error_check(id);
+
+    id.job = JOB_END;
+    mumps_interf<T>::mumps_c(id);
+
+#ifdef GMM_USES_MPI
+    MPI_Bcast(&(rhs[0]),id.n,gmm::mpi_type(T()),0,MPI_COMM_WORLD);
+#endif
+
+    gmm::copy(rhs, X);
+
+    return ok;
+
+  }
+
+
+
+  /** MUMPS solve interface for distributed matrices 
+   *  Works only with sparse or skyline matrices
+   */
+  template <typename MAT, typename VECTX, typename VECTB>
+  bool MUMPS_distributed_matrix_solve(const MAT &A, const VECTX &X_,
+                                      const VECTB &B, bool sym = false) {
+    return MUMPS_solve(A, X_, B, sym, true);
+  }
+
+
+
+  template<typename T>
+  inline T real_or_complex(std::complex<T> a) { return a.real(); }
+  template<typename T>
+  inline T real_or_complex(T &a) { return a; }
+
+
+  /** Evaluate matrix determinant with MUMPS  
+   *  Works only with sparse or skyline matrices
+   */
+  template <typename MAT, typename T = typename linalg_traits<MAT>::value_type>
+  T MUMPS_determinant(const MAT &A, int &exponent,
+                      bool sym = false, bool distributed = false) {
+    exponent = 0;
+    typedef typename mumps_interf<T>::value_type MUMPS_T;
+    typedef typename number_traits<T>::magnitude_type R;
+    GMM_ASSERT2(gmm::mat_nrows(A) == gmm::mat_ncols(A), "Non-square matrix");
+  
+    ij_sparse_matrix<T> AA(A, sym);
+  
+    const int JOB_INIT = -1;
+    const int JOB_END = -2;
+    const int USE_COMM_WORLD = -987654;
+
+    typename mumps_interf<T>::MUMPS_STRUC_C id;
+
+    int rank(0);
+#ifdef GMM_USES_MPI
+    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
+#endif
+    
+    id.job = JOB_INIT;
+    id.par = 1;
+    id.sym = sym ? 2 : 0;
+    id.comm_fortran = USE_COMM_WORLD;
+    mumps_interf<T>::mumps_c(id);
+    
+    if (rank == 0 || distributed) {
+      id.n = int(gmm::mat_nrows(A));
+      if (distributed) {
+        id.nz_loc = int(AA.irn.size());
+        id.irn_loc = &(AA.irn[0]);
+        id.jcn_loc = &(AA.jcn[0]);
+        id.a_loc = (MUMPS_T*)(&(AA.a[0]));
+      } else {
+        id.nz = int(AA.irn.size());
+        id.irn = &(AA.irn[0]);
+        id.jcn = &(AA.jcn[0]);
+        id.a = (MUMPS_T*)(&(AA.a[0]));
+      }
+    }
+
+    id.ICNTL(1) = -1; // output stream for error messages
+    id.ICNTL(2) = -1; // output stream for other messages
+    id.ICNTL(3) = -1; // output stream for global information
+    id.ICNTL(4) = 0;  // verbosity level
+
+    if (distributed)
+      id.ICNTL(5) = 0;  // assembled input matrix (default)
+
+//    id.ICNTL(14) += 80; // small boost to the workspace size 
+
+    if (distributed)
+      id.ICNTL(18) = 3; // strategy for distributed input matrix
+
+    id.ICNTL(31) = 1;   // only factorization, no solution to follow
+    id.ICNTL(33) = 1;   // request determinant calculation
+
+    id.job = 4; // abalysis (job=1) + factorization (job=2)
+    mumps_interf<T>::mumps_c(id);
+    mumps_error_check(id);
+
+    T det = real_or_complex(std::complex<R>(id.RINFOG(12),id.RINFOG(13)));
+    exponent = id.INFOG(34);
+
+    id.job = JOB_END;
+    mumps_interf<T>::mumps_c(id);
+
+    return det;
+  }
+
+#undef ICNTL
+#undef INFO
+#undef INFOG
+#undef RINFOG
+
+}
+
+  
+#endif // GMM_MUMPS_INTERFACE_H
+
+#endif // GMM_USES_MUMPS
--- a/gmm/gmm_algobase.h
+++ b/gmm/gmm_algobase.h
@ -0,0 +1,228 @@
+/* -*- c++ -*- (enables emacs c++ mode) */
+/*===========================================================================
+
+ Copyright (C) 2000-2017 Yves Renard
+
+ This file is a part of GetFEM++
+
+ GetFEM++  is  free software;  you  can  redistribute  it  and/or modify it
+ under  the  terms  of the  GNU  Lesser General Public License as published
+ by  the  Free Software Foundation;  either version 3 of the License,  or
+ (at your option) any later version along with the GCC Runtime Library
+ Exception either version 3.1 or (at your option) any later version.
+ This program  is  distributed  in  the  hope  that it will be useful,  but
+ WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ or  FITNESS  FOR  A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+ License and GCC Runtime Library Exception for more details.
+ You  should  have received a copy of the GNU Lesser General Public License
+ along  with  this program;  if not, write to the Free Software Foundation,
+ Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301, USA.
+
+ As a special exception, you  may use  this file  as it is a part of a free
+ software  library  without  restriction.  Specifically,  if   other  files
+ instantiate  templates  or  use macros or inline functions from this file,
+ or  you compile this  file  and  link  it  with other files  to produce an
+ executable, this file  does  not  by itself cause the resulting executable
+ to be covered  by the GNU Lesser General Public License.  This   exception
+ does not  however  invalidate  any  other  reasons why the executable file
+ might be covered by the GNU Lesser General Public License.
+
+===========================================================================*/
+
+/** @file gmm_algobase.h 
+   @author  Yves Renard <Yves.Renard@insa-lyon.fr>
+   @date September 28, 2000.
+    @brief Miscelleanous algorithms on containers.
+*/
+
+#ifndef GMM_ALGOBASE_H__
+#define GMM_ALGOBASE_H__
+#include "gmm_std.h"
+#include "gmm_except.h"
+#include <functional>
+
+namespace gmm {
+
+  /* ********************************************************************* */
+  /* Definitition de classes de comparaison.                               */
+  /* retournant un int.                                                    */
+  /* ********************************************************************* */
+  
+  template <class T>
+    struct less : public std::binary_function<T, T, int> {
+    inline int operator()(const T& x, const T& y) const
+    { return (x < y) ? -1 : ((y < x) ? 1 : 0); }
+  };
+
+  template<> struct less<int> : public std::binary_function<int, int, int>
+  { int operator()(int x, int y) const { return x-y; } };
+  template<> struct less<char> : public std::binary_function<char, char, int>
+  { int operator()(char x, char y) const { return int(x-y); } };
+  template<> struct less<short> : public std::binary_function<short,short,int>
+  { int operator()(short x, short y) const { return int(x-y); } };
+  template<> struct less<unsigned char>
+     : public std::binary_function<unsigned char, unsigned char, int> {
+    int operator()(unsigned char x, unsigned char y) const
+    { return int(x)-int(y); }
+  };
+  
+
+  template <class T>
+    struct greater : public std::binary_function<T, T, int> {
+    inline int operator()(const T& x, const T& y) const
+    { return (y < x) ? -1 : ((x < y) ? 1 : 0); }
+  };
+
+  template<> struct greater<int> : public std::binary_function<int, int, int>
+  { int operator()(int x, int y) const { return y-x; } };
+  template<> struct greater<char> : public std::binary_function<char,char,int>
+  { int operator()(char x, char y) const { return int(y-x); } };
+  template<> struct greater<short>
+      : public std::binary_function<short, short, int>
+  { int operator()(short x, short y) const { return int(y-x); } };
+  template<> struct greater<unsigned char>
+    : public std::binary_function<unsigned char, unsigned char, int> {
+    int operator()(unsigned char x, unsigned char y) const
+      { return int(y)-int(x); }
+  };
+
+  template <typename T> inline T my_abs(T a) { return (a < T(0)) ? T(-a) : a; }
+  
+  template <class T>
+    struct approx_less : public std::binary_function<T, T, int> { 
+    double eps;
+    inline int operator()(const T &x, const T &y) const
+    { if (my_abs(x - y) <= eps) return 0; if (x < y) return -1; return 1; }
+    approx_less(double e = 1E-13) { eps = e; }
+  };
+
+  template <class T>
+    struct approx_greater : public std::binary_function<T, T, int> { 
+    double eps;
+    inline int operator()(const T &x, const T &y) const
+    { if (my_abs(x - y) <= eps) return 0; if (x > y) return -1; return 1; }
+    approx_greater(double e = 1E-13) { eps = e; }
+  };
+
+  template<class ITER1, class ITER2, class COMP>
+    int lexicographical_compare(ITER1 b1, const ITER1 &e1,
+				ITER2 b2, const ITER2 &e2, const COMP &c)  {
+    int i;
+    for ( ; b1 != e1 && b2 != e2; ++b1, ++b2)
+      if ((i = c(*b1, *b2)) != 0) return i;
+    if (b1 != e1) return 1;
+    if (b2 != e2) return -1;
+    return 0; 
+  }
+
+  template<class CONT, class COMP = gmm::less<typename CONT::value_type> >
+    struct lexicographical_less : public std::binary_function<CONT, CONT, int>
+  { 
+    COMP c;
+    int operator()(const CONT &x, const CONT &y) const {
+      return gmm::lexicographical_compare(x.begin(), x.end(),
+					  y.begin(), y.end(), c);
+    }
+    lexicographical_less(const COMP &d = COMP()) { c = d; }
+  };
+
+  template<class CONT, class COMP = gmm::less<typename CONT::value_type> >
+  struct lexicographical_greater
+    : public std::binary_function<CONT, CONT, int> { 
+    COMP c;
+    int operator()(const CONT &x, const CONT &y) const {
+      return -gmm::lexicographical_compare(x.begin(), x.end(),
+					   y.begin(), y.end(), c);
+    }
+    lexicographical_greater(const COMP &d = COMP()) { c = d; }
+  };
+  
+
+  /* ********************************************************************* */
+  /* "Virtual" iterators on sequences.                                     */
+  /* The class T represent a class of sequence.                            */
+  /* ********************************************************************* */
+
+  template<class T> struct sequence_iterator {
+    
+    typedef T             value_type;
+    typedef value_type*   pointer;
+    typedef value_type&   reference;
+    typedef const value_type& const_reference;
+    typedef std::forward_iterator_tag iterator_category;
+
+    T Un;
+
+    sequence_iterator(T U0 = T(0)) { Un = U0; }
+    
+    sequence_iterator &operator ++()
+    { ++Un; return *this; }
+    sequence_iterator operator ++(int)
+    { sequence_iterator tmp = *this; (*this)++; return tmp; }
+	
+    const_reference operator *() const { return Un; }
+    reference operator *() { return Un; }
+    
+    bool operator ==(const sequence_iterator &i) const { return (i.Un==Un);}
+    bool operator !=(const sequence_iterator &i) const { return (i.Un!=Un);}
+  };
+
+  /* ********************************************************************* */
+  /* generic algorithms.                                                   */
+  /* ********************************************************************* */
+
+  template <class ITER1, class SIZE, class ITER2>
+  ITER2 copy_n(ITER1 first, SIZE count, ITER2 result) {
+    for ( ; count > 0; --count, ++first, ++result) *result = *first;
+    return result;
+  }
+
+  template<class ITER>
+    typename std::iterator_traits<ITER>::value_type
+      mean_value(ITER first, const ITER &last) {
+    GMM_ASSERT2(first != last, "mean value of empty container");
+    size_t n = 1;
+    typename std::iterator_traits<ITER>::value_type res = *first++;
+    while (first != last) { res += *first; ++first; ++n; }
+    res /= float(n);
+    return res;
+  }
+
+  template<class CONT>
+    typename CONT::value_type
+  mean_value(const CONT &c) { return mean_value(c.begin(), c.end()); }
+
+  template<class ITER> /* hum ... */
+    void minmax_box(typename std::iterator_traits<ITER>::value_type &pmin,
+		    typename std::iterator_traits<ITER>::value_type &pmax,
+		    ITER first, const ITER &last) {
+    typedef typename std::iterator_traits<ITER>::value_type PT;
+    if (first != last) { pmin = pmax = *first; ++first; }
+    while (first != last) {
+      typename PT::const_iterator b = (*first).begin(), e = (*first).end();
+      typename PT::iterator b1 = pmin.begin(), b2 = pmax.begin();
+      while (b != e)
+	{ *b1 = std::min(*b1, *b); *b2 = std::max(*b2, *b); ++b; ++b1; ++b2; }
+    }
+  }
+
+  template<typename VEC> struct sorted_indexes_aux {
+    const VEC &v;
+  public:
+    sorted_indexes_aux(const VEC& v_) : v(v_) {}
+    template <typename IDX>
+    bool operator()(const IDX &ia, const IDX &ib) const
+    { return v[ia] < v[ib]; }
+  };
+
+  template<typename VEC, typename IVEC> 
+  void sorted_indexes(const VEC &v, IVEC &iv) {
+    iv.clear(); iv.resize(v.size());
+    for (size_t i=0; i < v.size(); ++i) iv[i] = i;
+    std::sort(iv.begin(), iv.end(), sorted_indexes_aux<VEC>(v));
+  }
+
+}
+
+
+#endif /* GMM_ALGOBASE_H__ */
--- a/gmm/gmm_blas.h
+++ b/gmm/gmm_blas.h
--- a/gmm/gmm_blas_interface.h
+++ b/gmm/gmm_blas_interface.h
@ -0,0 +1,948 @@
+/* -*- c++ -*- (enables emacs c++ mode) */
+/*===========================================================================
+
+ Copyright (C) 2003-2017 Yves Renard
+
+ This file is a part of GetFEM++
+
+ GetFEM++  is  free software;  you  can  redistribute  it  and/or modify it
+ under  the  terms  of the  GNU  Lesser General Public License as published
+ by  the  Free Software Foundation;  either version 3 of the License,  or
+ (at your option) any later version along with the GCC Runtime Library
+ Exception either version 3.1 or (at your option) any later version.
+ This program  is  distributed  in  the  hope  that it will be useful,  but
+ WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ or  FITNESS  FOR  A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+ License and GCC Runtime Library Exception for more details.
+ You  should  have received a copy of the GNU Lesser General Public License
+ along  with  this program;  if not, write to the Free Software Foundation,
+ Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301, USA.
+
+ As a special exception, you  may use  this file  as it is a part of a free
+ software  library  without  restriction.  Specifically,  if   other  files
+ instantiate  templates  or  use macros or inline functions from this file,
+ or  you compile this  file  and  link  it  with other files  to produce an
+ executable, this file  does  not  by itself cause the resulting executable
+ to be covered  by the GNU Lesser General Public License.  This   exception
+ does not  however  invalidate  any  other  reasons why the executable file
+ might be covered by the GNU Lesser General Public License.
+
+===========================================================================*/
+
+/**@file gmm_blas_interface.h
+   @author  Yves Renard <Yves.Renard@insa-lyon.fr>
+   @date October 7, 2003.
+   @brief gmm interface for fortran BLAS.
+*/
+
+#if defined(GETFEM_USES_BLAS) || defined(GMM_USES_BLAS) \
+  || defined(GMM_USES_LAPACK) || defined(GMM_USES_ATLAS)
+
+#ifndef GMM_BLAS_INTERFACE_H
+#define GMM_BLAS_INTERFACE_H
+
+#include "gmm_blas.h"
+#include "gmm_interface.h"
+#include "gmm_matrix.h"
+
+namespace gmm {
+
+  // Use ./configure --enable-blas-interface to activate this interface.
+
+#define GMMLAPACK_TRACE(f) 
+  // #define GMMLAPACK_TRACE(f) cout << "function " << f << " called" << endl;
+
+  /* ********************************************************************* */
+  /* Operations interfaced for T = float, double, std::complex<float>      */
+  /*    or std::complex<double> :                                          */
+  /*                                                                       */
+  /* vect_norm2(std::vector<T>)                                            */
+  /*                                                                       */
+  /* vect_sp(std::vector<T>, std::vector<T>)                               */
+  /* vect_sp(scaled(std::vector<T>), std::vector<T>)                       */
+  /* vect_sp(std::vector<T>, scaled(std::vector<T>))                       */
+  /* vect_sp(scaled(std::vector<T>), scaled(std::vector<T>))               */
+  /*                                                                       */
+  /* vect_hp(std::vector<T>, std::vector<T>)                               */
+  /* vect_hp(scaled(std::vector<T>), std::vector<T>)                       */
+  /* vect_hp(std::vector<T>, scaled(std::vector<T>))                       */
+  /* vect_hp(scaled(std::vector<T>), scaled(std::vector<T>))               */
+  /*                                                                       */
+  /* add(std::vector<T>, std::vector<T>)                                   */
+  /* add(scaled(std::vector<T>, a), std::vector<T>)                        */ 
+  /*                                                                       */
+  /* mult(dense_matrix<T>, dense_matrix<T>, dense_matrix<T>)               */
+  /* mult(transposed(dense_matrix<T>), dense_matrix<T>, dense_matrix<T>)   */
+  /* mult(dense_matrix<T>, transposed(dense_matrix<T>), dense_matrix<T>)   */
+  /* mult(transposed(dense_matrix<T>), transposed(dense_matrix<T>),        */
+  /*      dense_matrix<T>)                                                 */
+  /* mult(conjugated(dense_matrix<T>), dense_matrix<T>, dense_matrix<T>)   */
+  /* mult(dense_matrix<T>, conjugated(dense_matrix<T>), dense_matrix<T>)   */
+  /* mult(conjugated(dense_matrix<T>), conjugated(dense_matrix<T>),        */
+  /*      dense_matrix<T>)                                                 */
+  /*                                                                       */
+  /* mult(dense_matrix<T>, std::vector<T>, std::vector<T>)                 */
+  /* mult(transposed(dense_matrix<T>), std::vector<T>, std::vector<T>)     */
+  /* mult(conjugated(dense_matrix<T>), std::vector<T>, std::vector<T>)     */
+  /* mult(dense_matrix<T>, scaled(std::vector<T>), std::vector<T>)         */
+  /* mult(transposed(dense_matrix<T>), scaled(std::vector<T>),             */
+  /*      std::vector<T>)                                                  */
+  /* mult(conjugated(dense_matrix<T>), scaled(std::vector<T>),             */
+  /*      std::vector<T>)                                                  */
+  /*                                                                       */
+  /* mult_add(dense_matrix<T>, std::vector<T>, std::vector<T>)             */
+  /* mult_add(transposed(dense_matrix<T>), std::vector<T>, std::vector<T>) */
+  /* mult_add(conjugated(dense_matrix<T>), std::vector<T>, std::vector<T>) */
+  /* mult_add(dense_matrix<T>, scaled(std::vector<T>), std::vector<T>)     */
+  /* mult_add(transposed(dense_matrix<T>), scaled(std::vector<T>),         */
+  /*          std::vector<T>)                                              */
+  /* mult_add(conjugated(dense_matrix<T>), scaled(std::vector<T>),         */
+  /*          std::vector<T>)                                              */
+  /*                                                                       */
+  /* mult(dense_matrix<T>, std::vector<T>, std::vector<T>, std::vector<T>) */
+  /* mult(transposed(dense_matrix<T>), std::vector<T>, std::vector<T>,     */
+  /*      std::vector<T>)                                                  */
+  /* mult(conjugated(dense_matrix<T>), std::vector<T>, std::vector<T>,     */
+  /*      std::vector<T>)                                                  */
+  /* mult(dense_matrix<T>, scaled(std::vector<T>), std::vector<T>,         */
+  /*      std::vector<T>)                                                  */
+  /* mult(transposed(dense_matrix<T>), scaled(std::vector<T>),             */
+  /*      std::vector<T>, std::vector<T>)                                  */
+  /* mult(conjugated(dense_matrix<T>), scaled(std::vector<T>),             */
+  /*      std::vector<T>, std::vector<T>)                                  */
+  /* mult(dense_matrix<T>, std::vector<T>, scaled(std::vector<T>),         */
+  /*      std::vector<T>)                                                  */
+  /* mult(transposed(dense_matrix<T>), std::vector<T>,                     */
+  /*      scaled(std::vector<T>), std::vector<T>)                          */
+  /* mult(conjugated(dense_matrix<T>), std::vector<T>,                     */
+  /*      scaled(std::vector<T>), std::vector<T>)                          */
+  /* mult(dense_matrix<T>, scaled(std::vector<T>), scaled(std::vector<T>), */
+  /*   std::vector<T>)                                                     */
+  /* mult(transposed(dense_matrix<T>), scaled(std::vector<T>),             */
+  /*      scaled(std::vector<T>), std::vector<T>)                          */
+  /* mult(conjugated(dense_matrix<T>), scaled(std::vector<T>),             */
+  /*      scaled(std::vector<T>), std::vector<T>)                          */
+  /*                                                                       */
+  /* lower_tri_solve(dense_matrix<T>, std::vector<T>, k, b)                */
+  /* upper_tri_solve(dense_matrix<T>, std::vector<T>, k, b)                */
+  /* lower_tri_solve(transposed(dense_matrix<T>), std::vector<T>, k, b)    */
+  /* upper_tri_solve(transposed(dense_matrix<T>), std::vector<T>, k, b)    */
+  /* lower_tri_solve(conjugated(dense_matrix<T>), std::vector<T>, k, b)    */
+  /* upper_tri_solve(conjugated(dense_matrix<T>), std::vector<T>, k, b)    */
+  /*                                                                       */
+  /* rank_one_update(dense_matrix<T>, std::vector<T>, std::vector<T>)      */
+  /* rank_one_update(dense_matrix<T>, scaled(std::vector<T>),              */
+  /*                                  std::vector<T>)                      */
+  /* rank_one_update(dense_matrix<T>, std::vector<T>,                      */
+  /*                                  scaled(std::vector<T>))              */
+  /*                                                                       */
+  /* ********************************************************************* */
+
+  /* ********************************************************************* */
+  /* Basic defines.                                                        */
+  /* ********************************************************************* */
+
+# define BLAS_S float
+# define BLAS_D double
+# define BLAS_C std::complex<float>
+# define BLAS_Z std::complex<double>
+
+  /* ********************************************************************* */
+  /* BLAS functions used.                                                  */
+  /* ********************************************************************* */
+  extern "C" {
+    void daxpy_(const int *n, const double *alpha, const double *x,
+                const int *incx, double *y, const int *incy);
+    void dgemm_(const char *tA, const char *tB, const int *m,
+                const int *n, const int *k, const double *alpha,
+                const double *A, const int *ldA, const double *B,
+                const int *ldB, const double *beta, double *C,
+                const int *ldC);
+    void sgemm_(...); void cgemm_(...); void zgemm_(...);
+    void sgemv_(...); void dgemv_(...); void cgemv_(...); void zgemv_(...);
+    void strsv_(...); void dtrsv_(...); void ctrsv_(...); void ztrsv_(...);
+    void saxpy_(...); /*void daxpy_(...); */void caxpy_(...); void zaxpy_(...);
+    BLAS_S sdot_ (...); BLAS_D ddot_ (...);
+    BLAS_C cdotu_(...); BLAS_Z zdotu_(...);
+    BLAS_C cdotc_(...); BLAS_Z zdotc_(...);
+    BLAS_S snrm2_(...); BLAS_D dnrm2_(...);
+    BLAS_S scnrm2_(...); BLAS_D dznrm2_(...);
+    void  sger_(...); void  dger_(...); void  cgerc_(...); void  zgerc_(...); 
+  }
+
+#if 1
+
+  /* ********************************************************************* */
+  /* vect_norm2(x).                                                        */
+  /* ********************************************************************* */
+
+# define nrm2_interface(param1, trans1, blas_name, base_type)		   \
+  inline number_traits<base_type >::magnitude_type			   \
+  vect_norm2(param1(base_type)) {					   \
+    GMMLAPACK_TRACE("nrm2_interface");					   \
+    int inc(1), n(int(vect_size(x))); trans1(base_type);		   \
+    return blas_name(&n, &x[0], &inc);					   \
+  }
+
+# define nrm2_p1(base_type) const std::vector<base_type > &x
+# define nrm2_trans1(base_type)
+
+  nrm2_interface(nrm2_p1, nrm2_trans1, snrm2_ , BLAS_S)
+  nrm2_interface(nrm2_p1, nrm2_trans1, dnrm2_ , BLAS_D)
+  nrm2_interface(nrm2_p1, nrm2_trans1, scnrm2_, BLAS_C)
+  nrm2_interface(nrm2_p1, nrm2_trans1, dznrm2_, BLAS_Z)
+
+  /* ********************************************************************* */
+  /* vect_sp(x, y).                                                        */
+  /* ********************************************************************* */
+
+# define dot_interface(param1, trans1, mult1, param2, trans2, mult2,	   \
+                         blas_name, base_type)                             \
+  inline base_type vect_sp(param1(base_type), param2(base_type)) {         \
+    GMMLAPACK_TRACE("dot_interface");                                      \
+    trans1(base_type); trans2(base_type); int inc(1), n(int(vect_size(y)));\
+    return mult1 mult2 blas_name(&n, &x[0], &inc, &y[0], &inc);            \
+  }
+
+# define dot_p1(base_type) const std::vector<base_type > &x
+# define dot_trans1(base_type)
+# define dot_p1_s(base_type)                                               \
+    const scaled_vector_const_ref<std::vector<base_type >, base_type > &x_
+# define dot_trans1_s(base_type)                                           \
+         std::vector<base_type > &x =                                      \
+         const_cast<std::vector<base_type > &>(*(linalg_origin(x_)));      \
+         base_type a(x_.r)
+
+# define dot_p2(base_type) const std::vector<base_type > &y
+# define dot_trans2(base_type)
+# define dot_p2_s(base_type)                                               \
+    const scaled_vector_const_ref<std::vector<base_type >, base_type > &y_
+# define dot_trans2_s(base_type)                                           \
+         std::vector<base_type > &y =                                      \
+         const_cast<std::vector<base_type > &>(*(linalg_origin(y_)));      \
+         base_type b(y_.r)
+
+  dot_interface(dot_p1, dot_trans1, (BLAS_S), dot_p2, dot_trans2, (BLAS_S),
+		sdot_ , BLAS_S)
+  dot_interface(dot_p1, dot_trans1, (BLAS_D), dot_p2, dot_trans2, (BLAS_D),
+		ddot_ , BLAS_D)
+  dot_interface(dot_p1, dot_trans1, (BLAS_C), dot_p2, dot_trans2, (BLAS_C),
+		cdotu_, BLAS_C)
+  dot_interface(dot_p1, dot_trans1, (BLAS_Z), dot_p2, dot_trans2, (BLAS_Z),
+		zdotu_, BLAS_Z)
+  
+  dot_interface(dot_p1_s, dot_trans1_s, a*, dot_p2, dot_trans2, (BLAS_S),
+		sdot_ ,BLAS_S)
+  dot_interface(dot_p1_s, dot_trans1_s, a*, dot_p2, dot_trans2, (BLAS_D),
+		ddot_ ,BLAS_D)
+  dot_interface(dot_p1_s, dot_trans1_s, a*, dot_p2, dot_trans2, (BLAS_C),
+		cdotu_,BLAS_C)
+  dot_interface(dot_p1_s, dot_trans1_s, a*, dot_p2, dot_trans2, (BLAS_Z),
+		zdotu_,BLAS_Z)
+  
+  dot_interface(dot_p1, dot_trans1, (BLAS_S), dot_p2_s, dot_trans2_s, b*,
+		sdot_ ,BLAS_S)
+  dot_interface(dot_p1, dot_trans1, (BLAS_D), dot_p2_s, dot_trans2_s, b*,
+		ddot_ ,BLAS_D)
+  dot_interface(dot_p1, dot_trans1, (BLAS_C), dot_p2_s, dot_trans2_s, b*,
+		cdotu_,BLAS_C)
+  dot_interface(dot_p1, dot_trans1, (BLAS_Z), dot_p2_s, dot_trans2_s, b*,
+		  zdotu_,BLAS_Z)
+
+  dot_interface(dot_p1_s,dot_trans1_s,a*,dot_p2_s,dot_trans2_s,b*,sdot_ ,
+		BLAS_S)
+  dot_interface(dot_p1_s,dot_trans1_s,a*,dot_p2_s,dot_trans2_s,b*,ddot_ ,
+		BLAS_D)
+  dot_interface(dot_p1_s,dot_trans1_s,a*,dot_p2_s,dot_trans2_s,b*,cdotu_,
+		BLAS_C)
+  dot_interface(dot_p1_s,dot_trans1_s,a*,dot_p2_s,dot_trans2_s,b*,zdotu_,
+		BLAS_Z)
+
+
+  /* ********************************************************************* */
+  /* vect_hp(x, y).                                                        */
+  /* ********************************************************************* */
+
+# define dotc_interface(param1, trans1, mult1, param2, trans2, mult2,	   \
+			blas_name, base_type)				   \
+  inline base_type vect_hp(param1(base_type), param2(base_type)) {         \
+    GMMLAPACK_TRACE("dotc_interface");                                     \
+    trans1(base_type); trans2(base_type); int inc(1), n(int(vect_size(y)));\
+    return mult1 mult2 blas_name(&n, &x[0], &inc, &y[0], &inc);            \
+  }
+
+# define dotc_p1(base_type) const std::vector<base_type > &x
+# define dotc_trans1(base_type)
+# define dotc_p1_s(base_type)                                              \
+    const scaled_vector_const_ref<std::vector<base_type >, base_type > &x_
+# define dotc_trans1_s(base_type)                                          \
+         std::vector<base_type > &x =                                      \
+         const_cast<std::vector<base_type > &>(*(linalg_origin(x_)));      \
+         base_type a(x_.r)
+
+# define dotc_p2(base_type) const std::vector<base_type > &y
+# define dotc_trans2(base_type)
+# define dotc_p2_s(base_type)                                              \
+    const scaled_vector_const_ref<std::vector<base_type >, base_type > &y_
+# define dotc_trans2_s(base_type)                                          \
+         std::vector<base_type > &y =                                      \
+         const_cast<std::vector<base_type > &>(*(linalg_origin(y_)));      \
+         base_type b(gmm::conj(y_.r))
+
+  dotc_interface(dotc_p1, dotc_trans1, (BLAS_S), dotc_p2, dotc_trans2,
+		 (BLAS_S),sdot_ ,BLAS_S)
+  dotc_interface(dotc_p1, dotc_trans1, (BLAS_D), dotc_p2, dotc_trans2,
+		 (BLAS_D),ddot_ ,BLAS_D)
+  dotc_interface(dotc_p1, dotc_trans1, (BLAS_C), dotc_p2, dotc_trans2,
+		 (BLAS_C),cdotc_,BLAS_C)
+  dotc_interface(dotc_p1, dotc_trans1, (BLAS_Z), dotc_p2, dotc_trans2,
+		 (BLAS_Z),zdotc_,BLAS_Z)
+  
+  dotc_interface(dotc_p1_s, dotc_trans1_s, a*, dotc_p2, dotc_trans2,
+		 (BLAS_S),sdot_, BLAS_S)
+  dotc_interface(dotc_p1_s, dotc_trans1_s, a*, dotc_p2, dotc_trans2,
+		 (BLAS_D),ddot_ , BLAS_D)
+  dotc_interface(dotc_p1_s, dotc_trans1_s, a*, dotc_p2, dotc_trans2,
+		 (BLAS_C),cdotc_, BLAS_C)
+  dotc_interface(dotc_p1_s, dotc_trans1_s, a*, dotc_p2, dotc_trans2,
+		 (BLAS_Z),zdotc_, BLAS_Z)
+  
+  dotc_interface(dotc_p1, dotc_trans1, (BLAS_S), dotc_p2_s, dotc_trans2_s,
+		 b*,sdot_ , BLAS_S)
+  dotc_interface(dotc_p1, dotc_trans1, (BLAS_D), dotc_p2_s, dotc_trans2_s,
+		 b*,ddot_ , BLAS_D)
+  dotc_interface(dotc_p1, dotc_trans1, (BLAS_C), dotc_p2_s, dotc_trans2_s,
+		 b*,cdotc_, BLAS_C)
+  dotc_interface(dotc_p1, dotc_trans1, (BLAS_Z), dotc_p2_s, dotc_trans2_s,
+		   b*,zdotc_, BLAS_Z)
+
+  dotc_interface(dotc_p1_s,dotc_trans1_s,a*,dotc_p2_s,dotc_trans2_s,b*,sdot_ ,
+		 BLAS_S)
+  dotc_interface(dotc_p1_s,dotc_trans1_s,a*,dotc_p2_s,dotc_trans2_s,b*,ddot_ ,
+		 BLAS_D)
+  dotc_interface(dotc_p1_s,dotc_trans1_s,a*,dotc_p2_s,dotc_trans2_s,b*,cdotc_,
+		 BLAS_C)
+  dotc_interface(dotc_p1_s,dotc_trans1_s,a*,dotc_p2_s,dotc_trans2_s,b*,zdotc_,
+		 BLAS_Z)
+
+  /* ********************************************************************* */
+  /* add(x, y).                                                            */
+  /* ********************************************************************* */
+
+# define axpy_interface(param1, trans1, blas_name, base_type)              \
+  inline void add(param1(base_type), std::vector<base_type > &y) {         \
+    GMMLAPACK_TRACE("axpy_interface");                                     \
+    int inc(1), n(int(vect_size(y))); trans1(base_type);	 	   \
+    if (n == 0) return;							   \
+    blas_name(&n, &a, &x[0], &inc, &y[0], &inc);                           \
+  }
+
+# define axpy_p1(base_type) const std::vector<base_type > &x
+# define axpy_trans1(base_type) base_type a(1)
+# define axpy_p1_s(base_type)                                              \
+    const scaled_vector_const_ref<std::vector<base_type >, base_type > &x_
+# define axpy_trans1_s(base_type)                                          \
+         std::vector<base_type > &x =                                      \
+         const_cast<std::vector<base_type > &>(*(linalg_origin(x_)));      \
+         base_type a(x_.r)
+
+  axpy_interface(axpy_p1, axpy_trans1, saxpy_, BLAS_S)
+  axpy_interface(axpy_p1, axpy_trans1, daxpy_, BLAS_D)
+  axpy_interface(axpy_p1, axpy_trans1, caxpy_, BLAS_C)
+  axpy_interface(axpy_p1, axpy_trans1, zaxpy_, BLAS_Z)
+  
+  axpy_interface(axpy_p1_s, axpy_trans1_s, saxpy_, BLAS_S)
+  axpy_interface(axpy_p1_s, axpy_trans1_s, daxpy_, BLAS_D)
+  axpy_interface(axpy_p1_s, axpy_trans1_s, caxpy_, BLAS_C)
+  axpy_interface(axpy_p1_s, axpy_trans1_s, zaxpy_, BLAS_Z)
+  
+
+  /* ********************************************************************* */
+  /* mult_add(A, x, z).                                                    */
+  /* ********************************************************************* */
+  
+# define gemv_interface(param1, trans1, param2, trans2, blas_name,         \
+			base_type, orien)                                  \
+  inline void mult_add_spec(param1(base_type), param2(base_type),          \
+              std::vector<base_type > &z, orien) {                         \
+    GMMLAPACK_TRACE("gemv_interface");                                     \
+    trans1(base_type); trans2(base_type); base_type beta(1);               \
+    int m(int(mat_nrows(A))), lda(m), n(int(mat_ncols(A))), inc(1);	   \
+    if (m && n) blas_name(&t, &m, &n, &alpha, &A(0,0), &lda, &x[0], &inc,  \
+                          &beta, &z[0], &inc);                             \
+    else gmm::clear(z);                                                    \
+  }
+
+  // First parameter
+# define gem_p1_n(base_type)  const dense_matrix<base_type > &A
+# define gem_trans1_n(base_type) const char t = 'N'
+# define gem_p1_t(base_type)                                               \
+         const transposed_col_ref<dense_matrix<base_type > *> &A_
+# define gem_trans1_t(base_type) dense_matrix<base_type > &A =             \
+         const_cast<dense_matrix<base_type > &>(*(linalg_origin(A_)));     \
+         const char t = 'T'
+# define gem_p1_tc(base_type)                                              \
+         const transposed_col_ref<const dense_matrix<base_type > *> &A_
+# define gem_p1_c(base_type)                                               \
+         const conjugated_col_matrix_const_ref<dense_matrix<base_type > > &A_
+# define gem_trans1_c(base_type) dense_matrix<base_type > &A =             \
+         const_cast<dense_matrix<base_type > &>(*(linalg_origin(A_)));     \
+         const char t = 'C'
+
+  // second parameter 
+# define gemv_p2_n(base_type)  const std::vector<base_type > &x
+# define gemv_trans2_n(base_type) base_type alpha(1)
+# define gemv_p2_s(base_type)                                              \
+    const scaled_vector_const_ref<std::vector<base_type >, base_type > &x_
+# define gemv_trans2_s(base_type) std::vector<base_type > &x =             \
+         const_cast<std::vector<base_type > &>(*(linalg_origin(x_)));      \
+         base_type alpha(x_.r)
+
+  // Z <- AX + Z.
+  gemv_interface(gem_p1_n, gem_trans1_n, gemv_p2_n, gemv_trans2_n, sgemv_,
+		 BLAS_S, col_major)
+  gemv_interface(gem_p1_n, gem_trans1_n, gemv_p2_n, gemv_trans2_n, dgemv_,
+		 BLAS_D, col_major)
+  gemv_interface(gem_p1_n, gem_trans1_n, gemv_p2_n, gemv_trans2_n, cgemv_,
+		 BLAS_C, col_major)
+  gemv_interface(gem_p1_n, gem_trans1_n, gemv_p2_n, gemv_trans2_n, zgemv_,
+		 BLAS_Z, col_major)
+
+  // Z <- transposed(A)X + Z.
+  gemv_interface(gem_p1_t, gem_trans1_t, gemv_p2_n, gemv_trans2_n, sgemv_,
+		 BLAS_S, row_major)
+  gemv_interface(gem_p1_t, gem_trans1_t, gemv_p2_n, gemv_trans2_n, dgemv_,
+		 BLAS_D, row_major)
+  gemv_interface(gem_p1_t, gem_trans1_t, gemv_p2_n, gemv_trans2_n, cgemv_,
+		 BLAS_C, row_major)
+  gemv_interface(gem_p1_t, gem_trans1_t, gemv_p2_n, gemv_trans2_n, zgemv_,
+		 BLAS_Z, row_major)
+  
+  // Z <- transposed(const A)X + Z.
+  gemv_interface(gem_p1_tc, gem_trans1_t, gemv_p2_n, gemv_trans2_n, sgemv_,
+		 BLAS_S, row_major)
+  gemv_interface(gem_p1_tc, gem_trans1_t, gemv_p2_n, gemv_trans2_n, dgemv_,
+		 BLAS_D, row_major)
+  gemv_interface(gem_p1_tc, gem_trans1_t, gemv_p2_n, gemv_trans2_n, cgemv_,
+		 BLAS_C, row_major)
+  gemv_interface(gem_p1_tc, gem_trans1_t, gemv_p2_n, gemv_trans2_n, zgemv_,
+		 BLAS_Z, row_major)
+  
+  // Z <- conjugated(A)X + Z.
+  gemv_interface(gem_p1_c, gem_trans1_c, gemv_p2_n, gemv_trans2_n, sgemv_,
+		 BLAS_S, row_major)
+  gemv_interface(gem_p1_c, gem_trans1_c, gemv_p2_n, gemv_trans2_n, dgemv_,
+		 BLAS_D, row_major)
+  gemv_interface(gem_p1_c, gem_trans1_c, gemv_p2_n, gemv_trans2_n, cgemv_,
+		 BLAS_C, row_major)
+  gemv_interface(gem_p1_c, gem_trans1_c, gemv_p2_n, gemv_trans2_n, zgemv_,
+		 BLAS_Z, row_major)
+
+  // Z <- A scaled(X) + Z.
+  gemv_interface(gem_p1_n, gem_trans1_n, gemv_p2_s, gemv_trans2_s, sgemv_,
+		 BLAS_S, col_major)
+  gemv_interface(gem_p1_n, gem_trans1_n, gemv_p2_s, gemv_trans2_s, dgemv_,
+		 BLAS_D, col_major)
+  gemv_interface(gem_p1_n, gem_trans1_n, gemv_p2_s, gemv_trans2_s, cgemv_,
+		 BLAS_C, col_major)
+  gemv_interface(gem_p1_n, gem_trans1_n, gemv_p2_s, gemv_trans2_s, zgemv_,
+		 BLAS_Z, col_major)
+
+  // Z <- transposed(A) scaled(X) + Z.
+  gemv_interface(gem_p1_t, gem_trans1_t, gemv_p2_s, gemv_trans2_s, sgemv_,
+		 BLAS_S, row_major)
+  gemv_interface(gem_p1_t, gem_trans1_t, gemv_p2_s, gemv_trans2_s, dgemv_,
+		 BLAS_D, row_major)
+  gemv_interface(gem_p1_t, gem_trans1_t, gemv_p2_s, gemv_trans2_s, cgemv_,
+		 BLAS_C, row_major)
+  gemv_interface(gem_p1_t, gem_trans1_t, gemv_p2_s, gemv_trans2_s, zgemv_,
+		 BLAS_Z, row_major)
+  
+  // Z <- transposed(const A) scaled(X) + Z.
+  gemv_interface(gem_p1_tc, gem_trans1_t, gemv_p2_s, gemv_trans2_s, sgemv_,
+		 BLAS_S, row_major)
+  gemv_interface(gem_p1_tc, gem_trans1_t, gemv_p2_s, gemv_trans2_s, dgemv_,
+		 BLAS_D, row_major)
+  gemv_interface(gem_p1_tc, gem_trans1_t, gemv_p2_s, gemv_trans2_s, cgemv_,
+		 BLAS_C, row_major)
+  gemv_interface(gem_p1_tc, gem_trans1_t, gemv_p2_s, gemv_trans2_s, zgemv_,
+		 BLAS_Z, row_major)
+  
+  // Z <- conjugated(A) scaled(X) + Z.
+  gemv_interface(gem_p1_c, gem_trans1_c, gemv_p2_s, gemv_trans2_s, sgemv_,
+		 BLAS_S, row_major)
+  gemv_interface(gem_p1_c, gem_trans1_c, gemv_p2_s, gemv_trans2_s, dgemv_,
+		 BLAS_D, row_major)
+  gemv_interface(gem_p1_c, gem_trans1_c, gemv_p2_s, gemv_trans2_s, cgemv_,
+		 BLAS_C, row_major)
+  gemv_interface(gem_p1_c, gem_trans1_c, gemv_p2_s, gemv_trans2_s, zgemv_,
+		 BLAS_Z, row_major)
+
+
+  /* ********************************************************************* */
+  /* mult(A, x, y).                                                        */
+  /* ********************************************************************* */
+  
+# define gemv_interface2(param1, trans1, param2, trans2, blas_name,        \
+                         base_type, orien)                                 \
+  inline void mult_spec(param1(base_type), param2(base_type),              \
+              std::vector<base_type > &z, orien) {                         \
+    GMMLAPACK_TRACE("gemv_interface2");                                    \
+    trans1(base_type); trans2(base_type); base_type beta(0);               \
+    int m(int(mat_nrows(A))), lda(m), n(int(mat_ncols(A))), inc(1);	   \
+    if (m && n)                                                            \
+      blas_name(&t, &m, &n, &alpha, &A(0,0), &lda, &x[0], &inc, &beta,     \
+                &z[0], &inc);                                              \
+    else gmm::clear(z);                                                    \
+  }
+
+  // Y <- AX.
+  gemv_interface2(gem_p1_n, gem_trans1_n, gemv_p2_n, gemv_trans2_n, sgemv_,
+		  BLAS_S, col_major)
+  gemv_interface2(gem_p1_n, gem_trans1_n, gemv_p2_n, gemv_trans2_n, dgemv_,
+		  BLAS_D, col_major)
+  gemv_interface2(gem_p1_n, gem_trans1_n, gemv_p2_n, gemv_trans2_n, cgemv_,
+		  BLAS_C, col_major)
+  gemv_interface2(gem_p1_n, gem_trans1_n, gemv_p2_n, gemv_trans2_n, zgemv_,
+		  BLAS_Z, col_major)
+
+  // Y <- transposed(A)X.
+  gemv_interface2(gem_p1_t, gem_trans1_t, gemv_p2_n, gemv_trans2_n, sgemv_,
+		  BLAS_S, row_major)
+  gemv_interface2(gem_p1_t, gem_trans1_t, gemv_p2_n, gemv_trans2_n, dgemv_,
+		  BLAS_D, row_major)
+  gemv_interface2(gem_p1_t, gem_trans1_t, gemv_p2_n, gemv_trans2_n, cgemv_,
+		  BLAS_C, row_major)
+  gemv_interface2(gem_p1_t, gem_trans1_t, gemv_p2_n, gemv_trans2_n, zgemv_,
+		  BLAS_Z, row_major)
+  
+  // Y <- transposed(const A)X.
+  gemv_interface2(gem_p1_tc, gem_trans1_t, gemv_p2_n, gemv_trans2_n, sgemv_,
+		  BLAS_S, row_major)
+  gemv_interface2(gem_p1_tc, gem_trans1_t, gemv_p2_n, gemv_trans2_n, dgemv_,
+		  BLAS_D, row_major)
+  gemv_interface2(gem_p1_tc, gem_trans1_t, gemv_p2_n, gemv_trans2_n, cgemv_,
+		  BLAS_C, row_major)
+  gemv_interface2(gem_p1_tc, gem_trans1_t, gemv_p2_n, gemv_trans2_n, zgemv_,
+		  BLAS_Z, row_major)
+  
+  // Y <- conjugated(A)X.
+  gemv_interface2(gem_p1_c, gem_trans1_c, gemv_p2_n, gemv_trans2_n, sgemv_,
+		  BLAS_S, row_major)
+  gemv_interface2(gem_p1_c, gem_trans1_c, gemv_p2_n, gemv_trans2_n, dgemv_,
+		  BLAS_D, row_major)
+  gemv_interface2(gem_p1_c, gem_trans1_c, gemv_p2_n, gemv_trans2_n, cgemv_,
+		  BLAS_C, row_major)
+  gemv_interface2(gem_p1_c, gem_trans1_c, gemv_p2_n, gemv_trans2_n, zgemv_,
+		  BLAS_Z, row_major)
+
+  // Y <- A scaled(X).
+  gemv_interface2(gem_p1_n, gem_trans1_n, gemv_p2_s, gemv_trans2_s, sgemv_,
+		  BLAS_S, col_major)
+  gemv_interface2(gem_p1_n, gem_trans1_n, gemv_p2_s, gemv_trans2_s, dgemv_,
+		  BLAS_D, col_major)
+  gemv_interface2(gem_p1_n, gem_trans1_n, gemv_p2_s, gemv_trans2_s, cgemv_,
+		  BLAS_C, col_major)
+  gemv_interface2(gem_p1_n, gem_trans1_n, gemv_p2_s, gemv_trans2_s, zgemv_,
+		  BLAS_Z, col_major)
+
+  // Y <- transposed(A) scaled(X).
+  gemv_interface2(gem_p1_t, gem_trans1_t, gemv_p2_s, gemv_trans2_s, sgemv_,
+		  BLAS_S, row_major)
+  gemv_interface2(gem_p1_t, gem_trans1_t, gemv_p2_s, gemv_trans2_s, dgemv_,
+		  BLAS_D, row_major)
+  gemv_interface2(gem_p1_t, gem_trans1_t, gemv_p2_s, gemv_trans2_s, cgemv_,
+		  BLAS_C, row_major)
+  gemv_interface2(gem_p1_t, gem_trans1_t, gemv_p2_s, gemv_trans2_s, zgemv_,
+		  BLAS_Z, row_major)
+  
+  // Y <- transposed(const A) scaled(X).
+  gemv_interface2(gem_p1_tc, gem_trans1_t, gemv_p2_s, gemv_trans2_s, sgemv_,
+		  BLAS_S, row_major)
+  gemv_interface2(gem_p1_tc, gem_trans1_t, gemv_p2_s, gemv_trans2_s, dgemv_,
+		  BLAS_D, row_major)
+  gemv_interface2(gem_p1_tc, gem_trans1_t, gemv_p2_s, gemv_trans2_s, cgemv_,
+		  BLAS_C, row_major)
+  gemv_interface2(gem_p1_tc, gem_trans1_t, gemv_p2_s, gemv_trans2_s, zgemv_,
+		  BLAS_Z, row_major)
+  
+  // Y <- conjugated(A) scaled(X).
+  gemv_interface2(gem_p1_c, gem_trans1_c, gemv_p2_s, gemv_trans2_s, sgemv_,
+		  BLAS_S, row_major)
+  gemv_interface2(gem_p1_c, gem_trans1_c, gemv_p2_s, gemv_trans2_s, dgemv_,
+		  BLAS_D, row_major)
+  gemv_interface2(gem_p1_c, gem_trans1_c, gemv_p2_s, gemv_trans2_s, cgemv_,
+		  BLAS_C, row_major)
+  gemv_interface2(gem_p1_c, gem_trans1_c, gemv_p2_s, gemv_trans2_s, zgemv_,
+		  BLAS_Z, row_major)
+
+
+  /* ********************************************************************* */
+  /* Rank one update.                                                      */
+  /* ********************************************************************* */
+
+# define ger_interface(blas_name, base_type)                               \
+  inline void rank_one_update(const dense_matrix<base_type > &A,           \
+			      const std::vector<base_type > &V,	   	   \
+			      const std::vector<base_type > &W) {	   \
+    GMMLAPACK_TRACE("ger_interface");                                      \
+    int m(int(mat_nrows(A))), lda = m, n(int(mat_ncols(A)));		   \
+    int incx = 1, incy = 1;						   \
+    base_type alpha(1);                                                    \
+    if (m && n)								   \
+      blas_name(&m, &n, &alpha, &V[0], &incx, &W[0], &incy, &A(0,0), &lda);\
+  }
+
+  ger_interface(sger_, BLAS_S)
+  ger_interface(dger_, BLAS_D)
+  ger_interface(cgerc_, BLAS_C)
+  ger_interface(zgerc_, BLAS_Z)
+
+# define ger_interface_sn(blas_name, base_type)                            \
+  inline void rank_one_update(const dense_matrix<base_type > &A,	   \
+			      gemv_p2_s(base_type),			   \
+			      const std::vector<base_type > &W) {	   \
+    GMMLAPACK_TRACE("ger_interface");                                      \
+    gemv_trans2_s(base_type); 						   \
+    int m(int(mat_nrows(A))), lda = m, n(int(mat_ncols(A)));		   \
+    int incx = 1, incy = 1;						   \
+    if (m && n)								   \
+      blas_name(&m, &n, &alpha, &x[0], &incx, &W[0], &incy, &A(0,0), &lda);\
+  }
+
+  ger_interface_sn(sger_, BLAS_S)
+  ger_interface_sn(dger_, BLAS_D)
+  ger_interface_sn(cgerc_, BLAS_C)
+  ger_interface_sn(zgerc_, BLAS_Z)
+
+# define ger_interface_ns(blas_name, base_type)                            \
+  inline void rank_one_update(const dense_matrix<base_type > &A,	   \
+			      const std::vector<base_type > &V,		   \
+			      gemv_p2_s(base_type)) {			   \
+    GMMLAPACK_TRACE("ger_interface");                                      \
+    gemv_trans2_s(base_type); 						   \
+    int m(int(mat_nrows(A))), lda = m, n(int(mat_ncols(A)));		   \
+    int incx = 1, incy = 1;						   \
+    base_type al2 = gmm::conj(alpha);					   \
+    if (m && n)								   \
+      blas_name(&m, &n, &al2, &V[0], &incx, &x[0], &incy, &A(0,0), &lda);  \
+  }
+
+  ger_interface_ns(sger_, BLAS_S)
+  ger_interface_ns(dger_, BLAS_D)
+  ger_interface_ns(cgerc_, BLAS_C)
+  ger_interface_ns(zgerc_, BLAS_Z)
+
+  /* ********************************************************************* */
+  /* dense matrix x dense matrix multiplication.                           */
+  /* ********************************************************************* */
+
+# define gemm_interface_nn(blas_name, base_type)                           \
+  inline void mult_spec(const dense_matrix<base_type > &A,                 \
+            const dense_matrix<base_type > &B,                             \
+            dense_matrix<base_type > &C, c_mult) {                         \
+    GMMLAPACK_TRACE("gemm_interface_nn");                                  \
+    const char t = 'N';                                                    \
+    int m(int(mat_nrows(A))), lda = m, k(int(mat_ncols(A)));  		   \
+    int n(int(mat_ncols(B)));						   \
+    int ldb = k, ldc = m;                                                  \
+    base_type alpha(1), beta(0);                                           \
+    if (m && k && n)                                                       \
+      blas_name(&t, &t, &m, &n, &k, &alpha,                                \
+	          &A(0,0), &lda, &B(0,0), &ldb, &beta, &C(0,0), &ldc);     \
+    else gmm::clear(C);                                                    \
+  }
+
+  gemm_interface_nn(sgemm_, BLAS_S)
+  gemm_interface_nn(dgemm_, BLAS_D)
+  gemm_interface_nn(cgemm_, BLAS_C)
+  gemm_interface_nn(zgemm_, BLAS_Z)
+  
+  /* ********************************************************************* */
+  /* transposed(dense matrix) x dense matrix multiplication.               */
+  /* ********************************************************************* */
+
+# define gemm_interface_tn(blas_name, base_type, is_const)                 \
+  inline void mult_spec(                                                   \
+         const transposed_col_ref<is_const<base_type > *> &A_,\
+         const dense_matrix<base_type > &B,                                \
+         dense_matrix<base_type > &C, rcmult) {                            \
+    GMMLAPACK_TRACE("gemm_interface_tn");                                  \
+    dense_matrix<base_type > &A                                            \
+         = const_cast<dense_matrix<base_type > &>(*(linalg_origin(A_)));   \
+    const char t = 'T', u = 'N';                                           \
+    int m(int(mat_ncols(A))), k(int(mat_nrows(A))), n(int(mat_ncols(B)));  \
+    int lda = k, ldb = k, ldc = m;					   \
+    base_type alpha(1), beta(0);                                           \
+    if (m && k && n)                                                       \
+      blas_name(&t, &u, &m, &n, &k, &alpha,                                \
+	          &A(0,0), &lda, &B(0,0), &ldb, &beta, &C(0,0), &ldc);     \
+    else gmm::clear(C);                                                    \
+  }
+
+  gemm_interface_tn(sgemm_, BLAS_S, dense_matrix)
+  gemm_interface_tn(dgemm_, BLAS_D, dense_matrix)
+  gemm_interface_tn(cgemm_, BLAS_C, dense_matrix)
+  gemm_interface_tn(zgemm_, BLAS_Z, dense_matrix)
+  gemm_interface_tn(sgemm_, BLAS_S, const dense_matrix)
+  gemm_interface_tn(dgemm_, BLAS_D, const dense_matrix)
+  gemm_interface_tn(cgemm_, BLAS_C, const dense_matrix)
+  gemm_interface_tn(zgemm_, BLAS_Z, const dense_matrix)
+
+  /* ********************************************************************* */
+  /* dense matrix x transposed(dense matrix) multiplication.               */
+  /* ********************************************************************* */
+
+# define gemm_interface_nt(blas_name, base_type, is_const)                 \
+  inline void mult_spec(const dense_matrix<base_type > &A,                 \
+		     const transposed_col_ref<is_const<base_type > *> &B_, \
+         dense_matrix<base_type > &C, r_mult) {                            \
+    GMMLAPACK_TRACE("gemm_interface_nt");                                  \
+    dense_matrix<base_type > &B                                            \
+        = const_cast<dense_matrix<base_type > &>(*(linalg_origin(B_)));    \
+    const char t = 'N', u = 'T';                                           \
+    int m(int(mat_nrows(A))), lda = m, k(int(mat_ncols(A)));               \
+    int n(int(mat_nrows(B)));						   \
+    int ldb = n, ldc = m;                                                  \
+    base_type alpha(1), beta(0);                                           \
+    if (m && k && n)                                                       \
+      blas_name(&t, &u, &m, &n, &k, &alpha,                                \
+	        &A(0,0), &lda, &B(0,0), &ldb, &beta, &C(0,0), &ldc);       \
+    else gmm::clear(C);                                                    \
+  }
+
+  gemm_interface_nt(sgemm_, BLAS_S, dense_matrix)
+  gemm_interface_nt(dgemm_, BLAS_D, dense_matrix)
+  gemm_interface_nt(cgemm_, BLAS_C, dense_matrix)
+  gemm_interface_nt(zgemm_, BLAS_Z, dense_matrix)
+  gemm_interface_nt(sgemm_, BLAS_S, const dense_matrix)
+  gemm_interface_nt(dgemm_, BLAS_D, const dense_matrix)
+  gemm_interface_nt(cgemm_, BLAS_C, const dense_matrix)
+  gemm_interface_nt(zgemm_, BLAS_Z, const dense_matrix)
+
+  /* ********************************************************************* */
+  /* transposed(dense matrix) x transposed(dense matrix) multiplication.   */
+  /* ********************************************************************* */
+
+# define gemm_interface_tt(blas_name, base_type, isA_const, isB_const)     \
+  inline void mult_spec(                                                   \
+	       const transposed_col_ref<isA_const <base_type > *> &A_,	   \
+               const transposed_col_ref<isB_const <base_type > *> &B_,	   \
+	       dense_matrix<base_type > &C, r_mult) {			   \
+    GMMLAPACK_TRACE("gemm_interface_tt");                                  \
+    dense_matrix<base_type > &A                                            \
+        = const_cast<dense_matrix<base_type > &>(*(linalg_origin(A_)));    \
+    dense_matrix<base_type > &B                                            \
+        = const_cast<dense_matrix<base_type > &>(*(linalg_origin(B_)));    \
+    const char t = 'T', u = 'T';                                           \
+    int m(int(mat_ncols(A))), k(int(mat_nrows(A))), n(int(mat_nrows(B)));  \
+    int lda = k, ldb = n, ldc = m;					   \
+    base_type alpha(1), beta(0);                                           \
+    if (m && k && n)                                                       \
+      blas_name(&t, &u, &m, &n, &k, &alpha,                                \
+	        &A(0,0), &lda, &B(0,0), &ldb, &beta, &C(0,0), &ldc);       \
+    else gmm::clear(C);                                                    \
+  }
+
+  gemm_interface_tt(sgemm_, BLAS_S, dense_matrix, dense_matrix)
+  gemm_interface_tt(dgemm_, BLAS_D, dense_matrix, dense_matrix)
+  gemm_interface_tt(cgemm_, BLAS_C, dense_matrix, dense_matrix)
+  gemm_interface_tt(zgemm_, BLAS_Z, dense_matrix, dense_matrix)
+  gemm_interface_tt(sgemm_, BLAS_S, const dense_matrix, dense_matrix)
+  gemm_interface_tt(dgemm_, BLAS_D, const dense_matrix, dense_matrix)
+  gemm_interface_tt(cgemm_, BLAS_C, const dense_matrix, dense_matrix)
+  gemm_interface_tt(zgemm_, BLAS_Z, const dense_matrix, dense_matrix)
+  gemm_interface_tt(sgemm_, BLAS_S, dense_matrix, const dense_matrix)
+  gemm_interface_tt(dgemm_, BLAS_D, dense_matrix, const dense_matrix)
+  gemm_interface_tt(cgemm_, BLAS_C, dense_matrix, const dense_matrix)
+  gemm_interface_tt(zgemm_, BLAS_Z, dense_matrix, const dense_matrix)
+  gemm_interface_tt(sgemm_, BLAS_S, const dense_matrix, const dense_matrix)
+  gemm_interface_tt(dgemm_, BLAS_D, const dense_matrix, const dense_matrix)
+  gemm_interface_tt(cgemm_, BLAS_C, const dense_matrix, const dense_matrix)
+  gemm_interface_tt(zgemm_, BLAS_Z, const dense_matrix, const dense_matrix)
+
+
+  /* ********************************************************************* */
+  /* conjugated(dense matrix) x dense matrix multiplication.               */
+  /* ********************************************************************* */
+
+# define gemm_interface_cn(blas_name, base_type)                           \
+  inline void mult_spec(                                                   \
+      const conjugated_col_matrix_const_ref<dense_matrix<base_type > > &A_,\
+      const dense_matrix<base_type > &B,                                   \
+      dense_matrix<base_type > &C, rcmult) {                               \
+    GMMLAPACK_TRACE("gemm_interface_cn");                                  \
+    dense_matrix<base_type > &A                                            \
+          = const_cast<dense_matrix<base_type > &>(*(linalg_origin(A_)));  \
+    const char t = 'C', u = 'N';                                           \
+    int m(int(mat_ncols(A))), k(int(mat_nrows(A))), n(int(mat_ncols(B)));  \
+    int lda = k, ldb = k, ldc = m;					   \
+    base_type alpha(1), beta(0);                                           \
+    if (m && k && n)                                                       \
+      blas_name(&t, &u, &m, &n, &k, &alpha,                                \
+	        &A(0,0), &lda, &B(0,0), &ldb, &beta, &C(0,0), &ldc);       \
+    else gmm::clear(C);                                                    \
+  }
+
+  gemm_interface_cn(sgemm_, BLAS_S)
+  gemm_interface_cn(dgemm_, BLAS_D)
+  gemm_interface_cn(cgemm_, BLAS_C)
+  gemm_interface_cn(zgemm_, BLAS_Z)
+
+  /* ********************************************************************* */
+  /* dense matrix x conjugated(dense matrix) multiplication.               */
+  /* ********************************************************************* */
+
+# define gemm_interface_nc(blas_name, base_type)                           \
+  inline void mult_spec(const dense_matrix<base_type > &A,                 \
+      const conjugated_col_matrix_const_ref<dense_matrix<base_type > > &B_,\
+      dense_matrix<base_type > &C, c_mult, row_major) {                    \
+    GMMLAPACK_TRACE("gemm_interface_nc");                                  \
+    dense_matrix<base_type > &B                                            \
+         = const_cast<dense_matrix<base_type > &>(*(linalg_origin(B_)));   \
+    const char t = 'N', u = 'C';                                           \
+    int m(int(mat_nrows(A))), lda = m, k(int(mat_ncols(A)));               \
+    int n(int(mat_nrows(B))), ldb = n, ldc = m;				   \
+    base_type alpha(1), beta(0);                                           \
+    if (m && k && n)                                                       \
+      blas_name(&t, &u, &m, &n, &k, &alpha,                                \
+	        &A(0,0), &lda, &B(0,0), &ldb, &beta, &C(0,0), &ldc);       \
+    else gmm::clear(C);                                                    \
+  }
+
+  gemm_interface_nc(sgemm_, BLAS_S)
+  gemm_interface_nc(dgemm_, BLAS_D)
+  gemm_interface_nc(cgemm_, BLAS_C)
+  gemm_interface_nc(zgemm_, BLAS_Z)
+
+  /* ********************************************************************* */
+  /* conjugated(dense matrix) x conjugated(dense matrix) multiplication.   */
+  /* ********************************************************************* */
+
+# define gemm_interface_cc(blas_name, base_type)                           \
+  inline void mult_spec(                                                   \
+      const conjugated_col_matrix_const_ref<dense_matrix<base_type > > &A_,\
+      const conjugated_col_matrix_const_ref<dense_matrix<base_type > > &B_,\
+      dense_matrix<base_type > &C, r_mult) {                               \
+    GMMLAPACK_TRACE("gemm_interface_cc");                                  \
+    dense_matrix<base_type > &A                                            \
+        = const_cast<dense_matrix<base_type > &>(*(linalg_origin(A_)));    \
+    dense_matrix<base_type > &B                                            \
+        = const_cast<dense_matrix<base_type > &>(*(linalg_origin(B_)));    \
+    const char t = 'C', u = 'C';                                           \
+    int m(int(mat_ncols(A))), k(int(mat_nrows(A))), lda = k;               \
+    int n(int(mat_nrows(B))), ldb = n, ldc = m;				   \
+    base_type alpha(1), beta(0);                                           \
+    if (m && k && n)                                                       \
+      blas_name(&t, &u, &m, &n, &k, &alpha,                                \
+	        &A(0,0), &lda, &B(0,0), &ldb, &beta, &C(0,0), &ldc);       \
+    else gmm::clear(C);                                                    \
+  }
+
+  gemm_interface_cc(sgemm_, BLAS_S)
+  gemm_interface_cc(dgemm_, BLAS_D)
+  gemm_interface_cc(cgemm_, BLAS_C)
+  gemm_interface_cc(zgemm_, BLAS_Z)
+   
+  /* ********************************************************************* */
+  /* Tri solve.                                                            */
+  /* ********************************************************************* */
+
+# define trsv_interface(f_name, loru, param1, trans1, blas_name, base_type)\
+  inline void f_name(param1(base_type), std::vector<base_type > &x,        \
+                              size_type k, bool is_unit) {                 \
+    GMMLAPACK_TRACE("trsv_interface");                                     \
+    loru; trans1(base_type); char d = is_unit ? 'U' : 'N';                 \
+    int lda(int(mat_nrows(A))), inc(1), n = int(k);			   \
+    if (lda) blas_name(&l, &t, &d, &n, &A(0,0), &lda, &x[0], &inc);        \
+  }
+
+# define trsv_upper const char l = 'U'
+# define trsv_lower const char l = 'L'
+
+  // X <- LOWER(A)^{-1}X.
+  trsv_interface(lower_tri_solve, trsv_lower, gem_p1_n, gem_trans1_n,
+		 strsv_, BLAS_S)
+  trsv_interface(lower_tri_solve, trsv_lower, gem_p1_n, gem_trans1_n,
+		 dtrsv_, BLAS_D) 
+  trsv_interface(lower_tri_solve, trsv_lower, gem_p1_n, gem_trans1_n,
+		 ctrsv_, BLAS_C) 
+  trsv_interface(lower_tri_solve, trsv_lower, gem_p1_n, gem_trans1_n,
+		 ztrsv_, BLAS_Z)
+  
+  // X <- UPPER(A)^{-1}X.
+  trsv_interface(upper_tri_solve, trsv_upper, gem_p1_n, gem_trans1_n,
+		 strsv_, BLAS_S)
+  trsv_interface(upper_tri_solve, trsv_upper, gem_p1_n, gem_trans1_n,
+		 dtrsv_, BLAS_D) 
+  trsv_interface(upper_tri_solve, trsv_upper, gem_p1_n, gem_trans1_n,
+		 ctrsv_, BLAS_C) 
+  trsv_interface(upper_tri_solve, trsv_upper, gem_p1_n, gem_trans1_n,
+		 ztrsv_, BLAS_Z)
+  
+  // X <- LOWER(transposed(A))^{-1}X.
+  trsv_interface(lower_tri_solve, trsv_upper, gem_p1_t, gem_trans1_t,
+		 strsv_, BLAS_S)
+  trsv_interface(lower_tri_solve, trsv_upper, gem_p1_t, gem_trans1_t,
+		 dtrsv_, BLAS_D) 
+  trsv_interface(lower_tri_solve, trsv_upper, gem_p1_t, gem_trans1_t,
+		 ctrsv_, BLAS_C) 
+  trsv_interface(lower_tri_solve, trsv_upper, gem_p1_t, gem_trans1_t,
+		 ztrsv_, BLAS_Z)
+  
+  // X <- UPPER(transposed(A))^{-1}X.
+  trsv_interface(upper_tri_solve, trsv_lower, gem_p1_t, gem_trans1_t,
+		 strsv_, BLAS_S)
+  trsv_interface(upper_tri_solve, trsv_lower, gem_p1_t, gem_trans1_t,
+		 dtrsv_, BLAS_D) 
+  trsv_interface(upper_tri_solve, trsv_lower, gem_p1_t, gem_trans1_t,
+		 ctrsv_, BLAS_C) 
+  trsv_interface(upper_tri_solve, trsv_lower, gem_p1_t, gem_trans1_t,
+		 ztrsv_, BLAS_Z)
+
+  // X <- LOWER(transposed(const A))^{-1}X.
+  trsv_interface(lower_tri_solve, trsv_upper, gem_p1_tc, gem_trans1_t,
+		 strsv_, BLAS_S)
+  trsv_interface(lower_tri_solve, trsv_upper, gem_p1_tc, gem_trans1_t,
+		 dtrsv_, BLAS_D) 
+  trsv_interface(lower_tri_solve, trsv_upper, gem_p1_tc, gem_trans1_t,
+		 ctrsv_, BLAS_C) 
+  trsv_interface(lower_tri_solve, trsv_upper, gem_p1_tc, gem_trans1_t,
+		 ztrsv_, BLAS_Z)
+  
+  // X <- UPPER(transposed(const A))^{-1}X.
+  trsv_interface(upper_tri_solve, trsv_lower, gem_p1_tc, gem_trans1_t,
+		 strsv_, BLAS_S)
+  trsv_interface(upper_tri_solve, trsv_lower, gem_p1_tc, gem_trans1_t,
+		 dtrsv_, BLAS_D) 
+  trsv_interface(upper_tri_solve, trsv_lower, gem_p1_tc, gem_trans1_t,
+		 ctrsv_, BLAS_C) 
+  trsv_interface(upper_tri_solve, trsv_lower, gem_p1_tc, gem_trans1_t,
+		 ztrsv_, BLAS_Z)
+
+  // X <- LOWER(conjugated(A))^{-1}X.
+  trsv_interface(lower_tri_solve, trsv_upper, gem_p1_c, gem_trans1_c,
+		 strsv_, BLAS_S)
+  trsv_interface(lower_tri_solve, trsv_upper, gem_p1_c, gem_trans1_c,
+		 dtrsv_, BLAS_D) 
+  trsv_interface(lower_tri_solve, trsv_upper, gem_p1_c, gem_trans1_c,
+		 ctrsv_, BLAS_C) 
+  trsv_interface(lower_tri_solve, trsv_upper, gem_p1_c, gem_trans1_c,
+		 ztrsv_, BLAS_Z)
+  
+  // X <- UPPER(conjugated(A))^{-1}X.
+  trsv_interface(upper_tri_solve, trsv_lower, gem_p1_c, gem_trans1_c,
+		 strsv_, BLAS_S)
+  trsv_interface(upper_tri_solve, trsv_lower, gem_p1_c, gem_trans1_c,
+		 dtrsv_, BLAS_D) 
+  trsv_interface(upper_tri_solve, trsv_lower, gem_p1_c, gem_trans1_c,
+		 ctrsv_, BLAS_C) 
+  trsv_interface(upper_tri_solve, trsv_lower, gem_p1_c, gem_trans1_c,
+		 ztrsv_, BLAS_Z)
+  
+#endif
+}
+
+#endif // GMM_BLAS_INTERFACE_H
+
+#endif // GMM_USES_BLAS
--- a/gmm/gmm_condition_number.h
+++ b/gmm/gmm_condition_number.h
@ -0,0 +1,147 @@
+/* -*- c++ -*- (enables emacs c++ mode) */
+/*===========================================================================
+
+ Copyright (C) 2003-2017 Yves Renard, Julien Pommier
+
+ This file is a part of GetFEM++
+
+ GetFEM++  is  free software;  you  can  redistribute  it  and/or modify it
+ under  the  terms  of the  GNU  Lesser General Public License as published
+ by  the  Free Software Foundation;  either version 3 of the License,  or
+ (at your option) any later version along with the GCC Runtime Library
+ Exception either version 3.1 or (at your option) any later version.
+ This program  is  distributed  in  the  hope  that it will be useful,  but
+ WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ or  FITNESS  FOR  A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+ License and GCC Runtime Library Exception for more details.
+ You  should  have received a copy of the GNU Lesser General Public License
+ along  with  this program;  if not, write to the Free Software Foundation,
+ Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301, USA.
+
+ As a special exception, you  may use  this file  as it is a part of a free
+ software  library  without  restriction.  Specifically,  if   other  files
+ instantiate  templates  or  use macros or inline functions from this file,
+ or  you compile this  file  and  link  it  with other files  to produce an
+ executable, this file  does  not  by itself cause the resulting executable
+ to be covered  by the GNU Lesser General Public License.  This   exception
+ does not  however  invalidate  any  other  reasons why the executable file
+ might be covered by the GNU Lesser General Public License.
+
+===========================================================================*/
+
+/**@file gmm_condition_number.h
+   @author  Yves Renard <Yves.Renard@insa-lyon.fr>, Julien Pommier <Julien.Pommier@insa-toulouse.fr>
+   @date August 27, 2003.
+   @brief computation of the condition number of dense matrices.
+*/
+#ifndef GMM_CONDITION_NUMBER_H__
+#define GMM_CONDITION_NUMBER_H__
+
+#include "gmm_dense_qr.h"
+
+namespace gmm {
+
+  /** computation of the condition number of dense matrices using SVD.
+
+      Uses symmetric_qr_algorithm => dense matrices only.
+
+      @param M a matrix.
+      @param emin smallest (in magnitude) eigenvalue
+      @param emax largest eigenvalue.
+   */
+  template <typename MAT> 
+  typename number_traits<typename 
+  linalg_traits<MAT>::value_type>::magnitude_type
+  condition_number(const MAT& M, 
+	  typename number_traits<typename
+	  linalg_traits<MAT>::value_type>::magnitude_type& emin,
+	  typename number_traits<typename
+	  linalg_traits<MAT>::value_type>::magnitude_type& emax) {
+    typedef typename linalg_traits<MAT>::value_type T;
+    typedef typename number_traits<T>::magnitude_type R;
+
+    // Added because of errors in complex with zero det
+    if (sizeof(T) != sizeof(R) && gmm::abs(gmm::lu_det(M)) == R(0))
+      return  gmm::default_max(R());
+      
+    size_type m = mat_nrows(M), n = mat_ncols(M);
+    emax = emin = R(0);
+    std::vector<R> eig(m+n);
+
+    if (m+n == 0) return R(0);
+    if (is_hermitian(M)) {
+      eig.resize(m);
+      gmm::symmetric_qr_algorithm(M, eig);
+    }
+    else {
+      dense_matrix<T> B(m+n, m+n); // not very efficient ??
+      gmm::copy(conjugated(M), sub_matrix(B, sub_interval(m, n), sub_interval(0, m)));
+      gmm::copy(M, sub_matrix(B, sub_interval(0, m),
+					  sub_interval(m, n)));
+      gmm::symmetric_qr_algorithm(B, eig);
+    }
+    emin = emax = gmm::abs(eig[0]);
+    for (size_type i = 1; i < eig.size(); ++i) {
+      R e = gmm::abs(eig[i]); 
+      emin = std::min(emin, e);
+      emax = std::max(emax, e);
+    }
+    // cout << "emin = " << emin << " emax = " << emax << endl;
+    if (emin == R(0)) return gmm::default_max(R());
+    return emax / emin;
+  }
+
+  template <typename MAT> 
+  typename number_traits<typename 
+  linalg_traits<MAT>::value_type>::magnitude_type
+  condition_number(const MAT& M) { 
+    typename number_traits<typename
+      linalg_traits<MAT>::value_type>::magnitude_type emax, emin;
+    return condition_number(M, emin, emax);
+  }
+
+  template <typename MAT> 
+  typename number_traits<typename 
+  linalg_traits<MAT>::value_type>::magnitude_type
+  Frobenius_condition_number_sqr(const MAT& M) { 
+    typedef typename linalg_traits<MAT>::value_type T;
+    typedef typename number_traits<T>::magnitude_type R;
+    size_type m = mat_nrows(M), n = mat_ncols(M);
+    dense_matrix<T> B(std::min(m,n), std::min(m,n));
+    if (m < n) mult(M,gmm::conjugated(M),B);
+    else       mult(gmm::conjugated(M),M,B);
+    R trB = abs(mat_trace(B));
+    lu_inverse(B);
+    return trB*abs(mat_trace(B));
+  }
+
+  template <typename MAT> 
+  typename number_traits<typename 
+  linalg_traits<MAT>::value_type>::magnitude_type
+  Frobenius_condition_number(const MAT& M)
+  { return sqrt(Frobenius_condition_number_sqr(M)); }
+
+  /** estimation of the condition number (TO BE DONE...)
+   */
+  template <typename MAT> 
+  typename number_traits<typename 
+  linalg_traits<MAT>::value_type>::magnitude_type
+  condest(const MAT& M, 
+	  typename number_traits<typename
+	  linalg_traits<MAT>::value_type>::magnitude_type& emin,
+	  typename number_traits<typename
+	  linalg_traits<MAT>::value_type>::magnitude_type& emax) {
+    return condition_number(M, emin, emax);
+  }
+  
+  template <typename MAT> 
+  typename number_traits<typename 
+  linalg_traits<MAT>::value_type>::magnitude_type
+  condest(const MAT& M) { 
+    typename number_traits<typename
+      linalg_traits<MAT>::value_type>::magnitude_type emax, emin;
+    return condest(M, emin, emax);
+  }
+}
+
+#endif
--- a/gmm/gmm_conjugated.h
+++ b/gmm/gmm_conjugated.h
@ -0,0 +1,398 @@
+/* -*- c++ -*- (enables emacs c++ mode) */
+/*===========================================================================
+
+ Copyright (C) 2003-2017 Yves Renard
+
+ This file is a part of GetFEM++
+
+ GetFEM++  is  free software;  you  can  redistribute  it  and/or modify it
+ under  the  terms  of the  GNU  Lesser General Public License as published
+ by  the  Free Software Foundation;  either version 3 of the License,  or
+ (at your option) any later version along with the GCC Runtime Library
+ Exception either version 3.1 or (at your option) any later version.
+ This program  is  distributed  in  the  hope  that it will be useful,  but
+ WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ or  FITNESS  FOR  A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+ License and GCC Runtime Library Exception for more details.
+ You  should  have received a copy of the GNU Lesser General Public License
+ along  with  this program;  if not, write to the Free Software Foundation,
+ Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301, USA.
+
+ As a special exception, you  may use  this file  as it is a part of a free
+ software  library  without  restriction.  Specifically,  if   other  files
+ instantiate  templates  or  use macros or inline functions from this file,
+ or  you compile this  file  and  link  it  with other files  to produce an
+ executable, this file  does  not  by itself cause the resulting executable
+ to be covered  by the GNU Lesser General Public License.  This   exception
+ does not  however  invalidate  any  other  reasons why the executable file
+ might be covered by the GNU Lesser General Public License.
+
+===========================================================================*/
+
+/**@file gmm_conjugated.h
+   @author  Yves Renard <Yves.Renard@insa-lyon.fr>
+   @date September 18, 2003.
+   @brief handle conjugation of complex matrices/vectors.
+*/
+#ifndef GMM_CONJUGATED_H__
+#define GMM_CONJUGATED_H__
+
+#include "gmm_def.h"
+
+namespace gmm {
+  ///@cond DOXY_SHOW_ALL_FUNCTIONS
+
+  /* ********************************************************************* */
+  /*		Conjugated references on vectors            		   */
+  /* ********************************************************************* */
+
+  template <typename IT> struct conjugated_const_iterator {
+    typedef typename std::iterator_traits<IT>::value_type      value_type;
+    typedef typename std::iterator_traits<IT>::pointer         pointer;
+    typedef typename std::iterator_traits<IT>::reference       reference;
+    typedef typename std::iterator_traits<IT>::difference_type difference_type;
+    typedef typename std::iterator_traits<IT>::iterator_category
+    iterator_category;
+
+    IT it;
+    
+    conjugated_const_iterator(void) {}
+    conjugated_const_iterator(const IT &i) : it(i) {}
+    
+    inline size_type index(void) const { return it.index(); }
+    conjugated_const_iterator operator ++(int)
+    { conjugated_const_iterator tmp = *this; ++it; return tmp; }
+    conjugated_const_iterator operator --(int) 
+    { conjugated_const_iterator tmp = *this; --it; return tmp; }
+    conjugated_const_iterator &operator ++() { ++it; return *this; }
+    conjugated_const_iterator &operator --() { --it; return *this; }
+    conjugated_const_iterator &operator +=(difference_type i)
+      { it += i; return *this; }
+    conjugated_const_iterator &operator -=(difference_type i)
+      { it -= i; return *this; }
+    conjugated_const_iterator operator +(difference_type i) const
+      { conjugated_const_iterator itb = *this; return (itb += i); }
+    conjugated_const_iterator operator -(difference_type i) const
+      { conjugated_const_iterator itb = *this; return (itb -= i); }
+    difference_type operator -(const conjugated_const_iterator &i) const
+      { return difference_type(it - i.it); }
+    
+    value_type operator  *() const { return gmm::conj(*it); }
+    value_type operator [](size_type ii) const { return gmm::conj(it[ii]); }
+    
+    bool operator ==(const conjugated_const_iterator &i) const
+      { return (i.it == it); }
+    bool operator !=(const conjugated_const_iterator &i) const
+      { return (i.it != it); }
+    bool operator < (const conjugated_const_iterator &i) const
+      { return (it < i.it); }
+  };
+
+  template <typename V> struct conjugated_vector_const_ref {
+    typedef conjugated_vector_const_ref<V> this_type;
+    typedef typename linalg_traits<V>::value_type value_type;
+    typedef typename linalg_traits<V>::const_iterator iterator;
+    typedef typename linalg_traits<this_type>::reference reference;
+    typedef typename linalg_traits<this_type>::origin_type origin_type;
+
+    iterator begin_, end_;
+    const origin_type *origin;
+    size_type size_;
+
+    conjugated_vector_const_ref(const V &v)
+      : begin_(vect_const_begin(v)), end_(vect_const_end(v)),
+	origin(linalg_origin(v)),
+	size_(vect_size(v)) {}
+
+    reference operator[](size_type i) const
+    { return gmm::conj(linalg_traits<V>::access(origin, begin_, end_, i)); }
+  };
+
+  template <typename V> struct linalg_traits<conjugated_vector_const_ref<V> > {
+    typedef conjugated_vector_const_ref<V> this_type;
+    typedef typename linalg_traits<V>::origin_type origin_type;
+    typedef linalg_const is_reference;
+    typedef abstract_vector linalg_type;
+    typedef typename linalg_traits<V>::value_type value_type;
+    typedef value_type reference;
+    typedef abstract_null_type iterator;
+    typedef conjugated_const_iterator<typename
+                   linalg_traits<V>::const_iterator> const_iterator;
+    typedef typename linalg_traits<V>::storage_type storage_type;
+    typedef typename linalg_traits<V>::index_sorted index_sorted;
+    static size_type size(const this_type &v) { return v.size_; }
+    static iterator begin(this_type &v) { return iterator(v.begin_); }
+    static const_iterator begin(const this_type &v)
+    { return const_iterator(v.begin_); }
+    static iterator end(this_type &v)
+    { return iterator(v.end_); }
+    static const_iterator end(const this_type &v)
+    { return const_iterator(v.end_); }
+    static value_type access(const origin_type *o, const const_iterator &it,
+			     const const_iterator &ite, size_type i)
+    { return gmm::conj(linalg_traits<V>::access(o, it.it, ite.it, i)); }
+    static const origin_type* origin(const this_type &v) { return v.origin; }
+  };
+
+  template<typename V> std::ostream &operator <<
+    (std::ostream &o, const conjugated_vector_const_ref<V>& m)
+  { gmm::write(o,m); return o; }
+
+  /* ********************************************************************* */
+  /*		Conjugated references on matrices            		   */
+  /* ********************************************************************* */
+
+  template <typename M> struct conjugated_row_const_iterator {
+    typedef conjugated_row_const_iterator<M> iterator;
+    typedef typename linalg_traits<M>::const_row_iterator ITER;
+    typedef typename linalg_traits<M>::value_type value_type;
+    typedef ptrdiff_t difference_type;
+    typedef size_t size_type;
+
+    ITER it;
+
+    iterator operator ++(int) { iterator tmp = *this; it++; return tmp; }
+    iterator operator --(int) { iterator tmp = *this; it--; return tmp; }
+    iterator &operator ++()   { it++; return *this; }
+    iterator &operator --()   { it--; return *this; }
+    iterator &operator +=(difference_type i) { it += i; return *this; }
+    iterator &operator -=(difference_type i) { it -= i; return *this; }
+    iterator operator +(difference_type i) const 
+    { iterator itt = *this; return (itt += i); }
+    iterator operator -(difference_type i) const
+    { iterator itt = *this; return (itt -= i); }
+    difference_type operator -(const iterator &i) const
+    { return it - i.it; }
+
+    ITER operator *() const { return it; }
+    ITER operator [](int i) { return it + i; }
+
+    bool operator ==(const iterator &i) const { return (it == i.it); }
+    bool operator !=(const iterator &i) const { return !(i == *this); }
+    bool operator < (const iterator &i) const { return (it < i.it); }
+
+    conjugated_row_const_iterator(void) {}
+    conjugated_row_const_iterator(const ITER &i) : it(i) { }
+
+  };
+
+  template <typename M> struct  conjugated_row_matrix_const_ref {
+    
+    typedef conjugated_row_matrix_const_ref<M> this_type;
+    typedef typename linalg_traits<M>::const_row_iterator iterator;
+    typedef typename linalg_traits<M>::value_type value_type;
+    typedef typename linalg_traits<this_type>::origin_type origin_type;
+
+    iterator begin_, end_;
+    const origin_type *origin;
+    size_type nr, nc;
+
+    conjugated_row_matrix_const_ref(const M &m)
+      : begin_(mat_row_begin(m)), end_(mat_row_end(m)),
+	origin(linalg_origin(m)), nr(mat_ncols(m)), nc(mat_nrows(m)) {}
+
+    value_type operator()(size_type i, size_type j) const
+    { return gmm::conj(linalg_traits<M>::access(begin_+j, i)); }
+  };
+
+  template<typename M> std::ostream &operator <<
+  (std::ostream &o, const conjugated_row_matrix_const_ref<M>& m)
+  { gmm::write(o,m); return o; }
+
+
+  template <typename M> struct conjugated_col_const_iterator {
+    typedef conjugated_col_const_iterator<M> iterator;
+    typedef typename linalg_traits<M>::const_col_iterator ITER;
+    typedef typename linalg_traits<M>::value_type value_type;
+    typedef ptrdiff_t difference_type;
+    typedef size_t size_type;
+
+    ITER it;
+
+    iterator operator ++(int) { iterator tmp = *this; it++; return tmp; }
+    iterator operator --(int) { iterator tmp = *this; it--; return tmp; }
+    iterator &operator ++()   { it++; return *this; }
+    iterator &operator --()   { it--; return *this; }
+    iterator &operator +=(difference_type i) { it += i; return *this; }
+    iterator &operator -=(difference_type i) { it -= i; return *this; }
+    iterator operator +(difference_type i) const 
+    { iterator itt = *this; return (itt += i); }
+    iterator operator -(difference_type i) const
+    { iterator itt = *this; return (itt -= i); }
+    difference_type operator -(const iterator &i) const
+    { return it - i.it; }
+
+    ITER operator *() const { return it; }
+    ITER operator [](int i) { return it + i; }
+
+    bool operator ==(const iterator &i) const { return (it == i.it); }
+    bool operator !=(const iterator &i) const { return !(i == *this); }
+    bool operator < (const iterator &i) const { return (it < i.it); }
+
+    conjugated_col_const_iterator(void) {}
+    conjugated_col_const_iterator(const ITER &i) : it(i) { }
+
+  };
+
+  template <typename M> struct  conjugated_col_matrix_const_ref {
+    
+    typedef conjugated_col_matrix_const_ref<M> this_type;
+    typedef typename linalg_traits<M>::const_col_iterator iterator;
+    typedef typename linalg_traits<M>::value_type value_type;
+    typedef typename linalg_traits<this_type>::origin_type origin_type;
+
+    iterator begin_, end_;
+    const origin_type *origin;
+    size_type nr, nc;
+
+    conjugated_col_matrix_const_ref(const M &m)
+      : begin_(mat_col_begin(m)), end_(mat_col_end(m)),
+	origin(linalg_origin(m)), nr(mat_ncols(m)), nc(mat_nrows(m)) {}
+
+    value_type operator()(size_type i, size_type j) const
+    { return gmm::conj(linalg_traits<M>::access(begin_+i, j)); }
+  };
+
+
+
+  template<typename M> std::ostream &operator <<
+  (std::ostream &o, const conjugated_col_matrix_const_ref<M>& m)
+  { gmm::write(o,m); return o; }
+
+
+  template <typename L, typename SO> struct conjugated_return__ {
+    typedef conjugated_row_matrix_const_ref<L> return_type;
+  };
+  template <typename L> struct conjugated_return__<L, col_major> {
+    typedef conjugated_col_matrix_const_ref<L> return_type;
+  };
+  template <typename L, typename T, typename LT> struct conjugated_return_ {
+    typedef const L & return_type;
+  };
+  template <typename L, typename T>
+  struct conjugated_return_<L, std::complex<T>, abstract_vector> {
+    typedef conjugated_vector_const_ref<L> return_type;
+  };
+  template <typename L, typename T>
+  struct conjugated_return_<L, T, abstract_matrix> {
+    typedef typename conjugated_return__<L,
+    typename principal_orientation_type<typename
+    linalg_traits<L>::sub_orientation>::potype
+    >::return_type return_type;
+  };
+  template <typename L> struct conjugated_return {
+    typedef typename
+    conjugated_return_<L, typename linalg_traits<L>::value_type,
+		       typename linalg_traits<L>::linalg_type		       
+		       >::return_type return_type;
+  };
+
+  ///@endcond
+  /** return a conjugated view of the input matrix or vector. */
+  template <typename L> inline
+  typename conjugated_return<L>::return_type
+  conjugated(const L &v) {
+    return conjugated(v, typename linalg_traits<L>::value_type(),
+		      typename linalg_traits<L>::linalg_type());
+  }
+  ///@cond DOXY_SHOW_ALL_FUNCTIONS
+
+  template <typename L, typename T, typename LT> inline
+  const L & conjugated(const L &v, T, LT) { return v; }
+
+  template <typename L, typename T> inline
+  conjugated_vector_const_ref<L> conjugated(const L &v, std::complex<T>,
+					    abstract_vector)
+  { return conjugated_vector_const_ref<L>(v); }
+
+  template <typename L, typename T> inline
+  typename conjugated_return__<L,
+    typename principal_orientation_type<typename
+    linalg_traits<L>::sub_orientation>::potype>::return_type
+  conjugated(const L &v, T, abstract_matrix) {
+    return conjugated(v, typename principal_orientation_type<typename
+		      linalg_traits<L>::sub_orientation>::potype());
+  }
+
+  template <typename L> inline
+  conjugated_row_matrix_const_ref<L> conjugated(const L &v, row_major)
+  { return conjugated_row_matrix_const_ref<L>(v); }
+
+  template <typename L> inline
+  conjugated_col_matrix_const_ref<L> conjugated(const L &v, col_major)
+  { return conjugated_col_matrix_const_ref<L>(v); }
+
+  template <typename M>
+  struct linalg_traits<conjugated_row_matrix_const_ref<M> > {
+    typedef conjugated_row_matrix_const_ref<M> this_type;
+    typedef typename linalg_traits<M>::origin_type origin_type;
+    typedef linalg_const is_reference;
+    typedef abstract_matrix linalg_type;
+    typedef typename linalg_traits<M>::value_type value_type;
+    typedef value_type reference;
+    typedef typename linalg_traits<M>::storage_type storage_type;
+    typedef typename org_type<typename linalg_traits<M>::const_sub_row_type>::t vector_type;
+    typedef conjugated_vector_const_ref<vector_type> sub_col_type;
+    typedef conjugated_vector_const_ref<vector_type> const_sub_col_type;
+    typedef conjugated_row_const_iterator<M> col_iterator;
+    typedef conjugated_row_const_iterator<M> const_col_iterator;
+    typedef abstract_null_type const_sub_row_type;
+    typedef abstract_null_type sub_row_type;
+    typedef abstract_null_type const_row_iterator;
+    typedef abstract_null_type row_iterator;
+    typedef col_major sub_orientation;
+    typedef typename linalg_traits<M>::index_sorted index_sorted;
+    static inline size_type ncols(const this_type &m) { return m.nc; }
+    static inline size_type nrows(const this_type &m) { return m.nr; }
+    static inline const_sub_col_type col(const const_col_iterator &it)
+    { return conjugated(linalg_traits<M>::row(it.it)); }
+    static inline const_col_iterator col_begin(const this_type &m)
+    { return const_col_iterator(m.begin_); }
+    static inline const_col_iterator col_end(const this_type &m)
+    { return const_col_iterator(m.end_); }
+    static inline const origin_type* origin(const this_type &m)
+    { return m.origin; }
+    static value_type access(const const_col_iterator &it, size_type i)
+    { return gmm::conj(linalg_traits<M>::access(it.it, i)); }
+  };
+  
+  template <typename M>
+  struct linalg_traits<conjugated_col_matrix_const_ref<M> > {
+    typedef conjugated_col_matrix_const_ref<M> this_type;
+    typedef typename linalg_traits<M>::origin_type origin_type;
+    typedef linalg_const is_reference;
+    typedef abstract_matrix linalg_type;
+    typedef typename linalg_traits<M>::value_type value_type;
+    typedef value_type reference;
+    typedef typename linalg_traits<M>::storage_type storage_type;
+    typedef typename org_type<typename linalg_traits<M>::const_sub_col_type>::t vector_type;
+    typedef conjugated_vector_const_ref<vector_type> sub_row_type;
+    typedef conjugated_vector_const_ref<vector_type> const_sub_row_type;
+    typedef conjugated_col_const_iterator<M> row_iterator;
+    typedef conjugated_col_const_iterator<M> const_row_iterator;
+    typedef abstract_null_type const_sub_col_type;
+    typedef abstract_null_type sub_col_type;
+    typedef abstract_null_type const_col_iterator;
+    typedef abstract_null_type col_iterator;
+    typedef row_major sub_orientation;
+    typedef typename linalg_traits<M>::index_sorted index_sorted;
+    static inline size_type nrows(const this_type &m) { return m.nr; }
+    static inline size_type ncols(const this_type &m) { return m.nc; }
+    static inline const_sub_row_type row(const const_row_iterator &it)
+    { return conjugated(linalg_traits<M>::col(it.it)); }
+    static inline const_row_iterator row_begin(const this_type &m)
+    { return const_row_iterator(m.begin_); }
+    static inline const_row_iterator row_end(const this_type &m)
+    { return const_row_iterator(m.end_); }
+    static inline const origin_type* origin(const this_type &m)
+    { return m.origin; }
+    static value_type access(const const_row_iterator &it, size_type i)
+    { return gmm::conj(linalg_traits<M>::access(it.it, i)); }
+  };
+  
+  ///@endcond
+  
+
+}
+
+#endif //  GMM_CONJUGATED_H__
--- a/gmm/gmm_def.h
+++ b/gmm/gmm_def.h
--- a/gmm/gmm_dense_Householder.h
+++ b/gmm/gmm_dense_Householder.h
@ -0,0 +1,317 @@
+/* -*- c++ -*- (enables emacs c++ mode) */
+/*===========================================================================
+
+ Copyright (C) 2003-2017 Yves Renard, Caroline Lecalvez
+
+ This file is a part of GetFEM++
+
+ GetFEM++  is  free software;  you  can  redistribute  it  and/or modify it
+ under  the  terms  of the  GNU  Lesser General Public License as published
+ by  the  Free Software Foundation;  either version 3 of the License,  or
+ (at your option) any later version along with the GCC Runtime Library
+ Exception either version 3.1 or (at your option) any later version.
+ This program  is  distributed  in  the  hope  that it will be useful,  but
+ WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ or  FITNESS  FOR  A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+ License and GCC Runtime Library Exception for more details.
+ You  should  have received a copy of the GNU Lesser General Public License
+ along  with  this program;  if not, write to the Free Software Foundation,
+ Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301, USA.
+
+ As a special exception, you  may use  this file  as it is a part of a free
+ software  library  without  restriction.  Specifically,  if   other  files
+ instantiate  templates  or  use macros or inline functions from this file,
+ or  you compile this  file  and  link  it  with other files  to produce an
+ executable, this file  does  not  by itself cause the resulting executable
+ to be covered  by the GNU Lesser General Public License.  This   exception
+ does not  however  invalidate  any  other  reasons why the executable file
+ might be covered by the GNU Lesser General Public License.
+
+===========================================================================*/
+
+/**@file gmm_dense_Householder.h
+   @author Caroline Lecalvez <Caroline.Lecalvez@gmm.insa-toulouse.fr>
+   @author Yves Renard <Yves.Renard@insa-lyon.fr>
+   @date June 5, 2003.
+   @brief Householder for dense matrices.
+*/
+
+#ifndef GMM_DENSE_HOUSEHOLDER_H
+#define GMM_DENSE_HOUSEHOLDER_H
+
+#include "gmm_kernel.h"
+
+namespace gmm {
+  ///@cond DOXY_SHOW_ALL_FUNCTIONS
+
+  /* ********************************************************************* */
+  /*    Rank one update  (complex and real version)                        */
+  /* ********************************************************************* */
+
+  template <typename Matrix, typename VecX, typename VecY>
+  inline void rank_one_update(Matrix &A, const VecX& x,
+                              const VecY& y, row_major) {
+    typedef typename linalg_traits<Matrix>::value_type T;
+    size_type N = mat_nrows(A);
+    GMM_ASSERT2(N <= vect_size(x) && mat_ncols(A) <= vect_size(y),
+                "dimensions mismatch");
+    typename linalg_traits<VecX>::const_iterator itx = vect_const_begin(x);
+    for (size_type i = 0; i < N; ++i, ++itx) {
+      typedef typename linalg_traits<Matrix>::sub_row_type row_type;
+      row_type row = mat_row(A, i);
+      typename linalg_traits<typename org_type<row_type>::t>::iterator
+        it = vect_begin(row), ite = vect_end(row);
+      typename linalg_traits<VecY>::const_iterator ity = vect_const_begin(y);
+      T tx = *itx;
+      for (; it != ite; ++it, ++ity) *it += conj_product(*ity, tx);
+    }
+  }
+
+  template <typename Matrix, typename VecX, typename VecY>
+  inline void rank_one_update(Matrix &A, const VecX& x,
+                              const VecY& y, col_major) {
+    typedef typename linalg_traits<Matrix>::value_type T;
+    size_type M = mat_ncols(A);
+    GMM_ASSERT2(mat_nrows(A) <= vect_size(x) && M <= vect_size(y),
+                "dimensions mismatch");
+    typename linalg_traits<VecY>::const_iterator ity = vect_const_begin(y);
+    for (size_type i = 0; i < M; ++i, ++ity) {
+      typedef typename linalg_traits<Matrix>::sub_col_type col_type;
+      col_type col = mat_col(A, i);
+      typename linalg_traits<typename org_type<col_type>::t>::iterator
+        it = vect_begin(col), ite = vect_end(col);
+      typename linalg_traits<VecX>::const_iterator itx = vect_const_begin(x);
+      T ty = *ity;
+      for (; it != ite; ++it, ++itx) *it += conj_product(ty, *itx);
+    }
+  }
+
+  ///@endcond
+  template <typename Matrix, typename VecX, typename VecY>
+  inline void rank_one_update(const Matrix &AA, const VecX& x,
+                              const VecY& y) {
+    Matrix& A = const_cast<Matrix&>(AA);
+    rank_one_update(A, x, y, typename principal_orientation_type<typename
+                    linalg_traits<Matrix>::sub_orientation>::potype());
+  }
+  ///@cond DOXY_SHOW_ALL_FUNCTIONS
+
+  /* ********************************************************************* */
+  /*    Rank two update  (complex and real version)                        */
+  /* ********************************************************************* */
+
+  template <typename Matrix, typename VecX, typename VecY>
+  inline void rank_two_update(Matrix &A, const VecX& x,
+                              const VecY& y, row_major) {
+    typedef typename linalg_traits<Matrix>::value_type value_type;
+    size_type N = mat_nrows(A);
+    GMM_ASSERT2(N <= vect_size(x) && mat_ncols(A) <= vect_size(y),
+                "dimensions mismatch");
+    typename linalg_traits<VecX>::const_iterator itx1 = vect_const_begin(x);
+    typename linalg_traits<VecY>::const_iterator ity2 = vect_const_begin(y);
+    for (size_type i = 0; i < N; ++i, ++itx1, ++ity2) {
+      typedef typename linalg_traits<Matrix>::sub_row_type row_type;
+      row_type row = mat_row(A, i);
+      typename linalg_traits<typename org_type<row_type>::t>::iterator
+        it = vect_begin(row), ite = vect_end(row);
+      typename linalg_traits<VecX>::const_iterator itx2 = vect_const_begin(x);
+      typename linalg_traits<VecY>::const_iterator ity1 = vect_const_begin(y);
+      value_type tx = *itx1, ty = *ity2;
+      for (; it != ite; ++it, ++ity1, ++itx2)
+        *it += conj_product(*ity1, tx) + conj_product(*itx2, ty);
+    }
+  }
+
+  template <typename Matrix, typename VecX, typename VecY>
+  inline void rank_two_update(Matrix &A, const VecX& x,
+                              const VecY& y, col_major) {
+    typedef typename linalg_traits<Matrix>::value_type value_type;
+    size_type M = mat_ncols(A);
+    GMM_ASSERT2(mat_nrows(A) <= vect_size(x) && M <= vect_size(y),
+                "dimensions mismatch");
+    typename linalg_traits<VecX>::const_iterator itx2 = vect_const_begin(x);
+    typename linalg_traits<VecY>::const_iterator ity1 = vect_const_begin(y);
+    for (size_type i = 0; i < M; ++i, ++ity1, ++itx2) {
+      typedef typename linalg_traits<Matrix>::sub_col_type col_type;
+      col_type col = mat_col(A, i);
+      typename linalg_traits<typename org_type<col_type>::t>::iterator
+        it = vect_begin(col), ite = vect_end(col);
+      typename linalg_traits<VecX>::const_iterator itx1 = vect_const_begin(x);
+      typename linalg_traits<VecY>::const_iterator ity2 = vect_const_begin(y);
+      value_type ty = *ity1, tx = *itx2;
+      for (; it != ite; ++it, ++itx1, ++ity2)
+        *it += conj_product(ty, *itx1) + conj_product(tx, *ity2);
+    }
+  }
+
+  ///@endcond
+  template <typename Matrix, typename VecX, typename VecY>
+  inline void rank_two_update(const Matrix &AA, const VecX& x,
+                              const VecY& y) {
+    Matrix& A = const_cast<Matrix&>(AA);
+    rank_two_update(A, x, y, typename principal_orientation_type<typename
+                    linalg_traits<Matrix>::sub_orientation>::potype());
+  }
+  ///@cond DOXY_SHOW_ALL_FUNCTIONS
+
+  /* ********************************************************************* */
+  /*    Householder vector computation (complex and real version)          */
+  /* ********************************************************************* */
+
+  template <typename VECT> void house_vector(const VECT &VV) {
+    VECT &V = const_cast<VECT &>(VV);
+    typedef typename linalg_traits<VECT>::value_type T;
+    typedef typename number_traits<T>::magnitude_type R;
+
+    R mu = vect_norm2(V), abs_v0 = gmm::abs(V[0]);
+    if (mu != R(0))
+      gmm::scale(V, (abs_v0 == R(0)) ? T(R(1) / mu)
+                 : (safe_divide(T(abs_v0), V[0]) / (abs_v0 + mu)));
+    if (gmm::real(V[vect_size(V)-1]) * R(0) != R(0)) gmm::clear(V);
+    V[0] = T(1);
+  }
+
+  template <typename VECT> void house_vector_last(const VECT &VV) {
+    VECT &V = const_cast<VECT &>(VV);
+    typedef typename linalg_traits<VECT>::value_type T;
+    typedef typename number_traits<T>::magnitude_type R;
+
+    size_type m = vect_size(V);
+    R mu = vect_norm2(V), abs_v0 = gmm::abs(V[m-1]);
+    if (mu != R(0))
+      gmm::scale(V, (abs_v0 == R(0)) ? T(R(1) / mu)
+                 : ((abs_v0 / V[m-1]) / (abs_v0 + mu)));
+    if (gmm::real(V[0]) * R(0) != R(0)) gmm::clear(V);
+    V[m-1] = T(1);
+  }
+
+  /* ********************************************************************* */
+  /*    Householder updates  (complex and real version)                    */
+  /* ********************************************************************* */
+
+  // multiply A to the left by the reflector stored in V. W is a temporary.
+  template <typename MAT, typename VECT1, typename VECT2> inline
+  void row_house_update(const MAT &AA, const VECT1 &V, const VECT2 &WW) {
+    VECT2 &W = const_cast<VECT2 &>(WW); MAT &A = const_cast<MAT &>(AA);
+    typedef typename linalg_traits<MAT>::value_type value_type;
+    typedef typename number_traits<value_type>::magnitude_type magnitude_type;
+
+    gmm::mult(conjugated(A),
+              scaled(V, value_type(magnitude_type(-2)/vect_norm2_sqr(V))), W);
+    rank_one_update(A, V, W);
+  }
+
+  // multiply A to the right by the reflector stored in V. W is a temporary.
+  template <typename MAT, typename VECT1, typename VECT2> inline
+  void col_house_update(const MAT &AA, const VECT1 &V, const VECT2 &WW) {
+    VECT2 &W = const_cast<VECT2 &>(WW); MAT &A = const_cast<MAT &>(AA);
+    typedef typename linalg_traits<MAT>::value_type value_type;
+    typedef typename number_traits<value_type>::magnitude_type magnitude_type;
+
+    gmm::mult(A,
+              scaled(V, value_type(magnitude_type(-2)/vect_norm2_sqr(V))), W);
+    rank_one_update(A, W, V);
+  }
+
+  ///@endcond
+
+  /* ********************************************************************* */
+  /*    Hessenberg reduction with Householder.                             */
+  /* ********************************************************************* */
+
+  template <typename MAT1, typename MAT2>
+  void Hessenberg_reduction(const MAT1& AA, const MAT2 &QQ, bool compute_Q){
+    MAT1& A = const_cast<MAT1&>(AA); MAT2& Q = const_cast<MAT2&>(QQ);
+    typedef typename linalg_traits<MAT1>::value_type value_type;
+    if (compute_Q) gmm::copy(identity_matrix(), Q);
+    size_type n = mat_nrows(A); if (n < 2) return;
+    std::vector<value_type> v(n), w(n);
+    sub_interval SUBK(0,n);
+    for (size_type k = 1; k+1 < n; ++k) {
+      sub_interval SUBI(k, n-k), SUBJ(k-1,n-k+1);
+      v.resize(n-k);
+      for (size_type j = k; j < n; ++j) v[j-k] = A(j, k-1);
+      house_vector(v);
+      row_house_update(sub_matrix(A, SUBI, SUBJ), v, sub_vector(w, SUBJ));
+      col_house_update(sub_matrix(A, SUBK, SUBI), v, w);
+      // is it possible to "unify" the two on the common part of the matrix?
+      if (compute_Q) col_house_update(sub_matrix(Q, SUBK, SUBI), v, w);
+    }
+  }
+
+  /* ********************************************************************* */
+  /*    Householder tridiagonalization for symmetric matrices              */
+  /* ********************************************************************* */
+
+  template <typename MAT1, typename MAT2>
+  void Householder_tridiagonalization(const MAT1 &AA, const MAT2 &QQ,
+                                      bool compute_q) {
+    MAT1 &A = const_cast<MAT1 &>(AA); MAT2 &Q = const_cast<MAT2 &>(QQ);
+    typedef typename linalg_traits<MAT1>::value_type T;
+    typedef typename number_traits<T>::magnitude_type R;
+
+    size_type n = mat_nrows(A); if (n < 2) return;
+    std::vector<T> v(n), p(n), w(n), ww(n);
+    sub_interval SUBK(0,n);
+
+    for (size_type k = 1; k+1 < n; ++k) { // not optimized ...
+      sub_interval SUBI(k, n-k);
+      v.resize(n-k); p.resize(n-k); w.resize(n-k);
+      for (size_type l = k; l < n; ++l)
+        { v[l-k] = w[l-k] = A(l, k-1); A(l, k-1) = A(k-1, l) = T(0); }
+      house_vector(v);
+      R norm = vect_norm2_sqr(v);
+      A(k-1, k) = gmm::conj(A(k, k-1) = w[0] - T(2)*v[0]*vect_hp(w, v)/norm);
+
+      gmm::mult(sub_matrix(A, SUBI), gmm::scaled(v, T(-2) / norm), p);
+      gmm::add(p, gmm::scaled(v, -vect_hp(v, p) / norm), w);
+      rank_two_update(sub_matrix(A, SUBI), v, w);
+      // it should be possible to compute only the upper or lower part
+
+      if (compute_q) col_house_update(sub_matrix(Q, SUBK, SUBI), v, ww);
+    }
+  }
+
+  /* ********************************************************************* */
+  /*    Real and complex Givens rotations                                  */
+  /* ********************************************************************* */
+
+  template <typename T> void Givens_rotation(T a, T b, T &c, T &s) {
+    typedef typename number_traits<T>::magnitude_type R;
+    R aa = gmm::abs(a), bb = gmm::abs(b);
+    if (bb == R(0)) { c = T(1); s = T(0);   return; }
+    if (aa == R(0)) { c = T(0); s = b / bb; return; }
+    if (bb > aa)
+      { T t = -safe_divide(a,b); s = T(R(1) / (sqrt(R(1)+gmm::abs_sqr(t)))); c = s * t; }
+    else
+      { T t = -safe_divide(b,a); c = T(R(1) / (sqrt(R(1)+gmm::abs_sqr(t)))); s = c * t; }
+  }
+
+  // Apply Q* v
+  template <typename T> inline
+  void Apply_Givens_rotation_left(T &x, T &y, T c, T s)
+  { T t1=x, t2=y; x = gmm::conj(c)*t1 - gmm::conj(s)*t2; y = c*t2 + s*t1; }
+
+  // Apply v^T Q
+  template <typename T> inline
+  void Apply_Givens_rotation_right(T &x, T &y, T c, T s)
+  { T t1=x, t2=y; x = c*t1 - s*t2; y = gmm::conj(c)*t2 + gmm::conj(s)*t1; }
+
+  template <typename MAT, typename T>
+  void row_rot(const MAT &AA, T c, T s, size_type i, size_type k) {
+    MAT &A = const_cast<MAT &>(AA); // can be specialized for row matrices
+    for (size_type j = 0; j < mat_ncols(A); ++j)
+      Apply_Givens_rotation_left(A(i,j), A(k,j), c, s);
+  }
+
+  template <typename MAT, typename T>
+  void col_rot(const MAT &AA, T c, T s, size_type i, size_type k) {
+    MAT &A = const_cast<MAT &>(AA); // can be specialized for column matrices
+    for (size_type j = 0; j < mat_nrows(A); ++j)
+      Apply_Givens_rotation_right(A(j,i), A(j,k), c, s);
+  }
+
+}
+
+#endif
+
--- a/gmm/gmm_dense_lu.h
+++ b/gmm/gmm_dense_lu.h
@ -0,0 +1,250 @@
+/* -*- c++ -*- (enables emacs c++ mode) */
+/*===========================================================================
+
+ Copyright (C) 2003-2017 Yves Renard
+
+ This file is a part of GetFEM++
+
+ GetFEM++  is  free software;  you  can  redistribute  it  and/or modify it
+ under  the  terms  of the  GNU  Lesser General Public License as published
+ by  the  Free Software Foundation;  either version 3 of the License,  or
+ (at your option) any later version along with the GCC Runtime Library
+ Exception either version 3.1 or (at your option) any later version.
+ This program  is  distributed  in  the  hope  that it will be useful,  but
+ WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ or  FITNESS  FOR  A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+ License and GCC Runtime Library Exception for more details.
+ You  should  have received a copy of the GNU Lesser General Public License
+ along  with  this program;  if not, write to the Free Software Foundation,
+ Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301, USA.
+
+ As a special exception, you  may use  this file  as it is a part of a free
+ software  library  without  restriction.  Specifically,  if   other  files
+ instantiate  templates  or  use macros or inline functions from this file,
+ or  you compile this  file  and  link  it  with other files  to produce an
+ executable, this file  does  not  by itself cause the resulting executable
+ to be covered  by the GNU Lesser General Public License.  This   exception
+ does not  however  invalidate  any  other  reasons why the executable file
+ might be covered by the GNU Lesser General Public License.
+
+===========================================================================*/
+
+// This file is a modified version of lu.h from MTL.
+// See http://osl.iu.edu/research/mtl/
+// Following the corresponding Copyright notice.
+//===========================================================================
+//
+// Copyright (c) 1998-2001, University of Notre Dame. All rights reserved.
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+//    * Redistributions of source code must retain the above copyright
+//      notice, this list of conditions and the following disclaimer.
+//    * Redistributions in binary form must reproduce the above copyright
+//      notice, this list of conditions and the following disclaimer in the
+//      documentation and/or other materials provided with the distribution.
+//    * Neither the name of the University of Notre Dame nor the
+//      names of its contributors may be used to endorse or promote products
+//      derived from this software without specific prior written permission.
+//
+// THIS SOFTWARE  IS  PROVIDED  BY  THE TRUSTEES  OF  INDIANA UNIVERSITY  AND
+// CONTRIBUTORS  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,
+// BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND  FITNESS
+// FOR  A PARTICULAR PURPOSE ARE DISCLAIMED. IN  NO  EVENT SHALL THE TRUSTEES
+// OF INDIANA UNIVERSITY AND CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+// INCIDENTAL, SPECIAL, EXEMPLARY,  OR CONSEQUENTIAL DAMAGES (INCLUDING,  BUT
+// NOT  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA,  OR PROFITS;  OR BUSINESS  INTERRUPTION)  HOWEVER  CAUSED AND ON ANY
+// THEORY  OF  LIABILITY,  WHETHER  IN  CONTRACT,  STRICT  LIABILITY, OR TORT
+// (INCLUDING  NEGLIGENCE  OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+// THIS  SOFTWARE,  EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+//===========================================================================
+
+/**@file gmm_dense_lu.h
+   @author  Andrew Lumsdaine, Jeremy G. Siek, Lie-Quan Lee, Y. Renard
+   @date June 5, 2003.
+   @brief LU factorizations and determinant computation for dense matrices.
+*/
+#ifndef GMM_DENSE_LU_H
+#define GMM_DENSE_LU_H
+
+#include "gmm_dense_Householder.h"
+#include "gmm_opt.h"
+
+namespace gmm {
+
+
+  /** LU Factorization of a general (dense) matrix (real or complex).
+  
+  This is the outer product (a level-2 operation) form of the LU
+  Factorization with pivoting algorithm . This is equivalent to
+  LAPACK's dgetf2. Also see "Matrix Computations" 3rd Ed.  by Golub
+  and Van Loan section 3.2.5 and especially page 115.
+  
+  The pivot indices in ipvt are indexed starting from 1
+  so that this is compatible with LAPACK (Fortran).
+  */
+  template <typename DenseMatrix, typename Pvector>
+  size_type lu_factor(DenseMatrix& A, Pvector& ipvt) {
+    typedef typename linalg_traits<DenseMatrix>::value_type T;
+    typedef typename linalg_traits<Pvector>::value_type int_T;
+    typedef typename number_traits<T>::magnitude_type R;
+    size_type info(0), i, j, jp, M(mat_nrows(A)), N(mat_ncols(A));
+    size_type NN = std::min(M, N);
+    std::vector<T> c(M), r(N);
+    
+    GMM_ASSERT2(ipvt.size()+1 >= NN, "IPVT too small");
+    for (i = 0; i+1 < NN; ++i) ipvt[i] = int_T(i);
+      
+    if (M || N) {
+      for (j = 0; j+1 < NN; ++j) {
+	R max = gmm::abs(A(j,j)); jp = j;
+	for (i = j+1; i < M; ++i)		   /* find pivot.          */
+	  if (gmm::abs(A(i,j)) > max) { jp = i; max = gmm::abs(A(i,j)); }
+	ipvt[j] = int_T(jp + 1);
+	
+	if (max == R(0)) { info = j + 1; break; }
+        if (jp != j) for (i = 0; i < N; ++i) std::swap(A(jp, i), A(j, i));
+	
+        for (i = j+1; i < M; ++i) { A(i, j) /= A(j,j); c[i-j-1] = -A(i, j); }
+        for (i = j+1; i < N; ++i) r[i-j-1] = A(j, i);  // avoid the copy ?
+	rank_one_update(sub_matrix(A, sub_interval(j+1, M-j-1),
+				 sub_interval(j+1, N-j-1)), c, conjugated(r));
+      }
+      ipvt[NN-1] = int_T(NN);
+    }
+    return info;
+  }
+  
+  /** LU Solve : Solve equation Ax=b, given an LU factored matrix.*/
+  //  Thanks to Valient Gough for this routine!
+  template <typename DenseMatrix, typename VectorB, typename VectorX,
+	    typename Pvector>
+  void lu_solve(const DenseMatrix &LU, const Pvector& pvector, 
+		VectorX &x, const VectorB &b) {
+    typedef typename linalg_traits<DenseMatrix>::value_type T;
+    copy(b, x);
+    for(size_type i = 0; i < pvector.size(); ++i) {
+      size_type perm = pvector[i]-1;     // permutations stored in 1's offset
+      if(i != perm) { T aux = x[i]; x[i] = x[perm]; x[perm] = aux; }
+    }
+    /* solve  Ax = b  ->  LUx = b  ->  Ux = L^-1 b.                        */
+    lower_tri_solve(LU, x, true);
+    upper_tri_solve(LU, x, false);
+  }
+
+  template <typename DenseMatrix, typename VectorB, typename VectorX>
+  void lu_solve(const DenseMatrix &A, VectorX &x, const VectorB &b) {
+    typedef typename linalg_traits<DenseMatrix>::value_type T;
+    dense_matrix<T> B(mat_nrows(A), mat_ncols(A));
+    std::vector<int> ipvt(mat_nrows(A));
+    gmm::copy(A, B);
+    size_type info = lu_factor(B, ipvt);
+    GMM_ASSERT1(!info, "Singular system, pivot = " << info);
+    lu_solve(B, ipvt, x, b);
+  }
+  
+  template <typename DenseMatrix, typename VectorB, typename VectorX,
+	    typename Pvector>
+  void lu_solve_transposed(const DenseMatrix &LU, const Pvector& pvector, 
+			   VectorX &x, const VectorB &b) {
+    typedef typename linalg_traits<DenseMatrix>::value_type T;
+    copy(b, x);
+    lower_tri_solve(transposed(LU), x, false);
+    upper_tri_solve(transposed(LU), x, true);
+    for(size_type i = pvector.size(); i > 0; --i) {
+      size_type perm = pvector[i-1]-1;    // permutations stored in 1's offset
+      if(i-1 != perm) { T aux = x[i-1]; x[i-1] = x[perm]; x[perm] = aux; }
+    }
+
+  }
+
+
+  ///@cond DOXY_SHOW_ALL_FUNCTIONS
+  template <typename DenseMatrixLU, typename DenseMatrix, typename Pvector>
+  void lu_inverse(const DenseMatrixLU& LU, const Pvector& pvector,
+		  DenseMatrix& AInv, col_major) {
+    typedef typename linalg_traits<DenseMatrixLU>::value_type T;
+    std::vector<T> tmp(pvector.size(), T(0));
+    std::vector<T> result(pvector.size());
+    for(size_type i = 0; i < pvector.size(); ++i) {
+      tmp[i] = T(1);
+      lu_solve(LU, pvector, result, tmp);
+      copy(result, mat_col(AInv, i));
+      tmp[i] = T(0);
+    }
+  }
+
+  template <typename DenseMatrixLU, typename DenseMatrix, typename Pvector>
+  void lu_inverse(const DenseMatrixLU& LU, const Pvector& pvector,
+		  DenseMatrix& AInv, row_major) {
+    typedef typename linalg_traits<DenseMatrixLU>::value_type T;
+    std::vector<T> tmp(pvector.size(), T(0));
+    std::vector<T> result(pvector.size());
+    for(size_type i = 0; i < pvector.size(); ++i) {
+      tmp[i] = T(1); // to be optimized !!
+      // on peut sur le premier tri solve reduire le systeme
+      // et peut etre faire un solve sur une serie de vecteurs au lieu
+      // de vecteur a vecteur (accumulation directe de l'inverse dans la
+      // matrice au fur et a mesure du calcul ... -> evite la copie finale
+      lu_solve_transposed(LU, pvector, result, tmp);
+      copy(result, mat_row(AInv, i));
+      tmp[i] = T(0);
+    }
+  }
+  ///@endcond  
+
+  /** Given an LU factored matrix, build the inverse of the matrix. */
+  template <typename DenseMatrixLU, typename DenseMatrix, typename Pvector>
+  void lu_inverse(const DenseMatrixLU& LU, const Pvector& pvector,
+		  const DenseMatrix& AInv_) {
+    DenseMatrix& AInv = const_cast<DenseMatrix&>(AInv_);
+    lu_inverse(LU, pvector, AInv, typename principal_orientation_type<typename
+	       linalg_traits<DenseMatrix>::sub_orientation>::potype());
+  }
+
+  /** Given a dense matrix, build the inverse of the matrix, and
+      return the determinant */
+  template <typename DenseMatrix>
+  typename linalg_traits<DenseMatrix>::value_type
+  lu_inverse(const DenseMatrix& A_, bool doassert = true) {
+    typedef typename linalg_traits<DenseMatrix>::value_type T;
+    DenseMatrix& A = const_cast<DenseMatrix&>(A_);
+    dense_matrix<T> B(mat_nrows(A), mat_ncols(A));
+    std::vector<int> ipvt(mat_nrows(A));
+    gmm::copy(A, B);
+    size_type info = lu_factor(B, ipvt);
+    if (doassert) GMM_ASSERT1(!info, "Non invertible matrix, pivot = "<<info);
+    if (!info) lu_inverse(B, ipvt, A);
+    return lu_det(B, ipvt);
+  }
+
+  /** Compute the matrix determinant (via a LU factorization) */
+  template <typename DenseMatrixLU, typename Pvector>
+  typename linalg_traits<DenseMatrixLU>::value_type
+  lu_det(const DenseMatrixLU& LU, const Pvector &pvector) {
+    typedef typename linalg_traits<DenseMatrixLU>::value_type T;
+    T det(1);
+    for (size_type j = 0; j < std::min(mat_nrows(LU), mat_ncols(LU)); ++j)
+      det *= LU(j,j);
+    for(size_type i = 0; i < pvector.size(); ++i)
+      if (i != size_type(pvector[i]-1)) { det = -det; }
+    return det;
+  }
+
+  template <typename DenseMatrix>
+  typename linalg_traits<DenseMatrix>::value_type
+  lu_det(const DenseMatrix& A) {
+    typedef typename linalg_traits<DenseMatrix>::value_type T;
+    dense_matrix<T> B(mat_nrows(A), mat_ncols(A));
+    std::vector<int> ipvt(mat_nrows(A));
+    gmm::copy(A, B);
+    lu_factor(B, ipvt);
+    return lu_det(B, ipvt);
+  }
+
+}
+
+#endif
+
--- a/gmm/gmm_dense_matrix_functions.h
+++ b/gmm/gmm_dense_matrix_functions.h
@ -0,0 +1,302 @@
+/* -*- c++ -*- (enables emacs c++ mode) */
+/*===========================================================================
+
+ Copyright (C) 2014-2017 Konstantinos Poulios
+
+ This file is a part of GetFEM++
+
+ GetFEM++  is  free software;  you  can  redistribute  it  and/or modify it
+ under  the  terms  of the  GNU  Lesser General Public License as published
+ by  the  Free Software Foundation;  either version 3 of the License,  or
+ (at your option) any later version along with the GCC Runtime Library
+ Exception either version 3.1 or (at your option) any later version.
+ This program  is  distributed  in  the  hope  that it will be useful,  but
+ WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ or  FITNESS  FOR  A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+ License and GCC Runtime Library Exception for more details.
+ You  should  have received a copy of the GNU Lesser General Public License
+ along  with  this program;  if not, write to the Free Software Foundation,
+ Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301, USA.
+
+ As a special exception, you  may use  this file  as it is a part of a free
+ software  library  without  restriction.  Specifically,  if   other  files
+ instantiate  templates  or  use macros or inline functions from this file,
+ or  you compile this  file  and  link  it  with other files  to produce an
+ executable, this file  does  not  by itself cause the resulting executable
+ to be covered  by the GNU Lesser General Public License.  This   exception
+ does not  however  invalidate  any  other  reasons why the executable file
+ might be covered by the GNU Lesser General Public License.
+
+===========================================================================*/
+
+/**@file gmm_dense_matrix_functions.h
+   @author  Konstantinos Poulios <poulios.konstantinos@gmail.com>
+   @date December 10, 2014.
+   @brief Common matrix functions for dense matrices.
+*/
+#ifndef GMM_DENSE_MATRIX_FUNCTIONS_H
+#define GMM_DENSE_MATRIX_FUNCTIONS_H
+
+
+namespace gmm {
+
+
+  /**
+     Matrix square root for upper triangular matrices (from GNU Octave).
+  */
+  template <typename T>
+  void sqrtm_utri_inplace(dense_matrix<T>& A)
+  {
+    typedef typename number_traits<T>::magnitude_type R;
+    bool singular = false;
+
+    // The following code is equivalent to this triple loop:
+    //
+    //   n = rows (A);
+    //   for j = 1:n
+    //     A(j,j) = sqrt (A(j,j));
+    //     for i = j-1:-1:1
+    //       A(i,j) /= (A(i,i) + A(j,j));
+    //       k = 1:i-1;
+    //   t storing a    A(k,j) -= A(k,i) * A(i,j);
+    //     endfor
+    //   endfor
+
+    R tol = R(0); // default_tol(R()) * gmm::mat_maxnorm(A);
+
+    const size_type n = mat_nrows(A);
+    for (int j=0; j < int(n); j++) {
+      typename dense_matrix<T>::iterator colj = A.begin() + j*n;
+      if (gmm::abs(colj[j]) > tol)
+        colj[j] = gmm::sqrt(colj[j]);
+      else
+        singular = true;
+
+      for (int i=j-1; i >= 0; i--) {
+        typename dense_matrix<T>::const_iterator coli = A.begin() + i*n;
+        T colji = colj[i] = safe_divide(colj[i], (coli[i] + colj[j]));
+        for (int k = 0; k < i; k++)
+          colj[k] -= coli[k] * colji;
+      }
+    }
+
+    if (singular)
+      GMM_WARNING1("Matrix is singular, may not have a square root");
+  }
+
+
+  template <typename T>
+  void sqrtm(const dense_matrix<std::complex<T> >& A,
+             dense_matrix<std::complex<T> >& SQRTMA)
+  {
+    GMM_ASSERT1(gmm::mat_nrows(A) == gmm::mat_ncols(A),
+                "Matrix square root requires a square matrix");
+    gmm::resize(SQRTMA, gmm::mat_nrows(A), gmm::mat_ncols(A));
+    dense_matrix<std::complex<T> > S(A), Q(A), TMP(A);
+    #if defined(GMM_USES_LAPACK)
+    schur(TMP, S, Q);
+    #else
+    GMM_ASSERT1(false, "Please recompile with lapack and blas librairies "
+                "to use sqrtm matrix function.");
+    #endif
+    sqrtm_utri_inplace(S);
+    gmm::mult(Q, S, TMP);
+    gmm::mult(TMP, gmm::transposed(Q), SQRTMA);
+  }
+
+  template <typename T>
+  void sqrtm(const dense_matrix<T>& A,
+             dense_matrix<std::complex<T> >& SQRTMA)
+  {
+    dense_matrix<std::complex<T> > cA(mat_nrows(A), mat_ncols(A));
+    gmm::copy(A, gmm::real_part(cA));
+    sqrtm(cA, SQRTMA);
+  }
+
+  template <typename T>
+  void sqrtm(const dense_matrix<T>& A, dense_matrix<T>& SQRTMA)
+  {
+    dense_matrix<std::complex<T> > cA(mat_nrows(A), mat_ncols(A));
+    gmm::copy(A, gmm::real_part(cA));
+    dense_matrix<std::complex<T> > cSQRTMA(cA);
+    sqrtm(cA, cSQRTMA);
+    gmm::resize(SQRTMA, gmm::mat_nrows(A), gmm::mat_ncols(A));
+    gmm::copy(gmm::real_part(cSQRTMA), SQRTMA);
+//    dense_matrix<std::complex<T1> >::const_reference
+//      it = cSQRTMA.begin(), ite = cSQRTMA.end();
+//    dense_matrix<std::complex<T1> >::reference
+//      rit = SQRTMA.begin();
+//    for (; it != ite; ++it, ++rit) *rit = it->real();
+  }
+
+
+  /**
+   Matrix logarithm for upper triangular matrices (from GNU/Octave)
+  */
+  template <typename T>
+  void logm_utri_inplace(dense_matrix<T>& S)
+  {
+    typedef typename number_traits<T>::magnitude_type R;
+
+    size_type n = gmm::mat_nrows(S);
+    GMM_ASSERT1(n == gmm::mat_ncols(S),
+                "Matrix logarithm is not defined for non-square matrices");
+    for (size_type i=0; i < n-1; ++i)
+      if (gmm::abs(S(i+1,i)) > default_tol(T())) {
+        GMM_ASSERT1(false, "An upper triangular matrix is expected");
+        break;
+      }
+    for (size_type i=0; i < n-1; ++i)
+      if (gmm::real(S(i,i)) <= -default_tol(R()) &&
+          gmm::abs(gmm::imag(S(i,i))) <= default_tol(R())) {
+        GMM_ASSERT1(false, "Principal matrix logarithm is not defined "
+                           "for matrices with negative eigenvalues");
+        break;
+      }
+
+    // Algorithm 11.9 in "Function of matrices", by N. Higham
+    R theta[] = { R(0),R(0),R(1.61e-2),R(5.38e-2),R(1.13e-1),R(1.86e-1),R(2.6429608311114350e-1) };
+
+    R scaling(1);
+    size_type p(0), m(6), opt_iters(100);
+    for (size_type k=0; k < opt_iters; ++k, scaling *= R(2)) {
+      dense_matrix<T> auxS(S);
+      for (size_type i = 0; i < n; ++i) auxS(i,i) -= R(1);
+      R tau = gmm::mat_norm1(auxS);
+      if (tau <= theta[6]) {
+        ++p;
+        size_type j1(6), j2(6);
+        for (size_type j=0; j < 6; ++j)
+          if (tau <= theta[j]) { j1 = j; break; }
+        for (size_type j=0; j < j1; ++j)
+          if (tau <= 2*theta[j]) { j2 = j; break; }
+        if (j1 - j2 <= 1 || p == 2) { m = j1; break; }
+      }
+      sqrtm_utri_inplace(S);
+      if (k == opt_iters-1)
+        GMM_WARNING1 ("Maximum number of square roots exceeded; "
+                      "the calculated matrix logarithm may still be accurate");
+    }
+
+    for (size_type i = 0; i < n; ++i) S(i,i) -= R(1);
+
+    if (m > 0) {
+
+      std::vector<R> nodes, wts;
+      switch(m) {
+      case 0: {
+        R nodes_[] = { R(0.5) };
+        R wts_[] = { R(1) };
+        nodes.assign(nodes_, nodes_+m+1);
+        wts.assign(wts_, wts_+m+1);
+        } break;
+      case 1: {
+        R nodes_[] = { R(0.211324865405187),R(0.788675134594813) };
+        R wts_[] = { R(0.5),R(0.5) };
+        nodes.assign(nodes_, nodes_+m+1);
+        wts.assign(wts_, wts_+m+1);
+        } break;
+      case 2: {
+        R nodes_[] = { R(0.112701665379258),R(0.500000000000000),R(0.887298334620742) };
+        R wts_[] = { R(0.277777777777778),R(0.444444444444444),R(0.277777777777778) };
+        nodes.assign(nodes_, nodes_+m+1);
+        wts.assign(wts_, wts_+m+1);
+        } break;
+      case 3: {
+        R nodes_[] = { R(0.0694318442029737),R(0.3300094782075718),R(0.6699905217924281),R(0.9305681557970263) };
+        R wts_[] = { R(0.173927422568727),R(0.326072577431273),R(0.326072577431273),R(0.173927422568727) };
+        nodes.assign(nodes_, nodes_+m+1);
+        wts.assign(wts_, wts_+m+1);
+        } break;
+      case 4: {
+        R nodes_[] = { R(0.0469100770306681),R(0.2307653449471584),R(0.5000000000000000),R(0.7692346550528415),R(0.9530899229693319) };
+        R wts_[] = { R(0.118463442528095),R(0.239314335249683),R(0.284444444444444),R(0.239314335249683),R(0.118463442528094) };
+        nodes.assign(nodes_, nodes_+m+1);
+        wts.assign(wts_, wts_+m+1);
+        } break;
+      case 5: {
+        R nodes_[] = { R(0.0337652428984240),R(0.1693953067668678),R(0.3806904069584015),R(0.6193095930415985),R(0.8306046932331322),R(0.9662347571015761) };
+        R wts_[] = { R(0.0856622461895853),R(0.1803807865240693),R(0.2339569672863452),R(0.2339569672863459),R(0.1803807865240693),R(0.0856622461895852) };
+        nodes.assign(nodes_, nodes_+m+1);
+        wts.assign(wts_, wts_+m+1);
+        } break;
+      case 6: {
+        R nodes_[] = { R(0.0254460438286208),R(0.1292344072003028),R(0.2970774243113015),R(0.4999999999999999),R(0.7029225756886985),R(0.8707655927996973),R(0.9745539561713792) };
+        R wts_[] = { R(0.0647424830844348),R(0.1398526957446384),R(0.1909150252525594),R(0.2089795918367343),R(0.1909150252525595),R(0.1398526957446383),R(0.0647424830844349) };
+        nodes.assign(nodes_, nodes_+m+1);
+        wts.assign(wts_, wts_+m+1);
+        } break;
+      }
+
+      dense_matrix<T> auxS1(S), auxS2(S);
+      std::vector<T> auxvec(n);
+      gmm::clear(S);
+      for (size_type j=0; j <= m; ++j) {
+        gmm::copy(gmm::scaled(auxS1, nodes[j]), auxS2);
+        gmm::add(gmm::identity_matrix(), auxS2);
+        // S += wts[i] * auxS1 * inv(auxS2)
+        for (size_type i=0; i < n; ++i) {
+          gmm::copy(gmm::mat_row(auxS1, i), auxvec);
+          gmm::lower_tri_solve(gmm::transposed(auxS2), auxvec, false);
+          gmm::add(gmm::scaled(auxvec, wts[j]), gmm::mat_row(S, i));
+        }
+      }
+    }
+    gmm::scale(S, scaling);
+  }
+
+  /**
+   Matrix logarithm (from GNU/Octave)
+  */
+  template <typename T>
+  void logm(const dense_matrix<T>& A, dense_matrix<T>& LOGMA)
+  {
+    typedef typename number_traits<T>::magnitude_type R;
+    size_type n = gmm::mat_nrows(A);
+    GMM_ASSERT1(n == gmm::mat_ncols(A),
+                "Matrix logarithm is not defined for non-square matrices");
+    dense_matrix<T> S(A), Q(A);
+    #if defined(GMM_USES_LAPACK)
+    schur(A, S, Q); // A = Q * S * Q^T
+    #else
+    GMM_ASSERT1(false, "Please recompile with lapack and blas librairies "
+                "to use logm matrix function.");
+    #endif
+
+    bool convert_to_complex(false);
+    if (!is_complex(T()))
+      for (size_type i=0; i < n-1; ++i)
+        if (gmm::abs(S(i+1,i)) > default_tol(T())) {
+          convert_to_complex = true;
+          break;
+        }
+
+    gmm::resize(LOGMA, n, n);
+    if (convert_to_complex) {
+      dense_matrix<std::complex<R> > cS(n,n), cQ(n,n), auxmat(n,n);
+      gmm::copy(gmm::real_part(S), gmm::real_part(cS));
+      gmm::copy(gmm::real_part(Q), gmm::real_part(cQ));
+      block2x2_reduction(cS, cQ, default_tol(R())*R(3));
+      for (size_type j=0; j < n-1; ++j)
+        for (size_type i=j+1; i < n; ++i)
+          cS(i,j) = T(0);
+      logm_utri_inplace(cS);
+      gmm::mult(cQ, cS, auxmat);
+      gmm::mult(auxmat, gmm::transposed(cQ), cS);
+      // Remove small complex values which may have entered calculation
+      gmm::copy(gmm::real_part(cS), LOGMA);
+//      GMM_ASSERT1(gmm::mat_norm1(gmm::imag_part(cS)) < n*default_tol(T()),
+//                  "Internal error, imag part should be zero");
+    } else {
+      dense_matrix<T> auxmat(n,n);
+      logm_utri_inplace(S);
+      gmm::mult(Q, S, auxmat);
+      gmm::mult(auxmat, gmm::transposed(Q), LOGMA);
+    }
+
+  }
+
+}
+
+#endif
+
--- a/gmm/gmm_dense_qr.h
+++ b/gmm/gmm_dense_qr.h
@ -0,0 +1,789 @@
+/* -*- c++ -*- (enables emacs c++ mode) */
+/*===========================================================================
+
+ Copyright (C) 2003-2017 Yves Renard
+
+ This file is a part of GetFEM++
+
+ GetFEM++  is  free software;  you  can  redistribute  it  and/or modify it
+ under  the  terms  of the  GNU  Lesser General Public License as published
+ by  the  Free Software Foundation;  either version 3 of the License,  or
+ (at your option) any later version along with the GCC Runtime Library
+ Exception either version 3.1 or (at your option) any later version.
+ This program  is  distributed  in  the  hope  that it will be useful,  but
+ WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ or  FITNESS  FOR  A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+ License and GCC Runtime Library Exception for more details.
+ You  should  have received a copy of the GNU Lesser General Public License
+ along  with  this program;  if not, write to the Free Software Foundation,
+ Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301, USA.
+
+ As a special exception, you  may use  this file  as it is a part of a free
+ software  library  without  restriction.  Specifically,  if   other  files
+ instantiate  templates  or  use macros or inline functions from this file,
+ or  you compile this  file  and  link  it  with other files  to produce an
+ executable, this file  does  not  by itself cause the resulting executable
+ to be covered  by the GNU Lesser General Public License.  This   exception
+ does not  however  invalidate  any  other  reasons why the executable file
+ might be covered by the GNU Lesser General Public License.
+
+===========================================================================*/
+
+/**@file gmm_dense_qr.h
+   @author  Caroline Lecalvez, Caroline.Lecalvez@gmm.insa-tlse.fr, Yves Renard <Yves.Renard@insa-lyon.fr>
+   @date September 12, 2003.
+   @brief Dense QR factorization.
+*/
+#ifndef GMM_DENSE_QR_H
+#define GMM_DENSE_QR_H
+
+#include "gmm_dense_Householder.h"
+
+namespace gmm {
+
+
+  /**
+     QR factorization using Householder method (complex and real version).
+  */
+  template <typename MAT1>
+  void qr_factor(const MAT1 &A_) {
+    MAT1 &A = const_cast<MAT1 &>(A_);
+    typedef typename linalg_traits<MAT1>::value_type value_type;
+
+    size_type m = mat_nrows(A), n = mat_ncols(A);
+    GMM_ASSERT2(m >= n, "dimensions mismatch");
+
+    std::vector<value_type> W(m), V(m);
+
+    for (size_type j = 0; j < n; ++j) {
+      sub_interval SUBI(j, m-j), SUBJ(j, n-j);
+      V.resize(m-j); W.resize(n-j);
+
+      for (size_type i = j; i < m; ++i) V[i-j] = A(i, j);
+      house_vector(V);
+
+      row_house_update(sub_matrix(A, SUBI, SUBJ), V, W);
+      for (size_type i = j+1; i < m; ++i) A(i, j) = V[i-j];
+    }
+  }
+
+
+  // QR comes from QR_factor(QR) where the upper triangular part stands for R
+  // and the lower part contains the Householder reflectors.
+  // A <- AQ
+  template <typename MAT1, typename MAT2>
+  void apply_house_right(const MAT1 &QR, const MAT2 &A_) {
+    MAT2 &A = const_cast<MAT2 &>(A_);
+    typedef typename linalg_traits<MAT1>::value_type T;
+    size_type m = mat_nrows(QR), n = mat_ncols(QR);
+    GMM_ASSERT2(m == mat_ncols(A), "dimensions mismatch");
+    if (m == 0) return;
+    std::vector<T> V(m), W(mat_nrows(A));
+    V[0] = T(1);
+    for (size_type j = 0; j < n; ++j) {
+      V.resize(m-j);
+      for (size_type i = j+1; i < m; ++i) V[i-j] = QR(i, j);
+      col_house_update(sub_matrix(A, sub_interval(0, mat_nrows(A)),
+                                  sub_interval(j, m-j)), V, W);
+    }
+  }
+
+  // QR comes from QR_factor(QR) where the upper triangular part stands for R
+  // and the lower part contains the Householder reflectors.
+  // A <- Q*A
+  template <typename MAT1, typename MAT2>
+  void apply_house_left(const MAT1 &QR, const MAT2 &A_) {
+    MAT2 &A = const_cast<MAT2 &>(A_);
+    typedef typename linalg_traits<MAT1>::value_type T;
+    size_type m = mat_nrows(QR), n = mat_ncols(QR);
+    GMM_ASSERT2(m == mat_nrows(A), "dimensions mismatch");
+    if (m == 0) return;
+    std::vector<T> V(m), W(mat_ncols(A));
+    V[0] = T(1);
+    for (size_type j = 0; j < n; ++j) {
+      V.resize(m-j);
+      for (size_type i = j+1; i < m; ++i) V[i-j] = QR(i, j);
+      row_house_update(sub_matrix(A, sub_interval(j, m-j),
+                                  sub_interval(0, mat_ncols(A))), V, W);
+    }
+  }
+
+  /** Compute the QR factorization, where Q is assembled. */
+  template <typename MAT1, typename MAT2, typename MAT3>
+    void qr_factor(const MAT1 &A, const MAT2 &QQ, const MAT3 &RR) {
+    MAT2 &Q = const_cast<MAT2 &>(QQ); MAT3 &R = const_cast<MAT3 &>(RR);
+    typedef typename linalg_traits<MAT1>::value_type value_type;
+
+    size_type m = mat_nrows(A), n = mat_ncols(A);
+    GMM_ASSERT2(m >= n, "dimensions mismatch");
+    gmm::copy(A, Q);
+
+    std::vector<value_type> W(m);
+    dense_matrix<value_type> VV(m, n);
+
+    for (size_type j = 0; j < n; ++j) {
+      sub_interval SUBI(j, m-j), SUBJ(j, n-j);
+
+      for (size_type i = j; i < m; ++i) VV(i,j) = Q(i, j);
+      house_vector(sub_vector(mat_col(VV,j), SUBI));
+
+      row_house_update(sub_matrix(Q, SUBI, SUBJ),
+                       sub_vector(mat_col(VV,j), SUBI), sub_vector(W, SUBJ));
+    }
+
+    gmm::copy(sub_matrix(Q, sub_interval(0, n), sub_interval(0, n)), R);
+    gmm::copy(identity_matrix(), Q);
+
+    for (size_type j = n-1; j != size_type(-1); --j) {
+      sub_interval SUBI(j, m-j), SUBJ(j, n-j);
+      row_house_update(sub_matrix(Q, SUBI, SUBJ),
+                       sub_vector(mat_col(VV,j), SUBI), sub_vector(W, SUBJ));
+    }
+  }
+
+  ///@cond DOXY_SHOW_ALL_FUNCTIONS
+  template <typename TA, typename TV, typename Ttol,
+            typename MAT, typename VECT>
+  void extract_eig(const MAT &A, VECT &V, Ttol tol, TA, TV) {
+    size_type n = mat_nrows(A);
+    if (n == 0) return;
+    tol *= Ttol(2);
+    Ttol tol_i = tol * gmm::abs(A(0,0)), tol_cplx = tol_i;
+    for (size_type i = 0; i < n; ++i) {
+      if (i < n-1) {
+        tol_i = (gmm::abs(A(i,i))+gmm::abs(A(i+1,i+1)))*tol;
+        tol_cplx = std::max(tol_cplx, tol_i);
+      }
+      if ((i < n-1) && gmm::abs(A(i+1,i)) >= tol_i) {
+        TA tr = A(i,i) + A(i+1, i+1);
+        TA det = A(i,i)*A(i+1, i+1) - A(i,i+1)*A(i+1, i);
+        TA delta = tr*tr - TA(4) * det;
+        if (delta < -tol_cplx) {
+          GMM_WARNING1("A complex eigenvalue has been detected : "
+                      << std::complex<TA>(tr/TA(2), gmm::sqrt(-delta)/TA(2)));
+          V[i] = V[i+1] = tr / TA(2);
+        }
+        else {
+          delta = std::max(TA(0), delta);
+          V[i  ] = TA(tr + gmm::sqrt(delta))/ TA(2);
+          V[i+1] = TA(tr -  gmm::sqrt(delta))/ TA(2);
+        }
+        ++i;
+      }
+      else
+        V[i] = TV(A(i,i));
+    }
+  }
+
+  template <typename TA, typename TV, typename Ttol,
+            typename MAT, typename VECT>
+  void extract_eig(const MAT &A, VECT &V, Ttol tol, TA, std::complex<TV>) {
+    size_type n = mat_nrows(A);
+    tol *= Ttol(2);
+    for (size_type i = 0; i < n; ++i)
+      if ((i == n-1) ||
+          gmm::abs(A(i+1,i)) < (gmm::abs(A(i,i))+gmm::abs(A(i+1,i+1)))*tol)
+        V[i] = std::complex<TV>(A(i,i));
+      else {
+        TA tr = A(i,i) + A(i+1, i+1);
+        TA det = A(i,i)*A(i+1, i+1) - A(i,i+1)*A(i+1, i);
+        TA delta = tr*tr - TA(4) * det;
+        if (delta < TA(0)) {
+          V[i] = std::complex<TV>(tr / TA(2), gmm::sqrt(-delta) / TA(2));
+          V[i+1] = std::complex<TV>(tr / TA(2), -gmm::sqrt(-delta)/ TA(2));
+        }
+        else {
+          V[i  ] = TA(tr + gmm::sqrt(delta)) / TA(2);
+          V[i+1] = TA(tr -  gmm::sqrt(delta)) / TA(2);
+        }
+        ++i;
+      }
+  }
+
+  template <typename TA, typename TV, typename Ttol,
+            typename MAT, typename VECT>
+  void extract_eig(const MAT &A, VECT &V, Ttol tol, std::complex<TA>, TV) {
+    typedef std::complex<TA> T;
+    size_type n = mat_nrows(A);
+    if (n == 0) return;
+    tol *= Ttol(2);
+    Ttol tol_i = tol * gmm::abs(A(0,0)), tol_cplx = tol_i;
+    for (size_type i = 0; i < n; ++i) {
+      if (i < n-1) {
+        tol_i = (gmm::abs(A(i,i))+gmm::abs(A(i+1,i+1)))*tol;
+        tol_cplx = std::max(tol_cplx, tol_i);
+      }
+      if ((i == n-1) || gmm::abs(A(i+1,i)) < tol_i) {
+        if (gmm::abs(std::imag(A(i,i))) > tol_cplx)
+          GMM_WARNING1("A complex eigenvalue has been detected : "
+                      << T(A(i,i)) << " : "  << gmm::abs(std::imag(A(i,i)))
+                      / gmm::abs(std::real(A(i,i))) << " : " << tol_cplx);
+        V[i] = std::real(A(i,i));
+      }
+      else {
+        T tr = A(i,i) + A(i+1, i+1);
+        T det = A(i,i)*A(i+1, i+1) - A(i,i+1)*A(i+1, i);
+        T delta = tr*tr - TA(4) * det;
+        T a1 = (tr + gmm::sqrt(delta)) / TA(2);
+        T a2 = (tr - gmm::sqrt(delta)) / TA(2);
+        if (gmm::abs(std::imag(a1)) > tol_cplx)
+          GMM_WARNING1("A complex eigenvalue has been detected : " << a1);
+        if (gmm::abs(std::imag(a2)) > tol_cplx)
+          GMM_WARNING1("A complex eigenvalue has been detected : " << a2);
+
+        V[i] = std::real(a1); V[i+1] = std::real(a2);
+        ++i;
+      }
+    }
+  }
+
+  template <typename TA, typename TV, typename Ttol,
+            typename MAT, typename VECT>
+  void extract_eig(const MAT &A, VECT &V, Ttol tol,
+                   std::complex<TA>, std::complex<TV>) {
+    size_type n = mat_nrows(A);
+    tol *= Ttol(2);
+    for (size_type i = 0; i < n; ++i)
+      if ((i == n-1) ||
+          gmm::abs(A(i+1,i)) < (gmm::abs(A(i,i))+gmm::abs(A(i+1,i+1)))*tol)
+        V[i] = std::complex<TV>(A(i,i));
+      else {
+        std::complex<TA> tr = A(i,i) + A(i+1, i+1);
+        std::complex<TA> det = A(i,i)*A(i+1, i+1) - A(i,i+1)*A(i+1, i);
+        std::complex<TA> delta = tr*tr - TA(4) * det;
+        V[i] = (tr + gmm::sqrt(delta)) / TA(2);
+        V[i+1] = (tr - gmm::sqrt(delta)) / TA(2);
+        ++i;
+      }
+  }
+
+  ///@endcond
+  /**
+     Compute eigenvalue vector.
+  */
+  template <typename MAT, typename Ttol, typename VECT> inline
+  void extract_eig(const MAT &A, const VECT &V, Ttol tol) {
+    extract_eig(A, const_cast<VECT&>(V), tol,
+                typename linalg_traits<MAT>::value_type(),
+                typename linalg_traits<VECT>::value_type());
+  }
+
+  /* ********************************************************************* */
+  /*    Stop criterion for QR algorithms                                   */
+  /* ********************************************************************* */
+
+  template <typename MAT, typename Ttol>
+  void qr_stop_criterion(MAT &A, size_type &p, size_type &q, Ttol tol) {
+    typedef typename linalg_traits<MAT>::value_type T;
+    typedef typename number_traits<T>::magnitude_type R;
+    R rmin = default_min(R()) * R(2);
+    size_type n = mat_nrows(A);
+    if (n <= 2) { q = n; p = 0; }
+    else {
+      for (size_type i = 1; i < n-q; ++i)
+        if (gmm::abs(A(i,i-1)) < (gmm::abs(A(i,i))+ gmm::abs(A(i-1,i-1)))*tol
+            || gmm::abs(A(i,i-1)) < rmin)
+          A(i,i-1) = T(0);
+
+      while ((q < n-1 && A(n-1-q, n-2-q) == T(0)) ||
+             (q < n-2 && A(n-2-q, n-3-q) == T(0))) ++q;
+      if (q >= n-2) q = n;
+      p = n-q; if (p) --p; if (p) --p;
+      while (p > 0 && A(p,p-1) != T(0)) --p;
+    }
+  }
+
+  template <typename MAT, typename Ttol> inline
+  void symmetric_qr_stop_criterion(const MAT &AA, size_type &p, size_type &q,
+                                Ttol tol) {
+    typedef typename linalg_traits<MAT>::value_type T;
+    typedef typename number_traits<T>::magnitude_type R;
+    R rmin = default_min(R()) * R(2);
+    MAT& A = const_cast<MAT&>(AA);
+    size_type n = mat_nrows(A);
+    if (n <= 1) { q = n; p = 0; }
+    else {
+      for (size_type i = 1; i < n-q; ++i)
+        if (gmm::abs(A(i,i-1)) < (gmm::abs(A(i,i))+ gmm::abs(A(i-1,i-1)))*tol
+            || gmm::abs(A(i,i-1)) < rmin)
+          A(i,i-1) = T(0);
+
+      while (q < n-1 && A(n-1-q, n-2-q) == T(0)) ++q;
+      if (q >= n-1) q = n;
+      p = n-q; if (p) --p; if (p) --p;
+      while (p > 0 && A(p,p-1) != T(0)) --p;
+    }
+  }
+
+  template <typename VECT1, typename VECT2, typename Ttol> inline
+  void symmetric_qr_stop_criterion(const VECT1 &diag, const VECT2 &sdiag_,
+                                   size_type &p, size_type &q, Ttol tol) {
+    typedef typename linalg_traits<VECT2>::value_type T;
+    typedef typename number_traits<T>::magnitude_type R;
+    R rmin = default_min(R()) * R(2);
+    VECT2 &sdiag = const_cast<VECT2 &>(sdiag_);
+    size_type n = vect_size(diag);
+    if (n <= 1) { q = n; p = 0; return; }
+    for (size_type i = 1; i < n-q; ++i)
+      if (gmm::abs(sdiag[i-1]) < (gmm::abs(diag[i])+ gmm::abs(diag[i-1]))*tol
+          || gmm::abs(sdiag[i-1]) < rmin)
+        sdiag[i-1] = T(0);
+    while (q < n-1 && sdiag[n-2-q] == T(0)) ++q;
+    if (q >= n-1) q = n;
+    p = n-q; if (p) --p; if (p) --p;
+    while (p > 0 && sdiag[p-1] != T(0)) --p;
+  }
+
+  /* ********************************************************************* */
+  /*    2x2 blocks reduction for Schur vectors                             */
+  /* ********************************************************************* */
+
+  template <typename MATH, typename MATQ, typename Ttol>
+  void block2x2_reduction(MATH &H, MATQ &Q, Ttol tol) {
+    typedef typename linalg_traits<MATH>::value_type T;
+    typedef typename number_traits<T>::magnitude_type R;
+
+    size_type n = mat_nrows(H), nq = mat_nrows(Q);
+    if (n < 2) return;
+    sub_interval SUBQ(0, nq), SUBL(0, 2);
+    std::vector<T> v(2), w(std::max(n, nq)); v[0] = T(1);
+    tol *= Ttol(2);
+    Ttol tol_i = tol * gmm::abs(H(0,0)), tol_cplx = tol_i;
+    for (size_type i = 0; i < n-1; ++i) {
+      tol_i = (gmm::abs(H(i,i))+gmm::abs(H(i+1,i+1)))*tol;
+      tol_cplx = std::max(tol_cplx, tol_i);
+
+      if (gmm::abs(H(i+1,i)) > tol_i) { // 2x2 block detected
+        T tr = (H(i+1, i+1) - H(i,i)) / T(2);
+        T delta = tr*tr + H(i,i+1)*H(i+1, i);
+
+        if (is_complex(T()) || gmm::real(delta) >= R(0)) {
+          sub_interval SUBI(i, 2);
+          T theta = (tr - gmm::sqrt(delta)) / H(i+1,i);
+          R a = gmm::abs(theta);
+          v[1] = (a == R(0)) ? T(-1)
+            : gmm::conj(theta) * (R(1) - gmm::sqrt(a*a + R(1)) / a);
+          row_house_update(sub_matrix(H, SUBI), v, sub_vector(w, SUBL));
+          col_house_update(sub_matrix(H, SUBI), v, sub_vector(w, SUBL));
+          col_house_update(sub_matrix(Q, SUBQ, SUBI), v, sub_vector(w, SUBQ));
+        }
+        ++i;
+      }
+    }
+  }
+
+  /* ********************************************************************* */
+  /*    Basic qr algorithm.                                                */
+  /* ********************************************************************* */
+
+  #define tol_type_for_qr typename number_traits<typename \
+                          linalg_traits<MAT1>::value_type>::magnitude_type
+  #define default_tol_for_qr \
+    (gmm::default_tol(tol_type_for_qr()) *  tol_type_for_qr(3))
+
+  // QR method for real or complex square matrices based on QR factorisation.
+  // eigval has to be a complex vector if A has complex eigeinvalues.
+  // Very slow method. Use implicit_qr_method instead.
+  template <typename MAT1, typename VECT, typename MAT2>
+    void rudimentary_qr_algorithm(const MAT1 &A, const VECT &eigval_,
+                                  const MAT2 &eigvect_,
+                                  tol_type_for_qr tol = default_tol_for_qr,
+                                  bool compvect = true) {
+    VECT &eigval = const_cast<VECT &>(eigval_);
+    MAT2 &eigvect = const_cast<MAT2 &>(eigvect_);
+
+    typedef typename linalg_traits<MAT1>::value_type value_type;
+
+    size_type n = mat_nrows(A), p, q = 0, ite = 0;
+    dense_matrix<value_type> Q(n, n), R(n,n), A1(n,n);
+    gmm::copy(A, A1);
+
+    Hessenberg_reduction(A1, eigvect, compvect);
+    qr_stop_criterion(A1, p, q, tol);
+
+    while (q < n) {
+      qr_factor(A1, Q, R);
+      gmm::mult(R, Q, A1);
+      if (compvect) { gmm::mult(eigvect, Q, R); gmm::copy(R, eigvect); }
+
+      qr_stop_criterion(A1, p, q, tol);
+      ++ite;
+      GMM_ASSERT1(ite < n*1000, "QR algorithm failed");
+    }
+    if (compvect) block2x2_reduction(A1, Q, tol);
+    extract_eig(A1, eigval, tol);
+  }
+
+  template <typename MAT1, typename VECT>
+    void rudimentary_qr_algorithm(const MAT1 &a, VECT &eigval,
+                                  tol_type_for_qr tol = default_tol_for_qr) {
+    dense_matrix<typename linalg_traits<MAT1>::value_type> m(0,0);
+    rudimentary_qr_algorithm(a, eigval, m, tol, false);
+  }
+
+  /* ********************************************************************* */
+  /*    Francis QR step.                                                   */
+  /* ********************************************************************* */
+
+  template <typename MAT1, typename MAT2>
+    void Francis_qr_step(const MAT1& HH, const MAT2 &QQ, bool compute_Q) {
+    MAT1& H = const_cast<MAT1&>(HH); MAT2& Q = const_cast<MAT2&>(QQ);
+    typedef typename linalg_traits<MAT1>::value_type value_type;
+    size_type n = mat_nrows(H), nq = mat_nrows(Q);
+
+    std::vector<value_type> v(3), w(std::max(n, nq));
+
+    value_type s = H(n-2, n-2) + H(n-1, n-1);
+    value_type t = H(n-2, n-2) * H(n-1, n-1) - H(n-2, n-1) * H(n-1, n-2);
+    value_type x = H(0, 0) * H(0, 0) + H(0,1) * H(1, 0) - s * H(0,0) + t;
+    value_type y = H(1, 0) * (H(0,0) + H(1,1) - s);
+    value_type z = H(1, 0) * H(2, 1);
+
+    sub_interval SUBQ(0, nq);
+
+    for (size_type k = 0; k < n - 2; ++k) {
+      v[0] = x; v[1] = y; v[2] = z;
+      house_vector(v);
+      size_type r = std::min(k+4, n), q = (k==0) ? 0 : k-1;
+      sub_interval SUBI(k, 3), SUBJ(0, r), SUBK(q, n-q);
+
+      row_house_update(sub_matrix(H, SUBI, SUBK),  v, sub_vector(w, SUBK));
+      col_house_update(sub_matrix(H, SUBJ, SUBI),  v, sub_vector(w, SUBJ));
+
+      if (compute_Q)
+        col_house_update(sub_matrix(Q, SUBQ, SUBI),  v, sub_vector(w, SUBQ));
+
+      x = H(k+1, k); y = H(k+2, k);
+      if (k < n-3) z = H(k+3, k);
+    }
+    sub_interval SUBI(n-2,2), SUBJ(0, n), SUBK(n-3,3), SUBL(0, 3);
+    v.resize(2);
+    v[0] = x; v[1] = y;
+    house_vector(v);
+    row_house_update(sub_matrix(H, SUBI, SUBK), v, sub_vector(w, SUBL));
+    col_house_update(sub_matrix(H, SUBJ, SUBI), v, sub_vector(w, SUBJ));
+    if (compute_Q)
+      col_house_update(sub_matrix(Q, SUBQ, SUBI), v, sub_vector(w, SUBQ));
+  }
+
+  /* ********************************************************************* */
+  /*    Wilkinson Double shift QR step (from Lapack).                      */
+  /* ********************************************************************* */
+
+  template <typename MAT1, typename MAT2, typename Ttol>
+  void Wilkinson_double_shift_qr_step(const MAT1& HH, const MAT2 &QQ,
+                                      Ttol tol, bool exc, bool compute_Q) {
+    MAT1& H = const_cast<MAT1&>(HH); MAT2& Q = const_cast<MAT2&>(QQ);
+    typedef typename linalg_traits<MAT1>::value_type T;
+    typedef typename number_traits<T>::magnitude_type R;
+
+    size_type n = mat_nrows(H), nq = mat_nrows(Q), m;
+    std::vector<T> v(3), w(std::max(n, nq));
+    const R dat1(0.75), dat2(-0.4375);
+    T h33, h44, h43h34, v1(0), v2(0), v3(0);
+
+    if (exc) {                    /* Exceptional shift.                    */
+      R s = gmm::abs(H(n-1, n-2)) + gmm::abs(H(n-2, n-3));
+      h33 = h44 = dat1 * s;
+      h43h34 = dat2*s*s;
+    }
+    else {                        /* Wilkinson double shift.               */
+      h44 = H(n-1,n-1); h33 = H(n-2, n-2);
+      h43h34 = H(n-1, n-2) * H(n-2, n-1);
+    }
+
+    /* Look for two consecutive small subdiagonal elements.                */
+    /* Determine the effect of starting the double-shift QR iteration at   */
+    /* row m, and see if this would make H(m-1, m-2) negligible.           */
+    for (m = n-2; m != 0; --m) {
+      T h11  = H(m-1, m-1), h22  = H(m, m);
+      T h21  = H(m, m-1),   h12  = H(m-1, m);
+      T h44s = h44 - h11,   h33s = h33 - h11;
+      v1 = (h33s*h44s-h43h34) / h21 + h12;
+      v2 = h22 - h11 - h33s - h44s;
+      v3 = H(m+1, m);
+      R s = gmm::abs(v1) + gmm::abs(v2) + gmm::abs(v3);
+      v1 /= s; v2 /= s; v3 /= s;
+      if (m == 1) break;
+      T h00 = H(m-2, m-2);
+      T h10 = H(m-1, m-2);
+      R tst1 = gmm::abs(v1)*(gmm::abs(h00)+gmm::abs(h11)+gmm::abs(h22));
+      if (gmm::abs(h10)*(gmm::abs(v2)+gmm::abs(v3)) <= tol * tst1) break;
+    }
+
+    /* Double shift QR step.                                               */
+    sub_interval SUBQ(0, nq);
+    for (size_type k = (m == 0) ? 0 : m-1; k < n-2; ++k) {
+      v[0] = v1; v[1] = v2; v[2] = v3;
+      house_vector(v);
+      size_type r = std::min(k+4, n), q = (k==0) ? 0 : k-1;
+      sub_interval SUBI(k, 3), SUBJ(0, r), SUBK(q, n-q);
+
+      row_house_update(sub_matrix(H, SUBI, SUBK),  v, sub_vector(w, SUBK));
+      col_house_update(sub_matrix(H, SUBJ, SUBI),  v, sub_vector(w, SUBJ));
+      if (k > m-1) { H(k+1, k-1) = T(0); if (k < n-3) H(k+2, k-1) = T(0); }
+
+      if (compute_Q)
+        col_house_update(sub_matrix(Q, SUBQ, SUBI),  v, sub_vector(w, SUBQ));
+
+      v1 = H(k+1, k); v2 = H(k+2, k);
+      if (k < n-3) v3 = H(k+3, k);
+    }
+    sub_interval SUBI(n-2,2), SUBJ(0, n), SUBK(n-3,3), SUBL(0, 3);
+    v.resize(2); v[0] = v1; v[1] = v2;
+    house_vector(v);
+    row_house_update(sub_matrix(H, SUBI, SUBK), v, sub_vector(w, SUBL));
+    col_house_update(sub_matrix(H, SUBJ, SUBI), v, sub_vector(w, SUBJ));
+    if (compute_Q)
+      col_house_update(sub_matrix(Q, SUBQ, SUBI), v, sub_vector(w, SUBQ));
+  }
+
+  /* ********************************************************************* */
+  /*    Implicit QR algorithm.                                             */
+  /* ********************************************************************* */
+
+  // QR method for real or complex square matrices based on an
+  // implicit QR factorisation. eigval has to be a complex vector
+  // if A has complex eigenvalues. Complexity about 10n^3, 25n^3 if
+  // eigenvectors are computed
+  template <typename MAT1, typename VECT, typename MAT2>
+    void implicit_qr_algorithm(const MAT1 &A, const VECT &eigval_,
+                               const MAT2 &Q_,
+                               tol_type_for_qr tol = default_tol_for_qr,
+                               bool compvect = true) {
+    VECT &eigval = const_cast<VECT &>(eigval_);
+    MAT2 &Q = const_cast<MAT2 &>(Q_);
+    typedef typename linalg_traits<MAT1>::value_type value_type;
+
+    size_type n(mat_nrows(A)), q(0), q_old, p(0), ite(0), its(0);
+    dense_matrix<value_type> H(n,n);
+    sub_interval SUBK(0,0);
+
+    gmm::copy(A, H);
+    Hessenberg_reduction(H, Q, compvect);
+    qr_stop_criterion(H, p, q, tol);
+
+    while (q < n) {
+      sub_interval SUBI(p, n-p-q), SUBJ(0, mat_ncols(Q));
+      if (compvect) SUBK = SUBI;
+//       Francis_qr_step(sub_matrix(H, SUBI),
+//                       sub_matrix(Q, SUBJ, SUBK), compvect);
+      Wilkinson_double_shift_qr_step(sub_matrix(H, SUBI),
+                                     sub_matrix(Q, SUBJ, SUBK),
+                                     tol, (its == 10 || its == 20), compvect);
+      q_old = q;
+      qr_stop_criterion(H, p, q, tol*2);
+      if (q != q_old) its = 0;
+      ++its; ++ite;
+      GMM_ASSERT1(ite < n*100, "QR algorithm failed");
+    }
+    if (compvect) block2x2_reduction(H, Q, tol);
+    extract_eig(H, eigval, tol);
+  }
+
+
+  template <typename MAT1, typename VECT>
+    void implicit_qr_algorithm(const MAT1 &a, VECT &eigval,
+                               tol_type_for_qr tol = default_tol_for_qr) {
+    dense_matrix<typename linalg_traits<MAT1>::value_type> m(1,1);
+    implicit_qr_algorithm(a, eigval, m, tol, false);
+  }
+
+  /* ********************************************************************* */
+  /*    Implicit symmetric QR step with Wilkinson Shift.                   */
+  /* ********************************************************************* */
+
+  template <typename MAT1, typename MAT2>
+    void symmetric_Wilkinson_qr_step(const MAT1& MM, const MAT2 &ZZ,
+                                     bool compute_z) {
+    MAT1& M = const_cast<MAT1&>(MM); MAT2& Z = const_cast<MAT2&>(ZZ);
+    typedef typename linalg_traits<MAT1>::value_type T;
+    typedef typename number_traits<T>::magnitude_type R;
+    size_type n = mat_nrows(M);
+
+    for (size_type i = 0; i < n; ++i) {
+      M(i, i) = T(gmm::real(M(i, i)));
+      if (i > 0) {
+        T a = (M(i, i-1) + gmm::conj(M(i-1, i)))/R(2);
+        M(i, i-1) = a; M(i-1, i) = gmm::conj(a);
+      }
+    }
+
+    R d = gmm::real(M(n-2, n-2) - M(n-1, n-1)) / R(2);
+    R e = gmm::abs_sqr(M(n-1, n-2));
+    R nu = d + gmm::sgn(d)*gmm::sqrt(d*d+e);
+    if (nu == R(0)) { M(n-1, n-2) = T(0); return; }
+    R mu = gmm::real(M(n-1, n-1)) - e / nu;
+    T x = M(0,0) - T(mu), z = M(1, 0), c, s;
+
+    for (size_type k = 1; k < n; ++k) {
+      Givens_rotation(x, z, c, s);
+
+      if (k > 1) Apply_Givens_rotation_left(M(k-1,k-2), M(k,k-2), c, s);
+      Apply_Givens_rotation_left(M(k-1,k-1), M(k,k-1), c, s);
+      Apply_Givens_rotation_left(M(k-1,k  ), M(k,k  ), c, s);
+      if (k < n-1) Apply_Givens_rotation_left(M(k-1,k+1), M(k,k+1), c, s);
+      if (k > 1) Apply_Givens_rotation_right(M(k-2,k-1), M(k-2,k), c, s);
+      Apply_Givens_rotation_right(M(k-1,k-1), M(k-1,k), c, s);
+      Apply_Givens_rotation_right(M(k  ,k-1), M(k,k)  , c, s);
+      if (k < n-1) Apply_Givens_rotation_right(M(k+1,k-1), M(k+1,k), c, s);
+
+      if (compute_z) col_rot(Z, c, s, k-1, k);
+      if (k < n-1) { x = M(k, k-1); z = M(k+1, k-1); }
+    }
+
+  }
+
+  template <typename VECT1, typename VECT2, typename MAT>
+  void symmetric_Wilkinson_qr_step(const VECT1& diag_, const VECT2& sdiag_,
+                                   const MAT &ZZ, bool compute_z) {
+    VECT1& diag = const_cast<VECT1&>(diag_);
+    VECT2& sdiag = const_cast<VECT2&>(sdiag_);
+    MAT& Z = const_cast<MAT&>(ZZ);
+    typedef typename linalg_traits<VECT2>::value_type T;
+    typedef typename number_traits<T>::magnitude_type R;
+
+    size_type n = vect_size(diag);
+    R d = (diag[n-2] - diag[n-1]) / R(2);
+    R e = gmm::abs_sqr(sdiag[n-2]);
+    R nu = d + gmm::sgn(d)*gmm::sqrt(d*d+e);
+    if (nu == R(0)) { sdiag[n-2] = T(0); return; }
+    R mu = diag[n-1] - e / nu;
+    T x = diag[0] - T(mu), z = sdiag[0], c, s;
+
+    T a01(0), a02(0);
+    T a10(0), a11(diag[0]), a12(gmm::conj(sdiag[0])), a13(0);
+    T a20(0), a21(sdiag[0]), a22(diag[1]), a23(gmm::conj(sdiag[1]));
+    T a31(0), a32(sdiag[1]);
+
+    for (size_type k = 1; k < n; ++k) {
+      Givens_rotation(x, z, c, s);
+
+      if (k > 1) Apply_Givens_rotation_left(a10, a20, c, s);
+      Apply_Givens_rotation_left(a11, a21, c, s);
+      Apply_Givens_rotation_left(a12, a22, c, s);
+      if (k < n-1) Apply_Givens_rotation_left(a13, a23, c, s);
+
+      if (k > 1) Apply_Givens_rotation_right(a01, a02, c, s);
+      Apply_Givens_rotation_right(a11, a12, c, s);
+      Apply_Givens_rotation_right(a21, a22, c, s);
+      if (k < n-1) Apply_Givens_rotation_right(a31, a32, c, s);
+
+      if (compute_z) col_rot(Z, c, s, k-1, k);
+
+      diag[k-1] = gmm::real(a11);
+      diag[k] = gmm::real(a22);
+      if (k > 1) sdiag[k-2] = (gmm::conj(a01) + a10) / R(2);
+      sdiag[k-1] = (gmm::conj(a12) + a21) / R(2);
+
+      x = sdiag[k-1]; z = (gmm::conj(a13) + a31) / R(2);
+
+      a01 = a12; a02 = a13;
+      a10 = a21; a11 = a22; a12 = a23; a13 = T(0);
+      a20 = a31; a21 = a32; a31 = T(0);
+
+      if (k < n-1) {
+        sdiag[k] = (gmm::conj(a23) + a32) / R(2);
+        a22 = T(diag[k+1]); a32 = sdiag[k+1]; a23 = gmm::conj(a32);
+      }
+    }
+  }
+
+  /* ********************************************************************* */
+  /*    Implicit QR algorithm for symmetric or hermitian matrices.         */
+  /* ********************************************************************* */
+
+  // implicit QR method for real square symmetric matrices or complex
+  // hermitian matrices.
+  // eigval has to be a complex vector if A has complex eigeinvalues.
+  // complexity about 4n^3/3, 9n^3 if eigenvectors are computed
+  template <typename MAT1, typename VECT, typename MAT2>
+  void symmetric_qr_algorithm_old(const MAT1 &A, const VECT &eigval_,
+                              const MAT2 &eigvect_,
+                              tol_type_for_qr tol = default_tol_for_qr,
+                              bool compvect = true) {
+    VECT &eigval = const_cast<VECT &>(eigval_);
+    MAT2 &eigvect = const_cast<MAT2 &>(eigvect_);
+    typedef typename linalg_traits<MAT1>::value_type T;
+    typedef typename number_traits<T>::magnitude_type R;
+
+    if (compvect) gmm::copy(identity_matrix(), eigvect);
+    size_type n = mat_nrows(A), q = 0, p, ite = 0;
+    dense_matrix<T> Tri(n, n);
+    gmm::copy(A, Tri);
+
+    Householder_tridiagonalization(Tri, eigvect, compvect);
+
+    symmetric_qr_stop_criterion(Tri, p, q, tol);
+
+    while (q < n) {
+
+      sub_interval SUBI(p, n-p-q), SUBJ(0, mat_ncols(eigvect)), SUBK(p, n-p-q);
+      if (!compvect) SUBK = sub_interval(0,0);
+      symmetric_Wilkinson_qr_step(sub_matrix(Tri, SUBI),
+                                  sub_matrix(eigvect, SUBJ, SUBK), compvect);
+
+      symmetric_qr_stop_criterion(Tri, p, q, tol*R(2));
+      ++ite;
+      GMM_ASSERT1(ite < n*100, "QR algorithm failed. Probably, your matrix"
+                  " is not real symmetric or complex hermitian");
+    }
+
+    extract_eig(Tri, eigval, tol);
+  }
+
+  template <typename MAT1, typename VECT, typename MAT2>
+  void symmetric_qr_algorithm(const MAT1 &A, const VECT &eigval_,
+                              const MAT2 &eigvect_,
+                              tol_type_for_qr tol = default_tol_for_qr,
+                              bool compvect = true) {
+    VECT &eigval = const_cast<VECT &>(eigval_);
+    MAT2 &eigvect = const_cast<MAT2 &>(eigvect_);
+    typedef typename linalg_traits<MAT1>::value_type T;
+    typedef typename number_traits<T>::magnitude_type R;
+
+    size_type n = mat_nrows(A), q = 0, p, ite = 0;
+    if (compvect) gmm::copy(identity_matrix(), eigvect);
+    if (n == 0) return;
+    if (n == 1) { eigval[0]=gmm::real(A(0,0)); return; }
+    dense_matrix<T> Tri(n, n);
+    gmm::copy(A, Tri);
+
+    Householder_tridiagonalization(Tri, eigvect, compvect);
+
+    std::vector<R> diag(n);
+    std::vector<T> sdiag(n);
+    for (size_type i = 0; i < n; ++i)
+      { diag[i] = gmm::real(Tri(i, i)); if (i+1 < n) sdiag[i] = Tri(i+1, i); }
+
+    symmetric_qr_stop_criterion(diag, sdiag, p, q, tol);
+
+    while (q < n) {
+      sub_interval SUBI(p, n-p-q), SUBJ(0, mat_ncols(eigvect)), SUBK(p, n-p-q);
+      if (!compvect) SUBK = sub_interval(0,0);
+
+      symmetric_Wilkinson_qr_step(sub_vector(diag, SUBI),
+                                  sub_vector(sdiag, SUBI),
+                                  sub_matrix(eigvect, SUBJ, SUBK), compvect);
+
+      symmetric_qr_stop_criterion(diag, sdiag, p, q, tol*R(3));
+      ++ite;
+      GMM_ASSERT1(ite < n*100, "QR algorithm failed.");
+    }
+
+    gmm::copy(diag, eigval);
+  }
+
+
+  template <typename MAT1, typename VECT>
+    void symmetric_qr_algorithm(const MAT1 &a, VECT &eigval,
+                                tol_type_for_qr tol = default_tol_for_qr) {
+    dense_matrix<typename linalg_traits<MAT1>::value_type> m(0,0);
+    symmetric_qr_algorithm(a, eigval, m, tol, false);
+  }
+
+
+}
+
+#endif
+
--- a/gmm/gmm_dense_sylvester.h
+++ b/gmm/gmm_dense_sylvester.h
@ -0,0 +1,174 @@
+/* -*- c++ -*- (enables emacs c++ mode) */
+/*===========================================================================
+
+ Copyright (C) 2003-2017 Yves Renard
+
+ This file is a part of GetFEM++
+
+ GetFEM++  is  free software;  you  can  redistribute  it  and/or modify it
+ under  the  terms  of the  GNU  Lesser General Public License as published
+ by  the  Free Software Foundation;  either version 3 of the License,  or
+ (at your option) any later version along with the GCC Runtime Library
+ Exception either version 3.1 or (at your option) any later version.
+ This program  is  distributed  in  the  hope  that it will be useful,  but
+ WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ or  FITNESS  FOR  A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+ License and GCC Runtime Library Exception for more details.
+ You  should  have received a copy of the GNU Lesser General Public License
+ along  with  this program;  if not, write to the Free Software Foundation,
+ Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301, USA.
+
+ As a special exception, you  may use  this file  as it is a part of a free
+ software  library  without  restriction.  Specifically,  if   other  files
+ instantiate  templates  or  use macros or inline functions from this file,
+ or  you compile this  file  and  link  it  with other files  to produce an
+ executable, this file  does  not  by itself cause the resulting executable
+ to be covered  by the GNU Lesser General Public License.  This   exception
+ does not  however  invalidate  any  other  reasons why the executable file
+ might be covered by the GNU Lesser General Public License.
+
+===========================================================================*/
+
+/** @file gmm_dense_sylvester.h
+    @author  Yves Renard <Yves.Renard@insa-lyon.fr>
+    @date June 5, 2003.
+    @brief Sylvester equation solver.
+*/
+#ifndef GMM_DENSE_SYLVESTER_H
+#define GMM_DENSE_SYLVESTER_H
+
+#include "gmm_kernel.h"
+
+namespace gmm {
+
+  /* ********************************************************************* */
+  /*   Kronecker system matrix.                                            */
+  /* ********************************************************************* */
+  template <typename MAT1, typename MAT2, typename MAT3>
+  void kron(const MAT1 &m1, const MAT2 &m2, const MAT3 &m3_,
+	    bool init = true) {
+    MAT3 &m3 = const_cast<MAT3 &>(m3_);
+    size_type m = mat_nrows(m1), n = mat_ncols(m1);
+    size_type l = mat_nrows(m2), k = mat_ncols(m2);
+
+    GMM_ASSERT2(mat_nrows(m3) == m*l && mat_ncols(m3) == n*k,
+		"dimensions mismatch");
+
+    for (size_type i = 0; i < m; ++i)
+      for (size_type j = 0; j < m; ++j)
+	if (init)
+	  gmm::copy(gmm::scaled(m2, m1(i,j)),
+		    gmm::sub_matrix(m3, sub_interval(l*i, l),
+				    sub_interval(k*j, k)));
+	else
+	  gmm::add(gmm::scaled(m2, m1(i,j)),
+		    gmm::sub_matrix(m3, sub_interval(l*i, l),
+				    sub_interval(k*j, k)));
+  }
+	
+
+  /* ********************************************************************* */
+  /*   Copy a matrix into a vector.                                        */
+  /* ********************************************************************* */
+
+  template <typename MAT, typename VECT>
+  colmatrix_to_vector(const MAT &A, VECT &v, col_major) {
+    size_type m = mat_nrows(A), n = mat_ncols(A);
+    GMM_ASSERT2(m*n == vect_size(v), "dimensions mismatch");
+    for (size_type i = 0; i < n; ++i)
+      gmm::copy(mat_col(A, i), sub_vector(v, sub_interval(i*m, m)));
+  }
+
+  template <typename MAT, typename VECT>
+  colmatrix_to_vector(const MAT &A, VECT &v, row_and_col)
+  { colmatrix_to_vector(A, v, col_major()); }
+
+  template <typename MAT, typename VECT>
+  colmatrix_to_vector(const MAT &A, VECT &v, col_and_row)
+  { colmatrix_to_vector(A, v, col_major()); }
+
+  template <typename MAT, typename VECT>
+  colmatrix_to_vector(const MAT &A, VECT &v, row_major) {
+    size_type m = mat_nrows(mat), n = mat_ncols(A);
+    GMM_ASSERT2(m*n == vect_size(v), "dimensions mismatch");
+    for (size_type i = 0; i < m; ++i)
+      gmm::copy(mat_row(A, i), sub_vector(v, sub_slice(i, n, m)));
+  }
+
+  template <typename MAT, typename VECT> inline
+  colmatrix_to_vector(const MAT &A, const VECT &v_) {
+    VECT &v = const_cast<VECT &>(v_);
+    colmatrix_to_vector(A, v, typename linalg_traits<MAT>::sub_orientation());
+  }
+
+
+  /* ********************************************************************* */
+  /*   Copy a vector into a matrix.                                        */
+  /* ********************************************************************* */
+
+  template <typename MAT, typename VECT>
+  vector_to_colmatrix(const VECT &v, MAT &A, col_major) {
+    size_type m = mat_nrows(A), n = mat_ncols(A);
+    GMM_ASSERT2(m*n == vect_size(v), "dimensions mismatch");
+    for (size_type i = 0; i < n; ++i)
+      gmm::copy(sub_vector(v, sub_interval(i*m, m)), mat_col(A, i));
+  }
+
+  template <typename MAT, typename VECT>
+  vector_to_colmatrix(const VECT &v, MAT &A, row_and_col)
+  { vector_to_colmatrix(v, A, col_major()); }
+
+  template <typename MAT, typename VECT>
+  vector_to_colmatrix(const VECT &v, MAT &A, col_and_row)
+  { vector_to_colmatrix(v, A, col_major()); }
+
+  template <typename MAT, typename VECT>
+  vector_to_colmatrix(const VECT &v, MAT &A, row_major) {
+    size_type m = mat_nrows(mat), n = mat_ncols(A);
+    GMM_ASSERT2(m*n == vect_size(v), "dimensions mismatch");
+    for (size_type i = 0; i < m; ++i)
+      gmm::copy(sub_vector(v, sub_slice(i, n, m)), mat_row(A, i));
+  }
+
+  template <typename MAT, typename VECT> inline
+  vector_to_colmatrix(const VECT &v, const MAT &A_) {
+    MAT &A = const_cast<MAT &>(A_);
+    vector_to_colmatrix(v, A, typename linalg_traits<MAT>::sub_orientation());
+  }
+
+  /* ********************************************************************* */
+  /*   Solve sylvester equation.                                           */
+  /* ********************************************************************* */
+
+  // very prohibitive solver, to be replaced ... 
+  template <typename MAT1, typename MAT2, typename MAT3, typename MAT4 >
+  void sylvester(const MAT1 &m1, const MAT2 &m2, const MAT3 &m3,
+		 const MAT4 &m4_) {
+    typedef typename linalg_traits<Mat>::value_type T;
+    
+    MAT3 &m4 = const_cast<MAT4 &>(m4_);
+    size_type m = mat_nrows(m1), n = mat_ncols(m1);
+    size_type l = mat_nrows(m2), k = mat_ncols(m2);
+    
+    GMM_ASSERT2(m == n && l == k && m == mat_nrows(m3) &&
+		l == mat_ncols(m3) && m == mat_nrows(m4) && l == mat_ncols(m4),
+		"dimensions mismatch");
+
+    gmm::dense_matrix<T> akronb(m*l, m*l);
+    gmm::dense_matrix<T> idm(m, m), idl(l,l);
+    gmm::copy(identity_matrix(), idm);
+    gmm::copy(identity_matrix(), idl);
+    std::vector<T> x(m*l), c(m*l);
+    
+    kron(idl, m1, akronb);
+    kron(gmm::transposed(m2), idm, akronb, false);
+
+    colmatrix_to_vector(m3, c);
+    lu_solve(akronb, c, x);
+    vector_to_colmatrix(x, m4);
+
+  }
+}
+
+#endif
+
--- a/gmm/gmm_domain_decomp.h
+++ b/gmm/gmm_domain_decomp.h
@ -0,0 +1,165 @@
+/* -*- c++ -*- (enables emacs c++ mode) */
+/*===========================================================================
+
+ Copyright (C) 2004-2017 Yves Renard
+
+ This file is a part of GetFEM++
+
+ GetFEM++  is  free software;  you  can  redistribute  it  and/or modify it
+ under  the  terms  of the  GNU  Lesser General Public License as published
+ by  the  Free Software Foundation;  either version 3 of the License,  or
+ (at your option) any later version along with the GCC Runtime Library
+ Exception either version 3.1 or (at your option) any later version.
+ This program  is  distributed  in  the  hope  that it will be useful,  but
+ WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ or  FITNESS  FOR  A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+ License and GCC Runtime Library Exception for more details.
+ You  should  have received a copy of the GNU Lesser General Public License
+ along  with  this program;  if not, write to the Free Software Foundation,
+ Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301, USA.
+
+ As a special exception, you  may use  this file  as it is a part of a free
+ software  library  without  restriction.  Specifically,  if   other  files
+ instantiate  templates  or  use macros or inline functions from this file,
+ or  you compile this  file  and  link  it  with other files  to produce an
+ executable, this file  does  not  by itself cause the resulting executable
+ to be covered  by the GNU Lesser General Public License.  This   exception
+ does not  however  invalidate  any  other  reasons why the executable file
+ might be covered by the GNU Lesser General Public License.
+
+===========================================================================*/
+
+/** @file gmm_domain_decomp.h
+   @author  Yves Renard <Yves.Renard@insa-lyon.fr>
+   @date May 21, 2004.
+    @brief Domain decomposition.
+*/
+#ifndef GMM_DOMAIN_DECOMP_H__
+#define GMM_DOMAIN_DECOMP_H__
+
+#include "gmm_kernel.h"
+#include <map>
+
+
+namespace gmm {
+
+  /** This function separates into small boxes of size msize with a ratio
+   * of overlap (in [0,1[) a set of points. The result is given into a
+   * vector of sparse matrices vB.
+   */
+  template <typename Matrix, typename Point>
+  void rudimentary_regular_decomposition(std::vector<Point> pts,
+					 double msize,
+					 double overlap,
+					 std::vector<Matrix> &vB) {
+    typedef typename linalg_traits<Matrix>::value_type value_type;
+    typedef abstract_null_type void_type;
+    typedef std::map<size_type, void_type> map_type;
+
+    size_type nbpts = pts.size();
+    if (!nbpts || pts[0].size() == 0) { vB.resize(0); return; }
+    int dim = int(pts[0].size());
+
+    // computation of the global box and the number of sub-domains
+    Point pmin = pts[0], pmax = pts[0];
+    for (size_type i = 1; i < nbpts; ++i)
+      for (int k = 0; k < dim; ++k) {
+	pmin[k] = std::min(pmin[k], pts[i][k]);
+	pmax[k] = std::max(pmax[k], pts[i][k]);
+      }
+    
+    std::vector<size_type> nbsub(dim), mult(dim);
+    std::vector<int> pts1(dim), pts2(dim);
+    size_type nbtotsub = 1;
+    for (int k = 0; k < dim; ++k) {
+      nbsub[k] = size_type((pmax[k] - pmin[k]) / msize)+1;
+      mult[k] = nbtotsub; nbtotsub *= nbsub[k];
+    }
+    
+    std::vector<map_type> subs(nbtotsub);
+    // points ventilation
+    std::vector<size_type> ns(dim), na(dim), nu(dim);
+    for (size_type i = 0; i < nbpts; ++i) {
+      for (int k = 0; k < dim; ++k) {
+	double a = (pts[i][k] - pmin[k]) / msize;
+	ns[k] = size_type(a) - 1; na[k] = 0;
+	pts1[k] = int(a + overlap); pts2[k] = int(ceil(a-1.0-overlap));
+      }
+      size_type sum = 0;
+      do {
+	bool ok = 1;
+	for (int k = 0; k < dim; ++k)
+	  if ((ns[k] >= nbsub[k]) || (pts1[k] < int(ns[k]))
+	      || (pts2[k] > int(ns[k]))) { ok = false; break; }
+	if (ok) {
+	  size_type ind = ns[0];
+	  for (int k=1; k < dim; ++k) ind += ns[k]*mult[k];
+	  subs[ind][i] = void_type();
+	}
+	for (int k = 0; k < dim; ++k) {
+	  if (na[k] < 2) { na[k]++; ns[k]++; ++sum; break; }
+	  na[k] = 0; ns[k] -= 2; sum -= 2;
+	}
+      } while (sum);
+    }
+    // delete too small domains.
+    size_type nbmaxinsub = 0;
+    for (size_type i = 0; i < nbtotsub; ++i)
+      nbmaxinsub = std::max(nbmaxinsub, subs[i].size());
+    
+    std::fill(ns.begin(), ns.end(), size_type(0));
+    for (size_type i = 0; i < nbtotsub; ++i) {
+      if (subs[i].size() > 0 && subs[i].size() < nbmaxinsub / 10) {
+	
+	for (int k = 0; k < dim; ++k) nu[k] = ns[k];
+	size_type nbmax = 0, imax = 0;
+	
+	for (int l = 0; l < dim; ++l) {
+	  nu[l]--;
+	  for (int m = 0; m < 2; ++m, nu[l]+=2) {
+	    bool ok = true;
+	    for (int k = 0; k < dim && ok; ++k) 
+	      if (nu[k] >= nbsub[k]) ok = false;
+	    if (ok) {
+	      size_type ind = ns[0];
+	      for (int k=1; k < dim; ++k) ind += ns[k]*mult[k];
+	      if (subs[ind].size() > nbmax)
+		{ nbmax = subs[ind].size(); imax = ind; }
+	    }
+	  }
+	  nu[l]--;
+	}
+	
+	if (nbmax > subs[i].size()) {
+	  for (map_type::iterator it=subs[i].begin(); it!=subs[i].end(); ++it)
+	    subs[imax][it->first] = void_type();
+	  subs[i].clear();
+	}
+      }
+      for (int k = 0; k < dim; ++k)
+	{ ns[k]++; if (ns[k] < nbsub[k]) break; ns[k] = 0; }
+    }
+    
+    // delete empty domains.
+    size_type effnb = 0;
+    for (size_type i = 0; i < nbtotsub; ++i) {
+      if (subs[i].size() > 0)
+	{ if (i != effnb) std::swap(subs[i], subs[effnb]); ++effnb; }
+    }
+
+    // build matrices
+    subs.resize(effnb);
+    vB.resize(effnb);
+    for (size_type i = 0; i < effnb; ++i) {
+      clear(vB[i]); resize(vB[i], nbpts, subs[i].size());
+      size_type j = 0;
+      for (map_type::iterator it=subs[i].begin(); it!=subs[i].end(); ++it, ++j)
+	vB[i](it->first, j) = value_type(1);
+    }
+  }
+  
+
+}
+
+
+#endif
--- a/gmm/gmm_except.h
+++ b/gmm/gmm_except.h
@ -0,0 +1,328 @@
+/* -*- c++ -*- (enables emacs c++ mode) */
+/*===========================================================================
+
+ Copyright (C) 2002-2017 Yves Renard
+
+ This file is a part of GetFEM++
+
+ GetFEM++  is  free software;  you  can  redistribute  it  and/or modify it
+ under  the  terms  of the  GNU  Lesser General Public License as published
+ by  the  Free Software Foundation;  either version 3 of the License,  or
+ (at your option) any later version along with the GCC Runtime Library
+ Exception either version 3.1 or (at your option) any later version.
+ This program  is  distributed  in  the  hope  that it will be useful,  but
+ WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ or  FITNESS  FOR  A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+ License and GCC Runtime Library Exception for more details.
+ You  should  have received a copy of the GNU Lesser General Public License
+ along  with  this program;  if not, write to the Free Software Foundation,
+ Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301, USA.
+
+ As a special exception, you  may use  this file  as it is a part of a free
+ software  library  without  restriction.  Specifically,  if   other  files
+ instantiate  templates  or  use macros or inline functions from this file,
+ or  you compile this  file  and  link  it  with other files  to produce an
+ executable, this file  does  not  by itself cause the resulting executable
+ to be covered  by the GNU Lesser General Public License.  This   exception
+ does not  however  invalidate  any  other  reasons why the executable file
+ might be covered by the GNU Lesser General Public License.
+
+===========================================================================*/
+
+/** @file gmm_except.h 
+    @author Yves Renard <Yves.Renard@insa-lyon.fr>
+    @author Julien Pommier <Julien.Pommier@insa-toulouse.fr>
+    @date September 01, 2002.
+    @brief Definition of basic exceptions.
+*/
+
+#ifndef GMM_EXCEPT_H__
+#define GMM_EXCEPT_H__
+
+#include "gmm_std.h"
+
+//provides external implementation of gmm_exception and logging.
+#ifndef EXTERNAL_EXCEPT_
+
+namespace gmm {
+
+/* *********************************************************************** */
+/*	GetFEM++ generic errors.                     			   */
+/* *********************************************************************** */
+
+  class gmm_error: public std::logic_error {
+  public:
+    gmm_error(const std::string& what_arg): std::logic_error (what_arg) {}
+  };
+
+#ifdef GETFEM_HAVE_PRETTY_FUNCTION
+#  define GMM_PRETTY_FUNCTION __PRETTY_FUNCTION__
+#else 
+#  define GMM_PRETTY_FUNCTION ""
+#endif
+
+  // Errors : GMM_THROW should not be used on its own.
+  //          GMM_ASSERT1 : Non-maskable errors. Typically for in/ouput and
+  //               when the test do not significantly reduces the performance.
+  //          GMM_ASSERT2 : All tests which are potentially performance
+  //               consuming. Not hidden by default. Hidden when NDEBUG is
+  //               defined.
+  //          GMM_ASSERT3 : For internal checks. Hidden by default. Active
+  //               only when DEBUG_MODE is defined.
+  // __EXCEPTIONS is defined by gcc, _CPPUNWIND is defined by visual c++
+#if defined(__EXCEPTIONS) || defined(_CPPUNWIND)
+  inline void short_error_throw(const char *file, int line, const char *func,
+				const char *errormsg) {
+    std::stringstream msg__;
+    msg__ << "Error in " << file << ", line " << line << " " << func
+	  << ": \n" << errormsg << std::ends;
+    throw gmm::gmm_error(msg__.str());
+  }
+# define GMM_THROW_(type, errormsg) {					\
+    std::stringstream msg__;						\
+    msg__ << "Error in " << __FILE__ << ", line "                       \
+	  << __LINE__ << " " << GMM_PRETTY_FUNCTION << ": \n"		\
+	  << errormsg << std::ends;				       	\
+    throw (type)(msg__.str());						\
+  }
+#else
+#ifndef _MSC_VER
+# define abort_no_return() ::abort()
+#else
+// apparently ::abort() on windows is not declared with __declspec(noreturn) so the compiler spits a lot of warnings when abort is used.
+# define abort_no_return() { assert("GMM ABORT"==0); throw "GMM ABORT"; }
+#endif
+
+  inline void short_error_throw(const char *file, int line, const char *func,
+				const char *errormsg) {
+    std::stringstream msg__;
+    msg__ << "Error in " << file << ", line " << line << " " << func
+	  << ": \n" << errormsg << std::ends;
+    std::cerr << msg__.str() << std::endl;
+    abort_no_return();	
+  }
+
+# define GMM_THROW_(type, errormsg) {					\
+    std::stringstream msg__;						\
+    msg__ << "Error in " << __FILE__ << ", line "	       		\
+	  << __LINE__ << " " << GMM_PRETTY_FUNCTION << ": \n"		\
+	  << errormsg;					        	\
+    std::cerr << msg__.str() << std::endl;                              \
+    abort_no_return();							\
+  }
+#endif
+
+# define GMM_ASSERT1(test, errormsg)		        		\
+  { if (!(test)) GMM_THROW_(gmm::gmm_error, errormsg); }
+
+  inline void GMM_THROW() {}
+#define GMM_THROW(a, b) { GMM_THROW_(a,b); gmm::GMM_THROW(); }
+
+#if defined(NDEBUG)
+# define GMM_ASSERT2(test, errormsg) {}
+# define GMM_ASSERT3(test, errormsg) {}
+#elif !defined(GMM_FULL_NDEBUG)
+# define GMM_ASSERT2(test, errormsg)				        \
+  { if (!(test)) GMM_THROW_(gmm::gmm_error, errormsg); }
+# define GMM_ASSERT3(test, errormsg)				        \
+  { if (!(test)) GMM_THROW_(gmm::gmm_error, errormsg); }
+#else
+# define GMM_ASSERT2(test, errormsg)          				\
+  { if (!(test)) GMM_THROW_(gmm::gmm_error, errormsg); }
+# define GMM_ASSERT3(test, errormsg)
+#endif
+
+/* *********************************************************************** */
+/*	GetFEM++ warnings.                         			   */
+/* *********************************************************************** */
+
+  // This allows to dynamically hide warnings
+  struct warning_level {
+    static int level(int l = -2)
+    { static int level_ = 3; return (l != -2) ? (level_ = l) : level_; }
+  };
+
+  inline void set_warning_level(int l) { warning_level::level(std::max(0,l)); }
+  inline int  get_warning_level(void)  { return warning_level::level(-2); }
+
+  // This allows not to compile some Warnings
+#ifndef GMM_WARNING_LEVEL
+# define GMM_WARNING_LEVEL 4
+#endif
+
+  // Warning levels : 0 always printed
+  //                  1 very important : specify a possible error in the code.
+  //                  2 important : specify a default of optimization for inst.
+  //                  3 remark
+  //                  4 ignored by default.
+
+#define GMM_WARNING_MSG(level_, thestr)  {			             \
+      std::stringstream msg__;                                               \
+      msg__ << "Level " << level_ << " Warning in " << __FILE__ << ", line " \
+            << __LINE__ << ": " << thestr;		                     \
+       std::cerr << msg__.str() << std::endl;                                \
+    }
+
+#define GMM_WARNING0(thestr) GMM_WARNING_MSG(0, thestr)
+
+#if GMM_WARNING_LEVEL > 0
+# define GMM_WARNING1(thestr)                                           \
+  { if (1 <= gmm::warning_level::level()) GMM_WARNING_MSG(1, thestr) }
+#else
+# define GMM_WARNING1(thestr) {}
+#endif
+
+#if GMM_WARNING_LEVEL > 1
+# define GMM_WARNING2(thestr)                                           \
+  { if (2 <= gmm::warning_level::level()) GMM_WARNING_MSG(2, thestr) } 
+#else
+# define GMM_WARNING2(thestr) {}
+#endif
+
+#if GMM_WARNING_LEVEL > 2
+# define GMM_WARNING3(thestr)                                           \
+  { if (3 <= gmm::warning_level::level()) GMM_WARNING_MSG(3, thestr) } 
+#else
+# define GMM_WARNING3(thestr) {}
+#endif
+
+#if GMM_WARNING_LEVEL > 3
+# define GMM_WARNING4(thestr)                                           \
+  { if (4 <= gmm::warning_level::level()) GMM_WARNING_MSG(4, thestr) } 
+#else
+# define GMM_WARNING4(thestr) {}
+#endif
+
+/* *********************************************************************** */
+/*	GetFEM++ traces.                         			   */
+/* *********************************************************************** */
+
+  // This allows to dynamically hide traces
+  struct traces_level {
+    static int level(int l = -2)
+    { static int level_ = 3; return (l != -2) ? (level_ = l) : level_; }
+  };
+
+  inline void set_traces_level(int l) { traces_level::level(std::max(0,l)); }
+
+  // This allow not too compile some Warnings
+#ifndef GMM_TRACES_LEVEL
+# define GMM_TRACES_LEVEL 4
+#endif
+
+  // Traces levels : 0 always printed
+  //                 1 Susceptible to occur once in a program.
+  //                 2 Susceptible to occur occasionnaly in a program (10).
+  //                 3 Susceptible to occur often (100).
+  //                 4 Susceptible to occur very often (>1000).
+
+#define GMM_TRACE_MSG_MPI     // for Parallelized version
+#define GMM_TRACE_MSG(level_, thestr)  {			       \
+    GMM_TRACE_MSG_MPI {						       \
+      std::stringstream msg__;                                         \
+      msg__ << "Trace " << level_ << " in " << __FILE__ << ", line "   \
+            << __LINE__ << ": " << thestr;        		       \
+      std::cout << msg__.str() << std::endl;			       \
+    }                                                                  \
+  }        
+
+#define GMM_TRACE0(thestr) GMM_TRACE_MSG(0, thestr)
+
+#if GMM_TRACES_LEVEL > 0
+# define GMM_TRACE1(thestr)						\
+  { if (1 <= gmm::traces_level::level()) GMM_TRACE_MSG(1, thestr) }
+#else
+# define GMM_TRACE1(thestr) {}
+#endif
+  
+#if GMM_TRACES_LEVEL > 1
+# define GMM_TRACE2(thestr)						\
+  { if (2 <= gmm::traces_level::level()) GMM_TRACE_MSG(2, thestr) } 
+#else
+# define GMM_TRACE2(thestr) {}
+#endif
+  
+#if GMM_TRACES_LEVEL > 2
+# define GMM_TRACE3(thestr)						\
+  { if (3 <= gmm::traces_level::level()) GMM_TRACE_MSG(3, thestr) } 
+#else
+# define GMM_TRACE3(thestr) {}
+#endif
+  
+#if GMM_TRACES_LEVEL > 3
+# define GMM_TRACE4(thestr)						\
+  { if (4 <= gmm::traces_level::level()) GMM_TRACE_MSG(4, thestr) } 
+#else
+# define GMM_TRACE4(thestr) {}
+#endif
+  
+  
+  /* ********************************************************************* */
+  /*    Definitions for compatibility with old versions.        	   */
+  /* ********************************************************************* */ 
+  
+#define GMM_STANDARD_CATCH_ERROR   catch(std::logic_error e)		\
+    {									\
+      std::cerr << "============================================\n";	\
+      std::cerr << "|      An error has been detected !!!      |\n";	\
+      std::cerr << "============================================\n";	\
+      std::cerr << e.what() << std::endl << std::endl;			\
+      exit(1);								\
+    }									\
+  catch(const std::runtime_error &e)					\
+    {									\
+      std::cerr << "============================================\n";	\
+      std::cerr << "|      An error has been detected !!!      |\n";	\
+      std::cerr << "============================================\n";	\
+      std::cerr << e.what() << std::endl << std::endl;			\
+      exit(1);								\
+    }									\
+  catch(const std::bad_alloc &) {					\
+    std::cerr << "============================================\n";	\
+    std::cerr << "|  A bad allocation has been detected !!!  |\n";	\
+    std::cerr << "============================================\n";	\
+    exit(1);								\
+  }									\
+  catch(const std::bad_typeid &) {					\
+    std::cerr << "============================================\n";	\
+    std::cerr << "|  A bad typeid     has been detected !!!  |\n";	\
+    std::cerr << "============================================\n";	\
+    exit(1);								\
+  }									\
+  catch(const std::bad_exception &) {					\
+    std::cerr << "============================================\n";	\
+    std::cerr << "|  A bad exception  has been detected !!!  |\n";	\
+    std::cerr << "============================================\n";	\
+    exit(1);								\
+  }									\
+  catch(const std::bad_cast &) {					\
+    std::cerr << "============================================\n";	\
+    std::cerr << "|    A bad cast  has been detected !!!     |\n";	\
+    std::cerr << "============================================\n";	\
+    exit(1);								\
+  }									\
+  catch(...) {								\
+    std::cerr << "============================================\n";	\
+    std::cerr << "|  An unknown error has been detected !!!  |\n";	\
+    std::cerr << "============================================\n";	\
+    exit(1);								\
+  }
+  //   catch(ios_base::failure) { 
+  //     std::cerr << "============================================\n";
+  //     std::cerr << "| A ios_base::failure has been detected !!!|\n";
+  //     std::cerr << "============================================\n";
+  //     exit(1);
+  //   } 
+
+#if defined(__GNUC__) && (__GNUC__ > 3)
+# define GMM_SET_EXCEPTION_DEBUG				\
+  std::set_terminate(__gnu_cxx::__verbose_terminate_handler);
+#else
+# define GMM_SET_EXCEPTION_DEBUG
+#endif
+
+}
+#else
+#include <external_except.h>
+#endif /* EXTERNAL_EXCEPT_*/
+#endif /* GMM_EXCEPT_H__ */
--- a/gmm/gmm_inoutput.h
+++ b/gmm/gmm_inoutput.h
--- a/gmm/gmm_interface.h
+++ b/gmm/gmm_interface.h
--- a/gmm/gmm_interface_bgeot.h
+++ b/gmm/gmm_interface_bgeot.h
@ -0,0 +1,83 @@
+/* -*- c++ -*- (enables emacs c++ mode) */
+/*===========================================================================
+
+ Copyright (C) 2002-2017 Yves Renard
+
+ This file is a part of GetFEM++
+
+ GetFEM++  is  free software;  you  can  redistribute  it  and/or modify it
+ under  the  terms  of the  GNU  Lesser General Public License as published
+ by  the  Free Software Foundation;  either version 3 of the License,  or
+ (at your option) any later version along with the GCC Runtime Library
+ Exception either version 3.1 or (at your option) any later version.
+ This program  is  distributed  in  the  hope  that it will be useful,  but
+ WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ or  FITNESS  FOR  A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+ License and GCC Runtime Library Exception for more details.
+ You  should  have received a copy of the GNU Lesser General Public License
+ along  with  this program;  if not, write to the Free Software Foundation,
+ Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301, USA.
+
+ As a special exception, you  may use  this file  as it is a part of a free
+ software  library  without  restriction.  Specifically,  if   other  files
+ instantiate  templates  or  use macros or inline functions from this file,
+ or  you compile this  file  and  link  it  with other files  to produce an
+ executable, this file  does  not  by itself cause the resulting executable
+ to be covered  by the GNU Lesser General Public License.  This   exception
+ does not  however  invalidate  any  other  reasons why the executable file
+ might be covered by the GNU Lesser General Public License.
+
+===========================================================================*/
+
+/**@file gmm_interface_bgeot.h
+   @author  Yves Renard <Yves.Renard@insa-lyon.fr>
+   @date October 13, 2002.
+   @brief interface for bgeot::small_vector
+*/
+#ifndef GMM_INTERFACE_BGEOT_H__
+#define GMM_INTERFACE_BGEOT_H__
+
+
+namespace gmm {
+
+  /* ********************************************************************* */
+  /*		                                         	 	   */
+  /*		Traits for bgeot objects                     		   */
+  /*		                                         		   */
+  /* ********************************************************************* */
+
+  template <typename T> struct linalg_traits<bgeot::small_vector<T> > {
+    typedef bgeot::small_vector<T> this_type;
+    typedef this_type origin_type;
+    typedef linalg_false is_reference;
+    typedef abstract_vector linalg_type;
+    typedef T value_type;
+    typedef T& reference;
+    typedef typename this_type::iterator iterator;
+    typedef typename this_type::const_iterator const_iterator;
+    typedef abstract_dense storage_type;
+    typedef linalg_true index_sorted;
+    static size_type size(const this_type &v) { return v.size(); }
+    static iterator begin(this_type &v) { return v.begin(); }
+    static const_iterator begin(const this_type &v) { return v.begin(); }
+    static iterator end(this_type &v) { return v.end(); }
+    static const_iterator end(const this_type &v) { return v.end(); }
+    static origin_type* origin(this_type &v) { return &v; }
+    static const origin_type* origin(const this_type &v) { return &v; }
+    static void clear(origin_type* o, const iterator &it, const iterator &ite)
+    { std::fill(it, ite, value_type(0)); }
+    static void do_clear(this_type &v)
+    { std::fill(v.begin(), v.end(), value_type(0)); }
+    static value_type access(const origin_type *, const const_iterator &it,
+			     const const_iterator &, size_type i)
+    { return it[i]; }
+    static reference access(origin_type *, const iterator &it,
+			    const iterator &, size_type i)
+    { return it[i]; }
+    static void resize(this_type &v, size_type n) { v.resize(n); }
+  };
+
+}
+
+
+#endif //  GMM_INTERFACE_BGEOT_H__
--- a/gmm/gmm_iter.h
+++ b/gmm/gmm_iter.h
@ -0,0 +1,162 @@
+/* -*- c++ -*- (enables emacs c++ mode) */
+/*===========================================================================
+
+ Copyright (C) 2002-2017 Yves Renard
+
+ This file is a part of GetFEM++
+
+ GetFEM++  is  free software;  you  can  redistribute  it  and/or modify it
+ under  the  terms  of the  GNU  Lesser General Public License as published
+ by  the  Free Software Foundation;  either version 3 of the License,  or
+ (at your option) any later version along with the GCC Runtime Library
+ Exception either version 3.1 or (at your option) any later version.
+ This program  is  distributed  in  the  hope  that it will be useful,  but
+ WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ or  FITNESS  FOR  A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+ License and GCC Runtime Library Exception for more details.
+ You  should  have received a copy of the GNU Lesser General Public License
+ along  with  this program;  if not, write to the Free Software Foundation,
+ Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301, USA.
+
+ As a special exception, you  may use  this file  as it is a part of a free
+ software  library  without  restriction.  Specifically,  if   other  files
+ instantiate  templates  or  use macros or inline functions from this file,
+ or  you compile this  file  and  link  it  with other files  to produce an
+ executable, this file  does  not  by itself cause the resulting executable
+ to be covered  by the GNU Lesser General Public License.  This   exception
+ does not  however  invalidate  any  other  reasons why the executable file
+ might be covered by the GNU Lesser General Public License.
+
+===========================================================================*/
+
+/**@file gmm_iter.h
+   @author  Yves Renard <Yves.Renard@insa-lyon.fr>
+   @date February 10, 2003.
+   @brief Iteration object.
+*/
+
+#ifndef GMM_ITER_H__
+#define GMM_ITER_H__
+
+#include "gmm_kernel.h"
+#include <iomanip>
+
+namespace gmm {
+
+  /**  The Iteration object calculates whether the solution has reached the
+       desired accuracy, or whether the maximum number of iterations has
+       been reached. 
+
+       The method finished() checks the convergence.  The first()
+       method is used to determine the first iteration of the loop.
+  */
+  class iteration {
+  protected :
+    double rhsn;       /* Right hand side norm.                            */
+    size_type maxiter; /* Max. number of iterations.                       */
+    int noise;         /* if noise > 0 iterations are printed.             */
+    double resmax;     /* maximum residu.                                  */
+    double resminreach, resadd;
+    double diverged_res; /* Threshold beyond which the iterative           */
+                       /* is considered to diverge.                        */
+    size_type nit;     /* iteration number.                                */
+    double res;        /* last computed residu.                            */
+    std::string name;  /* eventually, name of the method.                  */
+    bool written;
+    void (*callback)(const gmm::iteration&);
+  public :
+
+    void init(void) { 
+      nit = 0; res = 0.0; written = false; 
+      resminreach = 1E200; resadd = 0.0; 
+      callback = 0;
+    }
+
+    iteration(double r = 1.0E-8, int noi = 0, size_type mit = size_type(-1),
+              double div_res = 1E200)
+      : rhsn(1.0), maxiter(mit), noise(noi), resmax(r), diverged_res(div_res)
+    { init(); }
+
+    void  operator ++(int) {  nit++; written = false; resadd += res; }
+    void  operator ++() { (*this)++; }
+
+    bool first(void) { return nit == 0; }
+
+    /* get/set the "noisyness" (verbosity) of the solvers */
+    int get_noisy(void) const { return noise; }
+    void set_noisy(int n) { noise = n; }
+    void reduce_noisy(void) { if (noise > 0) noise--; }
+
+    double get_resmax(void) const { return resmax; }
+    void set_resmax(double r) { resmax = r; }
+
+    double get_res() const { return res; }
+    void enforce_converged(bool c = true)
+    { if (c) res = double(0); else res = rhsn * resmax + double(1); }
+
+    /* change the user-definable callback, called after each iteration */
+    void set_callback(void (*t)(const gmm::iteration&)) {
+      callback = t;
+    }
+
+    double get_diverged_residual(void) const { return diverged_res; }
+    void set_diverged_residual(double r) { diverged_res = r; }
+
+    size_type get_iteration(void) const { return nit; }
+    void set_iteration(size_type i) { nit = i; }
+    
+    size_type get_maxiter(void) const { return maxiter; }
+    void set_maxiter(size_type i) { maxiter = i; }
+
+    double get_rhsnorm(void) const { return rhsn; }
+    void set_rhsnorm(double r) { rhsn = r; }
+    
+    bool converged(void) {
+      return !isnan(res) && res <= rhsn * resmax;
+    }
+    bool converged(double nr) { 
+      res = gmm::abs(nr);
+      resminreach = std::min(resminreach, res);
+      return converged();
+    }
+    template <typename VECT> bool converged(const VECT &v)
+    { return converged(gmm::vect_norm2(v)); }
+    bool diverged(void) {
+      return isnan(res) || (nit>=maxiter)
+                        || (res>=rhsn*diverged_res && nit > 4);
+    }
+    bool diverged(double nr) {
+      res = gmm::abs(nr);
+      resminreach = std::min(resminreach, res);
+      return diverged();
+    }
+
+    bool finished(double nr) {
+      if (callback) callback(*this);
+      if (noise > 0 && !written) {
+        double a = (rhsn == 0) ? 1.0 : rhsn;
+        converged(nr);
+        cout << name << " iter " << std::setw(3) << nit << " residual "
+             << std::setw(12) << gmm::abs(nr) / a;
+//         if (nit % 100 == 0 && nit > 0) {
+//           cout << " (residual min " << resminreach / a << " mean val "
+//                << resadd / (100.0 * a) << " )";
+//           resadd = 0.0;
+//         }
+        cout <<  endl;
+        written = true;
+      }
+      return (converged(nr) || diverged(nr));
+    }
+    template <typename VECT> bool finished_vect(const VECT &v)
+    { return finished(double(gmm::vect_norm2(v))); }
+
+
+    void set_name(const std::string &n) { name = n; }
+    const std::string &get_name(void) const { return name; }
+
+  };
+
+}
+
+#endif /* GMM_ITER_H__ */
--- a/gmm/gmm_iter_solvers.h
+++ b/gmm/gmm_iter_solvers.h
@ -0,0 +1,111 @@
+/* -*- c++ -*- (enables emacs c++ mode) */
+/*===========================================================================
+
+ Copyright (C) 2002-2017 Yves Renard
+
+ This file is a part of GetFEM++
+
+ GetFEM++  is  free software;  you  can  redistribute  it  and/or modify it
+ under  the  terms  of the  GNU  Lesser General Public License as published
+ by  the  Free Software Foundation;  either version 3 of the License,  or
+ (at your option) any later version along with the GCC Runtime Library
+ Exception either version 3.1 or (at your option) any later version.
+ This program  is  distributed  in  the  hope  that it will be useful,  but
+ WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ or  FITNESS  FOR  A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+ License and GCC Runtime Library Exception for more details.
+ You  should  have received a copy of the GNU Lesser General Public License
+ along  with  this program;  if not, write to the Free Software Foundation,
+ Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301, USA.
+
+ As a special exception, you  may use  this file  as it is a part of a free
+ software  library  without  restriction.  Specifically,  if   other  files
+ instantiate  templates  or  use macros or inline functions from this file,
+ or  you compile this  file  and  link  it  with other files  to produce an
+ executable, this file  does  not  by itself cause the resulting executable
+ to be covered  by the GNU Lesser General Public License.  This   exception
+ does not  however  invalidate  any  other  reasons why the executable file
+ might be covered by the GNU Lesser General Public License.
+
+===========================================================================*/
+
+/**@file gmm_iter_solvers.h
+   @author  Yves Renard <Yves.Renard@insa-lyon.fr>
+   @date October 13, 2002.
+   @brief Include standard gmm iterative solvers (cg, gmres, ...)
+*/
+#ifndef GMM_ITER_SOLVERS_H__
+#define GMM_ITER_SOLVERS_H__
+
+#include "gmm_iter.h"
+
+
+namespace gmm {
+
+  /** mixed method to find a zero of a real function G, a priori 
+   * between a and b. If the zero is not between a and b, iterations
+   * of secant are applied. When a convenient interval is found,
+   * iterations of dichotomie and regula falsi are applied.
+   */
+  template <typename FUNC, typename T>
+  T find_root(const FUNC &G, T a = T(0), T b = T(1),
+	      T tol = gmm::default_tol(T())) {
+    T c, Ga = G(a), Gb = G(b), Gc, d;
+    d = gmm::abs(b - a);
+#if 0
+    for (int i = 0; i < 4; i++) { /* secant iterations.                   */
+      if (d < tol) return (b + a) / 2.0;
+      c = b - Gb * (b - a) / (Gb - Ga); Gc = G(c);
+      a = b; b = c; Ga = Gb; Gb = Gc;
+      d = gmm::abs(b - a);
+    }
+#endif
+    while (Ga * Gb > 0.0) { /* secant iterations.                         */
+      if (d < tol) return (b + a) / 2.0;
+      c = b - Gb * (b - a) / (Gb - Ga); Gc = G(c);
+      a = b; b = c; Ga = Gb; Gb = Gc;
+      d = gmm::abs(b - a);
+    }
+    
+    c = std::max(a, b); a = std::min(a, b); b = c;
+    while (d > tol) {
+      c = b - (b - a) * (Gb / (Gb - Ga)); /* regula falsi.     */
+      if (c > b) c = b;
+      if (c < a) c = a; 
+      Gc = G(c);
+      if (Gc*Gb > 0) { b = c; Gb = Gc; } else { a = c; Ga = Gc; }
+      c = (b + a) / 2.0 ; Gc = G(c); /* Dichotomie.                       */
+      if (Gc*Gb > 0) { b = c; Gb = Gc; } else { a = c; Ga = Gc; }
+      d = gmm::abs(b - a); c = (b + a) / 2.0; if ((c == a) || (c == b)) d = 0.0;
+    }
+    return (b + a) / 2.0;
+  }
+  
+}
+
+#include "gmm_precond_diagonal.h"
+#include "gmm_precond_ildlt.h"
+#include "gmm_precond_ildltt.h"
+#include "gmm_precond_mr_approx_inverse.h"
+#include "gmm_precond_ilu.h"
+#include "gmm_precond_ilut.h"
+#include "gmm_precond_ilutp.h"
+
+
+
+#include "gmm_solver_cg.h"
+#include "gmm_solver_bicgstab.h"
+#include "gmm_solver_qmr.h"
+#include "gmm_solver_constrained_cg.h"
+#include "gmm_solver_Schwarz_additive.h"
+#include "gmm_modified_gram_schmidt.h"
+#include "gmm_tri_solve.h"
+#include "gmm_solver_gmres.h"
+#include "gmm_solver_bfgs.h"
+#include "gmm_least_squares_cg.h"
+
+// #include "gmm_solver_idgmres.h"
+
+
+
+#endif //  GMM_ITER_SOLVERS_H__
--- a/gmm/gmm_kernel.h
+++ b/gmm/gmm_kernel.h
@ -0,0 +1,55 @@
+/* -*- c++ -*- (enables emacs c++ mode) */
+/*===========================================================================
+
+ Copyright (C) 2002-2017 Yves Renard
+
+ This file is a part of GetFEM++
+
+ GetFEM++  is  free software;  you  can  redistribute  it  and/or modify it
+ under  the  terms  of the  GNU  Lesser General Public License as published
+ by  the  Free Software Foundation;  either version 3 of the License,  or
+ (at your option) any later version along with the GCC Runtime Library
+ Exception either version 3.1 or (at your option) any later version.
+ This program  is  distributed  in  the  hope  that it will be useful,  but
+ WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ or  FITNESS  FOR  A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+ License and GCC Runtime Library Exception for more details.
+ You  should  have received a copy of the GNU Lesser General Public License
+ along  with  this program;  if not, write to the Free Software Foundation,
+ Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301, USA.
+
+ As a special exception, you  may use  this file  as it is a part of a free
+ software  library  without  restriction.  Specifically,  if   other  files
+ instantiate  templates  or  use macros or inline functions from this file,
+ or  you compile this  file  and  link  it  with other files  to produce an
+ executable, this file  does  not  by itself cause the resulting executable
+ to be covered  by the GNU Lesser General Public License.  This   exception
+ does not  however  invalidate  any  other  reasons why the executable file
+ might be covered by the GNU Lesser General Public License.
+
+===========================================================================*/
+
+/**@file gmm_kernel.h 
+   @author  Yves Renard <Yves.Renard@insa-lyon.fr>
+   @date November 15, 2003.
+   @brief Include the base gmm files.
+ */
+
+#ifndef GMM_KERNEL_H__
+#define GMM_KERNEL_H__
+
+#include "gmm_def.h"
+#include "gmm_blas.h"
+#include "gmm_real_part.h"
+#include "gmm_interface.h"
+#include "gmm_sub_vector.h"
+#include "gmm_sub_matrix.h"
+#include "gmm_vector_to_matrix.h"
+#include "gmm_vector.h"
+#include "gmm_matrix.h"
+#include "gmm_tri_solve.h"
+#include "gmm_blas_interface.h"
+#include "gmm_lapack_interface.h"
+
+
+#endif //  GMM_KERNEL_H__
--- a/gmm/gmm_lapack_interface.h
+++ b/gmm/gmm_lapack_interface.h
@ -0,0 +1,470 @@
+/* -*- c++ -*- (enables emacs c++ mode) */
+/*===========================================================================
+
+ Copyright (C) 2003-2017 Yves Renard
+
+ This file is a part of GetFEM++
+
+ GetFEM++  is  free software;  you  can  redistribute  it  and/or modify it
+ under  the  terms  of the  GNU  Lesser General Public License as published
+ by  the  Free Software Foundation;  either version 3 of the License,  or
+ (at your option) any later version along with the GCC Runtime Library
+ Exception either version 3.1 or (at your option) any later version.
+ This program  is  distributed  in  the  hope  that it will be useful,  but
+ WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ or  FITNESS  FOR  A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+ License and GCC Runtime Library Exception for more details.
+ You  should  have received a copy of the GNU Lesser General Public License
+ along  with  this program;  if not, write to the Free Software Foundation,
+ Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301, USA.
+
+ As a special exception, you  may use  this file  as it is a part of a free
+ software  library  without  restriction.  Specifically,  if   other  files
+ instantiate  templates  or  use macros or inline functions from this file,
+ or  you compile this  file  and  link  it  with other files  to produce an
+ executable, this file  does  not  by itself cause the resulting executable
+ to be covered  by the GNU Lesser General Public License.  This   exception
+ does not  however  invalidate  any  other  reasons why the executable file
+ might be covered by the GNU Lesser General Public License.
+
+===========================================================================*/
+
+/**@file gmm_lapack_interface.h
+   @author  Yves Renard <Yves.Renard@insa-lyon.fr>
+   @date October 7, 2003.
+   @brief gmm interface for LAPACK
+*/
+
+#ifndef GMM_LAPACK_INTERFACE_H
+#define GMM_LAPACK_INTERFACE_H
+
+#include "gmm_blas_interface.h"
+#include "gmm_dense_lu.h"
+#include "gmm_dense_qr.h"
+
+
+#if defined(GMM_USES_LAPACK)
+
+namespace gmm {
+
+  /* ********************************************************************* */
+  /* Operations interfaced for T = float, double, std::complex<float>      */
+  /*    or std::complex<double> :                                          */
+  /*                                                                       */
+  /* lu_factor(dense_matrix<T>, std::vector<int>)                          */
+  /* lu_solve(dense_matrix<T>, std::vector<T>, std::vector<T>)             */
+  /* lu_solve(dense_matrix<T>, std::vector<int>, std::vector<T>,           */
+  /*          std::vector<T>)                                              */
+  /* lu_solve_transposed(dense_matrix<T>, std::vector<int>, std::vector<T>,*/
+  /*          std::vector<T>)                                              */
+  /* lu_inverse(dense_matrix<T>)                                           */
+  /* lu_inverse(dense_matrix<T>, std::vector<int>, dense_matrix<T>)        */
+  /*                                                                       */
+  /* qr_factor(dense_matrix<T>, dense_matrix<T>, dense_matrix<T>)          */
+  /*                                                                       */
+  /* implicit_qr_algorithm(dense_matrix<T>, std::vector<T>)                */
+  /* implicit_qr_algorithm(dense_matrix<T>, std::vector<T>,                */
+  /*                       dense_matrix<T>)                                */
+  /* implicit_qr_algorithm(dense_matrix<T>, std::vector<std::complex<T> >) */
+  /* implicit_qr_algorithm(dense_matrix<T>, std::vector<std::complex<T> >, */
+  /*                       dense_matrix<T>)                                */
+  /*                                                                       */
+  /* geev_interface_right                                                  */
+  /* geev_interface_left                                                   */
+  /*                                                                       */
+  /* schur(dense_matrix<T>, dense_matrix<T>, dense_matrix<T>)              */
+  /*                                                                       */
+  /* svd(dense_matrix<T>, dense_matrix<T>, dense_matrix<T>, std::vector<T>)*/
+  /* svd(dense_matrix<T>, dense_matrix<T>, dense_matrix<T>,                */
+  /*     std::vector<std::complex<T> >)                                    */
+  /*                                                                       */
+  /* ********************************************************************* */
+
+  /* ********************************************************************* */
+  /* LAPACK functions used.                                                */
+  /* ********************************************************************* */
+
+  extern "C" {
+    void sgetrf_(...); void dgetrf_(...); void cgetrf_(...); void zgetrf_(...);
+    void sgetrs_(...); void dgetrs_(...); void cgetrs_(...); void zgetrs_(...);
+    void sgetri_(...); void dgetri_(...); void cgetri_(...); void zgetri_(...);
+    void sgeqrf_(...); void dgeqrf_(...); void cgeqrf_(...); void zgeqrf_(...);
+    void sorgqr_(...); void dorgqr_(...); void cungqr_(...); void zungqr_(...);
+    void sormqr_(...); void dormqr_(...); void cunmqr_(...); void zunmqr_(...);
+    void sgees_ (...); void dgees_ (...); void cgees_ (...); void zgees_ (...);
+    void sgeev_ (...); void dgeev_ (...); void cgeev_ (...); void zgeev_ (...);
+    void sgeesx_(...); void dgeesx_(...); void cgeesx_(...); void zgeesx_(...);
+    void sgesvd_(...); void dgesvd_(...); void cgesvd_(...); void zgesvd_(...);
+  }
+
+  /* ********************************************************************* */
+  /* LU decomposition.                                                     */
+  /* ********************************************************************* */
+
+# define getrf_interface(lapack_name, base_type) inline                    \
+  size_type lu_factor(dense_matrix<base_type > &A, std::vector<int> &ipvt){\
+    GMMLAPACK_TRACE("getrf_interface");                                    \
+    int m = int(mat_nrows(A)), n = int(mat_ncols(A)), lda(m), info(0);     \
+    if (m && n) lapack_name(&m, &n, &A(0,0), &lda, &ipvt[0], &info);       \
+    return size_type(info);                                                \
+  }
+
+  getrf_interface(sgetrf_, BLAS_S)
+  getrf_interface(dgetrf_, BLAS_D)
+  getrf_interface(cgetrf_, BLAS_C)
+  getrf_interface(zgetrf_, BLAS_Z)
+
+  /* ********************************************************************* */
+  /* LU solve.                                                             */
+  /* ********************************************************************* */
+
+# define getrs_interface(f_name, trans1, lapack_name, base_type) inline    \
+  void f_name(const dense_matrix<base_type > &A,                           \
+              const std::vector<int> &ipvt, std::vector<base_type > &x,    \
+              const std::vector<base_type > &b) {                          \
+    GMMLAPACK_TRACE("getrs_interface");                                    \
+    int n = int(mat_nrows(A)), info, nrhs(1);                              \
+    gmm::copy(b, x); trans1;                                               \
+    if (n)                                                                 \
+      lapack_name(&t, &n, &nrhs, &(A(0,0)),&n,&ipvt[0], &x[0], &n, &info); \
+  }
+  
+# define getrs_trans_n const char t = 'N'
+# define getrs_trans_t const char t = 'T'
+
+  getrs_interface(lu_solve, getrs_trans_n, sgetrs_, BLAS_S)
+  getrs_interface(lu_solve, getrs_trans_n, dgetrs_, BLAS_D)
+  getrs_interface(lu_solve, getrs_trans_n, cgetrs_, BLAS_C)
+  getrs_interface(lu_solve, getrs_trans_n, zgetrs_, BLAS_Z)
+  getrs_interface(lu_solve_transposed, getrs_trans_t, sgetrs_, BLAS_S)
+  getrs_interface(lu_solve_transposed, getrs_trans_t, dgetrs_, BLAS_D)
+  getrs_interface(lu_solve_transposed, getrs_trans_t, cgetrs_, BLAS_C)
+  getrs_interface(lu_solve_transposed, getrs_trans_t, zgetrs_, BLAS_Z)
+
+  /* ********************************************************************* */
+  /* LU inverse.                                                           */
+  /* ********************************************************************* */
+
+# define getri_interface(lapack_name, base_type) inline                    \
+  void lu_inverse(const dense_matrix<base_type > &LU,                      \
+       std::vector<int> &ipvt, const dense_matrix<base_type > &A_) {       \
+    GMMLAPACK_TRACE("getri_interface");                                    \
+    dense_matrix<base_type> &A                                             \
+      = const_cast<dense_matrix<base_type > &>(A_);                        \
+    int n = int(mat_nrows(A)), info, lwork(10000); base_type work[10000];  \
+    if (n) {                                                               \
+      std::copy(LU.begin(), LU.end(), A.begin());			   \
+      lapack_name(&n, &A(0,0), &n, &ipvt[0], &work[0], &lwork, &info);     \
+    }                                                                      \
+  }
+
+  getri_interface(sgetri_, BLAS_S)
+  getri_interface(dgetri_, BLAS_D)
+  getri_interface(cgetri_, BLAS_C)
+  getri_interface(zgetri_, BLAS_Z)
+
+
+  /* ********************************************************************* */
+  /* QR factorization.                                                     */
+  /* ********************************************************************* */
+
+# define geqrf_interface(lapack_name1, base_type) inline                   \
+  void qr_factor(dense_matrix<base_type > &A){                             \
+    GMMLAPACK_TRACE("geqrf_interface");                                    \
+    int m = int(mat_nrows(A)), n = int(mat_ncols(A)), info, lwork(-1);     \
+    base_type work1;                                                       \
+    if (m && n) {                                                          \
+      std::vector<base_type > tau(n);                                      \
+      lapack_name1(&m, &n, &A(0,0), &m, &tau[0], &work1  , &lwork, &info); \
+      lwork = int(gmm::real(work1));                                       \
+      std::vector<base_type > work(lwork);                                 \
+      lapack_name1(&m, &n, &A(0,0), &m, &tau[0], &work[0], &lwork, &info); \
+      GMM_ASSERT1(!info, "QR factorization failed");                       \
+    }                                                                      \
+  }
+    
+  geqrf_interface(sgeqrf_, BLAS_S)
+  geqrf_interface(dgeqrf_, BLAS_D)
+    // For complex values, housholder vectors are not the same as in
+    // gmm::lu_factor. Impossible to interface for the moment.
+    //  geqrf_interface(cgeqrf_, BLAS_C)
+    //  geqrf_interface(zgeqrf_, BLAS_Z)
+
+# define geqrf_interface2(lapack_name1, lapack_name2, base_type) inline    \
+  void qr_factor(const dense_matrix<base_type > &A,                        \
+       dense_matrix<base_type > &Q, dense_matrix<base_type > &R) {         \
+    GMMLAPACK_TRACE("geqrf_interface2");                                   \
+    int m = int(mat_nrows(A)), n = int(mat_ncols(A)), info, lwork(-1);     \
+    base_type work1;                                                       \
+    if (m && n) {							   \
+      std::copy(A.begin(), A.end(), Q.begin());				   \
+      std::vector<base_type > tau(n);                                      \
+      lapack_name1(&m, &n, &Q(0,0), &m, &tau[0], &work1  , &lwork, &info); \
+      lwork = int(gmm::real(work1));                                       \
+      std::vector<base_type > work(lwork);                                 \
+      lapack_name1(&m, &n, &Q(0,0), &m, &tau[0], &work[0], &lwork, &info); \
+      GMM_ASSERT1(!info, "QR factorization failed");                       \
+      base_type *p = &R(0,0), *q = &Q(0,0);                                \
+      for (int j = 0; j < n; ++j, q += m-n)                                \
+        for (int i = 0; i < n; ++i, ++p, ++q)                              \
+          *p = (j < i) ? base_type(0) : *q;                                \
+      lapack_name2(&m, &n, &n, &Q(0,0), &m,&tau[0],&work[0],&lwork,&info); \
+    }                                                                      \
+    else gmm::clear(Q);                                                    \
+  }
+
+  geqrf_interface2(sgeqrf_, sorgqr_, BLAS_S)
+  geqrf_interface2(dgeqrf_, dorgqr_, BLAS_D)
+  geqrf_interface2(cgeqrf_, cungqr_, BLAS_C)
+  geqrf_interface2(zgeqrf_, zungqr_, BLAS_Z)
+  
+  /* ********************************************************************* */
+  /* QR algorithm for eigenvalues search.                                  */
+  /* ********************************************************************* */
+
+# define gees_interface(lapack_name, base_type)                            \
+  template <typename VECT> inline void implicit_qr_algorithm(              \
+         const dense_matrix<base_type > &A,  const VECT &eigval_,          \
+         dense_matrix<base_type > &Q,                                      \
+         double tol=gmm::default_tol(base_type()), bool compvect = true) { \
+    GMMLAPACK_TRACE("gees_interface");                                     \
+    typedef bool (*L_fp)(...);  L_fp p = 0;                                \
+    int n = int(mat_nrows(A)), info, lwork(-1), sdim; base_type work1;     \
+    if (!n) return;                                                        \
+    dense_matrix<base_type > H(n,n); gmm::copy(A, H);                      \
+    char jobvs = (compvect ? 'V' : 'N'), sort = 'N';                       \
+    std::vector<double> rwork(n), eigv1(n), eigv2(n);                      \
+    lapack_name(&jobvs, &sort, p, &n, &H(0,0), &n, &sdim, &eigv1[0],       \
+                &eigv2[0], &Q(0,0), &n, &work1, &lwork, &rwork[0], &info); \
+    lwork = int(gmm::real(work1));                                         \
+    std::vector<base_type > work(lwork);                                   \
+    lapack_name(&jobvs, &sort, p, &n, &H(0,0), &n, &sdim, &eigv1[0],       \
+                &eigv2[0], &Q(0,0), &n, &work[0], &lwork, &rwork[0],&info);\
+    GMM_ASSERT1(!info, "QR algorithm failed");                             \
+    extract_eig(H, const_cast<VECT &>(eigval_), tol);                      \
+  }
+
+# define gees_interface2(lapack_name, base_type)                           \
+  template <typename VECT> inline void implicit_qr_algorithm(              \
+         const dense_matrix<base_type > &A,  const VECT &eigval_,          \
+         dense_matrix<base_type > &Q,                                      \
+         double tol=gmm::default_tol(base_type()), bool compvect = true) { \
+    GMMLAPACK_TRACE("gees_interface2");                                    \
+    typedef bool (*L_fp)(...);  L_fp p = 0;                                \
+    int n = int(mat_nrows(A)), info, lwork(-1), sdim; base_type work1;     \
+    if (!n) return;                                                        \
+    dense_matrix<base_type > H(n,n); gmm::copy(A, H);                      \
+    char jobvs = (compvect ? 'V' : 'N'), sort = 'N';                       \
+    std::vector<double> rwork(n), eigvv(n*2);                              \
+    lapack_name(&jobvs, &sort, p, &n, &H(0,0), &n, &sdim, &eigvv[0],       \
+                &Q(0,0), &n, &work1, &lwork, &rwork[0], &rwork[0], &info); \
+    lwork = int(gmm::real(work1));                                         \
+    std::vector<base_type > work(lwork);                                   \
+    lapack_name(&jobvs, &sort, p, &n, &H(0,0), &n, &sdim, &eigvv[0],       \
+                &Q(0,0), &n, &work[0], &lwork, &rwork[0], &rwork[0],&info);\
+    GMM_ASSERT1(!info, "QR algorithm failed");                             \
+    extract_eig(H, const_cast<VECT &>(eigval_), tol);                      \
+  }
+
+  gees_interface(sgees_, BLAS_S)
+  gees_interface(dgees_, BLAS_D)
+  gees_interface2(cgees_, BLAS_C)
+  gees_interface2(zgees_, BLAS_Z)
+
+
+# define jobv_right char jobvl = 'N', jobvr = 'V';
+# define jobv_left char jobvl = 'V', jobvr = 'N';
+
+# define geev_interface(lapack_name, base_type, side)                      \
+  template <typename VECT> inline void geev_interface_ ## side(             \
+         const dense_matrix<base_type > &A,  const VECT &eigval_,          \
+         dense_matrix<base_type > &Q) {                                    \
+    GMMLAPACK_TRACE("geev_interface");                                     \
+    int n = int(mat_nrows(A)), info, lwork(-1); base_type work1;           \
+    if (!n) return;                                                        \
+    dense_matrix<base_type > H(n,n); gmm::copy(A, H);                      \
+    jobv_ ## side                                                          \
+    std::vector<base_type > eigvr(n), eigvi(n);                            \
+    lapack_name(&jobvl, &jobvr, &n, &H(0,0), &n, &eigvr[0], &eigvi[0],     \
+                &Q(0,0), &n, &Q(0,0), &n, &work1, &lwork, &info);          \
+    lwork = int(gmm::real(work1));                                         \
+    std::vector<base_type > work(lwork);                                   \
+    lapack_name(&jobvl, &jobvr, &n, &H(0,0), &n, &eigvr[0], &eigvi[0],     \
+                &Q(0,0), &n, &Q(0,0), &n, &work[0], &lwork, &info);        \
+    GMM_ASSERT1(!info, "QR algorithm failed");                             \
+    gmm::copy(eigvr, gmm::real_part(const_cast<VECT &>(eigval_)));         \
+    gmm::copy(eigvi, gmm::imag_part(const_cast<VECT &>(eigval_)));         \
+  }
+
+# define geev_interface2(lapack_name, base_type, side)                     \
+  template <typename VECT> inline void geev_interface_ ## side(            \
+         const dense_matrix<base_type > &A,  const VECT &eigval_,          \
+         dense_matrix<base_type > &Q) {                                    \
+    GMMLAPACK_TRACE("geev_interface");                                     \
+    int n = int(mat_nrows(A)), info, lwork(-1); base_type work1;           \
+    if (!n) return;                                                        \
+    dense_matrix<base_type > H(n,n); gmm::copy(A, H);                      \
+    jobv_ ## side                                                          \
+    std::vector<base_type::value_type> rwork(2*n);                         \
+    std::vector<base_type> eigv(n);                                        \
+    lapack_name(&jobvl, &jobvr, &n, &H(0,0), &n, &eigv[0], &Q(0,0), &n,    \
+                &Q(0,0), &n, &work1, &lwork, &rwork[0], &info);            \
+    lwork = int(gmm::real(work1));                                         \
+    std::vector<base_type > work(lwork);                                   \
+    lapack_name(&jobvl, &jobvr, &n, &H(0,0), &n, &eigv[0], &Q(0,0), &n,    \
+                &Q(0,0), &n, &work[0], &lwork,  &rwork[0],  &info);        \
+    GMM_ASSERT1(!info, "QR algorithm failed");                             \
+    gmm::copy(eigv, const_cast<VECT &>(eigval_));                          \
+  }
+
+  geev_interface(sgeev_, BLAS_S, right)
+  geev_interface(dgeev_, BLAS_D, right)
+  geev_interface2(cgeev_, BLAS_C, right)
+  geev_interface2(zgeev_, BLAS_Z, right)
+
+  geev_interface(sgeev_, BLAS_S, left)
+  geev_interface(dgeev_, BLAS_D, left)
+  geev_interface2(cgeev_, BLAS_C, left)
+  geev_interface2(zgeev_, BLAS_Z, left) 
+    
+
+  /* ********************************************************************* */
+  /* SCHUR algorithm:                                                      */
+  /*  A = Q*S*(Q^T), with Q orthogonal and S upper quasi-triangula         */
+  /* ********************************************************************* */
+
+# define geesx_interface(lapack_name, base_type) inline                 \
+  void schur(dense_matrix<base_type> &A,                                \
+             dense_matrix<base_type> &S,                                \
+             dense_matrix<base_type> &Q) {                              \
+    GMMLAPACK_TRACE("geesx_interface");                                 \
+    int m = int(mat_nrows(A)), n = int(mat_ncols(A));                   \
+    GMM_ASSERT1(m == n, "Schur decomposition requires square matrix");  \
+    char jobvs = 'V', sort = 'N', sense = 'N';                          \
+    bool select = false;                                                \
+    int lwork = 8*n, sdim = 0, liwork = 1;                              \
+    std::vector<base_type> work(lwork), wr(n), wi(n);                   \
+    std::vector<int> iwork(liwork);                                     \
+    std::vector<int> bwork(1);                                          \
+    resize(S, n, n); copy(A, S);                                        \
+    resize(Q, n, n);                                                    \
+    base_type rconde(0), rcondv(0);                                     \
+    int info = -1;                                                      \
+    lapack_name(&jobvs, &sort, &select, &sense, &n, &S(0,0), &n,        \
+                &sdim, &wr[0], &wi[0], &Q(0,0), &n, &rconde, &rcondv,   \
+                &work[0], &lwork, &iwork[0], &liwork, &bwork[0], &info);\
+    GMM_ASSERT1(!info, "SCHUR algorithm failed");                       \
+  }
+
+# define geesx_interface2(lapack_name, base_type) inline                \
+  void schur(dense_matrix<base_type> &A,                                \
+             dense_matrix<base_type> &S,                                \
+             dense_matrix<base_type> &Q) {                              \
+    GMMLAPACK_TRACE("geesx_interface");                                 \
+    int m = int(mat_nrows(A)), n = int(mat_ncols(A));                   \
+    GMM_ASSERT1(m == n, "Schur decomposition requires square matrix");  \
+    char jobvs = 'V', sort = 'N', sense = 'N';                          \
+    bool select = false;                                                \
+    int lwork = 8*n, sdim = 0;                                          \
+    std::vector<base_type::value_type> rwork(lwork);                    \
+    std::vector<base_type> work(lwork), w(n);                           \
+    std::vector<int> bwork(1);                                          \
+    resize(S, n, n); copy(A, S);                                        \
+    resize(Q, n, n);                                                    \
+    base_type rconde(0), rcondv(0);                                     \
+    int info = -1;                                                      \
+    lapack_name(&jobvs, &sort, &select, &sense, &n, &S(0,0), &n,        \
+                &sdim, &w[0], &Q(0,0), &n, &rconde, &rcondv,            \
+                &work[0], &lwork, &rwork[0], &bwork[0], &info);         \
+    GMM_ASSERT1(!info, "SCHUR algorithm failed");                       \
+  }
+
+  geesx_interface(sgeesx_, BLAS_S)
+  geesx_interface(dgeesx_, BLAS_D)
+  geesx_interface2(cgeesx_, BLAS_C)
+  geesx_interface2(zgeesx_, BLAS_Z)
+
+  template <typename MAT>
+  void schur(const MAT &A_, MAT &S, MAT &Q) {
+   MAT A(A_);
+   schur(A, S, Q);
+  }
+
+
+  /* ********************************************************************* */
+  /* Interface to SVD. Does not correspond to a Gmm++ functionnality.      */
+  /* Author : Sebastian Nowozin <sebastian.nowozin@tuebingen.mpg.de>       */
+  /* ********************************************************************* */
+    
+# define gesvd_interface(lapack_name, base_type) inline                 \
+  void svd(dense_matrix<base_type> &X,                                  \
+           dense_matrix<base_type> &U,                                  \
+           dense_matrix<base_type> &Vtransposed,                        \
+           std::vector<base_type> &sigma) {                             \
+    GMMLAPACK_TRACE("gesvd_interface");                                 \
+    int m = int(mat_nrows(X)), n = int(mat_ncols(X));                   \
+    int mn_min = m < n ? m : n;                                         \
+    sigma.resize(mn_min);                                               \
+    std::vector<base_type> work(15 * mn_min);                           \
+    int lwork = int(work.size());                                       \
+    resize(U, m, m);                                                    \
+    resize(Vtransposed, n, n);                                          \
+    char job = 'A';                                                     \
+    int info = -1;                                                      \
+    lapack_name(&job, &job, &m, &n, &X(0,0), &m, &sigma[0], &U(0,0),    \
+                &m, &Vtransposed(0,0), &n, &work[0], &lwork, &info);    \
+  }
+
+# define cgesvd_interface(lapack_name, base_type, base_type2) inline    \
+  void svd(dense_matrix<base_type> &X,                                  \
+           dense_matrix<base_type> &U,                                  \
+           dense_matrix<base_type> &Vtransposed,                        \
+           std::vector<base_type2> &sigma) {                            \
+    GMMLAPACK_TRACE("gesvd_interface");                                 \
+    int m = int(mat_nrows(X)), n = int(mat_ncols(X));                   \
+    int mn_min = m < n ? m : n;                                         \
+    sigma.resize(mn_min);                                               \
+    std::vector<base_type> work(15 * mn_min);                           \
+    std::vector<base_type2> rwork(5 * mn_min);                          \
+    int lwork = int(work.size());                                       \
+    resize(U, m, m);                                                    \
+    resize(Vtransposed, n, n);                                          \
+    char job = 'A';                                                     \
+    int info = -1;                                                      \
+    lapack_name(&job, &job, &m, &n, &X(0,0), &m, &sigma[0], &U(0,0),    \
+                &m, &Vtransposed(0,0), &n, &work[0], &lwork,            \
+                &rwork[0], &info);                                      \
+  }
+  
+  gesvd_interface(sgesvd_, BLAS_S)
+  gesvd_interface(dgesvd_, BLAS_D)
+  cgesvd_interface(cgesvd_, BLAS_C, BLAS_S)
+  cgesvd_interface(zgesvd_, BLAS_Z, BLAS_D)
+
+  template <typename MAT, typename VEC>
+  void svd(const MAT &X_, MAT &U, MAT &Vtransposed, VEC &sigma) {
+   MAT X(X_);
+   svd(X, U, Vtransposed, sigma);
+  }
+    
+
+
+
+}
+
+#else
+
+namespace gmm
+{
+template <typename MAT>
+void schur(const MAT &A_, MAT &S, MAT &Q)
+{
+  GMM_ASSERT1(false, "Use of function schur(A,S,Q) requires GetFEM++ "
+              "to be built with Lapack");
+}
+
+}// namespace gmm
+
+#endif // GMM_USES_LAPACK
+
+#endif // GMM_LAPACK_INTERFACE_H
--- a/gmm/gmm_least_squares_cg.h
+++ b/gmm/gmm_least_squares_cg.h
@ -0,0 +1,96 @@
+/* -*- c++ -*- (enables emacs c++ mode) */
+/*===========================================================================
+
+ Copyright (C) 2002-2017 Yves Renard, Benjamin Schleimer
+
+ This file is a part of GetFEM++
+
+ GetFEM++  is  free software;  you  can  redistribute  it  and/or modify it
+ under  the  terms  of the  GNU  Lesser General Public License as published
+ by  the  Free Software Foundation;  either version 3 of the License,  or
+ (at your option) any later version along with the GCC Runtime Library
+ Exception either version 3.1 or (at your option) any later version.
+ This program  is  distributed  in  the  hope  that it will be useful,  but
+ WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ or  FITNESS  FOR  A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+ License and GCC Runtime Library Exception for more details.
+ You  should  have received a copy of the GNU Lesser General Public License
+ along  with  this program;  if not, write to the Free Software Foundation,
+ Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301, USA.
+
+ As a special exception, you  may use  this file  as it is a part of a free
+ software  library  without  restriction.  Specifically,  if   other  files
+ instantiate  templates  or  use macros or inline functions from this file,
+ or  you compile this  file  and  link  it  with other files  to produce an
+ executable, this file  does  not  by itself cause the resulting executable
+ to be covered  by the GNU Lesser General Public License.  This   exception
+ does not  however  invalidate  any  other  reasons why the executable file
+ might be covered by the GNU Lesser General Public License.
+
+===========================================================================*/
+
+/**@file gmm_leastsquares_cg.h
+   @author Benjamin Schleimer <bensch128  (at) yahoo (dot) com>
+   @date January 23, 2007.
+   @brief Conjugate gradient least squares algorithm. 
+   Algorithm taken from http://www.stat.washington.edu/wxs/Stat538-w05/Notes/conjugate-gradients.pdf page 6
+*/
+#ifndef GMM_LEAST_SQUARES_CG_H__
+#define GMM_LEAST_SQUARES_CG_H__
+
+#include "gmm_kernel.h"
+#include "gmm_iter.h"
+#include "gmm_conjugated.h"
+
+namespace gmm {
+
+  template <typename Matrix, typename Vector1, typename Vector2>
+  void least_squares_cg(const Matrix& C, Vector1& x, const Vector2& y,
+			iteration &iter) {
+
+    typedef typename temporary_dense_vector<Vector1>::vector_type temp_vector;
+    typedef typename linalg_traits<Vector1>::value_type T;
+
+    T rho, rho_1(0), a;
+    temp_vector p(vect_size(x)), q(vect_size(y)), g(vect_size(x));
+    temp_vector r(vect_size(y));
+    iter.set_rhsnorm(gmm::sqrt(gmm::abs(vect_hp(y, y))));
+
+    if (iter.get_rhsnorm() == 0.0)
+      clear(x);
+    else {
+      mult(C, scaled(x, T(-1)), y, r);
+      mult(conjugated(C), r, g);
+      rho = vect_hp(g, g);
+      copy(g, p);
+
+      while (!iter.finished_vect(g)) {
+
+	if (!iter.first()) { 
+	  rho = vect_hp(g, g);
+	  add(g, scaled(p, rho / rho_1), p);
+	}
+
+	mult(C, p, q);
+
+	a = rho / vect_hp(q, q);	
+	add(scaled(p, a), x);
+	add(scaled(q, -a), r);
+	// NOTE: how do we minimize the impact to the transpose?
+	mult(conjugated(C), r, g);
+	rho_1 = rho;
+
+	++iter;
+      }
+    }
+  }
+
+  template <typename Matrix, typename Precond, 
+            typename Vector1, typename Vector2> inline 
+  void least_squares_cg(const Matrix& C, const Vector1& x, const Vector2& y,
+			iteration &iter)
+  { least_squares_cg(C, linalg_const_cast(x), y, iter); }
+}
+
+
+#endif //  GMM_SOLVER_CG_H__
--- a/gmm/gmm_matrix.h
+++ b/gmm/gmm_matrix.h
--- a/gmm/gmm_modified_gram_schmidt.h
+++ b/gmm/gmm_modified_gram_schmidt.h
@ -0,0 +1,127 @@
+/* -*- c++ -*- (enables emacs c++ mode) */
+/*===========================================================================
+
+ Copyright (C) 2002-2017 Yves Renard
+
+ This file is a part of GetFEM++
+
+ GetFEM++  is  free software;  you  can  redistribute  it  and/or modify it
+ under  the  terms  of the  GNU  Lesser General Public License as published
+ by  the  Free Software Foundation;  either version 3 of the License,  or
+ (at your option) any later version along with the GCC Runtime Library
+ Exception either version 3.1 or (at your option) any later version.
+ This program  is  distributed  in  the  hope  that it will be useful,  but
+ WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ or  FITNESS  FOR  A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+ License and GCC Runtime Library Exception for more details.
+ You  should  have received a copy of the GNU Lesser General Public License
+ along  with  this program;  if not, write to the Free Software Foundation,
+ Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301, USA.
+
+ As a special exception, you  may use  this file  as it is a part of a free
+ software  library  without  restriction.  Specifically,  if   other  files
+ instantiate  templates  or  use macros or inline functions from this file,
+ or  you compile this  file  and  link  it  with other files  to produce an
+ executable, this file  does  not  by itself cause the resulting executable
+ to be covered  by the GNU Lesser General Public License.  This   exception
+ does not  however  invalidate  any  other  reasons why the executable file
+ might be covered by the GNU Lesser General Public License.
+
+===========================================================================*/
+
+//===========================================================================
+//
+// Copyright (c) 1998-2001, University of Notre Dame. All rights reserved.
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+//    * Redistributions of source code must retain the above copyright
+//      notice, this list of conditions and the following disclaimer.
+//    * Redistributions in binary form must reproduce the above copyright
+//      notice, this list of conditions and the following disclaimer in the
+//      documentation and/or other materials provided with the distribution.
+//    * Neither the name of the University of Notre Dame nor the
+//      names of its contributors may be used to endorse or promote products
+//      derived from this software without specific prior written permission.
+//
+// THIS SOFTWARE  IS  PROVIDED  BY  THE TRUSTEES  OF  INDIANA UNIVERSITY  AND
+// CONTRIBUTORS  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,
+// BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND  FITNESS
+// FOR  A PARTICULAR PURPOSE ARE DISCLAIMED. IN  NO  EVENT SHALL THE TRUSTEES
+// OF INDIANA UNIVERSITY AND CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+// INCIDENTAL, SPECIAL, EXEMPLARY,  OR CONSEQUENTIAL DAMAGES (INCLUDING,  BUT
+// NOT  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA,  OR PROFITS;  OR BUSINESS  INTERRUPTION)  HOWEVER  CAUSED AND ON ANY
+// THEORY  OF  LIABILITY,  WHETHER  IN  CONTRACT,  STRICT  LIABILITY, OR TORT
+// (INCLUDING  NEGLIGENCE  OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+// THIS  SOFTWARE,  EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+//===========================================================================
+
+/**@file gmm_modified_gram_schmidt.h
+   @author  Andrew Lumsdaine <lums@osl.iu.edu>, Lie-Quan Lee     <llee@osl.iu.edu>
+   @date October 13, 2002.
+   @brief Modified Gram-Schmidt orthogonalization
+*/
+
+#ifndef GMM_MODIFIED_GRAM_SCHMIDT_H
+#define GMM_MODIFIED_GRAM_SCHMIDT_H
+
+#include "gmm_kernel.h"
+
+namespace gmm {
+
+  template <typename T>
+  class modified_gram_schmidt {
+  protected:
+    typedef dense_matrix<T> MAT;
+    MAT M;
+
+  public:
+
+    modified_gram_schmidt(int restart, size_t s) : M(s, restart+1) {}
+
+    typename linalg_traits<MAT>::const_sub_col_type
+      operator[](size_t i) const { return mat_const_col(M, i); }
+
+    typename linalg_traits<MAT>::sub_col_type
+      operator[](size_t i) { return mat_col(M, i); }
+
+    inline size_type nrows(void) const { return M.nrows(); }
+    inline size_type ncols(void) const { return M.ncols(); }
+    MAT &mat(void) { return M; }
+    const MAT &mat(void) const { return M; }
+    
+  };
+
+  template <typename T, typename VecHi> inline
+  void orthogonalize(modified_gram_schmidt<T>& V, const VecHi& Hi_, size_t i) {
+    VecHi& Hi = const_cast<VecHi&>(Hi_);
+    
+    for (size_t k = 0; k <= i; k++) {
+      Hi[k] = gmm::vect_hp(V[i+1], V[k]);
+      gmm::add(gmm::scaled(V[k], -Hi[k]), V[i+1]);
+    }
+  }
+
+  template <typename T, typename VecHi>
+  void orthogonalize_with_refinment(modified_gram_schmidt<T>& V,
+				    const VecHi& Hi_, size_t i) {
+    VecHi& Hi = const_cast<VecHi&>(Hi_);
+    orthogonalize(V, Hi_, i);
+    
+    sub_interval SUBI(0, V.nrows()), SUBJ(0, i+1);
+    std::vector<T> corr(i+1);
+    gmm::mult(conjugated(sub_matrix(V.mat(), SUBI, SUBJ)),
+	      V[i+1], corr);
+    gmm::mult(sub_matrix(V.mat(), SUBI, SUBJ),
+	      scaled(corr, T(-1)), V[i+1],V[i+1]);
+    gmm::add(corr, sub_vector(Hi, SUBJ));
+  }
+  
+  template <typename T, typename VecS, typename VecX>
+  void combine(modified_gram_schmidt<T>& V, const VecS& s, VecX& x, size_t i)
+  { for (size_t j = 0; j < i; ++j) gmm::add(gmm::scaled(V[j], s[j]), x); }
+}
+
+#endif
--- a/gmm/gmm_opt.h
+++ b/gmm/gmm_opt.h
@ -0,0 +1,128 @@
+/* -*- c++ -*- (enables emacs c++ mode) */
+/*===========================================================================
+
+ Copyright (C) 2003-2017 Yves Renard
+
+ This file is a part of GetFEM++
+
+ GetFEM++  is  free software;  you  can  redistribute  it  and/or modify it
+ under  the  terms  of the  GNU  Lesser General Public License as published
+ by  the  Free Software Foundation;  either version 3 of the License,  or
+ (at your option) any later version along with the GCC Runtime Library
+ Exception either version 3.1 or (at your option) any later version.
+ This program  is  distributed  in  the  hope  that it will be useful,  but
+ WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ or  FITNESS  FOR  A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+ License and GCC Runtime Library Exception for more details.
+ You  should  have received a copy of the GNU Lesser General Public License
+ along  with  this program;  if not, write to the Free Software Foundation,
+ Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301, USA.
+
+ As a special exception, you  may use  this file  as it is a part of a free
+ software  library  without  restriction.  Specifically,  if   other  files
+ instantiate  templates  or  use macros or inline functions from this file,
+ or  you compile this  file  and  link  it  with other files  to produce an
+ executable, this file  does  not  by itself cause the resulting executable
+ to be covered  by the GNU Lesser General Public License.  This   exception
+ does not  however  invalidate  any  other  reasons why the executable file
+ might be covered by the GNU Lesser General Public License.
+
+===========================================================================*/
+
+/**@file gmm_opt.h
+   @author  Yves Renard <Yves.Renard@insa-lyon.fr>
+   @date July 9, 2003.
+   @brief Optimization for some small cases (inversion of 2x2 matrices etc.)
+*/
+#ifndef GMM_OPT_H__
+#define GMM_OPT_H__
+
+namespace gmm {
+
+  /* ********************************************************************* */
+  /*    Optimized determinant and inverse for small matrices (2x2 and 3x3) */
+  /*    with dense_matrix<T>.                                              */
+  /* ********************************************************************* */
+
+  template <typename T>  T lu_det(const dense_matrix<T> &A) {
+    size_type n(mat_nrows(A));
+    if (n) {
+      const T *p = &(A(0,0));
+      switch (n) {
+      case 1 : return (*p);
+      case 2 : return (*p) * (*(p+3)) - (*(p+1)) * (*(p+2));
+// Not stable for nearly singular matrices
+//       case 3 : return (*p) * ((*(p+4)) * (*(p+8)) - (*(p+5)) * (*(p+7)))
+// 		 - (*(p+1)) * ((*(p+3)) * (*(p+8)) - (*(p+5)) * (*(p+6)))
+// 		 + (*(p+2)) * ((*(p+3)) * (*(p+7)) - (*(p+4)) * (*(p+6)));
+      default :
+	{
+	  dense_matrix<T> B(mat_nrows(A), mat_ncols(A));
+	  std::vector<size_type> ipvt(mat_nrows(A));
+	  gmm::copy(A, B);
+	  lu_factor(B, ipvt);
+	  return lu_det(B, ipvt);	
+	}
+      }
+    }
+    return T(1);
+  }
+
+
+  template <typename T> T lu_inverse(const dense_matrix<T> &A_, bool doassert = true) {
+    dense_matrix<T>& A = const_cast<dense_matrix<T> &>(A_);
+    size_type N = mat_nrows(A);
+    T det(1);
+    if (N) {
+      T *p = &(A(0,0));
+      if (N <= 2) {
+	switch (N) {
+	  case 1 : {
+	    det = *p;
+	    if (doassert) GMM_ASSERT1(det!=T(0), "non invertible matrix");
+            if (det == T(0)) break;
+	    *p = T(1) / det; 
+	  } break;
+	  case 2 : {
+	    det = (*p) * (*(p+3)) - (*(p+1)) * (*(p+2));
+	    if (doassert) GMM_ASSERT1(det!=T(0), "non invertible matrix");
+            if (det == T(0)) break;
+	    std::swap(*p, *(p+3));
+	    *p++ /= det; *p++ /= -det; *p++ /= -det; *p++ /= det; 
+	  } break;
+// 	  case 3 : { // not stable for nearly singular matrices
+// 	    T a, b, c, d, e, f, g, h, i;
+// 	    a =   (*(p+4)) * (*(p+8)) - (*(p+5)) * (*(p+7));
+// 	    b = - (*(p+1)) * (*(p+8)) + (*(p+2)) * (*(p+7));
+// 	    c =   (*(p+1)) * (*(p+5)) - (*(p+2)) * (*(p+4));
+// 	    d = - (*(p+3)) * (*(p+8)) + (*(p+5)) * (*(p+6));
+// 	    e =   (*(p+0)) * (*(p+8)) - (*(p+2)) * (*(p+6));
+// 	    f = - (*(p+0)) * (*(p+5)) + (*(p+2)) * (*(p+3));
+// 	    g =   (*(p+3)) * (*(p+7)) - (*(p+4)) * (*(p+6));
+// 	    h = - (*(p+0)) * (*(p+7)) + (*(p+1)) * (*(p+6));
+// 	    i =   (*(p+0)) * (*(p+4)) - (*(p+1)) * (*(p+3));
+// 	    det = (*p) * a + (*(p+1)) * d + (*(p+2)) * g;
+// 	    GMM_ASSERT1(det!=T(0), "non invertible matrix");
+// 	    *p++ = a / det; *p++ = b / det; *p++ = c / det; 
+// 	    *p++ = d / det; *p++ = e / det; *p++ = f / det; 
+// 	    *p++ = g / det; *p++ = h / det; *p++ = i / det; 
+// 	  } break;
+	}
+      }
+      else {
+	dense_matrix<T> B(mat_nrows(A), mat_ncols(A));
+	std::vector<int> ipvt(mat_nrows(A));
+	gmm::copy(A, B);
+	size_type info = lu_factor(B, ipvt);
+	GMM_ASSERT1(!info, "non invertible matrix");
+	lu_inverse(B, ipvt, A);
+	return lu_det(B, ipvt);
+      }
+    }
+    return det;
+  }
+
+  
+}
+
+#endif //  GMM_OPT_H__
--- a/gmm/gmm_precond.h
+++ b/gmm/gmm_precond.h
@ -0,0 +1,65 @@
+/* -*- c++ -*- (enables emacs c++ mode) */
+/*===========================================================================
+
+ Copyright (C) 2004-2017 Yves Renard
+
+ This file is a part of GetFEM++
+
+ GetFEM++  is  free software;  you  can  redistribute  it  and/or modify it
+ under  the  terms  of the  GNU  Lesser General Public License as published
+ by  the  Free Software Foundation;  either version 3 of the License,  or
+ (at your option) any later version along with the GCC Runtime Library
+ Exception either version 3.1 or (at your option) any later version.
+ This program  is  distributed  in  the  hope  that it will be useful,  but
+ WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ or  FITNESS  FOR  A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+ License and GCC Runtime Library Exception for more details.
+ You  should  have received a copy of the GNU Lesser General Public License
+ along  with  this program;  if not, write to the Free Software Foundation,
+ Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301, USA.
+
+ As a special exception, you  may use  this file  as it is a part of a free
+ software  library  without  restriction.  Specifically,  if   other  files
+ instantiate  templates  or  use macros or inline functions from this file,
+ or  you compile this  file  and  link  it  with other files  to produce an
+ executable, this file  does  not  by itself cause the resulting executable
+ to be covered  by the GNU Lesser General Public License.  This   exception
+ does not  however  invalidate  any  other  reasons why the executable file
+ might be covered by the GNU Lesser General Public License.
+
+===========================================================================*/
+#ifndef GMM_PRECOND_H
+#define GMM_PRECOND_H
+
+#include "gmm_kernel.h"
+
+/** @file gmm_precond.h
+    @author  Yves Renard <Yves.Renard@insa-lyon.fr>
+    @date March 29, 2004.
+    @brief gmm preconditioners.
+ */
+
+/* Preconditioner concept :                                                */
+/*                                                                         */
+/* A the matrix, P the preconditioner PA well conditioned.                 */
+/* PRECOND precontioner type.                                              */
+/* mult(P, v, w) :  w <- P v                                               */
+/* transposed_mult(P, v, w)       : w <- transposed(P) v                   */
+/* left_mult(P, v, w)             : see qmr solver                         */
+/* right_mult(P, v, w)            : see qmr solver                         */
+/* transposed_left_mult(P, v, w)  : see qmr solver                         */
+/* transposed_right_mult(P, v, w) : see qmr solver                         */
+/*                                                                         */
+/* PRECOND P() : empty preconditioner.                                     */
+/* PRECOND P(A, ...) : preconditioner for the matrix A, with optional      */
+/*                     parameters                                          */
+/* PRECOND(...)  : empty precondtioner with parameters set.                */
+/* P.build_with(A) : build a precondtioner for A.                          */
+/*                                                                         */
+/* *********************************************************************** */
+
+
+
+
+#endif 
+
--- a/gmm/gmm_precond_diagonal.h
+++ b/gmm/gmm_precond_diagonal.h
@ -0,0 +1,132 @@
+/* -*- c++ -*- (enables emacs c++ mode) */
+/*===========================================================================
+
+ Copyright (C) 2003-2017 Yves Renard
+
+ This file is a part of GetFEM++
+
+ GetFEM++  is  free software;  you  can  redistribute  it  and/or modify it
+ under  the  terms  of the  GNU  Lesser General Public License as published
+ by  the  Free Software Foundation;  either version 3 of the License,  or
+ (at your option) any later version along with the GCC Runtime Library
+ Exception either version 3.1 or (at your option) any later version.
+ This program  is  distributed  in  the  hope  that it will be useful,  but
+ WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ or  FITNESS  FOR  A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+ License and GCC Runtime Library Exception for more details.
+ You  should  have received a copy of the GNU Lesser General Public License
+ along  with  this program;  if not, write to the Free Software Foundation,
+ Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301, USA.
+
+ As a special exception, you  may use  this file  as it is a part of a free
+ software  library  without  restriction.  Specifically,  if   other  files
+ instantiate  templates  or  use macros or inline functions from this file,
+ or  you compile this  file  and  link  it  with other files  to produce an
+ executable, this file  does  not  by itself cause the resulting executable
+ to be covered  by the GNU Lesser General Public License.  This   exception
+ does not  however  invalidate  any  other  reasons why the executable file
+ might be covered by the GNU Lesser General Public License.
+
+===========================================================================*/
+
+/**@file gmm_precond_diagonal.h
+   @author  Yves Renard <Yves.Renard@insa-lyon.fr>
+   @date June 5, 2003.
+   @brief Diagonal matrix preconditoner.
+*/
+
+#ifndef GMM_PRECOND_DIAGONAL_H
+#define GMM_PRECOND_DIAGONAL_H
+
+#include "gmm_precond.h"
+
+namespace gmm {
+
+  /** Diagonal preconditioner. */
+  template<typename Matrix> struct diagonal_precond {
+    typedef typename linalg_traits<Matrix>::value_type value_type;
+    typedef typename number_traits<value_type>::magnitude_type magnitude_type;
+
+    std::vector<magnitude_type> diag;
+
+    void build_with(const Matrix &M) {
+      diag.resize(mat_nrows(M));
+      for (size_type i = 0; i < mat_nrows(M); ++i) {
+	magnitude_type x = gmm::abs(M(i, i));
+	if (x == magnitude_type(0)) {
+	  x = magnitude_type(1);
+	  GMM_WARNING2("The matrix has a zero on its diagonal");
+	}
+	diag[i] = magnitude_type(1) / x;
+      }
+    }
+    size_type memsize() const { return sizeof(*this) + diag.size() * sizeof(value_type); }
+    diagonal_precond(const Matrix &M) { build_with(M); }
+    diagonal_precond(void) {}
+  };
+
+  template <typename Matrix, typename V2> inline
+  void mult_diag_p(const diagonal_precond<Matrix>& P, V2 &v2, abstract_sparse){
+    typename linalg_traits<V2>::iterator it = vect_begin(v2),
+      ite = vect_end(v2);
+    for (; it != ite; ++it) *it *= P.diag[it.index()];
+  }
+
+  template <typename Matrix, typename V2> inline
+  void mult_diag_p(const diagonal_precond<Matrix>& P,V2 &v2, abstract_skyline)
+    { mult_diag_p(P, v2, abstract_sparse()); }
+
+  template <typename Matrix, typename V2> inline
+  void mult_diag_p(const diagonal_precond<Matrix>& P, V2 &v2, abstract_dense){
+    for (size_type i = 0; i < P.diag.size(); ++i) v2[i] *= P.diag[i];
+  }
+
+  template <typename Matrix, typename V1, typename V2> inline
+  void mult(const diagonal_precond<Matrix>& P, const V1 &v1, V2 &v2) {
+    GMM_ASSERT2(P.diag.size() == vect_size(v2),"dimensions mismatch");
+    copy(v1, v2);
+    mult_diag_p(P, v2, typename linalg_traits<V2>::storage_type());
+  }
+
+  template <typename Matrix, typename V1, typename V2> inline
+  void transposed_mult(const diagonal_precond<Matrix>& P,const V1 &v1,V2 &v2) {
+    mult(P, v1, v2);
+  }
+  
+  // # define DIAG_LEFT_MULT_SQRT
+  
+  template <typename Matrix, typename V1, typename V2> inline
+  void left_mult(const diagonal_precond<Matrix>& P, const V1 &v1, V2 &v2) {
+    GMM_ASSERT2(P.diag.size() == vect_size(v2), "dimensions mismatch");
+    copy(v1, v2);
+#   ifdef DIAG_LEFT_MULT_SQRT
+    for (size_type i= 0; i < P.diag.size(); ++i) v2[i] *= gmm::sqrt(P.diag[i]);
+#   else
+    for (size_type i= 0; i < P.diag.size(); ++i) v2[i] *= P.diag[i];
+#   endif
+  }
+
+  template <typename Matrix, typename V1, typename V2> inline
+  void transposed_left_mult(const diagonal_precond<Matrix>& P,
+			    const V1 &v1, V2 &v2)
+    { left_mult(P, v1, v2); }
+
+  template <typename Matrix, typename V1, typename V2> inline
+  void right_mult(const diagonal_precond<Matrix>& P, const V1 &v1, V2 &v2) {
+    // typedef typename linalg_traits<Matrix>::value_type T;
+    GMM_ASSERT2(P.diag.size() == vect_size(v2), "dimensions mismatch");
+    copy(v1, v2);
+#   ifdef DIAG_LEFT_MULT_SQRT    
+    for (size_type i= 0; i < P.diag.size(); ++i) v2[i] *= gmm::sqrt(P.diag[i]);
+#   endif
+  }
+
+  template <typename Matrix, typename V1, typename V2> inline
+  void transposed_right_mult(const diagonal_precond<Matrix>& P,
+			    const V1 &v1, V2 &v2)
+    { right_mult(P, v1, v2); }
+
+}
+
+#endif 
+
--- a/gmm/gmm_precond_ildlt.h
+++ b/gmm/gmm_precond_ildlt.h
@ -0,0 +1,241 @@
+/* -*- c++ -*- (enables emacs c++ mode) */
+/*===========================================================================
+
+ Copyright (C) 2003-2017 Yves Renard
+
+ This file is a part of GetFEM++
+
+ GetFEM++  is  free software;  you  can  redistribute  it  and/or modify it
+ under  the  terms  of the  GNU  Lesser General Public License as published
+ by  the  Free Software Foundation;  either version 3 of the License,  or
+ (at your option) any later version along with the GCC Runtime Library
+ Exception either version 3.1 or (at your option) any later version.
+ This program  is  distributed  in  the  hope  that it will be useful,  but
+ WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ or  FITNESS  FOR  A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+ License and GCC Runtime Library Exception for more details.
+ You  should  have received a copy of the GNU Lesser General Public License
+ along  with  this program;  if not, write to the Free Software Foundation,
+ Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301, USA.
+
+ As a special exception, you  may use  this file  as it is a part of a free
+ software  library  without  restriction.  Specifically,  if   other  files
+ instantiate  templates  or  use macros or inline functions from this file,
+ or  you compile this  file  and  link  it  with other files  to produce an
+ executable, this file  does  not  by itself cause the resulting executable
+ to be covered  by the GNU Lesser General Public License.  This   exception
+ does not  however  invalidate  any  other  reasons why the executable file
+ might be covered by the GNU Lesser General Public License.
+
+===========================================================================*/
+
+// This file is a modified version of cholesky.h from ITL.
+// See http://osl.iu.edu/research/itl/
+// Following the corresponding Copyright notice.
+//===========================================================================
+//
+// Copyright (c) 1998-2001, University of Notre Dame. All rights reserved.
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+//    * Redistributions of source code must retain the above copyright
+//      notice, this list of conditions and the following disclaimer.
+//    * Redistributions in binary form must reproduce the above copyright
+//      notice, this list of conditions and the following disclaimer in the
+//      documentation and/or other materials provided with the distribution.
+//    * Neither the name of the University of Notre Dame nor the
+//      names of its contributors may be used to endorse or promote products
+//      derived from this software without specific prior written permission.
+//
+// THIS SOFTWARE  IS  PROVIDED  BY  THE TRUSTEES  OF  INDIANA UNIVERSITY  AND
+// CONTRIBUTORS  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,
+// BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND  FITNESS
+// FOR  A PARTICULAR PURPOSE ARE DISCLAIMED. IN  NO  EVENT SHALL THE TRUSTEES
+// OF INDIANA UNIVERSITY AND CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+// INCIDENTAL, SPECIAL, EXEMPLARY,  OR CONSEQUENTIAL DAMAGES (INCLUDING,  BUT
+// NOT  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA,  OR PROFITS;  OR BUSINESS  INTERRUPTION)  HOWEVER  CAUSED AND ON ANY
+// THEORY  OF  LIABILITY,  WHETHER  IN  CONTRACT,  STRICT  LIABILITY, OR TORT
+// (INCLUDING  NEGLIGENCE  OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+// THIS  SOFTWARE,  EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+//===========================================================================
+
+#ifndef GMM_PRECOND_ILDLT_H
+#define GMM_PRECOND_ILDLT_H
+
+/**@file gmm_precond_ildlt.h
+   @author Andrew Lumsdaine <lums@osl.iu.edu>
+   @author Lie-Quan Lee <llee@osl.iu.edu>
+   @author Yves Renard <yves.renard@insa-lyon.fr>
+   @date June 5, 2003.
+   @brief Incomplete Level 0 ILDLT Preconditioner.
+*/
+
+#include "gmm_precond.h"
+
+namespace gmm {
+
+  /** Incomplete Level 0 LDLT Preconditioner.
+      
+  For use with symmetric real or hermitian complex sparse matrices.
+
+  Notes: The idea under a concrete Preconditioner such as Incomplete
+  Cholesky is to create a Preconditioner object to use in iterative
+  methods.
+
+
+  Y. Renard : Transformed in LDLT for stability reason.
+  
+  U=LT is stored in csr format. D is stored on the diagonal of U.
+  */
+  template <typename Matrix>
+  class ildlt_precond {
+
+  public :
+    typedef typename linalg_traits<Matrix>::value_type value_type;
+    typedef typename number_traits<value_type>::magnitude_type magnitude_type;
+    typedef csr_matrix_ref<value_type *, size_type *, size_type *, 0> tm_type;
+
+    tm_type U;
+
+  protected :
+    std::vector<value_type> Tri_val;
+    std::vector<size_type> Tri_ind, Tri_ptr;
+ 
+    template<typename M> void do_ildlt(const M& A, row_major);
+    void do_ildlt(const Matrix& A, col_major);
+
+  public:
+
+    size_type nrows(void) const { return mat_nrows(U); }
+    size_type ncols(void) const { return mat_ncols(U); }
+    value_type &D(size_type i) { return Tri_val[Tri_ptr[i]]; }
+    const value_type &D(size_type i) const { return Tri_val[Tri_ptr[i]]; }
+    ildlt_precond(void) {}
+    void build_with(const Matrix& A) {
+      Tri_ptr.resize(mat_nrows(A)+1);
+      do_ildlt(A, typename principal_orientation_type<typename
+		  linalg_traits<Matrix>::sub_orientation>::potype());
+    }
+    ildlt_precond(const Matrix& A)  { build_with(A); }
+    size_type memsize() const { 
+      return sizeof(*this) + 
+	Tri_val.size() * sizeof(value_type) + 
+	(Tri_ind.size()+Tri_ptr.size()) * sizeof(size_type); 
+    }
+  };
+
+  template <typename Matrix> template<typename M>
+  void ildlt_precond<Matrix>::do_ildlt(const M& A, row_major) {
+    typedef typename linalg_traits<Matrix>::storage_type store_type;
+    typedef value_type T;
+    typedef typename number_traits<T>::magnitude_type R;
+    
+    size_type Tri_loc = 0, n = mat_nrows(A), d, g, h, i, j, k;
+    if (n == 0) return;
+    T z, zz;
+    Tri_ptr[0] = 0;
+    R prec = default_tol(R());
+    R max_pivot = gmm::abs(A(0,0)) * prec;
+    
+    for (int count = 0; count < 2; ++count) {
+      if (count) { Tri_val.resize(Tri_loc); Tri_ind.resize(Tri_loc); }
+      for (Tri_loc = 0, i = 0; i < n; ++i) {
+	typedef typename linalg_traits<M>::const_sub_row_type row_type;
+	row_type row = mat_const_row(A, i);
+        typename linalg_traits<typename org_type<row_type>::t>::const_iterator
+	  it = vect_const_begin(row), ite = vect_const_end(row);
+
+	if (count) { Tri_val[Tri_loc] = T(0); Tri_ind[Tri_loc] = i; }
+	++Tri_loc; // diagonal element
+
+	for (k = 0; it != ite; ++it, ++k) {
+	  j = index_of_it(it, k, store_type());
+	  if (i == j) {
+	    if (count) Tri_val[Tri_loc-1] = *it; 
+	  }
+	  else if (j > i) {
+	    if (count) { Tri_val[Tri_loc] = *it; Tri_ind[Tri_loc]=j; }
+	    ++Tri_loc;
+	  }
+	}
+	Tri_ptr[i+1] = Tri_loc;
+      }
+    }
+    
+    if (A(0,0) == T(0)) {
+      Tri_val[Tri_ptr[0]] = T(1);
+      GMM_WARNING2("pivot 0 is too small");
+    }
+    
+    for (k = 0; k < n; k++) {
+      d = Tri_ptr[k];
+      z = T(gmm::real(Tri_val[d])); Tri_val[d] = z;
+      if (gmm::abs(z) <= max_pivot) {
+	Tri_val[d] = z = T(1);
+	GMM_WARNING2("pivot " << k << " is too small [" << gmm::abs(z) << "]");
+      }
+      max_pivot = std::max(max_pivot, std::min(gmm::abs(z) * prec, R(1)));
+      
+      for (i = d + 1; i < Tri_ptr[k+1]; ++i) Tri_val[i] /= z;
+      for (i = d + 1; i < Tri_ptr[k+1]; ++i) {
+	zz = gmm::conj(Tri_val[i] * z);
+	h = Tri_ind[i];
+	g = i;
+	
+	for (j = Tri_ptr[h] ; j < Tri_ptr[h+1]; ++j)
+	  for ( ; g < Tri_ptr[k+1] && Tri_ind[g] <= Tri_ind[j]; ++g)
+	    if (Tri_ind[g] == Tri_ind[j])
+	      Tri_val[j] -= zz * Tri_val[g];
+      }
+    }
+    U = tm_type(&(Tri_val[0]), &(Tri_ind[0]), &(Tri_ptr[0]),
+			n, mat_ncols(A));
+  }
+  
+  template <typename Matrix>
+  void ildlt_precond<Matrix>::do_ildlt(const Matrix& A, col_major)
+  { do_ildlt(gmm::conjugated(A), row_major()); }
+
+  template <typename Matrix, typename V1, typename V2> inline
+  void mult(const ildlt_precond<Matrix>& P, const V1 &v1, V2 &v2) {
+    gmm::copy(v1, v2);
+    gmm::lower_tri_solve(gmm::conjugated(P.U), v2, true);
+    for (size_type i = 0; i < mat_nrows(P.U); ++i) v2[i] /= P.D(i);
+    gmm::upper_tri_solve(P.U, v2, true);
+  }
+
+  template <typename Matrix, typename V1, typename V2> inline
+  void transposed_mult(const ildlt_precond<Matrix>& P,const V1 &v1,V2 &v2)
+  { mult(P, v1, v2); }
+
+  template <typename Matrix, typename V1, typename V2> inline
+  void left_mult(const ildlt_precond<Matrix>& P, const V1 &v1, V2 &v2) {
+    copy(v1, v2);
+    gmm::lower_tri_solve(gmm::conjugated(P.U), v2, true);
+    for (size_type i = 0; i < mat_nrows(P.U); ++i) v2[i] /= P.D(i);
+  }
+
+  template <typename Matrix, typename V1, typename V2> inline
+  void right_mult(const ildlt_precond<Matrix>& P, const V1 &v1, V2 &v2)
+  { copy(v1, v2); gmm::upper_tri_solve(P.U, v2, true);  }
+
+  template <typename Matrix, typename V1, typename V2> inline
+  void transposed_left_mult(const ildlt_precond<Matrix>& P, const V1 &v1,
+			    V2 &v2) {
+    copy(v1, v2);
+    gmm::upper_tri_solve(P.U, v2, true);
+    for (size_type i = 0; i < mat_nrows(P.U); ++i) v2[i] /= P.D(i);
+  }
+
+  template <typename Matrix, typename V1, typename V2> inline
+  void transposed_right_mult(const ildlt_precond<Matrix>& P, const V1 &v1,
+			     V2 &v2)
+  { copy(v1, v2); gmm::lower_tri_solve(gmm::conjugated(P.U), v2, true); }
+
+
+}
+
+#endif 
+
--- a/gmm/gmm_precond_ildltt.h
+++ b/gmm/gmm_precond_ildltt.h
@ -0,0 +1,174 @@
+/* -*- c++ -*- (enables emacs c++ mode) */
+/*===========================================================================
+
+ Copyright (C) 2003-2017 Yves Renard
+
+ This file is a part of GetFEM++
+
+ GetFEM++  is  free software;  you  can  redistribute  it  and/or modify it
+ under  the  terms  of the  GNU  Lesser General Public License as published
+ by  the  Free Software Foundation;  either version 3 of the License,  or
+ (at your option) any later version along with the GCC Runtime Library
+ Exception either version 3.1 or (at your option) any later version.
+ This program  is  distributed  in  the  hope  that it will be useful,  but
+ WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ or  FITNESS  FOR  A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+ License and GCC Runtime Library Exception for more details.
+ You  should  have received a copy of the GNU Lesser General Public License
+ along  with  this program;  if not, write to the Free Software Foundation,
+ Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301, USA.
+
+ As a special exception, you  may use  this file  as it is a part of a free
+ software  library  without  restriction.  Specifically,  if   other  files
+ instantiate  templates  or  use macros or inline functions from this file,
+ or  you compile this  file  and  link  it  with other files  to produce an
+ executable, this file  does  not  by itself cause the resulting executable
+ to be covered  by the GNU Lesser General Public License.  This   exception
+ does not  however  invalidate  any  other  reasons why the executable file
+ might be covered by the GNU Lesser General Public License.
+
+===========================================================================*/
+
+/**@file gmm_precond_ildltt.h
+   @author  Yves Renard <Yves.Renard@insa-lyon.fr>
+   @date June 30, 2003.
+   @brief incomplete LDL^t (cholesky) preconditioner with fill-in and threshold.
+*/
+
+#ifndef GMM_PRECOND_ILDLTT_H
+#define GMM_PRECOND_ILDLTT_H
+
+// Store U = LT and D in indiag. On each line, the fill-in is the number
+// of non-zero elements on the line of the original matrix plus K, except if
+// the matrix is dense. In this case the fill-in is K on each line.
+
+#include "gmm_precond_ilut.h"
+
+namespace gmm {
+  /** incomplete LDL^t (cholesky) preconditioner with fill-in and
+      threshold. */
+  template <typename Matrix>
+  class ildltt_precond  {
+  public :
+    typedef typename linalg_traits<Matrix>::value_type value_type;
+    typedef typename number_traits<value_type>::magnitude_type magnitude_type;
+    
+    typedef rsvector<value_type> svector;
+
+    row_matrix<svector> U;
+    std::vector<magnitude_type> indiag;
+
+  protected:
+    size_type K;
+    double eps;    
+
+    template<typename M> void do_ildltt(const M&, row_major);
+    void do_ildltt(const Matrix&, col_major);
+
+  public:
+    void build_with(const Matrix& A, int k_ = -1, double eps_ = double(-1)) {
+      if (k_ >= 0) K = k_;
+      if (eps_ >= double(0)) eps = eps_;
+      gmm::resize(U, mat_nrows(A), mat_ncols(A));
+      indiag.resize(std::min(mat_nrows(A), mat_ncols(A)));
+      do_ildltt(A, typename principal_orientation_type<typename
+		linalg_traits<Matrix>::sub_orientation>::potype());
+    }
+    ildltt_precond(const Matrix& A, int k_, double eps_) 
+      : U(mat_nrows(A),mat_ncols(A)), K(k_), eps(eps_) { build_with(A); }
+    ildltt_precond(void) { K=10; eps = 1E-7; }
+    ildltt_precond(size_type k_, double eps_) :  K(k_), eps(eps_) {}
+    size_type memsize() const { 
+      return sizeof(*this) + nnz(U)*sizeof(value_type) + indiag.size() * sizeof(magnitude_type);
+    }    
+  };
+
+  template<typename Matrix> template<typename M> 
+  void ildltt_precond<Matrix>::do_ildltt(const M& A,row_major) {
+    typedef value_type T;
+    typedef typename number_traits<T>::magnitude_type R;
+
+    size_type n = mat_nrows(A);
+    if (n == 0) return;
+    svector w(n);
+    T tmp;
+    R prec = default_tol(R()), max_pivot = gmm::abs(A(0,0)) * prec;
+
+    gmm::clear(U);
+    for (size_type i = 0; i < n; ++i) {
+      gmm::copy(mat_const_row(A, i), w);
+      double norm_row = gmm::vect_norm2(w);
+
+      for (size_type krow = 0, k; krow < w.nb_stored(); ++krow) {
+	typename svector::iterator wk = w.begin() + krow;
+	if ((k = wk->c) >= i) break;
+ 	if (gmm::is_complex(wk->e)) {
+ 	  tmp = gmm::conj(U(k, i))/indiag[k]; // not completely satisfactory ..
+ 	  gmm::add(scaled(mat_row(U, k), -tmp), w);
+ 	}
+ 	else {
+	  tmp = wk->e;
+	  if (gmm::abs(tmp) < eps * norm_row) { w.sup(k); --krow; } 
+	  else { wk->e += tmp; gmm::add(scaled(mat_row(U, k), -tmp), w); }
+	}
+      }
+      tmp = w[i];
+
+      if (gmm::abs(gmm::real(tmp)) <= max_pivot)
+	{ GMM_WARNING2("pivot " << i << " is too small"); tmp = T(1); }
+
+      max_pivot = std::max(max_pivot, std::min(gmm::abs(tmp) * prec, R(1)));
+      indiag[i] = R(1) / gmm::real(tmp);
+      gmm::clean(w, eps * norm_row);
+      gmm::scale(w, T(indiag[i]));
+      std::sort(w.begin(), w.end(), elt_rsvector_value_less_<T>());
+      typename svector::const_iterator wit = w.begin(), wite = w.end();
+      for (size_type nnu = 0; wit != wite; ++wit)  // copy to be optimized ...
+	if (wit->c > i) { if (nnu < K) { U(i, wit->c) = wit->e; ++nnu; } }
+    }
+  }
+
+  template<typename Matrix> 
+  void ildltt_precond<Matrix>::do_ildltt(const Matrix& A, col_major)
+  { do_ildltt(gmm::conjugated(A), row_major()); }
+
+  template <typename Matrix, typename V1, typename V2> inline
+  void mult(const ildltt_precond<Matrix>& P, const V1 &v1, V2 &v2) {
+    gmm::copy(v1, v2);
+    gmm::lower_tri_solve(gmm::conjugated(P.U), v2, true);
+    for (size_type i = 0; i < P.indiag.size(); ++i) v2[i] *= P.indiag[i];
+    gmm::upper_tri_solve(P.U, v2, true);
+  }
+
+  template <typename Matrix, typename V1, typename V2> inline
+  void transposed_mult(const ildltt_precond<Matrix>& P,const V1 &v1, V2 &v2)
+  { mult(P, v1, v2); }
+
+  template <typename Matrix, typename V1, typename V2> inline
+  void left_mult(const ildltt_precond<Matrix>& P, const V1 &v1, V2 &v2) {
+    copy(v1, v2);
+    gmm::lower_tri_solve(gmm::conjugated(P.U), v2, true);
+    for (size_type i = 0; i < P.indiag.size(); ++i) v2[i] *= P.indiag[i];
+  }
+
+  template <typename Matrix, typename V1, typename V2> inline
+  void right_mult(const ildltt_precond<Matrix>& P, const V1 &v1, V2 &v2)
+  { copy(v1, v2); gmm::upper_tri_solve(P.U, v2, true); }
+
+  template <typename Matrix, typename V1, typename V2> inline
+  void transposed_left_mult(const ildltt_precond<Matrix>& P, const V1 &v1,
+			    V2 &v2) {
+    copy(v1, v2);
+    gmm::upper_tri_solve(P.U, v2, true);
+    for (size_type i = 0; i < P.indiag.size(); ++i) v2[i] *= P.indiag[i];
+  }
+
+  template <typename Matrix, typename V1, typename V2> inline
+  void transposed_right_mult(const ildltt_precond<Matrix>& P, const V1 &v1,
+			     V2 &v2)
+  { copy(v1, v2); gmm::lower_tri_solve(gmm::conjugated(P.U), v2, true); }
+
+}
+
+#endif 
+
--- a/gmm/gmm_precond_ilu.h
+++ b/gmm/gmm_precond_ilu.h
@ -0,0 +1,280 @@
+/* -*- c++ -*- (enables emacs c++ mode) */
+/*===========================================================================
+
+ Copyright (C) 2002-2017 Yves Renard
+
+ This file is a part of GetFEM++
+
+ GetFEM++  is  free software;  you  can  redistribute  it  and/or modify it
+ under  the  terms  of the  GNU  Lesser General Public License as published
+ by  the  Free Software Foundation;  either version 3 of the License,  or
+ (at your option) any later version along with the GCC Runtime Library
+ Exception either version 3.1 or (at your option) any later version.
+ This program  is  distributed  in  the  hope  that it will be useful,  but
+ WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ or  FITNESS  FOR  A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+ License and GCC Runtime Library Exception for more details.
+ You  should  have received a copy of the GNU Lesser General Public License
+ along  with  this program;  if not, write to the Free Software Foundation,
+ Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301, USA.
+
+ As a special exception, you  may use  this file  as it is a part of a free
+ software  library  without  restriction.  Specifically,  if   other  files
+ instantiate  templates  or  use macros or inline functions from this file,
+ or  you compile this  file  and  link  it  with other files  to produce an
+ executable, this file  does  not  by itself cause the resulting executable
+ to be covered  by the GNU Lesser General Public License.  This   exception
+ does not  however  invalidate  any  other  reasons why the executable file
+ might be covered by the GNU Lesser General Public License.
+
+===========================================================================*/
+
+// This file is a modified version of ilu.h from ITL.
+// See http://osl.iu.edu/research/itl/
+// Following the corresponding Copyright notice.
+//===========================================================================
+//
+// Copyright (c) 1998-2001, University of Notre Dame. All rights reserved.
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+//    * Redistributions of source code must retain the above copyright
+//      notice, this list of conditions and the following disclaimer.
+//    * Redistributions in binary form must reproduce the above copyright
+//      notice, this list of conditions and the following disclaimer in the
+//      documentation and/or other materials provided with the distribution.
+//    * Neither the name of the University of Notre Dame nor the
+//      names of its contributors may be used to endorse or promote products
+//      derived from this software without specific prior written permission.
+//
+// THIS SOFTWARE  IS  PROVIDED  BY  THE TRUSTEES  OF  INDIANA UNIVERSITY  AND
+// CONTRIBUTORS  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,
+// BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND  FITNESS
+// FOR  A PARTICULAR PURPOSE ARE DISCLAIMED. IN  NO  EVENT SHALL THE TRUSTEES
+// OF INDIANA UNIVERSITY AND CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+// INCIDENTAL, SPECIAL, EXEMPLARY,  OR CONSEQUENTIAL DAMAGES (INCLUDING,  BUT
+// NOT  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA,  OR PROFITS;  OR BUSINESS  INTERRUPTION)  HOWEVER  CAUSED AND ON ANY
+// THEORY  OF  LIABILITY,  WHETHER  IN  CONTRACT,  STRICT  LIABILITY, OR TORT
+// (INCLUDING  NEGLIGENCE  OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+// THIS  SOFTWARE,  EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+//===========================================================================
+
+/**@file gmm_precond_ilu.h
+   @author Andrew Lumsdaine <lums@osl.iu.edu>
+   @author Lie-Quan Lee <llee@osl.iu.edu>
+   @author Yves Renard <yves.renard@insa-lyon.fr>
+   @date June 5, 2003.
+   @brief Incomplete LU without fill-in Preconditioner.
+*/
+
+#ifndef GMM_PRECOND_ILU_H
+#define GMM_PRECOND_ILU_H
+
+//
+// Notes: The idea under a concrete Preconditioner such 
+//        as Incomplete LU is to create a Preconditioner
+//        object to use in iterative methods. 
+//
+
+#include "gmm_precond.h"
+
+namespace gmm {
+  /** Incomplete LU without fill-in Preconditioner. */
+  template <typename Matrix>
+  class ilu_precond {
+
+  public :
+    typedef typename linalg_traits<Matrix>::value_type value_type;
+    typedef csr_matrix_ref<value_type *, size_type *, size_type *, 0> tm_type;
+
+    tm_type U, L;
+    bool invert;
+  protected :
+    std::vector<value_type> L_val, U_val;
+    std::vector<size_type> L_ind, U_ind, L_ptr, U_ptr;
+ 
+    template<typename M> void do_ilu(const M& A, row_major);
+    void do_ilu(const Matrix& A, col_major);
+
+  public:
+    
+    size_type nrows(void) const { return mat_nrows(L); }
+    size_type ncols(void) const { return mat_ncols(U); }
+    
+    void build_with(const Matrix& A) {
+      invert = false;
+       L_ptr.resize(mat_nrows(A)+1);
+       U_ptr.resize(mat_nrows(A)+1);
+       do_ilu(A, typename principal_orientation_type<typename
+	      linalg_traits<Matrix>::sub_orientation>::potype());
+    }
+    ilu_precond(const Matrix& A) { build_with(A); }
+    ilu_precond(void) {}
+    size_type memsize() const { 
+      return sizeof(*this) + 
+	(L_val.size()+U_val.size()) * sizeof(value_type) + 
+	(L_ind.size()+L_ptr.size()) * sizeof(size_type) +
+	(U_ind.size()+U_ptr.size()) * sizeof(size_type); 
+    }
+  };
+
+  template <typename Matrix> template <typename M>
+  void ilu_precond<Matrix>::do_ilu(const M& A, row_major) {
+    typedef typename linalg_traits<Matrix>::storage_type store_type;
+    typedef value_type T;
+    typedef typename number_traits<T>::magnitude_type R;
+
+    size_type L_loc = 0, U_loc = 0, n = mat_nrows(A), i, j, k;
+    if (n == 0) return;
+    L_ptr[0] = 0; U_ptr[0] = 0;
+    R prec = default_tol(R());
+    R max_pivot = gmm::abs(A(0,0)) * prec;
+
+
+    for (int count = 0; count < 2; ++count) {
+      if (count) { 
+	L_val.resize(L_loc); L_ind.resize(L_loc);
+	U_val.resize(U_loc); U_ind.resize(U_loc);
+      }
+      L_loc = U_loc = 0;
+      for (i = 0; i < n; ++i) {
+	typedef typename linalg_traits<M>::const_sub_row_type row_type;
+	row_type row = mat_const_row(A, i);
+	typename linalg_traits<typename org_type<row_type>::t>::const_iterator
+	  it = vect_const_begin(row), ite = vect_const_end(row);
+	
+	if (count) { U_val[U_loc] = T(0); U_ind[U_loc] = i; }
+	++U_loc; // diagonal element
+	
+	for (k = 0; it != ite && k < 1000; ++it, ++k) {
+	  // if a plain row is present, retains only the 1000 firsts
+	  // nonzero elements. ---> a sort should be done.
+	  j = index_of_it(it, k, store_type());
+	  if (j < i) {
+	    if (count) { L_val[L_loc] = *it; L_ind[L_loc] = j; }
+	    L_loc++;
+	  }
+	  else if (i == j) {
+	    if (count) U_val[U_loc-1] = *it;
+	  }
+	  else {
+	    if (count) { U_val[U_loc] = *it; U_ind[U_loc] = j; }
+	    U_loc++;
+	  }
+	}
+        L_ptr[i+1] = L_loc; U_ptr[i+1] = U_loc;
+      }
+    }
+    
+    if (A(0,0) == T(0)) {
+      U_val[U_ptr[0]] = T(1);
+      GMM_WARNING2("pivot 0 is too small");
+    }
+
+    size_type qn, pn, rn;
+    for (i = 1; i < n; i++) {
+
+      pn = U_ptr[i];
+      if (gmm::abs(U_val[pn]) <= max_pivot) {
+	U_val[pn] = T(1);
+	GMM_WARNING2("pivot " << i << " is too small");
+      }
+      max_pivot = std::max(max_pivot,
+			   std::min(gmm::abs(U_val[pn]) * prec, R(1)));
+
+      for (j = L_ptr[i]; j < L_ptr[i+1]; j++) {
+	pn = U_ptr[L_ind[j]];
+	
+	T multiplier = (L_val[j] /= U_val[pn]);
+	
+	qn = j + 1;
+	rn = U_ptr[i];
+	
+	for (pn++; pn < U_ptr[L_ind[j]+1] && U_ind[pn] < i; pn++) {
+	  while (qn < L_ptr[i+1] && L_ind[qn] < U_ind[pn])
+	    qn++;
+	  if (qn < L_ptr[i+1] && U_ind[pn] == L_ind[qn])
+	    L_val[qn] -= multiplier * U_val[pn];
+	}
+	for (; pn < U_ptr[L_ind[j]+1]; pn++) {
+	  while (rn < U_ptr[i+1] && U_ind[rn] < U_ind[pn])
+	    rn++;
+	  if (rn < U_ptr[i+1] && U_ind[pn] == U_ind[rn])
+	    U_val[rn] -= multiplier * U_val[pn];
+	}
+      }
+    }
+
+    L = tm_type(&(L_val[0]), &(L_ind[0]), &(L_ptr[0]), n, mat_ncols(A));
+    U = tm_type(&(U_val[0]), &(U_ind[0]), &(U_ptr[0]), n, mat_ncols(A));
+  }
+  
+  template <typename Matrix>
+  void ilu_precond<Matrix>::do_ilu(const Matrix& A, col_major) {
+    do_ilu(gmm::transposed(A), row_major());
+    invert = true;
+  }
+
+  template <typename Matrix, typename V1, typename V2> inline
+  void mult(const ilu_precond<Matrix>& P, const V1 &v1, V2 &v2) {
+    gmm::copy(v1, v2);
+    if (P.invert) {
+      gmm::lower_tri_solve(gmm::transposed(P.U), v2, false);
+      gmm::upper_tri_solve(gmm::transposed(P.L), v2, true);
+    }
+    else {
+      gmm::lower_tri_solve(P.L, v2, true);
+      gmm::upper_tri_solve(P.U, v2, false);
+    }
+  }
+
+  template <typename Matrix, typename V1, typename V2> inline
+  void transposed_mult(const ilu_precond<Matrix>& P,const V1 &v1,V2 &v2) {
+    gmm::copy(v1, v2);
+    if (P.invert) {
+      gmm::lower_tri_solve(P.L, v2, true);
+      gmm::upper_tri_solve(P.U, v2, false);
+    }
+    else {
+      gmm::lower_tri_solve(gmm::transposed(P.U), v2, false);
+      gmm::upper_tri_solve(gmm::transposed(P.L), v2, true);
+    }
+  }
+
+  template <typename Matrix, typename V1, typename V2> inline
+  void left_mult(const ilu_precond<Matrix>& P, const V1 &v1, V2 &v2) {
+    copy(v1, v2);
+    if (P.invert) gmm::lower_tri_solve(gmm::transposed(P.U), v2, false);
+    else gmm::lower_tri_solve(P.L, v2, true);
+  }
+
+  template <typename Matrix, typename V1, typename V2> inline
+  void right_mult(const ilu_precond<Matrix>& P, const V1 &v1, V2 &v2) {
+    copy(v1, v2);
+    if (P.invert) gmm::upper_tri_solve(gmm::transposed(P.L), v2, true);
+    else gmm::upper_tri_solve(P.U, v2, false);
+  }
+
+  template <typename Matrix, typename V1, typename V2> inline
+  void transposed_left_mult(const ilu_precond<Matrix>& P, const V1 &v1,
+			    V2 &v2) {
+    copy(v1, v2);
+    if (P.invert) gmm::upper_tri_solve(P.U, v2, false);
+    else gmm::upper_tri_solve(gmm::transposed(P.L), v2, true);
+  }
+
+  template <typename Matrix, typename V1, typename V2> inline
+  void transposed_right_mult(const ilu_precond<Matrix>& P, const V1 &v1,
+			     V2 &v2) {
+    copy(v1, v2);
+    if (P.invert) gmm::lower_tri_solve(P.L, v2, true);
+    else gmm::lower_tri_solve(gmm::transposed(P.U), v2, false);
+  }
+
+
+}
+
+#endif 
+
--- a/gmm/gmm_precond_ilut.h
+++ b/gmm/gmm_precond_ilut.h
@ -0,0 +1,263 @@
+/* -*- c++ -*- (enables emacs c++ mode) */
+/*===========================================================================
+
+ Copyright (C) 2002-2017 Yves Renard
+
+ This file is a part of GetFEM++
+
+ GetFEM++  is  free software;  you  can  redistribute  it  and/or modify it
+ under  the  terms  of the  GNU  Lesser General Public License as published
+ by  the  Free Software Foundation;  either version 3 of the License,  or
+ (at your option) any later version along with the GCC Runtime Library
+ Exception either version 3.1 or (at your option) any later version.
+ This program  is  distributed  in  the  hope  that it will be useful,  but
+ WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ or  FITNESS  FOR  A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+ License and GCC Runtime Library Exception for more details.
+ You  should  have received a copy of the GNU Lesser General Public License
+ along  with  this program;  if not, write to the Free Software Foundation,
+ Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301, USA.
+
+ As a special exception, you  may use  this file  as it is a part of a free
+ software  library  without  restriction.  Specifically,  if   other  files
+ instantiate  templates  or  use macros or inline functions from this file,
+ or  you compile this  file  and  link  it  with other files  to produce an
+ executable, this file  does  not  by itself cause the resulting executable
+ to be covered  by the GNU Lesser General Public License.  This   exception
+ does not  however  invalidate  any  other  reasons why the executable file
+ might be covered by the GNU Lesser General Public License.
+
+===========================================================================*/
+
+// This file is a modified version of ilut.h from ITL.
+// See http://osl.iu.edu/research/itl/
+// Following the corresponding Copyright notice.
+//===========================================================================
+//
+// Copyright (c) 1998-2001, University of Notre Dame. All rights reserved.
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+//    * Redistributions of source code must retain the above copyright
+//      notice, this list of conditions and the following disclaimer.
+//    * Redistributions in binary form must reproduce the above copyright
+//      notice, this list of conditions and the following disclaimer in the
+//      documentation and/or other materials provided with the distribution.
+//    * Neither the name of the University of Notre Dame nor the
+//      names of its contributors may be used to endorse or promote products
+//      derived from this software without specific prior written permission.
+//
+// THIS SOFTWARE  IS  PROVIDED  BY  THE TRUSTEES  OF  INDIANA UNIVERSITY  AND
+// CONTRIBUTORS  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,
+// BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND  FITNESS
+// FOR  A PARTICULAR PURPOSE ARE DISCLAIMED. IN  NO  EVENT SHALL THE TRUSTEES
+// OF INDIANA UNIVERSITY AND CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+// INCIDENTAL, SPECIAL, EXEMPLARY,  OR CONSEQUENTIAL DAMAGES (INCLUDING,  BUT
+// NOT  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA,  OR PROFITS;  OR BUSINESS  INTERRUPTION)  HOWEVER  CAUSED AND ON ANY
+// THEORY  OF  LIABILITY,  WHETHER  IN  CONTRACT,  STRICT  LIABILITY, OR TORT
+// (INCLUDING  NEGLIGENCE  OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+// THIS  SOFTWARE,  EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+//===========================================================================
+
+#ifndef GMM_PRECOND_ILUT_H
+#define GMM_PRECOND_ILUT_H
+
+/**@file gmm_precond_ilut.h
+   @author  Andrew Lumsdaine <lums@osl.iu.edu>, Lie-Quan Lee <llee@osl.iu.edu>
+   @date June 5, 2003.
+   @brief ILUT:  Incomplete LU with threshold and K fill-in Preconditioner.
+*/
+
+/*
+  Performane comparing for SSOR, ILU and ILUT based on sherman 5 matrix 
+  in Harwell-Boeing collection on Sun Ultra 30 UPA/PCI (UltraSPARC-II 296MHz)
+  Preconditioner & Factorization time  &  Number of Iteration \\ \hline
+  SSOR        &   0.010577  & 41 \\
+  ILU         &   0.019336  & 32 \\
+  ILUT with 0 fill-in and threshold of 1.0e-6 & 0.343612 &  23 \\
+  ILUT with 5 fill-in and threshold of 1.0e-6 & 0.343612 &  18 \\ \hline
+*/
+
+#include "gmm_precond.h"
+
+namespace gmm {
+
+  template<typename T> struct elt_rsvector_value_less_ {
+    inline bool operator()(const elt_rsvector_<T>& a, 
+			   const elt_rsvector_<T>& b) const
+    { return (gmm::abs(a.e) > gmm::abs(b.e)); }
+  };
+
+  /** Incomplete LU with threshold and K fill-in Preconditioner.
+
+  The algorithm of ILUT(A, 0, 1.0e-6) is slower than ILU(A). If No
+  fill-in is arrowed, you can use ILU instead of ILUT.
+
+  Notes: The idea under a concrete Preconditioner such as ilut is to
+  create a Preconditioner object to use in iterative methods.
+  */
+  template <typename Matrix>
+  class ilut_precond  {
+  public :
+    typedef typename linalg_traits<Matrix>::value_type value_type;
+    typedef wsvector<value_type> _wsvector;
+    typedef rsvector<value_type> _rsvector;
+    typedef row_matrix<_rsvector> LU_Matrix;
+
+    bool invert;
+    LU_Matrix L, U;
+
+  protected:
+    size_type K;
+    double eps;    
+
+    template<typename M> void do_ilut(const M&, row_major);
+    void do_ilut(const Matrix&, col_major);
+
+  public:
+    void build_with(const Matrix& A, int k_ = -1, double eps_ = double(-1)) {
+      if (k_ >= 0) K = k_;
+      if (eps_ >= double(0)) eps = eps_;
+      invert = false;
+      gmm::resize(L, mat_nrows(A), mat_ncols(A));
+      gmm::resize(U, mat_nrows(A), mat_ncols(A));
+      do_ilut(A, typename principal_orientation_type<typename
+	      linalg_traits<Matrix>::sub_orientation>::potype());
+    }
+    ilut_precond(const Matrix& A, int k_, double eps_) 
+      : L(mat_nrows(A), mat_ncols(A)), U(mat_nrows(A), mat_ncols(A)),
+	K(k_), eps(eps_) { build_with(A); }
+    ilut_precond(size_type k_, double eps_) :  K(k_), eps(eps_) {}
+    ilut_precond(void) { K = 10; eps = 1E-7; }
+    size_type memsize() const { 
+      return sizeof(*this) + (nnz(U)+nnz(L))*sizeof(value_type);
+    }
+  };
+
+  template<typename Matrix> template<typename M> 
+  void ilut_precond<Matrix>::do_ilut(const M& A, row_major) {
+    typedef value_type T;
+    typedef typename number_traits<T>::magnitude_type R;
+    
+    size_type n = mat_nrows(A);
+    if (n == 0) return;
+    std::vector<T> indiag(n);
+    _wsvector w(mat_ncols(A));
+    _rsvector ww(mat_ncols(A)), wL(mat_ncols(A)), wU(mat_ncols(A));
+    T tmp;
+    gmm::clear(U); gmm::clear(L);
+    R prec = default_tol(R()); 
+    R max_pivot = gmm::abs(A(0,0)) * prec;
+
+    for (size_type i = 0; i < n; ++i) {
+      gmm::copy(mat_const_row(A, i), w);
+      double norm_row = gmm::vect_norm2(w);
+
+      typename _wsvector::iterator wkold = w.end();
+      for (typename _wsvector::iterator wk = w.begin();
+	   wk != w.end() && wk->first < i; ) {
+	size_type k = wk->first;
+	tmp = (wk->second) * indiag[k];
+	if (gmm::abs(tmp) < eps * norm_row) w.erase(k);
+	else { wk->second += tmp; gmm::add(scaled(mat_row(U, k), -tmp), w); }
+	if (wkold == w.end()) wk = w.begin(); else { wk = wkold; ++wk; }
+	if (wk != w.end() && wk->first == k)
+	  { if (wkold == w.end()) wkold = w.begin(); else ++wkold; ++wk; }
+      }
+      tmp = w[i];
+
+      if (gmm::abs(tmp) <= max_pivot) {
+	GMM_WARNING2("pivot " << i << " too small. try with ilutp ?");
+	w[i] = tmp = T(1);
+      }
+
+      max_pivot = std::max(max_pivot, std::min(gmm::abs(tmp) * prec, R(1)));
+      indiag[i] = T(1) / tmp;
+      gmm::clean(w, eps * norm_row);
+      gmm::copy(w, ww);
+      std::sort(ww.begin(), ww.end(), elt_rsvector_value_less_<T>());
+      typename _rsvector::const_iterator wit = ww.begin(), wite = ww.end();
+
+      size_type nnl = 0, nnu = 0;    
+      wL.base_resize(K); wU.base_resize(K+1);
+      typename _rsvector::iterator witL = wL.begin(), witU = wU.begin();
+      for (; wit != wite; ++wit) 
+	if (wit->c < i) { if (nnl < K) { *witL++ = *wit; ++nnl; } }
+	else { if (nnu < K  || wit->c == i) { *witU++ = *wit; ++nnu; } }
+      wL.base_resize(nnl); wU.base_resize(nnu);
+      std::sort(wL.begin(), wL.end());
+      std::sort(wU.begin(), wU.end());
+      gmm::copy(wL, L.row(i));
+      gmm::copy(wU, U.row(i));
+    }
+
+  }
+
+  template<typename Matrix> 
+  void ilut_precond<Matrix>::do_ilut(const Matrix& A, col_major) {
+    do_ilut(gmm::transposed(A), row_major());
+    invert = true;
+  }
+
+  template <typename Matrix, typename V1, typename V2> inline
+  void mult(const ilut_precond<Matrix>& P, const V1 &v1, V2 &v2) {
+    gmm::copy(v1, v2);
+    if (P.invert) {
+      gmm::lower_tri_solve(gmm::transposed(P.U), v2, false);
+      gmm::upper_tri_solve(gmm::transposed(P.L), v2, true);
+    }
+    else {
+      gmm::lower_tri_solve(P.L, v2, true);
+      gmm::upper_tri_solve(P.U, v2, false);
+    }
+  }
+
+  template <typename Matrix, typename V1, typename V2> inline
+  void transposed_mult(const ilut_precond<Matrix>& P,const V1 &v1,V2 &v2) {
+    gmm::copy(v1, v2);
+    if (P.invert) {
+      gmm::lower_tri_solve(P.L, v2, true);
+      gmm::upper_tri_solve(P.U, v2, false);
+    }
+    else {
+      gmm::lower_tri_solve(gmm::transposed(P.U), v2, false);
+      gmm::upper_tri_solve(gmm::transposed(P.L), v2, true);
+    }
+  }
+
+  template <typename Matrix, typename V1, typename V2> inline
+  void left_mult(const ilut_precond<Matrix>& P, const V1 &v1, V2 &v2) {
+    copy(v1, v2);
+    if (P.invert) gmm::lower_tri_solve(gmm::transposed(P.U), v2, false);
+    else gmm::lower_tri_solve(P.L, v2, true);
+  }
+
+  template <typename Matrix, typename V1, typename V2> inline
+  void right_mult(const ilut_precond<Matrix>& P, const V1 &v1, V2 &v2) {
+    copy(v1, v2);
+    if (P.invert) gmm::upper_tri_solve(gmm::transposed(P.L), v2, true);
+    else gmm::upper_tri_solve(P.U, v2, false);
+  }
+
+  template <typename Matrix, typename V1, typename V2> inline
+  void transposed_left_mult(const ilut_precond<Matrix>& P, const V1 &v1,
+			    V2 &v2) {
+    copy(v1, v2);
+    if (P.invert) gmm::upper_tri_solve(P.U, v2, false);
+    else gmm::upper_tri_solve(gmm::transposed(P.L), v2, true);
+  }
+
+  template <typename Matrix, typename V1, typename V2> inline
+  void transposed_right_mult(const ilut_precond<Matrix>& P, const V1 &v1,
+			     V2 &v2) {
+    copy(v1, v2);
+    if (P.invert) gmm::lower_tri_solve(P.L, v2, true);
+    else gmm::lower_tri_solve(gmm::transposed(P.U), v2, false);
+  }
+
+}
+
+#endif 
+
--- a/gmm/gmm_precond_ilutp.h
+++ b/gmm/gmm_precond_ilutp.h
@ -0,0 +1,284 @@
+/* -*- c++ -*- (enables emacs c++ mode) */
+/*===========================================================================
+
+ Copyright (C) 2004-2017 Yves Renard
+
+ This file is a part of GetFEM++
+
+ GetFEM++  is  free software;  you  can  redistribute  it  and/or modify it
+ under  the  terms  of the  GNU  Lesser General Public License as published
+ by  the  Free Software Foundation;  either version 3 of the License,  or
+ (at your option) any later version along with the GCC Runtime Library
+ Exception either version 3.1 or (at your option) any later version.
+ This program  is  distributed  in  the  hope  that it will be useful,  but
+ WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ or  FITNESS  FOR  A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+ License and GCC Runtime Library Exception for more details.
+ You  should  have received a copy of the GNU Lesser General Public License
+ along  with  this program;  if not, write to the Free Software Foundation,
+ Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301, USA.
+
+ As a special exception, you  may use  this file  as it is a part of a free
+ software  library  without  restriction.  Specifically,  if   other  files
+ instantiate  templates  or  use macros or inline functions from this file,
+ or  you compile this  file  and  link  it  with other files  to produce an
+ executable, this file  does  not  by itself cause the resulting executable
+ to be covered  by the GNU Lesser General Public License.  This   exception
+ does not  however  invalidate  any  other  reasons why the executable file
+ might be covered by the GNU Lesser General Public License.
+
+===========================================================================*/
+
+/**@file gmm_precond_ilutp.h
+   @author  Yves Renard <Yves.Renard@insa-lyon.fr>
+   @date October 14, 2004.
+   @brief ILUTP: Incomplete LU with threshold and K fill-in Preconditioner and
+   column pivoting.
+
+   
+*/
+#ifndef GMM_PRECOND_ILUTP_H
+#define GMM_PRECOND_ILUTP_H
+
+#include "gmm_precond_ilut.h"
+
+namespace gmm {
+
+  /**
+     ILUTP: Incomplete LU with threshold and K fill-in Preconditioner and
+     column pivoting.
+   
+     See Yousef Saad, Iterative Methods for
+     sparse linear systems, PWS Publishing Company, section 10.4.4
+
+      TODO : store the permutation by cycles to avoid the temporary vector
+  */
+  template <typename Matrix>
+  class ilutp_precond  {
+  public :
+    typedef typename linalg_traits<Matrix>::value_type value_type;
+    typedef wsvector<value_type> _wsvector;
+    typedef rsvector<value_type> _rsvector;
+    typedef row_matrix<_rsvector> LU_Matrix;
+    typedef col_matrix<_wsvector> CLU_Matrix;
+
+    bool invert;
+    LU_Matrix L, U;
+    gmm::unsorted_sub_index indperm;
+    gmm::unsorted_sub_index indperminv;
+    mutable std::vector<value_type> temporary;
+
+  protected:
+    size_type K;
+    double eps;
+
+    template<typename M> void do_ilutp(const M&, row_major);
+    void do_ilutp(const Matrix&, col_major);
+
+  public:
+    void build_with(const Matrix& A, int k_ = -1, double eps_ = double(-1)) {
+      if (k_ >= 0) K = k_;
+      if (eps_ >= double(0)) eps = eps_;
+      invert = false;
+      gmm::resize(L, mat_nrows(A), mat_ncols(A));
+      gmm::resize(U, mat_nrows(A), mat_ncols(A));
+      do_ilutp(A, typename principal_orientation_type<typename
+	      linalg_traits<Matrix>::sub_orientation>::potype());
+    }
+    ilutp_precond(const Matrix& A, size_type k_, double eps_) 
+      : L(mat_nrows(A), mat_ncols(A)), U(mat_nrows(A), mat_ncols(A)),
+	K(k_), eps(eps_) { build_with(A); }
+    ilutp_precond(int k_, double eps_) :  K(k_), eps(eps_) {}
+    ilutp_precond(void) { K = 10; eps = 1E-7; }
+    size_type memsize() const { 
+      return sizeof(*this) + (nnz(U)+nnz(L))*sizeof(value_type);
+    }
+  };
+
+
+  template<typename Matrix> template<typename M> 
+  void ilutp_precond<Matrix>::do_ilutp(const M& A, row_major) {
+    typedef value_type T;
+    typedef typename number_traits<T>::magnitude_type R;
+
+    size_type n = mat_nrows(A);
+    CLU_Matrix CU(n,n);
+    if (n == 0) return;
+    std::vector<T> indiag(n);
+    temporary.resize(n);
+    std::vector<size_type> ipvt(n), ipvtinv(n);
+    for (size_type i = 0; i < n; ++i) ipvt[i] = ipvtinv[i] = i;
+    indperm = unsorted_sub_index(ipvt);
+    indperminv = unsorted_sub_index(ipvtinv);
+    _wsvector w(mat_ncols(A));
+    _rsvector ww(mat_ncols(A));
+    
+    T tmp = T(0);
+    gmm::clear(L); gmm::clear(U);
+    R prec = default_tol(R()); 
+    R max_pivot = gmm::abs(A(0,0)) * prec;
+
+    for (size_type i = 0; i < n; ++i) {
+
+      copy(sub_vector(mat_const_row(A, i), indperm), w);
+      double norm_row = gmm::vect_norm2(mat_const_row(A, i)); 
+
+      typename _wsvector::iterator wkold = w.end();
+      for (typename _wsvector::iterator wk = w.begin();
+	   wk != w.end() && wk->first < i; )  {
+	size_type k = wk->first;
+	tmp = (wk->second) * indiag[k];
+	if (gmm::abs(tmp) < eps * norm_row) w.erase(k); 
+	else { wk->second += tmp; gmm::add(scaled(mat_row(U, k), -tmp), w); }
+	if (wkold == w.end()) wk = w.begin(); else { wk = wkold; ++wk; }
+	if (wk != w.end() && wk->first == k)
+	  { if (wkold == w.end()) wkold = w.begin(); else ++wkold; ++wk; }
+      }
+
+      gmm::clean(w, eps * norm_row);
+      gmm::copy(w, ww);
+
+      std::sort(ww.begin(), ww.end(), elt_rsvector_value_less_<T>());
+      typename _rsvector::const_iterator wit = ww.begin(), wite = ww.end();
+      size_type ip = size_type(-1);
+
+      for (; wit != wite; ++wit)
+	if (wit->c >= i) { ip = wit->c; tmp = wit->e; break; }
+      if (ip == size_type(-1) || gmm::abs(tmp) <= max_pivot)
+	{ GMM_WARNING2("pivot " << i << " too small"); ip=i; ww[i]=tmp=T(1); }
+      max_pivot = std::max(max_pivot, std::min(gmm::abs(tmp) * prec, R(1)));
+      indiag[i] = T(1) / tmp;
+      wit = ww.begin();
+
+      size_type nnl = 0, nnu = 0;
+      L[i].base_resize(K); U[i].base_resize(K+1);
+      typename _rsvector::iterator witL = L[i].begin(), witU = U[i].begin();
+      for (; wit != wite; ++wit) {
+	if (wit->c < i) { if (nnl < K) { *witL++ = *wit; ++nnl; } }
+	else if (nnu < K || wit->c == i)
+	  { CU(i, wit->c) = wit->e; *witU++ = *wit; ++nnu; }
+      }
+      L[i].base_resize(nnl); U[i].base_resize(nnu);
+      std::sort(L[i].begin(), L[i].end());
+      std::sort(U[i].begin(), U[i].end());
+
+      if (ip != i) {
+	typename _wsvector::const_iterator iti = CU.col(i).begin();
+	typename _wsvector::const_iterator itie = CU.col(i).end();
+	typename _wsvector::const_iterator itp = CU.col(ip).begin();
+	typename _wsvector::const_iterator itpe = CU.col(ip).end();
+	
+	while (iti != itie && itp != itpe) {
+	  if (iti->first < itp->first)
+	    { U.row(iti->first).swap_indices(i, ip); ++iti; }
+	  else if (iti->first > itp->first)
+	    { U.row(itp->first).swap_indices(i,ip);++itp; }
+	  else
+	    { U.row(iti->first).swap_indices(i, ip); ++iti; ++itp; }
+	}
+	
+	for( ; iti != itie; ++iti) U.row(iti->first).swap_indices(i, ip);
+	for( ; itp != itpe; ++itp) U.row(itp->first).swap_indices(i, ip);
+
+	CU.swap_col(i, ip);
+	
+	indperm.swap(i, ip);
+	indperminv.swap(ipvt[i], ipvt[ip]);
+	std::swap(ipvtinv[ipvt[i]], ipvtinv[ipvt[ip]]);
+	std::swap(ipvt[i], ipvt[ip]);
+      }
+    }
+  }
+
+  template<typename Matrix> 
+  void ilutp_precond<Matrix>::do_ilutp(const Matrix& A, col_major) {
+    do_ilutp(gmm::transposed(A), row_major());
+    invert = true;
+  }
+
+  template <typename Matrix, typename V1, typename V2> inline
+  void mult(const ilutp_precond<Matrix>& P, const V1 &v1, V2 &v2) {
+    if (P.invert) {
+      gmm::copy(gmm::sub_vector(v1, P.indperm), v2);
+      gmm::lower_tri_solve(gmm::transposed(P.U), v2, false);
+      gmm::upper_tri_solve(gmm::transposed(P.L), v2, true);
+    }
+    else {
+      gmm::copy(v1, P.temporary);
+      gmm::lower_tri_solve(P.L, P.temporary, true);
+      gmm::upper_tri_solve(P.U, P.temporary, false);
+      gmm::copy(gmm::sub_vector(P.temporary, P.indperminv), v2);
+    }
+  }
+
+  template <typename Matrix, typename V1, typename V2> inline
+  void transposed_mult(const ilutp_precond<Matrix>& P,const V1 &v1,V2 &v2) {
+    if (P.invert) {
+      gmm::copy(v1, P.temporary);
+      gmm::lower_tri_solve(P.L, P.temporary, true);
+      gmm::upper_tri_solve(P.U, P.temporary, false);
+      gmm::copy(gmm::sub_vector(P.temporary, P.indperminv), v2);
+    }
+    else {
+      gmm::copy(gmm::sub_vector(v1, P.indperm), v2);
+      gmm::lower_tri_solve(gmm::transposed(P.U), v2, false);
+      gmm::upper_tri_solve(gmm::transposed(P.L), v2, true);
+    }
+  }
+
+  template <typename Matrix, typename V1, typename V2> inline
+  void left_mult(const ilutp_precond<Matrix>& P, const V1 &v1, V2 &v2) {
+    if (P.invert) {
+      gmm::copy(gmm::sub_vector(v1, P.indperm), v2);
+      gmm::lower_tri_solve(gmm::transposed(P.U), v2, false);
+    }
+    else {
+      copy(v1, v2);
+      gmm::lower_tri_solve(P.L, v2, true);
+    }
+  }
+
+  template <typename Matrix, typename V1, typename V2> inline
+  void right_mult(const ilutp_precond<Matrix>& P, const V1 &v1, V2 &v2) {
+    if (P.invert) {
+      copy(v1, v2);
+      gmm::upper_tri_solve(gmm::transposed(P.L), v2, true);
+    }
+    else {
+      copy(v1, P.temporary);
+      gmm::upper_tri_solve(P.U, P.temporary, false);
+      gmm::copy(gmm::sub_vector(P.temporary, P.indperminv), v2);
+    }
+  }
+
+  template <typename Matrix, typename V1, typename V2> inline
+  void transposed_left_mult(const ilutp_precond<Matrix>& P, const V1 &v1,
+			    V2 &v2) {
+    if (P.invert) {
+      copy(v1, P.temporary);
+      gmm::upper_tri_solve(P.U, P.temporary, false);
+      gmm::copy(gmm::sub_vector(P.temporary, P.indperminv), v2);
+    }
+    else {
+      copy(v1, v2);
+      gmm::upper_tri_solve(gmm::transposed(P.L), v2, true);
+    }
+  }
+  
+  template <typename Matrix, typename V1, typename V2> inline
+  void transposed_right_mult(const ilutp_precond<Matrix>& P, const V1 &v1,
+			     V2 &v2) {
+    if (P.invert) {
+      copy(v1, v2);
+      gmm::lower_tri_solve(P.L, v2, true);
+    }
+    else {
+      gmm::copy(gmm::sub_vector(v1, P.indperm), v2);
+      gmm::lower_tri_solve(gmm::transposed(P.U), v2, false);
+    }
+  }
+
+}
+
+#endif 
+
--- a/gmm/gmm_precond_mr_approx_inverse.h
+++ b/gmm/gmm_precond_mr_approx_inverse.h
@ -0,0 +1,149 @@
+/* -*- c++ -*- (enables emacs c++ mode) */
+/*===========================================================================
+
+ Copyright (C) 2002-2017 Yves Renard
+
+ This file is a part of GetFEM++
+
+ GetFEM++  is  free software;  you  can  redistribute  it  and/or modify it
+ under  the  terms  of the  GNU  Lesser General Public License as published
+ by  the  Free Software Foundation;  either version 3 of the License,  or
+ (at your option) any later version along with the GCC Runtime Library
+ Exception either version 3.1 or (at your option) any later version.
+ This program  is  distributed  in  the  hope  that it will be useful,  but
+ WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ or  FITNESS  FOR  A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+ License and GCC Runtime Library Exception for more details.
+ You  should  have received a copy of the GNU Lesser General Public License
+ along  with  this program;  if not, write to the Free Software Foundation,
+ Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301, USA.
+
+ As a special exception, you  may use  this file  as it is a part of a free
+ software  library  without  restriction.  Specifically,  if   other  files
+ instantiate  templates  or  use macros or inline functions from this file,
+ or  you compile this  file  and  link  it  with other files  to produce an
+ executable, this file  does  not  by itself cause the resulting executable
+ to be covered  by the GNU Lesser General Public License.  This   exception
+ does not  however  invalidate  any  other  reasons why the executable file
+ might be covered by the GNU Lesser General Public License.
+
+===========================================================================*/
+
+
+// This file is a modified version of approximate_inverse.h from ITL.
+// See http://osl.iu.edu/research/itl/
+// Following the corresponding Copyright notice.
+//===========================================================================
+//
+// Copyright (c) 1998-2001, University of Notre Dame. All rights reserved.
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+//    * Redistributions of source code must retain the above copyright
+//      notice, this list of conditions and the following disclaimer.
+//    * Redistributions in binary form must reproduce the above copyright
+//      notice, this list of conditions and the following disclaimer in the
+//      documentation and/or other materials provided with the distribution.
+//    * Neither the name of the University of Notre Dame nor the
+//      names of its contributors may be used to endorse or promote products
+//      derived from this software without specific prior written permission.
+//
+// THIS SOFTWARE  IS  PROVIDED  BY  THE TRUSTEES  OF  INDIANA UNIVERSITY  AND
+// CONTRIBUTORS  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,
+// BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND  FITNESS
+// FOR  A PARTICULAR PURPOSE ARE DISCLAIMED. IN  NO  EVENT SHALL THE TRUSTEES
+// OF INDIANA UNIVERSITY AND CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+// INCIDENTAL, SPECIAL, EXEMPLARY,  OR CONSEQUENTIAL DAMAGES (INCLUDING,  BUT
+// NOT  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA,  OR PROFITS;  OR BUSINESS  INTERRUPTION)  HOWEVER  CAUSED AND ON ANY
+// THEORY  OF  LIABILITY,  WHETHER  IN  CONTRACT,  STRICT  LIABILITY, OR TORT
+// (INCLUDING  NEGLIGENCE  OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+// THIS  SOFTWARE,  EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+//===========================================================================
+
+/**@file gmm_precond_mr_approx_inverse.h
+   @author Andrew Lumsdaine <lums@osl.iu.edu>
+   @author Lie-Quan Lee     <llee@osl.iu.edu>
+   @author  Yves Renard <Yves.Renard@insa-lyon.fr>
+   @date June 5, 2003.
+   @brief Approximate inverse via MR iteration.
+*/
+
+#ifndef GMM_PRECOND_MR_APPROX_INVERSE_H
+#define GMM_PRECOND_MR_APPROX_INVERSE_H
+
+
+#include "gmm_precond.h"
+
+namespace gmm {
+
+  /** Approximate inverse via MR iteration (see P301 of Saad book).
+   */
+  template <typename Matrix>
+  struct mr_approx_inverse_precond {
+
+    typedef typename linalg_traits<Matrix>::value_type value_type;
+    typedef typename number_traits<value_type>::magnitude_type magnitude_type;
+    typedef typename principal_orientation_type<typename
+      linalg_traits<Matrix>::sub_orientation>::potype sub_orientation;
+    typedef wsvector<value_type> VVector;
+    typedef col_matrix<VVector> MMatrix;
+
+    MMatrix M;
+    size_type nb_it;
+    magnitude_type threshold;
+
+    void build_with(const Matrix& A);
+    mr_approx_inverse_precond(const Matrix& A, size_type nb_it_,
+			      magnitude_type threshold_)
+      : M(mat_nrows(A), mat_ncols(A))
+    { threshold = threshold_; nb_it = nb_it_; build_with(A); }
+    mr_approx_inverse_precond(void)
+    { threshold = magnitude_type(1E-7); nb_it = 5; }
+    mr_approx_inverse_precond(size_type nb_it_, magnitude_type threshold_)
+    { threshold = threshold_; nb_it = nb_it_; } 
+    const MMatrix &approx_inverse(void) const { return M; }
+  };
+
+  template <typename Matrix, typename V1, typename V2> inline
+  void mult(const mr_approx_inverse_precond<Matrix>& P, const V1 &v1, V2 &v2)
+  { mult(P.M, v1, v2); }
+
+  template <typename Matrix, typename V1, typename V2> inline
+  void transposed_mult(const mr_approx_inverse_precond<Matrix>& P,
+		       const V1 &v1,V2 &v2)
+  { mult(gmm::conjugated(P.M), v1, v2); }
+
+  template <typename Matrix>
+  void mr_approx_inverse_precond<Matrix>::build_with(const Matrix& A) {
+    gmm::resize(M, mat_nrows(A), mat_ncols(A));
+    typedef value_type T;
+    typedef magnitude_type R;
+    VVector m(mat_ncols(A)),r(mat_ncols(A)),ei(mat_ncols(A)),Ar(mat_ncols(A)); 
+    T alpha = mat_trace(A)/ mat_euclidean_norm_sqr(A);
+    if (alpha == T(0)) alpha = T(1);
+    
+    for (size_type i = 0; i < mat_nrows(A); ++i) {
+      gmm::clear(m); gmm::clear(ei); 
+      m[i] = alpha;
+      ei[i] = T(1);
+      
+      for (size_type j = 0; j < nb_it; ++j) {
+	gmm::mult(A, gmm::scaled(m, T(-1)), r);
+	gmm::add(ei, r);
+	gmm::mult(A, r, Ar);
+	T nAr = vect_sp(Ar,Ar);
+	if (gmm::abs(nAr) > R(0)) {
+	  gmm::add(gmm::scaled(r, gmm::safe_divide(vect_sp(r, Ar), vect_sp(Ar, Ar))), m);
+	  gmm::clean(m, threshold * gmm::vect_norm2(m));
+	} else gmm::clear(m);
+      }
+      if (gmm::vect_norm2(m) == R(0)) m[i] = alpha;
+      gmm::copy(m, M.col(i));
+    }
+  }
+}
+
+#endif 
+
--- a/gmm/gmm_range_basis.h
+++ b/gmm/gmm_range_basis.h
@ -0,0 +1,499 @@
+/* -*- c++ -*- (enables emacs c++ mode) */
+/*===========================================================================
+
+ Copyright (C) 2009-2017 Yves Renard
+
+ This file is a part of GetFEM++
+
+ GetFEM++  is  free software;  you  can  redistribute  it  and/or modify it
+ under  the  terms  of the  GNU  Lesser General Public License as published
+ by  the  Free Software Foundation;  either version 3 of the License,  or
+ (at your option) any later version along with the GCC Runtime Library
+ Exception either version 3.1 or (at your option) any later version.
+ This program  is  distributed  in  the  hope  that it will be useful,  but
+ WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ or  FITNESS  FOR  A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+ License and GCC Runtime Library Exception for more details.
+ You  should  have received a copy of the GNU Lesser General Public License
+ along  with  this program;  if not, write to the Free Software Foundation,
+ Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301, USA.
+
+ As a special exception, you  may use  this file  as it is a part of a free
+ software  library  without  restriction.  Specifically,  if   other  files
+ instantiate  templates  or  use macros or inline functions from this file,
+ or  you compile this  file  and  link  it  with other files  to produce an
+ executable, this file  does  not  by itself cause the resulting executable
+ to be covered  by the GNU Lesser General Public License.  This   exception
+ does not  however  invalidate  any  other  reasons why the executable file
+ might be covered by the GNU Lesser General Public License.
+
+===========================================================================*/
+
+/**@file gmm_range_basis.h
+   @author  Yves Renard <Yves.Renard@insa-lyon.fr>
+   @date March 10, 2009.
+   @brief Extract a basis of the range of a (large sparse) matrix from the
+          columns of this matrix.
+*/
+#ifndef GMM_RANGE_BASIS_H
+#define GMM_RANGE_BASIS_H
+#include "gmm_dense_qr.h"
+#include "gmm_dense_lu.h"
+
+#include "gmm_kernel.h"
+#include "gmm_iter.h"
+#include <set>
+#include <list>
+
+
+namespace gmm {
+
+
+  template <typename T, typename VECT, typename MAT1>
+  void tridiag_qr_algorithm
+  (std::vector<typename number_traits<T>::magnitude_type> diag,
+   std::vector<T> sdiag, const VECT &eigval_, const MAT1 &eigvect_,
+   bool compvect, tol_type_for_qr tol = default_tol_for_qr) {
+    VECT &eigval = const_cast<VECT &>(eigval_);
+    MAT1 &eigvect = const_cast<MAT1 &>(eigvect_);
+    typedef typename number_traits<T>::magnitude_type R;
+
+    if (compvect) gmm::copy(identity_matrix(), eigvect);
+
+    size_type n = diag.size(), q = 0, p, ite = 0;
+    if (n == 0) return;
+    if (n == 1) { eigval[0] = gmm::real(diag[0]); return; }
+
+    symmetric_qr_stop_criterion(diag, sdiag, p, q, tol);
+
+    while (q < n) {
+      sub_interval SUBI(p, n-p-q), SUBJ(0, mat_ncols(eigvect)), SUBK(p, n-p-q);
+      if (!compvect) SUBK = sub_interval(0,0);
+
+      symmetric_Wilkinson_qr_step(sub_vector(diag, SUBI),
+                                  sub_vector(sdiag, SUBI),
+                                  sub_matrix(eigvect, SUBJ, SUBK), compvect);
+
+      symmetric_qr_stop_criterion(diag, sdiag, p, q, tol*R(3));
+      ++ite;
+      GMM_ASSERT1(ite < n*100, "QR algorithm failed.");
+    }
+
+    gmm::copy(diag, eigval);
+  }
+
+  // Range basis with a restarted Lanczos method
+  template <typename Mat>
+  void range_basis_eff_Lanczos(const Mat &BB, std::set<size_type> &columns,
+                       double EPS=1E-12) {
+    typedef std::set<size_type> TAB;
+    typedef typename linalg_traits<Mat>::value_type T;
+    typedef typename number_traits<T>::magnitude_type R;
+
+    size_type nc_r = columns.size(), k;
+    col_matrix< rsvector<T> > B(mat_nrows(BB), mat_ncols(BB));
+
+    k = 0;
+    for (TAB::iterator it = columns.begin(); it!=columns.end(); ++it, ++k){
+      gmm::copy(scaled(mat_col(BB, *it), T(1)/vect_norm2(mat_col(BB, *it))),
+                mat_col(B, *it));
+    }
+    std::vector<T> w(mat_nrows(B));
+    size_type restart = 120;
+    std::vector<T> sdiag(restart);
+    std::vector<R> eigval(restart), diag(restart);
+    dense_matrix<T> eigvect(restart, restart);
+
+    R rho = R(-1), rho2;
+    while (nc_r) {
+
+      std::vector<T> v(nc_r), v0(nc_r), wl(nc_r);
+      dense_matrix<T> lv(nc_r, restart);
+
+      if (rho < R(0)) { // Estimate of the spectral radius of B^* B
+        gmm::fill_random(v);
+        for (size_type i = 0; i < 100; ++i) {
+          gmm::scale(v, T(1)/vect_norm2(v));
+          gmm::copy(v, v0);
+          k = 0; gmm::clear(w);
+          for (TAB::iterator it=columns.begin(); it!=columns.end(); ++it, ++k)
+            add(scaled(mat_col(B, *it), v[k]), w);
+          k = 0;
+          for (TAB::iterator it=columns.begin(); it!=columns.end(); ++it, ++k)
+            v[k] = vect_hp(w, mat_col(B, *it));
+          rho = gmm::abs(vect_hp(v, v0) / vect_hp(v0, v0));
+        }
+        rho *= R(2);
+      }
+
+      // Computing vectors of the null space of de B^* B with restarted Lanczos
+      rho2 = 0;
+      gmm::fill_random(v);
+      size_type iter = 0;
+      for(;;++iter) {
+        R rho_old = rho2;
+        R beta = R(0), alpha;
+        gmm::scale(v, T(1)/vect_norm2(v));
+        size_type eff_restart = restart;
+    if (sdiag.size() != restart) {
+      sdiag.resize(restart); eigval.resize(restart); diag.resize(restart); gmm::resize(eigvect, restart, restart);
+      gmm::resize(lv, nc_r, restart);
+    }
+
+        for (size_type i = 0; i < restart; ++i) { // Lanczos iterations
+          gmm::copy(v, mat_col(lv, i));
+          gmm::clear(w);
+          k = 0;
+          for (TAB::iterator it=columns.begin(); it!=columns.end(); ++it, ++k)
+            add(scaled(mat_col(B, *it), v[k]), w);
+
+          k = 0;
+          for (TAB::iterator it=columns.begin(); it!=columns.end(); ++it, ++k)
+            wl[k] = v[k]*rho - vect_hp(w, mat_col(B, *it)) - beta*v0[k];
+          alpha = gmm::real(vect_hp(wl, v));
+          diag[i] = alpha;
+          gmm::add(gmm::scaled(v, -alpha), wl);
+          sdiag[i] = beta = vect_norm2(wl);
+          gmm::copy(v, v0);
+      if (beta < EPS) { eff_restart = i+1; break; }
+      gmm::copy(gmm::scaled(wl, T(1) / beta), v);
+    }
+    if (eff_restart != restart) {
+      sdiag.resize(eff_restart); eigval.resize(eff_restart); diag.resize(eff_restart);
+      gmm::resize(eigvect, eff_restart, eff_restart); gmm::resize(lv, nc_r, eff_restart);
+    }
+        tridiag_qr_algorithm(diag, sdiag, eigval, eigvect, true);
+
+        size_type num = size_type(-1);
+        rho2 = R(0);
+        for (size_type j = 0; j < eff_restart; ++j)
+          { R nvp=gmm::abs(eigval[j]); if (nvp > rho2) { rho2=nvp; num=j; }}
+
+        GMM_ASSERT1(num != size_type(-1), "Internal error");
+
+        gmm::mult(lv, mat_col(eigvect, num), v);
+
+        if (gmm::abs(rho2-rho_old) < rho_old*R(EPS)) break;
+        // if (gmm::abs(rho-rho2) < rho*R(gmm::sqrt(EPS))) break;
+        if (gmm::abs(rho-rho2) < rho*R(EPS)*R(100)) break;
+      }
+
+      if (gmm::abs(rho-rho2) < rho*R(EPS*10.)) {
+        size_type j_max = size_type(-1), j = 0;
+        R val_max = R(0);
+        for (TAB::iterator it=columns.begin(); it!=columns.end(); ++it, ++j)
+          if (gmm::abs(v[j]) > val_max)
+            { val_max = gmm::abs(v[j]); j_max = *it; }
+        columns.erase(j_max); nc_r = columns.size();
+      }
+      else break;
+    }
+  }
+
+  // Range basis with LU decomposition. Not stable from a numerical viewpoint.
+  // Complex version not verified
+  template <typename Mat>
+  void range_basis_eff_lu(const Mat &B, std::set<size_type> &columns,
+                          std::vector<bool> &c_ortho, double EPS) {
+
+    typedef std::set<size_type> TAB;
+    typedef typename linalg_traits<Mat>::value_type T;
+    typedef typename number_traits<T>::magnitude_type R;
+
+    size_type nc_r = 0, nc_o = 0, nc = mat_ncols(B), nr = mat_nrows(B), i, j;
+
+    for (TAB::iterator it=columns.begin(); it!=columns.end(); ++it)
+      if (!(c_ortho[*it])) ++nc_r; else nc_o++;
+
+    if (nc_r > 0) {
+
+      gmm::row_matrix< gmm::rsvector<T> > Hr(nc, nc_r), Ho(nc, nc_o);
+      gmm::row_matrix< gmm::rsvector<T> > BBr(nr, nc_r), BBo(nr, nc_o);
+
+      i = j = 0;
+      for (TAB::iterator it=columns.begin(); it!=columns.end(); ++it)
+        if (!(c_ortho[*it]))
+          { Hr(*it, i) = T(1)/ vect_norminf(mat_col(B, *it)); ++i; }
+        else
+          { Ho(*it, j) = T(1)/ vect_norm2(mat_col(B, *it)); ++j; }
+
+      gmm::mult(B, Hr, BBr);
+      gmm::mult(B, Ho, BBo);
+      gmm::dense_matrix<T> M(nc_r, nc_r), BBB(nc_r, nc_o), MM(nc_r, nc_r);
+      gmm::mult(gmm::conjugated(BBr), BBr, M);
+      gmm::mult(gmm::conjugated(BBr), BBo, BBB);
+      gmm::mult(BBB, gmm::conjugated(BBB), MM);
+      gmm::add(gmm::scaled(MM, T(-1)), M);
+
+      std::vector<int> ipvt(nc_r);
+      gmm::lu_factor(M, ipvt);
+
+      R emax = R(0);
+      for (i = 0; i < nc_r; ++i) emax = std::max(emax, gmm::abs(M(i,i)));
+
+      i = 0;
+      std::set<size_type> c = columns;
+      for (TAB::iterator it = c.begin(); it != c.end(); ++it)
+        if (!(c_ortho[*it])) {
+          if (gmm::abs(M(i,i)) <= R(EPS)*emax) columns.erase(*it);
+          ++i;
+        }
+    }
+  }
+
+
+  // Range basis with Gram-Schmidt orthogonalization (sparse version)
+  // The sparse version is better when the sparsity is high and less efficient
+  // than the dense version for high degree elements (P3, P4 ...)
+  // Complex version not verified
+  template <typename Mat>
+  void range_basis_eff_Gram_Schmidt_sparse(const Mat &BB,
+                                           std::set<size_type> &columns,
+                                           std::vector<bool> &c_ortho,
+                                           double EPS) {
+
+    typedef std::set<size_type> TAB;
+    typedef typename linalg_traits<Mat>::value_type T;
+    typedef typename number_traits<T>::magnitude_type R;
+
+    size_type nc = mat_ncols(BB), nr = mat_nrows(BB);
+    std::set<size_type> c = columns, rc = columns;
+
+    gmm::col_matrix< rsvector<T> > B(nr, nc);
+    for (std::set<size_type>::iterator it = columns.begin();
+         it != columns.end(); ++it) {
+      gmm::copy(mat_col(BB, *it), mat_col(B, *it));
+      gmm::scale(mat_col(B, *it), T(1)/vect_norm2(mat_col(B, *it)));
+    }
+
+    for (std::set<size_type>::iterator it = c.begin(); it != c.end(); ++it)
+      if (c_ortho[*it]) {
+        for (std::set<size_type>::iterator it2 = rc.begin();
+             it2 != rc.end(); ++it2)
+          if (!(c_ortho[*it2])) {
+            T r = -vect_hp(mat_col(B, *it2), mat_col(B, *it));
+            if (r != T(0)) add(scaled(mat_col(B, *it), r), mat_col(B, *it2));
+          }
+        rc.erase(*it);
+      }
+
+    while (rc.size()) {
+      R nmax = R(0); size_type cmax = size_type(-1);
+      for (std::set<size_type>::iterator it=rc.begin(); it != rc.end();) {
+        TAB::iterator itnext = it; ++itnext;
+        R n = vect_norm2(mat_col(B, *it));
+        if (nmax < n) { nmax = n; cmax = *it; }
+        if (n < R(EPS)) { columns.erase(*it); rc.erase(*it); }
+        it = itnext;
+      }
+
+      if (nmax < R(EPS)) break;
+
+      gmm::scale(mat_col(B, cmax), T(1)/vect_norm2(mat_col(B, cmax)));
+      rc.erase(cmax);
+      for (std::set<size_type>::iterator it=rc.begin(); it!=rc.end(); ++it) {
+        T r = -vect_hp(mat_col(B, *it), mat_col(B, cmax));
+        if (r != T(0)) add(scaled(mat_col(B, cmax), r), mat_col(B, *it));
+      }
+    }
+    for (std::set<size_type>::iterator it=rc.begin(); it!=rc.end(); ++it)
+      columns.erase(*it);
+  }
+
+
+  // Range basis with Gram-Schmidt orthogonalization (dense version)
+  template <typename Mat>
+  void range_basis_eff_Gram_Schmidt_dense(const Mat &B,
+                                          std::set<size_type> &columns,
+                                          std::vector<bool> &c_ortho,
+                                          double EPS) {
+
+    typedef std::set<size_type> TAB;
+    typedef typename linalg_traits<Mat>::value_type T;
+    typedef typename number_traits<T>::magnitude_type R;
+
+    size_type nc_r = columns.size(), nc = mat_ncols(B), nr = mat_nrows(B), i;
+    std::set<size_type> rc;
+
+    row_matrix< gmm::rsvector<T> > H(nc, nc_r), BB(nr, nc_r);
+    std::vector<T> v(nc_r);
+    std::vector<size_type> ind(nc_r);
+
+    i = 0;
+    for (TAB::iterator it = columns.begin(); it != columns.end(); ++it, ++i)
+      H(*it, i) = T(1) / vect_norm2(mat_col(B, *it));
+
+    mult(B, H, BB);
+    dense_matrix<T> M(nc_r, nc_r);
+    mult(gmm::conjugated(BB), BB, M);
+
+    i = 0;
+    for (TAB::iterator it = columns.begin(); it != columns.end(); ++it, ++i)
+      if (c_ortho[*it]) {
+        gmm::copy(mat_row(M, i), v);
+        rank_one_update(M, scaled(v, T(-1)), v);
+        M(i, i) = T(1);
+      }
+      else { rc.insert(i); ind[i] = *it; }
+
+    while (rc.size() > 0) {
+
+      // Next pivot
+      R nmax = R(0); size_type imax = size_type(-1);
+      for (TAB::iterator it = rc.begin(); it != rc.end();) {
+        TAB::iterator itnext = it; ++itnext;
+        R a = gmm::abs(M(*it, *it));
+        if (a > nmax) { nmax = a; imax = *it; }
+        if (a < R(EPS)) { columns.erase(ind[*it]); rc.erase(*it); }
+        it = itnext;
+      }
+
+      if (nmax < R(EPS)) break;
+
+      // Normalization
+      gmm::scale(mat_row(M, imax), T(1) / sqrt(nmax));
+      gmm::scale(mat_col(M, imax), T(1) / sqrt(nmax));
+
+      // orthogonalization
+      copy(mat_row(M, imax), v);
+      rank_one_update(M, scaled(v, T(-1)), v);
+      M(imax, imax) = T(1);
+
+      rc.erase(imax);
+    }
+    for (std::set<size_type>::iterator it=rc.begin(); it!=rc.end(); ++it)
+      columns.erase(ind[*it]);
+  }
+
+  template <typename L> size_type nnz_eps(const L& l, double eps) {
+    typename linalg_traits<L>::const_iterator it = vect_const_begin(l),
+      ite = vect_const_end(l);
+    size_type res(0);
+    for (; it != ite; ++it) if (gmm::abs(*it) >= eps) ++res;
+    return res;
+  }
+
+  template <typename L>
+  bool reserve__rb(const L& l, std::vector<bool> &b, double eps) {
+    typename linalg_traits<L>::const_iterator it = vect_const_begin(l),
+      ite = vect_const_end(l);
+    bool ok = true;
+    for (; it != ite; ++it)
+      if (gmm::abs(*it) >= eps && b[it.index()]) ok = false;
+    if (ok) {
+      for (it = vect_const_begin(l); it != ite; ++it)
+        if (gmm::abs(*it) >= eps) b[it.index()] = true;
+    }
+    return ok;
+  }
+
+  template <typename Mat>
+  void range_basis(const Mat &B, std::set<size_type> &columns,
+                       double EPS, col_major, bool skip_init=false) {
+
+    typedef typename linalg_traits<Mat>::value_type T;
+    typedef typename number_traits<T>::magnitude_type R;
+
+    size_type nc = mat_ncols(B), nr = mat_nrows(B);
+
+    std::vector<R> norms(nc);
+    std::vector<bool> c_ortho(nc), booked(nr);
+    std::vector< std::set<size_type> > nnzs(mat_nrows(B));
+
+    if (!skip_init) {
+
+      R norm_max = R(0);
+      for (size_type i = 0; i < nc; ++i) {
+        norms[i] = vect_norminf(mat_col(B, i));
+        norm_max = std::max(norm_max, norms[i]);
+      }
+
+      columns.clear();
+      for (size_type i = 0; i < nc; ++i)
+        if (norms[i] > norm_max*R(EPS)) {
+          columns.insert(i);
+          nnzs[nnz_eps(mat_col(B, i), R(EPS) * norms[i])].insert(i);
+        }
+
+      for (size_type i = 1; i < nr; ++i)
+        for (std::set<size_type>::iterator it = nnzs[i].begin();
+             it != nnzs[i].end(); ++it)
+          if (reserve__rb(mat_col(B, *it), booked, R(EPS) * norms[*it]))
+            c_ortho[*it] = true;
+    }
+
+    size_type sizesm[7] = {125, 200, 350, 550, 800, 1100, 1500}, actsize;
+    for (int k = 0; k < 7; ++k) {
+      size_type nc_r = columns.size();
+      std::set<size_type> c1, cres;
+      actsize = sizesm[k];
+      for (std::set<size_type>::iterator it = columns.begin();
+           it != columns.end(); ++it) {
+        c1.insert(*it);
+        if (c1.size() >= actsize) {
+          range_basis_eff_Gram_Schmidt_dense(B, c1, c_ortho, EPS);
+          for (std::set<size_type>::iterator it2=c1.begin(); it2 != c1.end();
+               ++it2) cres.insert(*it2);
+          c1.clear();
+        }
+      }
+      if (c1.size() > 1)
+        range_basis_eff_Gram_Schmidt_dense(B, c1, c_ortho, EPS);
+      for (std::set<size_type>::iterator it = c1.begin(); it != c1.end(); ++it)
+        cres.insert(*it);
+      columns = cres;
+      if (nc_r <= actsize) return;
+      if (columns.size() == nc_r) break;
+      if (sizesm[k] >= 350 && columns.size() > (nc_r*19)/20) break;
+    }
+    if (columns.size() > std::max(size_type(10), actsize))
+      range_basis_eff_Lanczos(B, columns, EPS);
+    else
+      range_basis_eff_Gram_Schmidt_dense(B, columns, c_ortho, EPS);
+  }
+
+
+  template <typename Mat>
+  void range_basis(const Mat &B, std::set<size_type> &columns,
+                   double EPS, row_major) {
+    typedef typename  linalg_traits<Mat>::value_type T;
+    gmm::col_matrix< rsvector<T> > BB(mat_nrows(B), mat_ncols(B));
+    GMM_WARNING3("A copy of a row matrix is done into a column matrix "
+                 "for range basis algorithm.");
+    gmm::copy(B, BB);
+    range_basis(BB, columns, EPS);
+  }
+
+  /** Range Basis :
+    Extract a basis of the range of a (large sparse) matrix selecting some
+    column vectors of this matrix. This is in particular useful to select
+    an independent set of linear constraints.
+
+    The algorithm is optimized for two cases :
+       - when the (non trivial) kernel is small. An iterativ algorithm
+         based on Lanczos method is applied
+       - when the (non trivial) kernel is large and most of the dependencies
+         can be detected locally. A block Gram-Schmidt is applied first then
+         a restarted Lanczos method when the remaining kernel is greatly
+         smaller.
+    The restarted Lanczos method could be improved or replaced by a block
+    Lanczos method, a block Wiedelann method (in order to be parallelized for
+    instance) or simply could compute more than one vector of the null
+    space at each iteration.
+    The LU decomposition has been tested for local elimination but gives bad
+    results : the algorithm is unstable and do not permit to give the right
+    number of vector at the end of the process. Moreover, the number of final
+    vectors depends greatly on the number of vectors in a block of the local
+    analysis.
+  */
+  template <typename Mat>
+  void range_basis(const Mat &B, std::set<size_type> &columns,
+                   double EPS=1E-12) {
+    range_basis(B, columns, EPS,
+                typename principal_orientation_type
+                <typename linalg_traits<Mat>::sub_orientation>::potype());
+}
+
+}
+
+#endif
--- a/gmm/gmm_real_part.h
+++ b/gmm/gmm_real_part.h
@ -0,0 +1,605 @@
+/* -*- c++ -*- (enables emacs c++ mode) */
+/*===========================================================================
+
+ Copyright (C) 2003-2017 Yves Renard
+
+ This file is a part of GetFEM++
+
+ GetFEM++  is  free software;  you  can  redistribute  it  and/or modify it
+ under  the  terms  of the  GNU  Lesser General Public License as published
+ by  the  Free Software Foundation;  either version 3 of the License,  or
+ (at your option) any later version along with the GCC Runtime Library
+ Exception either version 3.1 or (at your option) any later version.
+ This program  is  distributed  in  the  hope  that it will be useful,  but
+ WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ or  FITNESS  FOR  A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+ License and GCC Runtime Library Exception for more details.
+ You  should  have received a copy of the GNU Lesser General Public License
+ along  with  this program;  if not, write to the Free Software Foundation,
+ Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301, USA.
+
+ As a special exception, you  may use  this file  as it is a part of a free
+ software  library  without  restriction.  Specifically,  if   other  files
+ instantiate  templates  or  use macros or inline functions from this file,
+ or  you compile this  file  and  link  it  with other files  to produce an
+ executable, this file  does  not  by itself cause the resulting executable
+ to be covered  by the GNU Lesser General Public License.  This   exception
+ does not  however  invalidate  any  other  reasons why the executable file
+ might be covered by the GNU Lesser General Public License.
+
+===========================================================================*/
+
+/**@file gmm_real_part.h
+   @author  Yves Renard <Yves.Renard@insa-lyon.fr>
+   @date September 18, 2003.
+   @brief extract the real/imaginary part of vectors/matrices 
+*/
+#ifndef GMM_REAL_PART_H
+#define GMM_REAL_PART_H
+
+#include "gmm_def.h"
+#include "gmm_vector.h"
+
+namespace gmm {
+
+  struct linalg_real_part {};
+  struct linalg_imag_part {};
+  template <typename R, typename PART> struct which_part {};
+  
+  template <typename C> typename number_traits<C>::magnitude_type 
+  real_or_imag_part(C x, linalg_real_part) { return gmm::real(x); }
+  template <typename C> typename number_traits<C>::magnitude_type 
+  real_or_imag_part(C x, linalg_imag_part) { return gmm::imag(x); }
+  template <typename T, typename C, typename OP> C
+  complex_from(T x, C y, OP op, linalg_real_part) { return std::complex<T>(op(std::real(y), x), std::imag(y)); }
+  template <typename T, typename C, typename OP> C
+  complex_from(T x, C y, OP op,linalg_imag_part) { return std::complex<T>(std::real(y), op(std::imag(y), x)); }
+  
+  template<typename T> struct project2nd {
+    T operator()(T , T b) const { return b; }
+  };
+  
+  template<typename T, typename R, typename PART> class ref_elt_vector<T, which_part<R, PART> > {
+
+    R r;
+    
+    public :
+
+    operator T() const { return real_or_imag_part(std::complex<T>(r), PART()); }
+    ref_elt_vector(R r_) : r(r_) {}
+    inline ref_elt_vector &operator =(T v)
+    { r = complex_from(v, std::complex<T>(r), gmm::project2nd<T>(), PART()); return *this; }
+    inline bool operator ==(T v) const { return (r == v); }
+    inline bool operator !=(T v) const { return (r != v); }
+    inline ref_elt_vector &operator +=(T v)
+    { r = complex_from(v, std::complex<T>(r), std::plus<T>(), PART()); return *this; }
+    inline ref_elt_vector &operator -=(T v)
+      { r = complex_from(v, std::complex<T>(r), std::minus<T>(), PART()); return *this; }
+    inline ref_elt_vector &operator /=(T v)
+      { r = complex_from(v, std::complex<T>(r), std::divides<T>(), PART()); return *this; }
+    inline ref_elt_vector &operator *=(T v)
+      { r = complex_from(v, std::complex<T>(r), std::multiplies<T>(), PART()); return *this; }
+    inline ref_elt_vector &operator =(const ref_elt_vector &re)
+      { *this = T(re); return *this; }
+    T operator +()    { return  T(*this);   } // necessary for unknow reason
+    T operator -()    { return -T(*this);   } // necessary for unknow reason
+    T operator +(T v) { return T(*this)+ v; } // necessary for unknow reason
+    T operator -(T v) { return T(*this)- v; } // necessary for unknow reason
+    T operator *(T v) { return T(*this)* v; } // necessary for unknow reason
+    T operator /(T v) { return T(*this)/ v; } // necessary for unknow reason
+  };
+
+  template<typename reference> struct ref_or_value_type {
+    template <typename T, typename W>
+    static W r(const T &x, linalg_real_part, W) {
+      return gmm::real(x);
+    }
+    template <typename T, typename W>
+    static W r(const T &x, linalg_imag_part, W) {
+      return gmm::imag(x);
+    }
+  };
+  
+  template<typename U, typename R, typename PART> 
+  struct ref_or_value_type<ref_elt_vector<U, which_part<R, PART> > > {
+    template<typename T , typename W> 
+    static const T &r(const T &x, linalg_real_part, W)
+    { return x; }
+    template<typename T, typename W> 
+    static const T &r(const T &x, linalg_imag_part, W) {
+      return x; 
+    }
+    template<typename T , typename W> 
+    static T &r(T &x, linalg_real_part, W)
+    { return x; }
+    template<typename T, typename W> 
+    static T &r(T &x, linalg_imag_part, W) {
+      return x; 
+    }
+  };
+
+  
+  /* ********************************************************************* */
+  /*	Reference to the real part of (complex) vectors            	   */
+  /* ********************************************************************* */
+
+  template <typename IT, typename MIT, typename PART>
+  struct part_vector_iterator {
+    typedef typename std::iterator_traits<IT>::value_type      vtype;
+    typedef typename gmm::number_traits<vtype>::magnitude_type value_type;
+    typedef value_type                                        *pointer;
+    typedef ref_elt_vector<value_type, which_part<typename std::iterator_traits<IT>::reference, PART> > reference;
+    typedef typename std::iterator_traits<IT>::difference_type difference_type;
+    typedef typename std::iterator_traits<IT>::iterator_category
+    iterator_category;
+
+    IT it;
+    
+    part_vector_iterator(void) {}
+    explicit part_vector_iterator(const IT &i) : it(i) {}
+    part_vector_iterator(const part_vector_iterator<MIT, MIT, PART> &i) : it(i.it) {}
+    
+
+    size_type index(void) const { return it.index(); }
+    part_vector_iterator operator ++(int)
+    { part_vector_iterator tmp = *this; ++it; return tmp; }
+    part_vector_iterator operator --(int) 
+    { part_vector_iterator tmp = *this; --it; return tmp; }
+    part_vector_iterator &operator ++() { ++it; return *this; }
+    part_vector_iterator &operator --() { --it; return *this; }
+    part_vector_iterator &operator +=(difference_type i)
+      { it += i; return *this; }
+    part_vector_iterator &operator -=(difference_type i)
+      { it -= i; return *this; }
+    part_vector_iterator operator +(difference_type i) const
+      { part_vector_iterator itb = *this; return (itb += i); }
+    part_vector_iterator operator -(difference_type i) const
+      { part_vector_iterator itb = *this; return (itb -= i); }
+    difference_type operator -(const part_vector_iterator &i) const
+      { return difference_type(it - i.it); }
+    
+    reference operator  *() const { return reference(*it); }
+    reference operator [](size_type ii) const { return reference(it[ii]); }
+    
+    bool operator ==(const part_vector_iterator &i) const
+      { return (i.it == it); }
+    bool operator !=(const part_vector_iterator &i) const
+      { return (i.it != it); }
+    bool operator < (const part_vector_iterator &i) const
+      { return (it < i.it); }
+  };
+
+
+  template <typename PT, typename PART> struct part_vector {
+    typedef part_vector<PT, PART> this_type;
+    typedef typename std::iterator_traits<PT>::value_type V;
+    typedef V * CPT;
+    typedef typename select_ref<typename linalg_traits<V>::const_iterator,
+            typename linalg_traits<V>::iterator, PT>::ref_type iterator;
+    typedef typename linalg_traits<this_type>::reference reference;
+    typedef typename linalg_traits<this_type>::value_type value_type;
+    typedef typename linalg_traits<this_type>::porigin_type porigin_type;
+
+    iterator begin_, end_;
+    porigin_type origin;
+    size_type size_;
+
+    size_type size(void) const { return size_; }
+   
+    reference operator[](size_type i) const { 
+      return reference(ref_or_value_type<reference>::r(
+	     linalg_traits<V>::access(origin, begin_, end_, i),
+	     PART(), value_type()));
+    }
+
+    part_vector(V &v)
+      : begin_(vect_begin(v)),  end_(vect_end(v)),
+	origin(linalg_origin(v)), size_(gmm::vect_size(v)) {}
+    part_vector(const V &v) 
+      : begin_(vect_begin(const_cast<V &>(v))),
+       end_(vect_end(const_cast<V &>(v))),
+	origin(linalg_origin(const_cast<V &>(v))), size_(gmm::vect_size(v)) {}
+    part_vector() {}
+    part_vector(const part_vector<CPT, PART> &cr)
+      : begin_(cr.begin_),end_(cr.end_),origin(cr.origin), size_(cr.size_) {} 
+  };
+
+  template <typename IT, typename MIT, typename ORG, typename PT,
+	    typename PART> inline
+  void set_to_begin(part_vector_iterator<IT, MIT, PART> &it,
+		    ORG o, part_vector<PT, PART> *, linalg_modifiable) {
+    typedef part_vector<PT, PART> VECT;
+    typedef typename linalg_traits<VECT>::V_reference ref_t;
+    set_to_begin(it.it, o, typename linalg_traits<VECT>::pV(), ref_t());
+  }
+  template <typename IT, typename MIT, typename ORG, typename PT,
+	    typename PART> inline
+  void set_to_begin(part_vector_iterator<IT, MIT, PART> &it,
+		    ORG o, const part_vector<PT, PART> *, linalg_modifiable) {
+    typedef part_vector<PT, PART> VECT;
+    typedef typename linalg_traits<VECT>::V_reference ref_t;
+    set_to_begin(it.it, o, typename linalg_traits<VECT>::pV(), ref_t());
+  }
+  template <typename IT, typename MIT, typename ORG, typename PT,
+	    typename PART> inline
+  void set_to_end(part_vector_iterator<IT, MIT, PART> &it,
+		    ORG o, part_vector<PT, PART> *, linalg_modifiable) {
+    typedef part_vector<PT, PART> VECT;
+    typedef typename linalg_traits<VECT>::V_reference ref_t;
+    set_to_end(it.it, o, typename linalg_traits<VECT>::pV(), ref_t());
+  }
+  template <typename IT, typename MIT, typename ORG,
+	    typename PT, typename PART> inline
+  void set_to_end(part_vector_iterator<IT, MIT, PART> &it,
+		  ORG o, const part_vector<PT, PART> *,
+		  linalg_modifiable) {
+    typedef part_vector<PT, PART> VECT;
+    typedef typename linalg_traits<VECT>::V_reference ref_t;
+    set_to_end(it.it, o, typename linalg_traits<VECT>::pV(), ref_t());
+  }
+
+  template <typename PT, typename PART> std::ostream &operator <<
+    (std::ostream &o, const part_vector<PT, PART>& m)
+  { gmm::write(o,m); return o; }
+
+
+  /* ********************************************************************* */
+  /*	Reference to the real or imaginary part of (complex) matrices      */
+  /* ********************************************************************* */
+
+
+  template <typename PT, typename PART> struct  part_row_ref {
+    
+    typedef part_row_ref<PT, PART> this_type;
+    typedef typename std::iterator_traits<PT>::value_type M;
+    typedef M * CPT;
+    typedef typename std::iterator_traits<PT>::reference ref_M;
+    typedef typename select_ref<typename linalg_traits<this_type>
+            ::const_row_iterator, typename linalg_traits<this_type>
+            ::row_iterator, PT>::ref_type iterator;
+    typedef typename linalg_traits<this_type>::value_type value_type;
+    typedef typename linalg_traits<this_type>::reference reference;
+    typedef typename linalg_traits<this_type>::porigin_type porigin_type;
+
+    iterator begin_, end_;
+    porigin_type origin;
+    size_type nr, nc;
+
+    part_row_ref(ref_M m)
+      : begin_(mat_row_begin(m)), end_(mat_row_end(m)),
+	origin(linalg_origin(m)), nr(mat_nrows(m)), nc(mat_ncols(m)) {}
+
+    part_row_ref(const part_row_ref<CPT, PART> &cr) :
+      begin_(cr.begin_),end_(cr.end_), origin(cr.origin),nr(cr.nr),nc(cr.nc) {}
+
+    reference operator()(size_type i, size_type j) const {
+      return reference(ref_or_value_type<reference>::r(
+					 linalg_traits<M>::access(begin_+i, j),
+					 PART(), value_type()));
+    }
+  };
+  
+  template<typename PT, typename PART> std::ostream &operator <<
+    (std::ostream &o, const part_row_ref<PT, PART>& m)
+  { gmm::write(o,m); return o; }
+
+  template <typename PT, typename PART> struct  part_col_ref {
+    
+    typedef part_col_ref<PT, PART> this_type;
+    typedef typename std::iterator_traits<PT>::value_type M;
+    typedef M * CPT;
+    typedef typename std::iterator_traits<PT>::reference ref_M;
+    typedef typename select_ref<typename linalg_traits<this_type>
+            ::const_col_iterator, typename linalg_traits<this_type>
+            ::col_iterator, PT>::ref_type iterator;
+    typedef typename linalg_traits<this_type>::value_type value_type;
+    typedef typename linalg_traits<this_type>::reference reference;
+    typedef typename linalg_traits<this_type>::porigin_type porigin_type;
+
+    iterator begin_, end_;
+    porigin_type origin;
+    size_type nr, nc;
+
+    part_col_ref(ref_M m)
+      : begin_(mat_col_begin(m)), end_(mat_col_end(m)),
+	origin(linalg_origin(m)), nr(mat_nrows(m)), nc(mat_ncols(m)) {}
+
+    part_col_ref(const part_col_ref<CPT, PART> &cr) :
+      begin_(cr.begin_),end_(cr.end_), origin(cr.origin),nr(cr.nr),nc(cr.nc) {}
+
+    reference operator()(size_type i, size_type j) const {
+      return reference(ref_or_value_type<reference>::r(
+					 linalg_traits<M>::access(begin_+j, i),
+					 PART(), value_type()));
+    }
+  };
+   
+
+  
+  template<typename PT, typename PART> std::ostream &operator <<
+    (std::ostream &o, const part_col_ref<PT, PART>& m)
+  { gmm::write(o,m); return o; }
+
+  
+
+
+
+
+template <typename TYPE, typename PART, typename PT>
+  struct part_return_ {
+    typedef abstract_null_type return_type;
+  };
+  template <typename PT, typename PART>
+  struct part_return_<row_major, PART, PT> {
+    typedef typename std::iterator_traits<PT>::value_type L;
+    typedef typename select_return<part_row_ref<const L *, PART>,
+		     part_row_ref< L *, PART>, PT>::return_type return_type;
+  };
+  template <typename PT, typename PART>
+  struct part_return_<col_major, PART, PT> {
+    typedef typename std::iterator_traits<PT>::value_type L;
+    typedef typename select_return<part_col_ref<const L *, PART>,
+		     part_col_ref<L *, PART>, PT>::return_type return_type;
+  };
+
+  template <typename PT, typename PART, typename LT> struct part_return__{
+    typedef abstract_null_type return_type;
+  };
+
+  template <typename PT, typename PART>
+  struct part_return__<PT, PART, abstract_matrix> {
+    typedef typename std::iterator_traits<PT>::value_type L;
+    typedef typename part_return_<typename principal_orientation_type<
+      typename linalg_traits<L>::sub_orientation>::potype, PART,
+      PT>::return_type return_type;
+  };
+
+  template <typename PT, typename PART>
+  struct part_return__<PT, PART, abstract_vector> {
+    typedef typename std::iterator_traits<PT>::value_type L;
+    typedef typename select_return<part_vector<const L *, PART>,
+      part_vector<L *, PART>, PT>::return_type return_type;
+  };
+
+  template <typename PT, typename PART> struct part_return {
+    typedef typename std::iterator_traits<PT>::value_type L;
+    typedef typename part_return__<PT, PART,
+      typename linalg_traits<L>::linalg_type>::return_type return_type;
+  };
+
+  template <typename L> inline 
+  typename part_return<const L *, linalg_real_part>::return_type
+  real_part(const L &l) {
+    return typename part_return<const L *, linalg_real_part>::return_type
+      (linalg_cast(const_cast<L &>(l)));
+  }
+
+  template <typename L> inline 
+  typename part_return<L *, linalg_real_part>::return_type
+  real_part(L &l) {
+    return typename part_return<L *, linalg_real_part>::return_type(linalg_cast(l));
+  }
+
+  template <typename L> inline 
+  typename part_return<const L *, linalg_imag_part>::return_type
+  imag_part(const L &l) {
+    return typename part_return<const L *, linalg_imag_part>::return_type
+      (linalg_cast(const_cast<L &>(l)));
+  }
+
+  template <typename L> inline 
+  typename part_return<L *, linalg_imag_part>::return_type
+  imag_part(L &l) {
+    return typename part_return<L *, linalg_imag_part>::return_type(linalg_cast(l));
+  }
+
+
+  template <typename PT, typename PART>
+  struct linalg_traits<part_vector<PT, PART> > {
+    typedef part_vector<PT, PART> this_type;
+    typedef this_type * pthis_type;
+    typedef PT pV;
+    typedef typename std::iterator_traits<PT>::value_type V;
+    typedef typename linalg_traits<V>::index_sorted index_sorted;
+    typedef typename linalg_traits<V>::is_reference V_reference;
+    typedef typename linalg_traits<V>::origin_type origin_type;
+    typedef typename select_ref<const origin_type *, origin_type *,
+			        PT>::ref_type porigin_type;
+    typedef typename which_reference<PT>::is_reference is_reference;
+    typedef abstract_vector linalg_type;
+    typedef typename linalg_traits<V>::value_type vtype;
+    typedef typename number_traits<vtype>::magnitude_type value_type;
+    typedef typename select_ref<value_type, ref_elt_vector<value_type,
+		     which_part<typename linalg_traits<V>::reference,
+				PART> >, PT>::ref_type reference;
+    typedef typename select_ref<typename linalg_traits<V>::const_iterator,
+	    typename linalg_traits<V>::iterator, PT>::ref_type pre_iterator;
+    typedef typename select_ref<abstract_null_type, 
+	    part_vector_iterator<pre_iterator, pre_iterator, PART>,
+	    PT>::ref_type iterator;
+    typedef part_vector_iterator<typename linalg_traits<V>::const_iterator,
+				 pre_iterator, PART> const_iterator;
+    typedef typename linalg_traits<V>::storage_type storage_type;
+    static size_type size(const this_type &v) { return v.size(); }
+    static iterator begin(this_type &v) {
+      iterator it; it.it = v.begin_;
+      if (!is_const_reference(is_reference()) && is_sparse(storage_type()))
+	set_to_begin(it, v.origin, pthis_type(), is_reference());
+      return it;
+    }
+    static const_iterator begin(const this_type &v) {
+      const_iterator it(v.begin_);
+      if (!is_const_reference(is_reference()) && is_sparse(storage_type()))
+	{ set_to_begin(it, v.origin, pthis_type(), is_reference()); }
+      return it;
+    }
+    static iterator end(this_type &v) {
+      iterator it(v.end_);
+      if (!is_const_reference(is_reference()) && is_sparse(storage_type()))
+	set_to_end(it, v.origin, pthis_type(), is_reference());
+      return it;
+    }
+    static const_iterator end(const this_type &v) {
+      const_iterator it(v.end_);
+      if (!is_const_reference(is_reference()) && is_sparse(storage_type()))
+	set_to_end(it, v.origin, pthis_type(), is_reference());
+      return it;
+    }
+    static origin_type* origin(this_type &v) { return v.origin; }
+    static const origin_type* origin(const this_type &v) { return v.origin; }
+
+    static void clear(origin_type* o, const iterator &begin_,
+		      const iterator &end_, abstract_sparse) {
+      std::deque<size_type> ind;
+      iterator it = begin_;
+      for (; it != end_; ++it) ind.push_front(it.index());
+      for (; !(ind.empty()); ind.pop_back())
+	access(o, begin_, end_, ind.back()) = value_type(0);
+    }
+    static void clear(origin_type* o, const iterator &begin_,
+		      const iterator &end_, abstract_skyline) {
+      clear(o, begin_, end_, abstract_sparse());
+    }
+    static void clear(origin_type* o, const iterator &begin_,
+		      const iterator &end_, abstract_dense) {
+      for (iterator it = begin_; it != end_; ++it) *it = value_type(0);
+    }
+
+   static void clear(origin_type* o, const iterator &begin_,
+		      const iterator &end_) 
+    { clear(o, begin_, end_, storage_type()); }
+    static void do_clear(this_type &v) { clear(v.origin, begin(v), end(v)); }
+    static value_type access(const origin_type *o, const const_iterator &it,
+			     const const_iterator &ite, size_type i) { 
+      return  real_or_imag_part(linalg_traits<V>::access(o, it.it, ite.it,i),
+				PART());
+    }
+    static reference access(origin_type *o, const iterator &it,
+			    const iterator &ite, size_type i)
+    { return reference(linalg_traits<V>::access(o, it.it, ite.it,i)); }
+  };
+
+  template <typename PT, typename PART>
+  struct linalg_traits<part_row_ref<PT, PART> > {
+    typedef part_row_ref<PT, PART> this_type;
+    typedef typename std::iterator_traits<PT>::value_type M;
+    typedef typename linalg_traits<M>::origin_type origin_type;
+    typedef typename select_ref<const origin_type *, origin_type *,
+			        PT>::ref_type porigin_type;
+    typedef typename which_reference<PT>::is_reference is_reference;
+    typedef abstract_matrix linalg_type;
+    typedef typename linalg_traits<M>::value_type vtype;
+    typedef typename number_traits<vtype>::magnitude_type value_type;
+    typedef typename linalg_traits<M>::storage_type storage_type;
+    typedef abstract_null_type sub_col_type;
+    typedef abstract_null_type const_sub_col_type;
+    typedef abstract_null_type col_iterator;
+    typedef abstract_null_type const_col_iterator;
+    typedef typename org_type<typename linalg_traits<M>::const_sub_row_type>::t
+            pre_const_sub_row_type;
+    typedef typename org_type<typename linalg_traits<M>::sub_row_type>::t pre_sub_row_type;
+    typedef part_vector<const pre_const_sub_row_type *, PART>
+            const_sub_row_type;
+    typedef typename select_ref<abstract_null_type,
+	    part_vector<pre_sub_row_type *, PART>, PT>::ref_type sub_row_type;
+    typedef typename linalg_traits<M>::const_row_iterator const_row_iterator;
+    typedef typename select_ref<abstract_null_type, typename
+            linalg_traits<M>::row_iterator, PT>::ref_type row_iterator;
+    typedef typename select_ref<
+            typename linalg_traits<const_sub_row_type>::reference,
+	    typename linalg_traits<sub_row_type>::reference,
+				PT>::ref_type reference;
+    typedef row_major sub_orientation;
+    typedef typename linalg_traits<M>::index_sorted index_sorted;
+    static size_type ncols(const this_type &v) { return v.nc; }
+    static size_type nrows(const this_type &v) { return v.nr; }
+    static const_sub_row_type row(const const_row_iterator &it)
+    { return const_sub_row_type(linalg_traits<M>::row(it)); }
+    static sub_row_type row(const row_iterator &it)
+    { return sub_row_type(linalg_traits<M>::row(it)); }
+    static row_iterator row_begin(this_type &m) { return m.begin_; }
+    static row_iterator row_end(this_type &m) { return m.end_; }
+    static const_row_iterator row_begin(const this_type &m)
+    { return m.begin_; }
+    static const_row_iterator row_end(const this_type &m) { return m.end_; }
+    static origin_type* origin(this_type &v) { return v.origin; }
+    static const origin_type* origin(const this_type &v) { return v.origin; }
+    static void do_clear(this_type &v);
+    static value_type access(const const_row_iterator &itrow, size_type i)
+    { return real_or_imag_part(linalg_traits<M>::access(itrow, i), PART()); }
+    static reference access(const row_iterator &itrow, size_type i) {
+      return reference(ref_or_value_type<reference>::r(
+					 linalg_traits<M>::access(itrow, i),
+					 PART(), value_type()));
+    }
+  };
+
+  template <typename PT, typename PART>
+  struct linalg_traits<part_col_ref<PT, PART> > {
+    typedef part_col_ref<PT, PART> this_type;
+    typedef typename std::iterator_traits<PT>::value_type M;
+    typedef typename linalg_traits<M>::origin_type origin_type;
+    typedef typename select_ref<const origin_type *, origin_type *,
+			        PT>::ref_type porigin_type;
+    typedef typename which_reference<PT>::is_reference is_reference;
+    typedef abstract_matrix linalg_type;
+    typedef typename linalg_traits<M>::value_type vtype;
+    typedef typename number_traits<vtype>::magnitude_type value_type;
+    typedef typename linalg_traits<M>::storage_type storage_type;
+    typedef abstract_null_type sub_row_type;
+    typedef abstract_null_type const_sub_row_type;
+    typedef abstract_null_type row_iterator;
+    typedef abstract_null_type const_row_iterator;
+    typedef typename org_type<typename linalg_traits<M>::const_sub_col_type>::t
+            pre_const_sub_col_type;
+    typedef typename org_type<typename linalg_traits<M>::sub_col_type>::t pre_sub_col_type;
+    typedef part_vector<const pre_const_sub_col_type *, PART>
+            const_sub_col_type;
+    typedef typename select_ref<abstract_null_type,
+	    part_vector<pre_sub_col_type *, PART>, PT>::ref_type sub_col_type;
+    typedef typename linalg_traits<M>::const_col_iterator const_col_iterator;
+    typedef typename select_ref<abstract_null_type, typename
+            linalg_traits<M>::col_iterator, PT>::ref_type col_iterator;
+    typedef typename select_ref<
+            typename linalg_traits<const_sub_col_type>::reference,
+	    typename linalg_traits<sub_col_type>::reference,
+				PT>::ref_type reference;
+    typedef col_major sub_orientation;
+    typedef typename linalg_traits<M>::index_sorted index_sorted;
+    static size_type nrows(const this_type &v) { return v.nr; }
+    static size_type ncols(const this_type &v) { return v.nc; }
+    static const_sub_col_type col(const const_col_iterator &it)
+    { return const_sub_col_type(linalg_traits<M>::col(it)); }
+    static sub_col_type col(const col_iterator &it)
+    { return sub_col_type(linalg_traits<M>::col(it)); }
+    static col_iterator col_begin(this_type &m) { return m.begin_; }
+    static col_iterator col_end(this_type &m) { return m.end_; }
+    static const_col_iterator col_begin(const this_type &m)
+    { return m.begin_; }
+    static const_col_iterator col_end(const this_type &m) { return m.end_; }
+    static origin_type* origin(this_type &v) { return v.origin; }
+    static const origin_type* origin(const this_type &v) { return v.origin; }
+    static void do_clear(this_type &v);
+    static value_type access(const const_col_iterator &itcol, size_type i)
+    { return real_or_imag_part(linalg_traits<M>::access(itcol, i), PART()); }
+    static reference access(const col_iterator &itcol, size_type i) {
+      return reference(ref_or_value_type<reference>::r(
+					 linalg_traits<M>::access(itcol, i),
+					 PART(), value_type()));
+    }
+  };
+
+  template <typename PT, typename PART> 
+  void linalg_traits<part_col_ref<PT, PART> >::do_clear(this_type &v) { 
+    col_iterator it = mat_col_begin(v), ite = mat_col_end(v);
+    for (; it != ite; ++it) clear(col(it));
+  }
+  
+  template <typename PT, typename PART> 
+  void linalg_traits<part_row_ref<PT, PART> >::do_clear(this_type &v) { 
+    row_iterator it = mat_row_begin(v), ite = mat_row_end(v);
+    for (; it != ite; ++it) clear(row(it));
+  }
+}
+
+#endif //  GMM_REAL_PART_H
--- a/gmm/gmm_ref.h
+++ b/gmm/gmm_ref.h
@ -0,0 +1,526 @@
+/* -*- c++ -*- (enables emacs c++ mode) */
+/*===========================================================================
+
+ Copyright (C) 2000-2017 Yves Renard
+
+ This file is a part of GetFEM++
+
+ GetFEM++  is  free software;  you  can  redistribute  it  and/or modify it
+ under  the  terms  of the  GNU  Lesser General Public License as published
+ by  the  Free Software Foundation;  either version 3 of the License,  or
+ (at your option) any later version along with the GCC Runtime Library
+ Exception either version 3.1 or (at your option) any later version.
+ This program  is  distributed  in  the  hope  that it will be useful,  but
+ WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ or  FITNESS  FOR  A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+ License and GCC Runtime Library Exception for more details.
+ You  should  have received a copy of the GNU Lesser General Public License
+ along  with  this program;  if not, write to the Free Software Foundation,
+ Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301, USA.
+
+ As a special exception, you  may use  this file  as it is a part of a free
+ software  library  without  restriction.  Specifically,  if   other  files
+ instantiate  templates  or  use macros or inline functions from this file,
+ or  you compile this  file  and  link  it  with other files  to produce an
+ executable, this file  does  not  by itself cause the resulting executable
+ to be covered  by the GNU Lesser General Public License.  This   exception
+ does not  however  invalidate  any  other  reasons why the executable file
+ might be covered by the GNU Lesser General Public License.
+
+===========================================================================*/
+
+
+#ifndef GMM_REF_H__
+#define GMM_REF_H__
+
+/** @file gmm_ref.h
+   @author  Yves Renard <Yves.Renard@insa-lyon.fr>
+   @date August 26, 2000.
+ *  @brief Provide some simple pseudo-containers.
+ *  
+ *  WARNING : modifiying the container infirm the validity of references.
+ */
+
+
+#include <iterator>
+#include "gmm_except.h"
+
+namespace gmm {
+
+  /* ********************************************************************* */
+  /* Simple reference.                                                     */
+  /* ********************************************************************* */
+
+  template<typename ITER> class tab_ref {
+
+    protected :
+
+      ITER begin_, end_;
+
+    public :
+
+      typedef typename std::iterator_traits<ITER>::value_type  value_type;
+      typedef typename std::iterator_traits<ITER>::pointer     pointer;
+      typedef typename std::iterator_traits<ITER>::pointer     const_pointer;
+      typedef typename std::iterator_traits<ITER>::reference   reference;
+      typedef typename std::iterator_traits<ITER>::reference   const_reference;
+      typedef typename std::iterator_traits<ITER>::difference_type
+	                                                       difference_type;
+      typedef ITER                            iterator;
+      typedef ITER                            const_iterator;
+      typedef std::reverse_iterator<const_iterator> const_reverse_iterator;
+      typedef std::reverse_iterator<iterator> reverse_iterator;
+      typedef size_t size_type;
+    
+      bool empty(void) const { return begin_ == end_; }
+      size_type size(void) const { return end_ - begin_; }
+
+      const iterator &begin(void) { return begin_; }
+      const const_iterator &begin(void) const { return begin_; }
+      const iterator &end(void) { return end_; }
+      const const_iterator &end(void) const { return end_; }
+      reverse_iterator rbegin(void) { return reverse_iterator(end()); }
+      const_reverse_iterator rbegin(void) const
+      { return const_reverse_iterator(end()); }
+      reverse_iterator rend(void) { return reverse_iterator(begin()); }
+      const_reverse_iterator rend(void) const
+      { return const_reverse_iterator(begin()); }
+
+      reference front(void) { return *begin(); }
+      const_reference front(void) const { return *begin(); }
+      reference back(void) { return *(--(end())); }
+      const_reference back(void) const { return *(--(end())); }
+      void pop_front(void) { ++begin_; }
+
+      const_reference operator [](size_type ii) const { return *(begin_ + ii);}
+      reference operator [](size_type ii) { return *(begin_ + ii); }
+
+      tab_ref(void) {}
+      tab_ref(const ITER &b, const ITER &e) : begin_(b), end_(e) {}
+  };
+
+
+  /* ********************************************************************* */
+  /* Reference with index.                                                 */
+  /* ********************************************************************* */
+
+//   template<typename ITER> struct tab_ref_index_iterator_
+//     : public dynamic_array<size_t>::const_iterator
+//   {
+//     typedef typename std::iterator_traits<ITER>::value_type  value_type;
+//     typedef typename std::iterator_traits<ITER>::pointer     pointer;
+//     typedef typename std::iterator_traits<ITER>::reference   reference;
+//     typedef typename std::iterator_traits<ITER>::difference_type  
+//     difference_type;
+//     typedef std::random_access_iterator_tag iterator_category;
+//     typedef size_t size_type;
+//     typedef dynamic_array<size_type>::const_iterator dnas_iterator_;
+//     typedef tab_ref_index_iterator_<ITER> iterator;
+    
+
+//     ITER piter;
+    
+//     iterator operator ++(int)
+//     { iterator tmp = *this; ++(*((dnas_iterator_ *)(this))); return tmp; }
+//     iterator operator --(int)
+//     { iterator tmp = *this; --(*((dnas_iterator_ *)(this))); return tmp; }
+//     iterator &operator ++()
+//     { ++(*((dnas_iterator_ *)(this))); return *this; }
+//     iterator &operator --()
+//     { --(*((dnas_iterator_ *)(this))); return *this; }
+//     iterator &operator +=(difference_type i)
+//     { (*((dnas_iterator_ *)(this))) += i; return *this; }
+//     iterator &operator -=(difference_type i)
+//     { (*((dnas_iterator_ *)(this))) -= i; return *this; }
+//     iterator operator +(difference_type i) const
+//     { iterator it = *this; return (it += i); }
+//     iterator operator -(difference_type i) const
+//     { iterator it = *this; return (it -= i); }
+//     difference_type operator -(const iterator &i) const
+//     { return *((dnas_iterator_ *)(this)) - *((dnas_iterator_ *)(&i)); }
+	
+//     reference operator *() const
+//     { return *(piter + *((*((dnas_iterator_ *)(this))))); }
+//     reference operator [](int ii)
+//     { return *(piter + *((*((dnas_iterator_ *)(this+ii))))); }
+    
+//     bool operator ==(const iterator &i) const
+//     { 
+//       return ((piter) == ((i.piter))
+//        && *((dnas_iterator_ *)(this)) == *((*((dnas_iterator_ *)(this)))));
+//     }
+//     bool operator !=(const iterator &i) const
+//     { return !(i == *this); }
+//     bool operator < (const iterator &i) const
+//     { 
+//       return ((piter) == ((i.piter))
+// 	 && *((dnas_iterator_ *)(this)) < *((*((dnas_iterator_ *)(this)))));
+//     }
+
+//     tab_ref_index_iterator_(void) {}
+//     tab_ref_index_iterator_(const ITER &iter, const dnas_iterator_ &dnas_iter)
+//       : dnas_iterator_(dnas_iter), piter(iter) {}
+//   };
+
+
+//   template<typename ITER> class tab_ref_index
+//   {
+//     public :
+
+//       typedef typename std::iterator_traits<ITER>::value_type value_type;
+//       typedef typename std::iterator_traits<ITER>::pointer    pointer;
+//       typedef typename std::iterator_traits<ITER>::pointer    const_pointer;
+//       typedef typename std::iterator_traits<ITER>::reference  reference;
+//       typedef typename std::iterator_traits<ITER>::reference  const_reference;
+//       typedef typename std::iterator_traits<ITER>::difference_type
+// 	                                                       difference_type;
+//       typedef size_t size_type; 
+//       typedef tab_ref_index_iterator_<ITER> iterator;
+//       typedef iterator                          const_iterator;
+//       typedef std::reverse_iterator<const_iterator> const_reverse_iterator;
+//       typedef std::reverse_iterator<iterator> reverse_iterator;
+    
+//     protected :
+
+//       ITER begin_;
+//       dynamic_array<size_type> index_;
+
+//     public :
+
+//       bool empty(void) const { return index_.empty(); }
+//       size_type size(void) const { return index_.size(); }
+
+
+//       iterator begin(void) { return iterator(begin_, index_.begin()); }
+//       const_iterator begin(void) const
+//       { return iterator(begin_, index_.begin()); }
+//       iterator end(void) { return iterator(begin_, index_.end()); }
+//       const_iterator end(void) const { return iterator(begin_, index_.end()); }
+//       reverse_iterator rbegin(void) { return reverse_iterator(end()); }
+//       const_reverse_iterator rbegin(void) const
+//       { return const_reverse_iterator(end()); }
+//       reverse_iterator rend(void) { return reverse_iterator(begin()); }
+//       const_reverse_iterator rend(void) const
+//       { return const_reverse_iterator(begin()); }
+
+
+//       reference front(void) { return *(begin_ +index_[0]); }
+//       const_reference front(void) const { return *(begin_ +index_[0]); }
+//       reference back(void) { return *(--(end())); }
+//       const_reference back(void) const { return *(--(end())); }
+   
+//       tab_ref_index(void) {}
+//       tab_ref_index(const ITER &b, const dynamic_array<size_type> &ind)
+//       { begin_ = b; index_ = ind; }
+
+//     // to be changed in a const_reference ?
+//       value_type operator [](size_type ii) const
+//       { return *(begin_ + index_[ii]);}
+//       reference operator [](size_type ii) { return *(begin_ + index_[ii]); }
+
+//   };
+
+
+  /// iterator over a gmm::tab_ref_index_ref<ITER,ITER_INDEX>
+  template<typename ITER, typename ITER_INDEX>
+    struct tab_ref_index_ref_iterator_
+    {
+      typedef typename std::iterator_traits<ITER>::value_type value_type;
+      typedef typename std::iterator_traits<ITER>::pointer    pointer;
+      typedef typename std::iterator_traits<ITER>::reference  reference;
+      typedef typename std::iterator_traits<ITER>::difference_type
+                                                              difference_type;
+      typedef std::random_access_iterator_tag iterator_category;
+      typedef tab_ref_index_ref_iterator_<ITER, ITER_INDEX> iterator;
+      typedef size_t size_type;
+
+      ITER piter;
+      ITER_INDEX iter_index;
+      
+      iterator operator ++(int)
+      { iterator tmp = *this; ++iter_index; return tmp; }
+      iterator operator --(int)
+      { iterator tmp = *this; --iter_index; return tmp; }
+      iterator &operator ++() { ++iter_index; return *this; }
+      iterator &operator --() { --iter_index; return *this; }
+      iterator &operator +=(difference_type i)
+      { iter_index += i; return *this; }
+      iterator &operator -=(difference_type i)
+      { iter_index -= i; return *this; }
+      iterator operator +(difference_type i) const
+      { iterator it = *this; return (it += i); }
+      iterator operator -(difference_type i) const
+      { iterator it = *this; return (it -= i); }
+      difference_type operator -(const iterator &i) const
+      { return iter_index - i.iter_index; }
+	
+      reference operator *() const
+      { return *(piter + *iter_index); }
+      reference operator [](size_type ii) const
+      { return *(piter + *(iter_index+ii)); }
+      
+      bool operator ==(const iterator &i) const
+      { return ((piter) == ((i.piter)) && iter_index == i.iter_index); }
+      bool operator !=(const iterator &i) const { return !(i == *this); }
+      bool operator < (const iterator &i) const
+      { return ((piter) == ((i.piter)) && iter_index < i.iter_index); }
+
+      tab_ref_index_ref_iterator_(void) {}
+      tab_ref_index_ref_iterator_(const ITER &iter, 
+				  const ITER_INDEX &dnas_iter)
+	: piter(iter), iter_index(dnas_iter) {}
+      
+    };
+
+  /** 
+      convenience template function for quick obtention of a indexed iterator
+      without having to specify its (long) typename
+  */
+  template<typename ITER, typename ITER_INDEX>
+  tab_ref_index_ref_iterator_<ITER,ITER_INDEX>
+  index_ref_iterator(ITER it, ITER_INDEX it_i) {
+    return tab_ref_index_ref_iterator_<ITER,ITER_INDEX>(it, it_i);
+  }
+
+  /** indexed array reference (given a container X, and a set of indexes I, 
+      this class provides a pseudo-container Y such that
+      @code Y[i] = X[I[i]] @endcode
+  */
+  template<typename ITER, typename ITER_INDEX> class tab_ref_index_ref {
+  public :
+    
+    typedef std::iterator_traits<ITER>            traits_type;
+    typedef typename traits_type::value_type      value_type;
+    typedef typename traits_type::pointer         pointer;
+    typedef typename traits_type::pointer         const_pointer;
+    typedef typename traits_type::reference       reference;
+    typedef typename traits_type::reference       const_reference;
+    typedef typename traits_type::difference_type difference_type;
+    typedef size_t                                size_type;
+    typedef tab_ref_index_ref_iterator_<ITER, ITER_INDEX>   iterator;
+    typedef iterator                              const_iterator;
+    typedef std::reverse_iterator<const_iterator>     const_reverse_iterator;
+    typedef std::reverse_iterator<iterator>           reverse_iterator;
+    
+  protected :
+
+    ITER begin_;
+    ITER_INDEX index_begin_, index_end_;
+
+  public :
+    
+    bool empty(void) const { return index_begin_ == index_end_; }
+    size_type size(void) const { return index_end_ - index_begin_; }
+    
+    iterator begin(void) { return iterator(begin_, index_begin_); }
+    const_iterator begin(void) const
+    { return iterator(begin_, index_begin_); }
+    iterator end(void) { return iterator(begin_, index_end_); }
+    const_iterator end(void) const { return iterator(begin_, index_end_); }
+    reverse_iterator rbegin(void) { return reverse_iterator(end()); }
+    const_reverse_iterator rbegin(void) const
+    { return const_reverse_iterator(end()); }
+    reverse_iterator rend(void) { return reverse_iterator(begin()); }
+    const_reverse_iterator rend(void) const
+    { return const_reverse_iterator(begin()); }
+    
+    reference front(void) { return *(begin_ + *index_begin_); }
+    const_reference front(void) const { return *(begin_ + *index_begin_); }
+    reference back(void) { return *(--(end())); }
+    const_reference back(void) const { return *(--(end())); }
+    void pop_front(void) { ++index_begin_; }
+    
+    tab_ref_index_ref(void) {}
+    tab_ref_index_ref(const ITER &b, const ITER_INDEX &bi,
+		      const ITER_INDEX &ei)
+      : begin_(b), index_begin_(bi), index_end_(ei) {}
+    
+    // to be changed in a const_reference ?
+    const_reference operator [](size_type ii) const
+    { return *(begin_ + index_begin_[ii]);}
+    reference operator [](size_type ii)
+    { return *(begin_ + index_begin_[ii]); }
+
+  };
+
+
+  /* ********************************************************************* */
+  /* Reference on regularly spaced elements.                               */
+  /* ********************************************************************* */
+
+  template<typename ITER> struct tab_ref_reg_spaced_iterator_ {
+    
+    typedef typename std::iterator_traits<ITER>::value_type value_type;
+    typedef typename std::iterator_traits<ITER>::pointer    pointer;
+    typedef typename std::iterator_traits<ITER>::reference  reference;
+    typedef typename std::iterator_traits<ITER>::difference_type
+                                                            difference_type;
+    typedef typename std::iterator_traits<ITER>::iterator_category
+                                                            iterator_category;
+    typedef size_t size_type;
+    typedef tab_ref_reg_spaced_iterator_<ITER> iterator;
+    
+    ITER it;
+    size_type N, i;
+    
+    iterator operator ++(int) { iterator tmp = *this; i++; return tmp; }
+    iterator operator --(int) { iterator tmp = *this; i--; return tmp; }
+    iterator &operator ++()   { i++; return *this; }
+    iterator &operator --()   { i--; return *this; }
+    iterator &operator +=(difference_type ii) { i+=ii; return *this; }
+    iterator &operator -=(difference_type ii) { i-=ii; return *this; }
+    iterator operator +(difference_type ii) const 
+    { iterator itt = *this; return (itt += ii); }
+    iterator operator -(difference_type ii) const
+    { iterator itt = *this; return (itt -= ii); }
+    difference_type operator -(const iterator &ii) const
+    { return (N ? (it - ii.it) / N : 0) + i - ii.i; }
+
+    reference operator *() const { return *(it + i*N); }
+    reference operator [](size_type ii) const { return *(it + (i+ii)*N); }
+
+    bool operator ==(const iterator &ii) const
+    { return (*this - ii) == difference_type(0); }
+    bool operator !=(const iterator &ii) const
+    { return  (*this - ii) != difference_type(0); }
+    bool operator < (const iterator &ii) const
+    { return (*this - ii) < difference_type(0); }
+
+    tab_ref_reg_spaced_iterator_(void) {}
+    tab_ref_reg_spaced_iterator_(const ITER &iter, size_type n, size_type ii)
+      : it(iter), N(n), i(ii) { }
+    
+  };
+
+  /** 
+      convenience template function for quick obtention of a strided iterator
+      without having to specify its (long) typename
+  */
+  template<typename ITER> tab_ref_reg_spaced_iterator_<ITER> 
+  reg_spaced_iterator(ITER it, size_t stride) {
+    return tab_ref_reg_spaced_iterator_<ITER>(it, stride);
+  }
+
+  /**
+     provide a "strided" view a of container
+  */
+  template<typename ITER> class tab_ref_reg_spaced {
+  public :
+
+    typedef typename std::iterator_traits<ITER>::value_type value_type;
+    typedef typename std::iterator_traits<ITER>::pointer    pointer;
+    typedef typename std::iterator_traits<ITER>::pointer    const_pointer;
+    typedef typename std::iterator_traits<ITER>::reference  reference;
+    typedef typename std::iterator_traits<ITER>::reference  const_reference;
+    typedef typename std::iterator_traits<ITER>::difference_type
+            difference_type;
+    typedef size_t size_type;
+    typedef tab_ref_reg_spaced_iterator_<ITER> iterator;
+    typedef iterator                          const_iterator;
+    typedef std::reverse_iterator<const_iterator> const_reverse_iterator;
+    typedef std::reverse_iterator<iterator> reverse_iterator;
+    
+  protected :
+
+    ITER begin_;
+    size_type N, size_;
+    
+  public :
+    
+    bool empty(void) const { return size_ == 0; }
+    size_type size(void) const { return size_; }
+    
+    iterator begin(void) { return iterator(begin_, N, 0); }
+    const_iterator begin(void) const { return iterator(begin_, N, 0); }
+    iterator end(void) { return iterator(begin_, N, size_); }
+    const_iterator end(void) const { return iterator(begin_, N, size_); }
+    reverse_iterator rbegin(void) { return reverse_iterator(end()); }
+    const_reverse_iterator rbegin(void) const
+    { return const_reverse_iterator(end()); }
+    reverse_iterator rend(void) { return reverse_iterator(begin()); }
+    const_reverse_iterator rend(void) const
+    { return const_reverse_iterator(begin()); }
+    
+    reference front(void) { return *begin_; }
+    const_reference front(void) const { return *begin_; }
+    reference back(void) { return *(begin_ + N * (size_-1)); }
+    const_reference back(void) const { return *(begin_ + N * (size_-1)); }
+    void pop_front(void) { begin_ += N; }
+    
+    tab_ref_reg_spaced(void) {}
+    tab_ref_reg_spaced(const ITER &b, size_type n, size_type s)
+      : begin_(b), N(n), size_(s) {}
+    
+    
+    const_reference operator [](size_type ii) const
+    { return *(begin_ + ii * N);}
+    reference operator [](size_type ii) { return *(begin_ + ii * N); }
+    
+  };
+
+  /// iterator over a tab_ref_with_selection
+  template<typename ITER, typename COND> 
+  struct tab_ref_with_selection_iterator_ : public ITER {
+    typedef typename std::iterator_traits<ITER>::value_type value_type;
+    typedef typename std::iterator_traits<ITER>::pointer    pointer;
+    typedef typename std::iterator_traits<ITER>::reference  reference;
+    typedef typename std::iterator_traits<ITER>::difference_type
+                                                              difference_type;
+    typedef std::forward_iterator_tag iterator_category;
+    typedef tab_ref_with_selection_iterator_<ITER, COND> iterator;
+    const COND cond;
+    
+    void forward(void) { while (!(cond)(*this)) ITER::operator ++(); }
+    iterator &operator ++()
+    { ITER::operator ++(); forward(); return *this; }
+    iterator operator ++(int)
+    { iterator tmp = *this; ++(*this); return tmp; }
+    
+    tab_ref_with_selection_iterator_(void) {}
+    tab_ref_with_selection_iterator_(const ITER &iter, const COND c)
+      : ITER(iter), cond(c) {}
+    
+  };
+
+  /**
+     given a container X and a predicate P, provide pseudo-container Y
+     of all elements of X such that P(X[i]).
+  */
+  template<typename ITER, typename COND> class tab_ref_with_selection {
+    
+  protected :
+    
+    ITER begin_, end_;
+    COND cond;
+    
+  public :
+    
+    typedef typename std::iterator_traits<ITER>::value_type value_type;
+    typedef typename std::iterator_traits<ITER>::pointer    pointer;
+    typedef typename std::iterator_traits<ITER>::pointer    const_pointer;
+    typedef typename std::iterator_traits<ITER>::reference  reference;
+    typedef typename std::iterator_traits<ITER>::reference  const_reference;
+    typedef size_t  size_type;
+    typedef tab_ref_with_selection_iterator_<ITER, COND> iterator;
+    typedef iterator   const_iterator;
+    
+    iterator begin(void) const
+    { iterator it(begin_, cond); it.forward(); return it; }
+    iterator end(void) const { return iterator(end_, cond); }
+    bool empty(void) const { return begin_ == end_; }
+    
+    value_type front(void) const { return *begin(); }
+    void pop_front(void) { ++begin_; begin_ = begin(); }
+    
+    COND &condition(void) { return cond; }
+    const COND &condition(void) const { return cond; }
+    
+    tab_ref_with_selection(void) {}
+    tab_ref_with_selection(const ITER &b, const ITER &e, const COND &c)
+      : begin_(b), end_(e), cond(c) { begin_ = begin(); }
+    
+  };
+
+}
+
+#endif /* GMM_REF_H__  */
--- a/gmm/gmm_scaled.h
+++ b/gmm/gmm_scaled.h
@ -0,0 +1,434 @@
+/* -*- c++ -*- (enables emacs c++ mode) */
+/*===========================================================================
+
+ Copyright (C) 2002-2017 Yves Renard
+
+ This file is a part of GetFEM++
+
+ GetFEM++  is  free software;  you  can  redistribute  it  and/or modify it
+ under  the  terms  of the  GNU  Lesser General Public License as published
+ by  the  Free Software Foundation;  either version 3 of the License,  or
+ (at your option) any later version along with the GCC Runtime Library
+ Exception either version 3.1 or (at your option) any later version.
+ This program  is  distributed  in  the  hope  that it will be useful,  but
+ WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ or  FITNESS  FOR  A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+ License and GCC Runtime Library Exception for more details.
+ You  should  have received a copy of the GNU Lesser General Public License
+ along  with  this program;  if not, write to the Free Software Foundation,
+ Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301, USA.
+
+ As a special exception, you  may use  this file  as it is a part of a free
+ software  library  without  restriction.  Specifically,  if   other  files
+ instantiate  templates  or  use macros or inline functions from this file,
+ or  you compile this  file  and  link  it  with other files  to produce an
+ executable, this file  does  not  by itself cause the resulting executable
+ to be covered  by the GNU Lesser General Public License.  This   exception
+ does not  however  invalidate  any  other  reasons why the executable file
+ might be covered by the GNU Lesser General Public License.
+
+===========================================================================*/
+
+/**@file gmm_scaled.h
+   @author  Yves Renard <Yves.Renard@insa-lyon.fr>
+   @date November 10, 2002.
+   @brief get a scaled view of a vector/matrix.
+*/
+#ifndef GMM_SCALED_H__
+#define GMM_SCALED_H__
+
+#include "gmm_def.h"
+
+namespace gmm {
+
+  /* ********************************************************************* */
+  /*		Scaled references on vectors            		   */
+  /* ********************************************************************* */
+
+  template <typename IT, typename S> struct scaled_const_iterator {
+    typedef typename strongest_numeric_type<typename std::iterator_traits<IT>::value_type,
+					    S>::T value_type;
+
+    typedef typename std::iterator_traits<IT>::pointer         pointer;
+    typedef typename std::iterator_traits<IT>::reference       reference;
+    typedef typename std::iterator_traits<IT>::difference_type difference_type;
+    typedef typename std::iterator_traits<IT>::iterator_category
+    iterator_category;
+
+    IT it;
+    S r;
+    
+    scaled_const_iterator(void) {}
+    scaled_const_iterator(const IT &i, S x) : it(i), r(x) {}
+    
+    inline size_type index(void) const { return it.index(); }
+    inline scaled_const_iterator operator ++(int)
+    { scaled_const_iterator tmp = *this; ++it; return tmp; }
+    inline scaled_const_iterator operator --(int) 
+    { scaled_const_iterator tmp = *this; --it; return tmp; }
+    inline scaled_const_iterator &operator ++() { ++it; return *this; }
+    inline scaled_const_iterator &operator --() { --it; return *this; }
+    inline scaled_const_iterator &operator +=(difference_type i)
+      { it += i; return *this; }
+    inline scaled_const_iterator &operator -=(difference_type i)
+      { it -= i; return *this; }
+    inline scaled_const_iterator operator +(difference_type i) const
+      { scaled_const_iterator itb = *this; return (itb += i); }
+    inline scaled_const_iterator operator -(difference_type i) const
+      { scaled_const_iterator itb = *this; return (itb -= i); }
+    inline difference_type operator -(const scaled_const_iterator &i) const
+      { return difference_type(it - i.it); }
+    
+    inline value_type operator  *() const { return (*it) * value_type(r); }
+    inline value_type operator [](size_type ii) const { return it[ii] * r; }
+    
+    inline bool operator ==(const scaled_const_iterator &i) const
+      { return (i.it == it); }
+    inline bool operator !=(const scaled_const_iterator &i) const
+      { return (i.it != it); }
+    inline bool operator < (const scaled_const_iterator &i) const
+      { return (it < i.it); }
+  };
+
+  template <typename V, typename S> struct scaled_vector_const_ref {
+    typedef scaled_vector_const_ref<V,S> this_type;
+    typedef typename linalg_traits<this_type>::value_type value_type;
+    typedef typename linalg_traits<V>::const_iterator iterator;
+    typedef typename linalg_traits<this_type>::reference reference;
+    typedef typename linalg_traits<this_type>::origin_type origin_type;
+
+    iterator begin_, end_;
+    const origin_type *origin;
+    size_type size_;
+    S r;
+
+    scaled_vector_const_ref(const V &v, S rr)
+      : begin_(vect_const_begin(v)), end_(vect_const_end(v)),
+	origin(linalg_origin(v)), size_(vect_size(v)), r(rr) {}
+
+    reference operator[](size_type i) const
+    { return value_type(r) * linalg_traits<V>::access(origin, begin_, end_, i); }
+  };
+
+
+   template<typename V, typename S> std::ostream &operator <<
+     (std::ostream &o, const scaled_vector_const_ref<V,S>& m)
+  { gmm::write(o,m); return o; }
+
+  /* ********************************************************************* */
+  /*		Scaled references on matrices            		   */
+  /* ********************************************************************* */
+
+  template <typename M, typename S> struct scaled_row_const_iterator {
+    typedef scaled_row_const_iterator<M,S> iterator;
+    typedef typename linalg_traits<M>::const_row_iterator ITER;
+    typedef ptrdiff_t difference_type;
+    typedef size_t size_type;
+
+    ITER it;
+    S r;
+
+    inline iterator operator ++(int) { iterator tmp=*this; it++; return tmp; }
+    inline iterator operator --(int) { iterator tmp=*this; it--; return tmp; }
+    inline iterator &operator ++()   { it++; return *this; }
+    inline iterator &operator --()   { it--; return *this; }
+    iterator &operator +=(difference_type i) { it += i; return *this; }
+    iterator &operator -=(difference_type i) { it -= i; return *this; }
+    iterator operator +(difference_type i) const 
+    { iterator itt = *this; return (itt += i); }
+    iterator operator -(difference_type i) const
+    { iterator itt = *this; return (itt -= i); }
+    difference_type operator -(const iterator &i) const
+    { return it - i.it; }
+
+    inline ITER operator *() const { return it; }
+    inline ITER operator [](int i) { return it + i; }
+
+    inline bool operator ==(const iterator &i) const { return (it == i.it); }
+    inline bool operator !=(const iterator &i) const { return !(i == *this); }
+    inline bool operator < (const iterator &i) const { return (it < i.it); }
+
+    scaled_row_const_iterator(void) {}
+    scaled_row_const_iterator(const ITER &i, S rr)
+      : it(i), r(rr) { }
+
+  };
+
+  template <typename M, typename S> struct  scaled_row_matrix_const_ref {
+    
+    typedef scaled_row_matrix_const_ref<M,S> this_type;
+    typedef typename linalg_traits<M>::const_row_iterator iterator;
+    typedef typename linalg_traits<this_type>::value_type value_type;
+    typedef typename linalg_traits<this_type>::origin_type origin_type;
+
+    iterator begin_, end_;
+    const origin_type *origin;
+    S r;
+    size_type nr, nc;
+
+    scaled_row_matrix_const_ref(const M &m, S rr)
+      : begin_(mat_row_begin(m)), end_(mat_row_end(m)),
+	origin(linalg_origin(m)), r(rr), nr(mat_nrows(m)), nc(mat_ncols(m)) {}
+
+    value_type operator()(size_type i, size_type j) const
+    { return r * linalg_traits<M>::access(begin_+i, j); }
+  };
+
+
+  template<typename M, typename S> std::ostream &operator <<
+    (std::ostream &o, const scaled_row_matrix_const_ref<M,S>& m)
+  { gmm::write(o,m); return o; }
+
+
+  template <typename M, typename S> struct scaled_col_const_iterator {
+    typedef scaled_col_const_iterator<M,S> iterator;
+    typedef typename linalg_traits<M>::const_col_iterator ITER;
+    typedef ptrdiff_t difference_type;
+    typedef size_t size_type;
+
+    ITER it;
+    S r;
+
+    iterator operator ++(int) { iterator tmp = *this; it++; return tmp; }
+    iterator operator --(int) { iterator tmp = *this; it--; return tmp; }
+    iterator &operator ++()   { it++; return *this; }
+    iterator &operator --()   { it--; return *this; }
+    iterator &operator +=(difference_type i) { it += i; return *this; }
+    iterator &operator -=(difference_type i) { it -= i; return *this; }
+    iterator operator +(difference_type i) const 
+    { iterator itt = *this; return (itt += i); }
+    iterator operator -(difference_type i) const
+    { iterator itt = *this; return (itt -= i); }
+    difference_type operator -(const iterator &i) const
+    { return it - i.it; }
+
+    ITER operator *() const { return it; }
+    ITER operator [](int i) { return it + i; }
+
+    bool operator ==(const iterator &i) const { return (it == i.it); }
+    bool operator !=(const iterator &i) const { return !(i == *this); }
+    bool operator < (const iterator &i) const { return (it < i.it); }
+
+    scaled_col_const_iterator(void) {}
+    scaled_col_const_iterator(const ITER &i, S rr)
+      : it(i), r(rr) { }
+
+  };
+
+  template <typename M, typename S> struct  scaled_col_matrix_const_ref {
+    
+    typedef scaled_col_matrix_const_ref<M,S> this_type;
+    typedef typename linalg_traits<M>::const_col_iterator iterator;
+    typedef typename linalg_traits<this_type>::value_type value_type;
+    typedef typename linalg_traits<this_type>::origin_type origin_type;
+
+    iterator begin_, end_;
+    const origin_type *origin;
+    S r;
+    size_type nr, nc;
+
+    scaled_col_matrix_const_ref(const M &m, S rr)
+      : begin_(mat_col_begin(m)), end_(mat_col_end(m)),
+	origin(linalg_origin(m)), r(rr), nr(mat_nrows(m)), nc(mat_ncols(m)) {}
+
+    value_type operator()(size_type i, size_type j) const
+    { return r * linalg_traits<M>::access(begin_+j, i); }
+  };
+
+
+
+  template<typename M, typename S> std::ostream &operator <<
+    (std::ostream &o, const scaled_col_matrix_const_ref<M,S>& m)
+  { gmm::write(o,m); return o; }
+
+
+  template <typename L, typename S, typename R> struct scaled_return__ {
+    typedef abstract_null_type return_type;
+  };
+  template <typename L, typename S> struct scaled_return__<L, S, row_major> 
+  { typedef scaled_row_matrix_const_ref<L,S> return_type; };
+  template <typename L, typename S> struct scaled_return__<L, S, col_major> 
+  { typedef scaled_col_matrix_const_ref<L,S> return_type; };
+  
+
+  template <typename L, typename S, typename LT> struct scaled_return_ {
+    typedef abstract_null_type return_type;
+  };
+  template <typename L, typename S> struct scaled_return_<L, S, abstract_vector> 
+  { typedef scaled_vector_const_ref<L,S> return_type; };
+  template <typename L, typename S> struct scaled_return_<L, S, abstract_matrix> {
+    typedef typename scaled_return__<L, S, 
+      typename principal_orientation_type<typename
+      linalg_traits<L>::sub_orientation>::potype>::return_type return_type;
+  };
+
+  template <typename L, typename S> struct scaled_return {
+    typedef typename scaled_return_<L, S, typename
+      linalg_traits<L>::linalg_type>::return_type return_type;
+  };
+
+  template <typename L, typename S> inline
+  typename scaled_return<L,S>::return_type
+  scaled(const L &v, S x)
+  { return scaled(v, x, typename linalg_traits<L>::linalg_type()); }
+
+  template <typename V, typename S> inline
+  typename scaled_return<V,S>::return_type
+  scaled(const V &v, S x, abstract_vector)
+  { return scaled_vector_const_ref<V,S>(v, x); }
+
+  template <typename M, typename S> inline
+  typename scaled_return<M,S>::return_type
+  scaled(const M &m, S x,abstract_matrix) {
+    return scaled(m, x,  typename principal_orientation_type<typename
+		  linalg_traits<M>::sub_orientation>::potype());
+  }
+
+  template <typename M, typename S> inline
+  typename scaled_return<M,S>::return_type
+  scaled(const M &m, S x, row_major) {
+    return scaled_row_matrix_const_ref<M,S>(m, x);
+  }
+
+  template <typename M, typename S> inline
+  typename scaled_return<M,S>::return_type
+  scaled(const M &m, S x, col_major) {
+    return scaled_col_matrix_const_ref<M,S>(m, x);
+  }
+
+
+  /* ******************************************************************** */
+  /*	matrix or vector scale                                	          */
+  /* ******************************************************************** */
+
+  template <typename L> inline
+  void scale(L& l, typename linalg_traits<L>::value_type a)
+  { scale(l, a, typename linalg_traits<L>::linalg_type()); }
+
+  template <typename L> inline
+  void scale(const L& l, typename linalg_traits<L>::value_type a)
+  { scale(linalg_const_cast(l), a); }
+
+  template <typename L> inline
+  void scale(L& l, typename linalg_traits<L>::value_type a, abstract_vector) {
+    typename linalg_traits<L>::iterator it = vect_begin(l), ite = vect_end(l);
+    for ( ; it != ite; ++it) *it *= a;
+  }
+
+  template <typename L> 
+  void scale(L& l, typename linalg_traits<L>::value_type a, abstract_matrix) {
+    scale(l, a, typename principal_orientation_type<typename
+	  linalg_traits<L>::sub_orientation>::potype());
+  }
+
+  template <typename L> 
+  void scale(L& l, typename linalg_traits<L>::value_type a, row_major) {
+    typename linalg_traits<L>::row_iterator it = mat_row_begin(l),
+      ite = mat_row_end(l);
+    for ( ; it != ite; ++it) scale(linalg_traits<L>::row(it), a);
+  }
+
+  template <typename L> 
+  void scale(L& l, typename linalg_traits<L>::value_type a, col_major) {
+    typename linalg_traits<L>::col_iterator it = mat_col_begin(l),
+      ite = mat_col_end(l);
+    for ( ; it != ite; ++it) scale(linalg_traits<L>::col(it), a);
+  }
+
+  template <typename V, typename S> struct linalg_traits<scaled_vector_const_ref<V,S> > {
+    typedef scaled_vector_const_ref<V,S> this_type;
+    typedef linalg_const is_reference;
+    typedef abstract_vector linalg_type;
+    typedef typename strongest_numeric_type<S, typename linalg_traits<V>::value_type>::T value_type;
+    typedef typename linalg_traits<V>::origin_type origin_type;
+    typedef value_type reference;
+    typedef abstract_null_type iterator;
+    typedef scaled_const_iterator<typename linalg_traits<V>::const_iterator, S>
+      const_iterator;
+    typedef typename linalg_traits<V>::storage_type storage_type;
+    typedef typename linalg_traits<V>::index_sorted index_sorted;
+    static size_type size(const this_type &v) { return v.size_; }
+    static const_iterator begin(const this_type &v)
+    { return const_iterator(v.begin_, v.r); }
+    static const_iterator end(const this_type &v)
+    { return const_iterator(v.end_, v.r); }
+    static const origin_type* origin(const this_type &v) { return v.origin; }
+    static value_type access(const origin_type *o, const const_iterator &it,
+			     const const_iterator &ite, size_type i)
+    { return it.r * (linalg_traits<V>::access(o, it.it, ite.it, i)); }
+
+  };
+
+
+  template <typename M, typename S> struct linalg_traits<scaled_row_matrix_const_ref<M,S> > {
+    typedef scaled_row_matrix_const_ref<M,S> this_type;
+    typedef linalg_const is_reference;
+    typedef abstract_matrix linalg_type;
+    typedef typename linalg_traits<M>::origin_type origin_type;
+    typedef typename strongest_numeric_type<S, typename linalg_traits<M>::value_type>::T value_type;
+    typedef value_type reference;
+    typedef typename linalg_traits<M>::storage_type storage_type;
+    typedef typename org_type<typename linalg_traits<M>::const_sub_row_type>::t vector_type;
+    typedef scaled_vector_const_ref<vector_type,S> sub_row_type;
+    typedef scaled_vector_const_ref<vector_type,S> const_sub_row_type;
+    typedef scaled_row_const_iterator<M,S> row_iterator;
+    typedef scaled_row_const_iterator<M,S> const_row_iterator;
+    typedef abstract_null_type const_sub_col_type;
+    typedef abstract_null_type sub_col_type;
+    typedef abstract_null_type const_col_iterator;
+    typedef abstract_null_type col_iterator;
+    typedef row_major sub_orientation;
+    typedef typename linalg_traits<M>::index_sorted index_sorted;
+    static size_type nrows(const this_type &m)
+    { return m.nr; }
+    static size_type ncols(const this_type &m)
+    { return m.nc; }
+    static const_sub_row_type row(const const_row_iterator &it)
+    { return scaled(linalg_traits<M>::row(it.it), it.r); }
+    static const_row_iterator row_begin(const this_type &m)
+    { return const_row_iterator(m.begin_, m.r); }
+    static const_row_iterator row_end(const this_type &m)
+    { return const_row_iterator(m.end_, m.r); }
+    static const origin_type* origin(const this_type &m) { return m.origin; }
+    static value_type access(const const_row_iterator &it, size_type i)
+    { return it.r * (linalg_traits<M>::access(it.it, i)); }
+  };
+
+  template <typename M, typename S> struct linalg_traits<scaled_col_matrix_const_ref<M,S> > {
+    typedef scaled_col_matrix_const_ref<M,S> this_type;
+    typedef linalg_const is_reference;
+    typedef abstract_matrix linalg_type;
+    typedef typename strongest_numeric_type<S, typename linalg_traits<M>::value_type>::T value_type;
+    typedef typename linalg_traits<M>::origin_type origin_type;
+    typedef value_type reference;
+    typedef typename linalg_traits<M>::storage_type storage_type;
+    typedef typename org_type<typename linalg_traits<M>::const_sub_col_type>::t vector_type;
+    typedef abstract_null_type sub_col_type;
+    typedef scaled_vector_const_ref<vector_type,S> const_sub_col_type;
+    typedef abstract_null_type  col_iterator;
+    typedef scaled_col_const_iterator<M,S> const_col_iterator;
+    typedef abstract_null_type const_sub_row_type;
+    typedef abstract_null_type sub_row_type;
+    typedef abstract_null_type const_row_iterator;
+    typedef abstract_null_type row_iterator;
+    typedef col_major sub_orientation;
+    typedef typename linalg_traits<M>::index_sorted index_sorted;
+    static size_type ncols(const this_type &m)
+    { return m.nc; }
+    static size_type nrows(const this_type &m)
+    { return m.nr; }
+    static const_sub_col_type col(const const_col_iterator &it)
+    { return scaled(linalg_traits<M>::col(it.it), it.r); }
+    static const_col_iterator col_begin(const this_type &m)
+    { return const_col_iterator(m.begin_, m.r); }
+    static const_col_iterator col_end(const this_type &m)
+    { return const_col_iterator(m.end_, m.r); }
+    static const origin_type* origin(const this_type &m) { return m.origin; }
+    static value_type access(const const_col_iterator &it, size_type i)
+    { return it.r * (linalg_traits<M>::access(it.it, i)); }
+  };
+
+
+}
+
+#endif //  GMM_SCALED_H__
--- a/gmm/gmm_solver_Schwarz_additive.h
+++ b/gmm/gmm_solver_Schwarz_additive.h
@ -0,0 +1,805 @@
+/* -*- c++ -*- (enables emacs c++ mode) */
+/*===========================================================================
+
+ Copyright (C) 2002-2017 Yves Renard
+
+ This file is a part of GetFEM++
+
+ GetFEM++  is  free software;  you  can  redistribute  it  and/or modify it
+ under  the  terms  of the  GNU  Lesser General Public License as published
+ by  the  Free Software Foundation;  either version 3 of the License,  or
+ (at your option) any later version along with the GCC Runtime Library
+ Exception either version 3.1 or (at your option) any later version.
+ This program  is  distributed  in  the  hope  that it will be useful,  but
+ WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ or  FITNESS  FOR  A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+ License and GCC Runtime Library Exception for more details.
+ You  should  have received a copy of the GNU Lesser General Public License
+ along  with  this program;  if not, write to the Free Software Foundation,
+ Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301, USA.
+
+ As a special exception, you  may use  this file  as it is a part of a free
+ software  library  without  restriction.  Specifically,  if   other  files
+ instantiate  templates  or  use macros or inline functions from this file,
+ or  you compile this  file  and  link  it  with other files  to produce an
+ executable, this file  does  not  by itself cause the resulting executable
+ to be covered  by the GNU Lesser General Public License.  This   exception
+ does not  however  invalidate  any  other  reasons why the executable file
+ might be covered by the GNU Lesser General Public License.
+
+===========================================================================*/
+
+/**@file gmm_solver_Schwarz_additive.h
+   @author  Yves Renard <Yves.Renard@insa-lyon.fr>
+   @author  Michel Fournie <fournie@mip.ups-tlse.fr>
+   @date October 13, 2002.
+*/
+
+#ifndef GMM_SOLVERS_SCHWARZ_ADDITIVE_H__
+#define GMM_SOLVERS_SCHWARZ_ADDITIVE_H__ 
+
+#include "gmm_kernel.h"
+#include "gmm_superlu_interface.h"
+#include "gmm_solver_cg.h"
+#include "gmm_solver_gmres.h"
+#include "gmm_solver_bicgstab.h"
+#include "gmm_solver_qmr.h"
+
+namespace gmm {
+      
+  /* ******************************************************************** */
+  /*		Additive Schwarz interfaced local solvers                 */
+  /* ******************************************************************** */
+
+  struct using_cg {};
+  struct using_gmres {};
+  struct using_bicgstab {};
+  struct using_qmr {};
+
+  template <typename P, typename local_solver, typename Matrix>
+  struct actual_precond {
+    typedef P APrecond;
+    static const APrecond &transform(const P &PP) { return PP; }
+  };
+
+  template <typename Matrix1, typename Precond, typename Vector> 
+  void AS_local_solve(using_cg, const Matrix1 &A, Vector &x, const Vector &b,
+		 const Precond &P, iteration &iter)
+  { cg(A, x, b, P, iter); }
+
+  template <typename Matrix1, typename Precond, typename Vector> 
+  void AS_local_solve(using_gmres, const Matrix1 &A, Vector &x,
+		      const Vector &b, const Precond &P, iteration &iter)
+  { gmres(A, x, b, P, 100, iter); }
+  
+  template <typename Matrix1, typename Precond, typename Vector> 
+  void AS_local_solve(using_bicgstab, const Matrix1 &A, Vector &x,
+		      const Vector &b, const Precond &P, iteration &iter)
+  { bicgstab(A, x, b, P, iter); }
+
+  template <typename Matrix1, typename Precond, typename Vector> 
+  void AS_local_solve(using_qmr, const Matrix1 &A, Vector &x,
+		      const Vector &b, const Precond &P, iteration &iter)
+  { qmr(A, x, b, P, iter); }
+
+#if defined(GMM_USES_SUPERLU)
+  struct using_superlu {};
+
+  template <typename P, typename Matrix>
+  struct actual_precond<P, using_superlu, Matrix> {
+    typedef typename linalg_traits<Matrix>::value_type value_type;
+    typedef SuperLU_factor<value_type> APrecond;
+    template <typename PR>
+    static APrecond transform(const PR &) { return APrecond(); }
+    static const APrecond &transform(const APrecond &PP) { return PP; }
+  };
+
+  template <typename Matrix1, typename Precond, typename Vector> 
+  void AS_local_solve(using_superlu, const Matrix1 &, Vector &x,
+		      const Vector &b, const Precond &P, iteration &iter)
+  { P.solve(x, b); iter.set_iteration(1); }
+#endif
+
+  /* ******************************************************************** */
+  /*		Additive Schwarz Linear system                            */
+  /* ******************************************************************** */
+
+  template <typename Matrix1, typename Matrix2, typename Precond,
+	    typename local_solver>
+  struct add_schwarz_mat{
+    typedef typename linalg_traits<Matrix1>::value_type value_type;
+
+    const Matrix1 *A;
+    const std::vector<Matrix2> *vB;
+    std::vector<Matrix2> vAloc;
+    mutable iteration iter;
+    double residual;
+    mutable size_type itebilan;
+    mutable std::vector<std::vector<value_type> > gi, fi;
+    std::vector<typename actual_precond<Precond, local_solver,
+					Matrix1>::APrecond> precond1;
+
+    void init(const Matrix1 &A_, const std::vector<Matrix2> &vB_,
+	      iteration iter_, const Precond &P, double residual_);
+
+    add_schwarz_mat(void) {}
+    add_schwarz_mat(const Matrix1 &A_, const std::vector<Matrix2> &vB_,
+		iteration iter_, const Precond &P, double residual_)
+    { init(A_, vB_, iter_, P, residual_); }
+  };
+
+  template <typename Matrix1, typename Matrix2, typename Precond,
+	    typename local_solver>
+  void add_schwarz_mat<Matrix1, Matrix2, Precond, local_solver>::init(
+       const Matrix1 &A_, const std::vector<Matrix2> &vB_,
+       iteration iter_, const Precond &P, double residual_) {
+
+    vB = &vB_; A = &A_; iter = iter_;
+    residual = residual_;
+    
+    size_type nb_sub = vB->size();
+    vAloc.resize(nb_sub);
+    gi.resize(nb_sub); fi.resize(nb_sub);
+    precond1.resize(nb_sub);
+    std::fill(precond1.begin(), precond1.end(),
+	      actual_precond<Precond, local_solver, Matrix1>::transform(P));
+    itebilan = 0;
+    
+    if (iter.get_noisy()) cout << "Init pour sub dom ";
+#ifdef GMM_USES_MPI
+    int size,tranche,borne_sup,borne_inf,rank,tag1=11,tag2=12,tag3=13,sizepr = 0;
+    //    int tab[4];
+    double t_ref,t_final;
+    MPI_Status status;
+    t_ref=MPI_Wtime();
+    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
+    MPI_Comm_size(MPI_COMM_WORLD, &size);
+    tranche=nb_sub/size;
+    borne_inf=rank*tranche;
+    borne_sup=(rank+1)*tranche;
+    // if (rank==size-1) borne_sup = nb_sub;
+
+    cout << "Nombre de sous domaines " << borne_sup - borne_inf << endl;
+
+    int sizeA = mat_nrows(*A);
+    gmm::csr_matrix<value_type> Acsr(sizeA, sizeA), Acsrtemp(sizeA, sizeA);
+    gmm::copy(gmm::eff_matrix(*A), Acsr);
+    int next = (rank + 1) % size;
+    int previous = (rank + size - 1) % size;
+    //communication of local information on ring pattern
+    //Each process receive  Nproc-1 contributions 
+
+    for (int nproc = 0; nproc < size; ++nproc) {
+       for (size_type i = size_type(borne_inf); i < size_type(borne_sup); ++i) {
+// 	for (size_type i = 0; i < nb_sub/size; ++i) {
+// 	for (size_type i = 0; i < nb_sub; ++i) {
+	// size_type i=(rank+size*(j-1)+nb_sub)%nb_sub;
+
+	cout << "Sous domaines " << i << " : " << mat_ncols((*vB)[i]) << endl;
+#else
+	for (size_type i = 0; i < nb_sub; ++i) {
+#endif
+	  
+	  if (iter.get_noisy()) cout << i << " " << std::flush;
+	  Matrix2 Maux(mat_ncols((*vB)[i]), mat_nrows((*vB)[i]));
+	  
+#ifdef GMM_USES_MPI
+	  Matrix2 Maux2(mat_ncols((*vB)[i]), mat_ncols((*vB)[i]));
+	  if (nproc == 0) {
+	    gmm::resize(vAloc[i], mat_ncols((*vB)[i]), mat_ncols((*vB)[i]));
+	    gmm::clear(vAloc[i]);
+	  }
+	  gmm::mult(gmm::transposed((*vB)[i]), Acsr, Maux);
+	  gmm::mult(Maux, (*vB)[i], Maux2);
+	  gmm::add(Maux2, vAloc[i]);
+#else
+	  gmm::resize(vAloc[i], mat_ncols((*vB)[i]), mat_ncols((*vB)[i]));
+	  gmm::mult(gmm::transposed((*vB)[i]), *A, Maux);
+	  gmm::mult(Maux, (*vB)[i], vAloc[i]);
+#endif
+
+#ifdef GMM_USES_MPI
+	  if (nproc == size - 1 ) {
+#endif
+	    precond1[i].build_with(vAloc[i]);
+	    gmm::resize(fi[i], mat_ncols((*vB)[i]));
+	    gmm::resize(gi[i], mat_ncols((*vB)[i]));
+#ifdef GMM_USES_MPI
+	  }
+#else
+	}
+#endif
+#ifdef GMM_USES_MPI
+     }
+      if (nproc != size - 1) {
+        MPI_Sendrecv(&(Acsr.jc[0]), sizeA+1, MPI_INT, next, tag2,
+                     &(Acsrtemp.jc[0]), sizeA+1, MPI_INT, previous, tag2,
+                     MPI_COMM_WORLD, &status);
+        if (Acsrtemp.jc[sizeA] > size_type(sizepr)) {
+          sizepr = Acsrtemp.jc[sizeA];
+          gmm::resize(Acsrtemp.pr, sizepr);
+          gmm::resize(Acsrtemp.ir, sizepr);
+        }
+        MPI_Sendrecv(&(Acsr.ir[0]), Acsr.jc[sizeA], MPI_INT, next, tag1,
+                     &(Acsrtemp.ir[0]), Acsrtemp.jc[sizeA], MPI_INT, previous, tag1,
+                     MPI_COMM_WORLD, &status);
+        
+        MPI_Sendrecv(&(Acsr.pr[0]), Acsr.jc[sizeA], mpi_type(value_type()), next, tag3, 
+                     &(Acsrtemp.pr[0]), Acsrtemp.jc[sizeA], mpi_type(value_type()), previous, tag3,
+                     MPI_COMM_WORLD, &status);
+        gmm::copy(Acsrtemp, Acsr);
+      }
+    }
+      t_final=MPI_Wtime();
+    cout<<"temps boucle precond "<< t_final-t_ref<<endl;
+#endif
+    if (iter.get_noisy()) cout << "\n";
+  }
+  
+  template <typename Matrix1, typename Matrix2, typename Precond,
+	    typename Vector2, typename Vector3, typename local_solver>
+  void mult(const add_schwarz_mat<Matrix1, Matrix2, Precond, local_solver> &M,
+	    const Vector2 &p, Vector3 &q) {
+    size_type itebilan = 0;
+#ifdef GMM_USES_MPI
+    static double tmult_tot = 0.0;
+    double t_ref = MPI_Wtime();
+#endif
+    // cout << "tmult AS begin " << endl;
+    mult(*(M.A), p, q);
+#ifdef GMM_USES_MPI
+    tmult_tot += MPI_Wtime()-t_ref;
+    cout << "tmult_tot = " << tmult_tot << endl;
+#endif
+    std::vector<double> qbis(gmm::vect_size(q));
+    std::vector<double> qter(gmm::vect_size(q));
+#ifdef GMM_USES_MPI
+    //    MPI_Status status;
+    //    MPI_Request request,request1;
+    //    int tag=111;
+    int size,tranche,borne_sup,borne_inf,rank;
+    size_type nb_sub=M.fi.size();
+    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
+    MPI_Comm_size(MPI_COMM_WORLD, &size);
+    tranche=nb_sub/size;
+    borne_inf=rank*tranche;
+    borne_sup=(rank+1)*tranche;
+    // if (rank==size-1) borne_sup=nb_sub;
+    //    int next = (rank + 1) % size;
+    //    int previous = (rank + size - 1) % size;
+    t_ref = MPI_Wtime();
+     for (size_type i = size_type(borne_inf); i < size_type(borne_sup); ++i)
+//        for (size_type i = 0; i < nb_sub/size; ++i)
+      // for (size_type j = 0; j < nb_sub; ++j)
+#else
+    for (size_type i = 0; i < M.fi.size(); ++i)
+#endif
+      {
+#ifdef GMM_USES_MPI
+	// size_type i=j; // (rank+size*(j-1)+nb_sub)%nb_sub;
+#endif
+	gmm::mult(gmm::transposed((*(M.vB))[i]), q, M.fi[i]);
+       M.iter.init();
+       AS_local_solve(local_solver(), (M.vAloc)[i], (M.gi)[i],
+		      (M.fi)[i],(M.precond1)[i],M.iter);
+       itebilan = std::max(itebilan, M.iter.get_iteration());
+       }
+
+#ifdef GMM_USES_MPI
+    cout << "First  AS loop time " <<  MPI_Wtime() - t_ref << endl;
+#endif
+
+    gmm::clear(q);
+#ifdef GMM_USES_MPI
+    t_ref = MPI_Wtime();
+    // for (size_type j = 0; j < nb_sub; ++j)
+    for (size_type i = size_type(borne_inf); i < size_type(borne_sup); ++i)
+
+#else
+      for (size_type i = 0; i < M.gi.size(); ++i)
+#endif
+	{
+
+#ifdef GMM_USES_MPI
+	  // size_type i=j; // (rank+size*(j-1)+nb_sub)%nb_sub;
+// 	  gmm::mult((*(M.vB))[i], M.gi[i], qbis,qbis);
+	  gmm::mult((*(M.vB))[i], M.gi[i], qter);
+	  add(qter,qbis,qbis);
+#else
+	  gmm::mult((*(M.vB))[i], M.gi[i], q, q);
+#endif
+	}
+#ifdef GMM_USES_MPI
+     //WARNING this add only if you use the ring pattern below
+  // need to do this below if using a n explicit ring pattern communication
+
+//      add(qbis,q,q);
+    cout << "Second AS loop time " <<  MPI_Wtime() - t_ref << endl;
+#endif
+
+
+#ifdef GMM_USES_MPI
+    //    int tag1=11;
+    static double t_tot = 0.0;
+    double t_final;
+    t_ref=MPI_Wtime();
+//     int next = (rank + 1) % size;
+//     int previous = (rank + size - 1) % size;
+    //communication of local information on ring pattern
+    //Each process receive  Nproc-1 contributions 
+
+//     if (size > 1) {
+//     for (int nproc = 0; nproc < size-1; ++nproc) 
+//       {
+
+// 	MPI_Sendrecv(&(qbis[0]), gmm::vect_size(q), MPI_DOUBLE, next, tag1,
+// 		   &(qter[0]), gmm::vect_size(q),MPI_DOUBLE,previous,tag1,
+// 		   MPI_COMM_WORLD,&status);
+// 	gmm::copy(qter, qbis);
+// 	add(qbis,q,q);
+//       }
+//     }
+    MPI_Allreduce(&(qbis[0]), &(q[0]),gmm::vect_size(q), MPI_DOUBLE,
+		  MPI_SUM,MPI_COMM_WORLD);
+    t_final=MPI_Wtime();
+    t_tot += t_final-t_ref;
+     cout<<"["<< rank<<"] temps reduce Resol "<< t_final-t_ref << " t_tot = " << t_tot << endl;
+#endif 
+
+    if (M.iter.get_noisy() > 0) cout << "itebloc = " << itebilan << endl;
+    M.itebilan += itebilan;
+    M.iter.set_resmax((M.iter.get_resmax() + M.residual) * 0.5);
+  }
+
+  template <typename Matrix1, typename Matrix2, typename Precond,
+	    typename Vector2, typename Vector3, typename local_solver>
+  void mult(const add_schwarz_mat<Matrix1, Matrix2, Precond, local_solver> &M,
+	    const Vector2 &p, const Vector3 &q) {
+    mult(M, p, const_cast<Vector3 &>(q));
+  }
+
+  template <typename Matrix1, typename Matrix2, typename Precond,
+	    typename Vector2, typename Vector3, typename Vector4,
+	    typename local_solver>
+  void mult(const add_schwarz_mat<Matrix1, Matrix2, Precond, local_solver> &M,
+	    const Vector2 &p, const Vector3 &p2, Vector4 &q)
+  { mult(M, p, q); add(p2, q); }
+
+  template <typename Matrix1, typename Matrix2, typename Precond,
+	    typename Vector2, typename Vector3, typename Vector4,
+	    typename local_solver>
+  void mult(const add_schwarz_mat<Matrix1, Matrix2, Precond, local_solver> &M,
+	    const Vector2 &p, const Vector3 &p2, const Vector4 &q)
+  { mult(M, p, const_cast<Vector4 &>(q)); add(p2, q); }
+
+  /* ******************************************************************** */
+  /*		Additive Schwarz interfaced global solvers                */
+  /* ******************************************************************** */
+
+  template <typename ASM_type, typename Vect>
+  void AS_global_solve(using_cg, const ASM_type &ASM, Vect &x,
+		       const Vect &b, iteration &iter)
+  { cg(ASM, x, b, *(ASM.A), identity_matrix(), iter); }
+
+  template <typename ASM_type, typename Vect>
+  void AS_global_solve(using_gmres, const ASM_type &ASM, Vect &x,
+		       const Vect &b, iteration &iter)
+  { gmres(ASM, x, b, identity_matrix(), 100, iter); }
+
+  template <typename ASM_type, typename Vect>
+  void AS_global_solve(using_bicgstab, const ASM_type &ASM, Vect &x,
+		       const Vect &b, iteration &iter)
+  { bicgstab(ASM, x, b, identity_matrix(), iter); }
+
+  template <typename ASM_type, typename Vect>
+  void AS_global_solve(using_qmr,const ASM_type &ASM, Vect &x,
+		       const Vect &b, iteration &iter)
+  { qmr(ASM, x, b, identity_matrix(), iter); }
+
+#if defined(GMM_USES_SUPERLU)
+  template <typename ASM_type, typename Vect>
+  void AS_global_solve(using_superlu, const ASM_type &, Vect &,
+		       const Vect &, iteration &) {
+    GMM_ASSERT1(false, "You cannot use SuperLU as "
+		"global solver in additive Schwarz meethod");
+  }
+#endif
+  
+  /* ******************************************************************** */
+  /*	            Linear Additive Schwarz method                        */
+  /* ******************************************************************** */
+  /* ref : Domain decomposition algorithms for the p-version finite       */
+  /*       element method for elliptic problems, Luca F. Pavarino,        */
+  /*       PhD thesis, Courant Institute of Mathematical Sciences, 1992.  */
+  /* ******************************************************************** */
+
+  /** Function to call if the ASM matrix is precomputed for successive solve
+   * with the same system.
+   */
+  template <typename Matrix1, typename Matrix2,
+	    typename Vector2, typename Vector3, typename Precond,
+	    typename local_solver, typename global_solver>
+  void additive_schwarz(
+    add_schwarz_mat<Matrix1, Matrix2, Precond, local_solver> &ASM, Vector3 &u,
+    const Vector2 &f, iteration &iter, const global_solver&) {
+
+    typedef typename linalg_traits<Matrix1>::value_type value_type;
+
+    size_type nb_sub = ASM.vB->size(), nb_dof = gmm::vect_size(f);
+    ASM.itebilan = 0;
+    std::vector<value_type> g(nb_dof);
+    std::vector<value_type> gbis(nb_dof);
+#ifdef GMM_USES_MPI
+    double t_init=MPI_Wtime();
+    int size,tranche,borne_sup,borne_inf,rank;
+    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
+    MPI_Comm_size(MPI_COMM_WORLD, &size);
+    tranche=nb_sub/size;
+    borne_inf=rank*tranche;
+    borne_sup=(rank+1)*tranche;
+    // if (rank==size-1) borne_sup=nb_sub*size;
+    for (size_type i = size_type(borne_inf); i < size_type(borne_sup); ++i)
+//     for (size_type i = 0; i < nb_sub/size; ++i)
+      // for (size_type j = 0; j < nb_sub; ++j)
+      // for (size_type i = rank; i < nb_sub; i+=size)
+#else
+    for (size_type i = 0; i < nb_sub; ++i)
+#endif
+    {
+
+#ifdef GMM_USES_MPI
+      // size_type i=j; // (rank+size*(j-1)+nb_sub)%nb_sub;
+#endif
+      gmm::mult(gmm::transposed((*(ASM.vB))[i]), f, ASM.fi[i]);
+      ASM.iter.init();
+      AS_local_solve(local_solver(), ASM.vAloc[i], ASM.gi[i], ASM.fi[i],
+		     ASM.precond1[i], ASM.iter);
+      ASM.itebilan = std::max(ASM.itebilan, ASM.iter.get_iteration());
+#ifdef GMM_USES_MPI
+    gmm::mult((*(ASM.vB))[i], ASM.gi[i], gbis,gbis);
+#else   
+    gmm::mult((*(ASM.vB))[i], ASM.gi[i], g, g);
+#endif
+    }
+#ifdef GMM_USES_MPI
+    cout<<"temps boucle init "<< MPI_Wtime()-t_init<<endl;
+    double t_ref,t_final;
+    t_ref=MPI_Wtime();
+    MPI_Allreduce(&(gbis[0]), &(g[0]),gmm::vect_size(g), MPI_DOUBLE,
+		  MPI_SUM,MPI_COMM_WORLD);
+    t_final=MPI_Wtime();
+    cout<<"temps reduce init "<< t_final-t_ref<<endl;
+#endif
+#ifdef GMM_USES_MPI
+    t_ref=MPI_Wtime();
+    cout<<"begin global AS"<<endl;
+#endif
+    AS_global_solve(global_solver(), ASM, u, g, iter);
+#ifdef GMM_USES_MPI
+    t_final=MPI_Wtime();
+    cout<<"temps AS Global Solve "<< t_final-t_ref<<endl;
+#endif
+    if (iter.get_noisy())
+      cout << "Total number of internal iterations : " << ASM.itebilan << endl;
+  }
+
+  /** Global function. Compute the ASM matrix and call the previous function.
+   *  The ASM matrix represent the preconditionned linear system.
+   */
+  template <typename Matrix1, typename Matrix2,
+	    typename Vector2, typename Vector3, typename Precond,
+	    typename local_solver, typename global_solver>
+  void additive_schwarz(const Matrix1 &A, Vector3 &u,
+				  const Vector2 &f, const Precond &P,
+				  const std::vector<Matrix2> &vB,
+				  iteration &iter, local_solver,
+				  global_solver) {
+    iter.set_rhsnorm(vect_norm2(f));
+    if (iter.get_rhsnorm() == 0.0) { gmm::clear(u); return; }
+    iteration iter2 = iter; iter2.reduce_noisy();
+    iter2.set_maxiter(size_type(-1));
+    add_schwarz_mat<Matrix1, Matrix2, Precond, local_solver>
+      ASM(A, vB, iter2, P, iter.get_resmax());
+    additive_schwarz(ASM, u, f, iter, global_solver());
+  }
+
+  /* ******************************************************************** */
+  /*		Sequential Non-Linear Additive Schwarz method             */
+  /* ******************************************************************** */
+  /* ref : Nonlinearly Preconditionned Inexact Newton Algorithms,         */
+  /*       Xiao-Chuan Cai, David E. Keyes,                                */
+  /*       SIAM J. Sci. Comp. 24: p183-200.  l                             */
+  /* ******************************************************************** */
+
+  template <typename Matrixt, typename MatrixBi> 
+  class NewtonAS_struct {
+    
+  public :
+    typedef Matrixt tangent_matrix_type;
+    typedef MatrixBi B_matrix_type;
+    typedef typename linalg_traits<Matrixt>::value_type value_type;
+    typedef std::vector<value_type> Vector;
+    
+    virtual size_type size(void) = 0;
+    virtual const std::vector<MatrixBi> &get_vB() = 0;
+    
+    virtual void compute_F(Vector &f, Vector &x) = 0;
+    virtual void compute_tangent_matrix(Matrixt &M, Vector &x) = 0;
+    // compute Bi^T grad(F(X)) Bi
+    virtual void compute_sub_tangent_matrix(Matrixt &Mloc, Vector &x,
+					    size_type i) = 0;
+    // compute Bi^T F(X)
+    virtual void compute_sub_F(Vector &fi, Vector &x, size_type i) = 0;
+
+    virtual ~NewtonAS_struct() {}
+  };
+
+  template <typename Matrixt, typename MatrixBi> 
+  struct AS_exact_gradient {
+    const std::vector<MatrixBi> &vB;
+    std::vector<Matrixt> vM;
+    std::vector<Matrixt> vMloc;
+
+    void init(void) {
+      for (size_type i = 0; i < vB.size(); ++i) {
+	Matrixt aux(gmm::mat_ncols(vB[i]), gmm::mat_ncols(vM[i]));
+	gmm::resize(vMloc[i], gmm::mat_ncols(vB[i]), gmm::mat_ncols(vB[i]));
+	gmm::mult(gmm::transposed(vB[i]), vM[i], aux);
+	gmm::mult(aux, vB[i], vMloc[i]);
+      }
+    }
+    AS_exact_gradient(const std::vector<MatrixBi> &vB_) : vB(vB_) {
+      vM.resize(vB.size()); vMloc.resize(vB.size());
+      for (size_type i = 0; i < vB.size(); ++i) {
+	gmm::resize(vM[i], gmm::mat_nrows(vB[i]), gmm::mat_nrows(vB[i]));
+      }
+    }
+  };
+
+  template <typename Matrixt, typename MatrixBi,
+	    typename Vector2, typename Vector3>
+  void mult(const AS_exact_gradient<Matrixt, MatrixBi> &M,
+	    const Vector2 &p, Vector3 &q) {
+    gmm::clear(q);
+    typedef typename gmm::linalg_traits<Vector3>::value_type T;
+    std::vector<T> v(gmm::vect_size(p)), w, x;
+    for (size_type i = 0; i < M.vB.size(); ++i) {
+      w.resize(gmm::mat_ncols(M.vB[i]));
+      x.resize(gmm::mat_ncols(M.vB[i]));
+      gmm::mult(M.vM[i], p, v);
+      gmm::mult(gmm::transposed(M.vB[i]), v, w);
+      double rcond;
+      SuperLU_solve(M.vMloc[i], x, w, rcond);
+      // gmm::iteration iter(1E-10, 0, 100000);
+      //gmm::gmres(M.vMloc[i], x, w, gmm::identity_matrix(), 50, iter);
+      gmm::mult_add(M.vB[i], x, q);
+    }
+  }
+
+  template <typename Matrixt, typename MatrixBi,
+	    typename Vector2, typename Vector3>
+  void mult(const AS_exact_gradient<Matrixt, MatrixBi> &M,
+	    const Vector2 &p, const Vector3 &q) {
+    mult(M, p, const_cast<Vector3 &>(q));
+  }
+
+  template <typename Matrixt, typename MatrixBi,
+	    typename Vector2, typename Vector3, typename Vector4>
+  void mult(const AS_exact_gradient<Matrixt, MatrixBi> &M,
+	    const Vector2 &p, const Vector3 &p2, Vector4 &q)
+  { mult(M, p, q); add(p2, q); }
+
+  template <typename Matrixt, typename MatrixBi,
+	    typename Vector2, typename Vector3, typename Vector4>
+  void mult(const AS_exact_gradient<Matrixt, MatrixBi> &M,
+	    const Vector2 &p, const Vector3 &p2, const Vector4 &q)
+  { mult(M, p, const_cast<Vector4 &>(q)); add(p2, q); }
+
+  struct S_default_newton_line_search {
+    
+    double conv_alpha, conv_r;
+    size_t it, itmax, glob_it;
+
+    double alpha, alpha_old, alpha_mult, first_res, alpha_max_ratio;
+    double alpha_min_ratio, alpha_min;
+    size_type count, count_pat;
+    bool max_ratio_reached;
+    double alpha_max_ratio_reached, r_max_ratio_reached;
+    size_type it_max_ratio_reached;
+
+    
+    double converged_value(void) { return conv_alpha; };
+    double converged_residual(void) { return conv_r; };
+
+    virtual void init_search(double r, size_t git, double = 0.0) {
+      alpha_min_ratio = 0.9;
+      alpha_min = 1e-10;
+      alpha_max_ratio = 10.0;
+      alpha_mult = 0.25;
+      itmax = size_type(-1);
+      glob_it = git; if (git <= 1) count_pat = 0;
+      conv_alpha = alpha = alpha_old = 1.;
+      conv_r = first_res = r; it = 0;
+      count = 0;
+      max_ratio_reached = false;
+    }
+    virtual double next_try(void) {
+      alpha_old = alpha;
+      if (alpha >= 0.4) alpha *= 0.5; else alpha *= alpha_mult; ++it;
+      return alpha_old;
+    }
+    virtual bool is_converged(double r, double = 0.0) {
+      // cout << "r = " << r << " alpha = " << alpha / alpha_mult << " count_pat = " << count_pat << endl;
+      if (!max_ratio_reached && r < first_res * alpha_max_ratio) {
+	alpha_max_ratio_reached = alpha_old; r_max_ratio_reached = r;
+	it_max_ratio_reached = it; max_ratio_reached = true; 
+      }
+      if (max_ratio_reached && r < r_max_ratio_reached * 0.5
+	  && r > first_res * 1.1 && it <= it_max_ratio_reached+1) {
+	alpha_max_ratio_reached = alpha_old; r_max_ratio_reached = r;
+	it_max_ratio_reached = it;
+      }
+      if (count == 0 || r < conv_r)
+	{ conv_r = r; conv_alpha = alpha_old; count = 1; }
+      if (conv_r < first_res) ++count;
+
+      if (r < first_res *  alpha_min_ratio)
+	{ count_pat = 0; return true; }      
+      if (count >= 5 || (alpha < alpha_min && max_ratio_reached)) {
+	if (conv_r < first_res * 0.99) count_pat = 0;
+	if (/*gmm::random() * 50. < -log(conv_alpha)-4.0 ||*/ count_pat >= 3)
+	  { conv_r=r_max_ratio_reached; conv_alpha=alpha_max_ratio_reached; }
+	if (conv_r >= first_res * 0.9999) count_pat++;
+	return true;
+      }
+      return false;
+    }
+    S_default_newton_line_search(void) { count_pat = 0; }
+  };
+
+
+  
+  template <typename Matrixt, typename MatrixBi, typename Vector,
+	    typename Precond, typename local_solver, typename global_solver>
+  void Newton_additive_Schwarz(NewtonAS_struct<Matrixt, MatrixBi> &NS,
+			       const Vector &u_,
+			       iteration &iter, const Precond &P,
+			       local_solver, global_solver) {
+    Vector &u = const_cast<Vector &>(u_);
+    typedef typename linalg_traits<Vector>::value_type value_type;
+    typedef typename number_traits<value_type>::magnitude_type mtype;
+    typedef actual_precond<Precond, local_solver, Matrixt> chgt_precond;
+    
+    double residual = iter.get_resmax();
+
+    S_default_newton_line_search internal_ls;
+    S_default_newton_line_search external_ls;
+
+    typename chgt_precond::APrecond PP = chgt_precond::transform(P);
+    iter.set_rhsnorm(mtype(1));
+    iteration iternc(iter);
+    iternc.reduce_noisy(); iternc.set_maxiter(size_type(-1));
+    iteration iter2(iternc);
+    iteration iter3(iter2); iter3.reduce_noisy();
+    iteration iter4(iter3);
+    iternc.set_name("Local Newton");
+    iter2.set_name("Linear System for Global Newton");
+    iternc.set_resmax(residual/100.0);
+    iter3.set_resmax(residual/10000.0);
+    iter2.set_resmax(residual/1000.0);
+    iter4.set_resmax(residual/1000.0);
+    std::vector<value_type> rhs(NS.size()), x(NS.size()), d(NS.size());
+    std::vector<value_type> xi, xii, fi, di;
+
+    std::vector< std::vector<value_type> > vx(NS.get_vB().size());
+    for (size_type i = 0; i < NS.get_vB().size(); ++i) // for exact gradient
+      vx[i].resize(NS.size()); // for exact gradient
+
+    Matrixt Mloc, M(NS.size(), NS.size());
+    NS.compute_F(rhs, u);
+    mtype act_res=gmm::vect_norm2(rhs), act_res_new(0), precond_res = act_res;
+    mtype alpha;
+    
+    while(!iter.finished(std::min(act_res, precond_res))) {
+      for (int SOR_step = 0;  SOR_step >= 0; --SOR_step) {
+	gmm::clear(rhs);
+	for (size_type isd = 0; isd < NS.get_vB().size(); ++isd) {
+	  const MatrixBi &Bi = (NS.get_vB())[isd];
+	  size_type si = mat_ncols(Bi);
+	  gmm::resize(Mloc, si, si);
+	  xi.resize(si); xii.resize(si); fi.resize(si); di.resize(si);
+	  
+	  iternc.init();
+	  iternc.set_maxiter(30); // ?
+	  if (iternc.get_noisy())
+	    cout << "Non-linear local problem " << isd << endl;
+	  gmm::clear(xi);
+	  gmm::copy(u, x);
+	  NS.compute_sub_F(fi, x, isd); gmm::scale(fi, value_type(-1));
+	  mtype r = gmm::vect_norm2(fi), r_t(r);
+	  if (r > value_type(0)) {
+	    iternc.set_rhsnorm(std::max(r, mtype(1)));
+	    while(!iternc.finished(r)) {
+	      NS.compute_sub_tangent_matrix(Mloc, x, isd);
+
+	      PP.build_with(Mloc);
+	      iter3.init();
+	      AS_local_solve(local_solver(), Mloc, di, fi, PP, iter3);
+	      
+	      internal_ls.init_search(r, iternc.get_iteration());
+	      do {
+		alpha = internal_ls.next_try();
+		gmm::add(xi, gmm::scaled(di, -alpha), xii);
+		gmm::mult(Bi, gmm::scaled(xii, -1.0), u, x);
+		NS.compute_sub_F(fi, x, isd); gmm::scale(fi, value_type(-1));
+		r_t = gmm::vect_norm2(fi);
+	      } while (!internal_ls.is_converged(r_t));
+	      
+	      if (alpha != internal_ls.converged_value()) {
+		alpha = internal_ls.converged_value();
+		gmm::add(xi, gmm::scaled(di, -alpha), xii);
+		gmm::mult(Bi, gmm::scaled(xii, -1.0), u, x);
+		NS.compute_sub_F(fi, x, isd); gmm::scale(fi, value_type(-1));
+		r_t = gmm::vect_norm2(fi);
+	      }
+	      gmm::copy(x, vx[isd]); // for exact gradient
+
+	      if (iternc.get_noisy()) cout << "(step=" << alpha << ")\t";
+	      ++iternc; r = r_t; gmm::copy(xii, xi); 
+	    }
+	    if (SOR_step) gmm::mult(Bi, gmm::scaled(xii, -1.0), u, u);
+	    gmm::mult(Bi, gmm::scaled(xii, -1.0), rhs, rhs);
+	  }
+	}
+	precond_res = gmm::vect_norm2(rhs);
+	if (SOR_step) cout << "SOR step residual = " << precond_res << endl;
+	if (precond_res < residual) break;
+	cout << "Precond residual = " << precond_res << endl;
+      }
+
+      iter2.init();
+      // solving linear system for the global Newton method
+      if (0) {
+	NS.compute_tangent_matrix(M, u);
+	add_schwarz_mat<Matrixt, MatrixBi, Precond, local_solver>
+	  ASM(M, NS.get_vB(), iter4, P, iter.get_resmax());
+	AS_global_solve(global_solver(), ASM, d, rhs, iter2);
+      }
+      else {  // for exact gradient
+	AS_exact_gradient<Matrixt, MatrixBi> eg(NS.get_vB());
+	for (size_type i = 0; i < NS.get_vB().size(); ++i) {
+	  NS.compute_tangent_matrix(eg.vM[i], vx[i]);
+	}
+	eg.init();
+	gmres(eg, d, rhs, gmm::identity_matrix(), 50, iter2);
+      }
+
+      //      gmm::add(gmm::scaled(rhs, 0.1), u); ++iter;
+      external_ls.init_search(act_res, iter.get_iteration());
+      do {
+	alpha = external_ls.next_try();
+	gmm::add(gmm::scaled(d, alpha), u, x);
+	NS.compute_F(rhs, x);
+	act_res_new = gmm::vect_norm2(rhs);
+      } while (!external_ls.is_converged(act_res_new));
+      
+      if (alpha != external_ls.converged_value()) {
+	alpha = external_ls.converged_value();
+	gmm::add(gmm::scaled(d, alpha), u, x);
+	NS.compute_F(rhs, x);
+	act_res_new = gmm::vect_norm2(rhs);
+      }
+
+      if (iter.get_noisy() > 1) cout << endl;
+      act_res = act_res_new; 
+      if (iter.get_noisy()) cout << "(step=" << alpha << ")\t unprecond res = " << act_res << " ";
+      
+      
+      ++iter; gmm::copy(x, u);
+    }
+  }
+
+}
+
+
+#endif //  GMM_SOLVERS_SCHWARZ_ADDITIVE_H__
--- a/gmm/gmm_solver_bfgs.h
+++ b/gmm/gmm_solver_bfgs.h
@ -0,0 +1,210 @@
+/* -*- c++ -*- (enables emacs c++ mode) */
+/*===========================================================================
+
+ Copyright (C) 2004-2017 Yves Renard
+
+ This file is a part of GetFEM++
+
+ GetFEM++  is  free software;  you  can  redistribute  it  and/or modify it
+ under  the  terms  of the  GNU  Lesser General Public License as published
+ by  the  Free Software Foundation;  either version 3 of the License,  or
+ (at your option) any later version along with the GCC Runtime Library
+ Exception either version 3.1 or (at your option) any later version.
+ This program  is  distributed  in  the  hope  that it will be useful,  but
+ WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ or  FITNESS  FOR  A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+ License and GCC Runtime Library Exception for more details.
+ You  should  have received a copy of the GNU Lesser General Public License
+ along  with  this program;  if not, write to the Free Software Foundation,
+ Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301, USA.
+
+ As a special exception, you  may use  this file  as it is a part of a free
+ software  library  without  restriction.  Specifically,  if   other  files
+ instantiate  templates  or  use macros or inline functions from this file,
+ or  you compile this  file  and  link  it  with other files  to produce an
+ executable, this file  does  not  by itself cause the resulting executable
+ to be covered  by the GNU Lesser General Public License.  This   exception
+ does not  however  invalidate  any  other  reasons why the executable file
+ might be covered by the GNU Lesser General Public License.
+
+===========================================================================*/
+
+/**@file gmm_solver_bfgs.h 
+   @author  Yves Renard <Yves.Renard@insa-lyon.fr>
+   @date October 14 2004.
+   @brief Implements BFGS (Broyden, Fletcher, Goldfarb, Shanno) algorithm.
+ */
+#ifndef GMM_BFGS_H
+#define GMM_BFGS_H
+
+#include "gmm_kernel.h"
+#include "gmm_iter.h"
+
+namespace gmm {
+
+  // BFGS algorithm (Broyden, Fletcher, Goldfarb, Shanno)
+  // Quasi Newton method for optimization problems.
+  // with Wolfe Line search.
+
+
+  // delta[k] = x[k+1] - x[k]
+  // gamma[k] = grad f(x[k+1]) - grad f(x[k])
+  // H[0] = I
+  // BFGS : zeta[k] = delta[k] - H[k] gamma[k]
+  // DFP  : zeta[k] = H[k] gamma[k]
+  // tau[k] = gamma[k]^T zeta[k]
+  // rho[k] = 1 / gamma[k]^T delta[k]
+  // BFGS : H[k+1] = H[k] + rho[k](zeta[k] delta[k]^T + delta[k] zeta[k]^T)
+  //                 - rho[k]^2 tau[k] delta[k] delta[k]^T
+  // DFP  : H[k+1] = H[k] + rho[k] delta[k] delta[k]^T 
+  //                 - (1/tau[k])zeta[k] zeta[k]^T 
+
+  // Object representing the inverse of the Hessian
+  template <typename VECTOR> struct bfgs_invhessian {
+    
+    typedef typename linalg_traits<VECTOR>::value_type T;
+    typedef typename number_traits<T>::magnitude_type R;
+
+    std::vector<VECTOR> delta, gamma, zeta;
+    std::vector<T> tau, rho;
+    int version;
+
+    template<typename VEC1, typename VEC2> void hmult(const VEC1 &X, VEC2 &Y) {
+      copy(X, Y);
+      for (size_type k = 0 ; k < delta.size(); ++k) {
+	T xdelta = vect_sp(X, delta[k]), xzeta = vect_sp(X, zeta[k]);
+	switch (version) {
+	case 0 : // BFGS
+	  add(scaled(zeta[k], rho[k]*xdelta), Y);
+	  add(scaled(delta[k], rho[k]*(xzeta-rho[k]*tau[k]*xdelta)), Y);
+	  break;
+	case 1 : // DFP
+	  add(scaled(delta[k], rho[k]*xdelta), Y);
+	  add(scaled(zeta[k], -xzeta/tau[k]), Y);
+	  break;
+	}
+      }
+    }
+    
+    void restart(void) {
+      delta.resize(0); gamma.resize(0); zeta.resize(0); 
+      tau.resize(0); rho.resize(0);
+    }
+    
+    template<typename VECT1, typename VECT2>
+    void update(const VECT1 &deltak, const VECT2 &gammak) {
+      T vsp = vect_sp(deltak, gammak);
+      if (vsp == T(0)) return;
+      size_type N = vect_size(deltak), k = delta.size();
+      VECTOR Y(N);
+      hmult(gammak, Y);
+      delta.resize(k+1); gamma.resize(k+1); zeta.resize(k+1);
+      tau.resize(k+1); rho.resize(k+1);
+      resize(delta[k], N); resize(gamma[k], N); resize(zeta[k], N); 
+      gmm::copy(deltak, delta[k]);
+      gmm::copy(gammak, gamma[k]);
+      rho[k] = R(1) / vsp;
+      if (version == 0)
+	add(delta[k], scaled(Y, -1), zeta[k]);
+      else
+	gmm::copy(Y, zeta[k]);
+      tau[k] = vect_sp(gammak,  zeta[k]);
+    }
+    
+    bfgs_invhessian(int v = 0) { version = v; }
+  };
+
+
+  template <typename FUNCTION, typename DERIVATIVE, typename VECTOR> 
+  void bfgs(const FUNCTION &f, const DERIVATIVE &grad, VECTOR &x,
+	    int restart, iteration& iter, int version = 0,
+	    double lambda_init=0.001, double print_norm=1.0) {
+
+    typedef typename linalg_traits<VECTOR>::value_type T;
+    typedef typename number_traits<T>::magnitude_type R;
+    
+    bfgs_invhessian<VECTOR> invhessian(version);
+    VECTOR r(vect_size(x)), d(vect_size(x)), y(vect_size(x)), r2(vect_size(x));
+    grad(x, r);
+    R lambda = lambda_init, valx = f(x), valy;
+    int nb_restart(0);
+    
+    if (iter.get_noisy() >= 1) cout << "value " << valx / print_norm << " ";
+    while (! iter.finished_vect(r)) {
+
+      invhessian.hmult(r, d); gmm::scale(d, T(-1));
+      
+      // Wolfe Line search
+      R derivative = gmm::vect_sp(r, d);    
+      R lambda_min(0), lambda_max(0), m1 = 0.27, m2 = 0.57;
+      bool unbounded = true, blocked = false, grad_computed = false;
+      
+      for(;;) {
+	add(x, scaled(d, lambda), y);
+	valy = f(y);
+	if (iter.get_noisy() >= 2) {
+	  cout.precision(15);
+	  cout << "Wolfe line search, lambda = " << lambda 
+ 	       << " value = " << valy /print_norm << endl;
+// 	       << " derivative = " << derivative
+// 	       << " lambda min = " << lambda_min << " lambda max = "
+// 	       << lambda_max << endl; getchar();
+	}
+	if (valy <= valx + m1 * lambda * derivative) {
+	  grad(y, r2); grad_computed = true;
+	  T derivative2 = gmm::vect_sp(r2, d);
+	  if (derivative2 >= m2*derivative) break;
+	  lambda_min = lambda;
+	}
+	else {
+	  lambda_max = lambda;
+	  unbounded = false;
+	}
+	if (unbounded) lambda *= R(10);
+	else  lambda = (lambda_max + lambda_min) / R(2);
+	if (lambda == lambda_max || lambda == lambda_min) break;
+	// valy <= R(2)*valx replaced by
+	// valy <= valx + gmm::abs(derivative)*lambda_init
+	// for compatibility with negative values (08.24.07).
+	if (valy <= valx + R(2)*gmm::abs(derivative)*lambda &&
+	    (lambda < R(lambda_init*1E-8) ||
+	     (!unbounded && lambda_max-lambda_min < R(lambda_init*1E-8))))
+	{ blocked = true; lambda = lambda_init; break; }
+      }
+
+      // Rank two update
+      ++iter;
+      if (!grad_computed) grad(y, r2);
+      gmm::add(scaled(r2, -1), r);
+      if ((iter.get_iteration() % restart) == 0 || blocked) { 
+	if (iter.get_noisy() >= 1) cout << "Restart\n";
+	invhessian.restart();
+	if (++nb_restart > 10) {
+	  if (iter.get_noisy() >= 1) cout << "BFGS is blocked, exiting\n";
+	  return;
+	}
+      }
+      else {
+	invhessian.update(gmm::scaled(d,lambda), gmm::scaled(r,-1));
+	nb_restart = 0;
+      }
+      copy(r2, r); copy(y, x); valx = valy;
+      if (iter.get_noisy() >= 1)
+	cout << "BFGS value " << valx/print_norm << "\t";
+    }
+
+  }
+
+
+  template <typename FUNCTION, typename DERIVATIVE, typename VECTOR> 
+  inline void dfp(const FUNCTION &f, const DERIVATIVE &grad, VECTOR &x,
+	    int restart, iteration& iter, int version = 1) {
+    bfgs(f, grad, x, restart, iter, version);
+
+  }
+
+
+}
+
+#endif 
+
--- a/gmm/gmm_solver_bicgstab.h
+++ b/gmm/gmm_solver_bicgstab.h
@ -0,0 +1,160 @@
+/* -*- c++ -*- (enables emacs c++ mode) */
+/*===========================================================================
+
+ Copyright (C) 2002-2017 Yves Renard
+
+ This file is a part of GetFEM++
+
+ GetFEM++  is  free software;  you  can  redistribute  it  and/or modify it
+ under  the  terms  of the  GNU  Lesser General Public License as published
+ by  the  Free Software Foundation;  either version 3 of the License,  or
+ (at your option) any later version along with the GCC Runtime Library
+ Exception either version 3.1 or (at your option) any later version.
+ This program  is  distributed  in  the  hope  that it will be useful,  but
+ WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ or  FITNESS  FOR  A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+ License and GCC Runtime Library Exception for more details.
+ You  should  have received a copy of the GNU Lesser General Public License
+ along  with  this program;  if not, write to the Free Software Foundation,
+ Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301, USA.
+
+ As a special exception, you  may use  this file  as it is a part of a free
+ software  library  without  restriction.  Specifically,  if   other  files
+ instantiate  templates  or  use macros or inline functions from this file,
+ or  you compile this  file  and  link  it  with other files  to produce an
+ executable, this file  does  not  by itself cause the resulting executable
+ to be covered  by the GNU Lesser General Public License.  This   exception
+ does not  however  invalidate  any  other  reasons why the executable file
+ might be covered by the GNU Lesser General Public License.
+
+===========================================================================*/
+
+// This file is a modified version of bicgstab.h from ITL.
+// See http://osl.iu.edu/research/itl/
+// Following the corresponding Copyright notice.
+//===========================================================================
+//
+// Copyright (c) 1998-2001, University of Notre Dame. All rights reserved.
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+//    * Redistributions of source code must retain the above copyright
+//      notice, this list of conditions and the following disclaimer.
+//    * Redistributions in binary form must reproduce the above copyright
+//      notice, this list of conditions and the following disclaimer in the
+//      documentation and/or other materials provided with the distribution.
+//    * Neither the name of the University of Notre Dame nor the
+//      names of its contributors may be used to endorse or promote products
+//      derived from this software without specific prior written permission.
+//
+// THIS SOFTWARE  IS  PROVIDED  BY  THE TRUSTEES  OF  INDIANA UNIVERSITY  AND
+// CONTRIBUTORS  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,
+// BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND  FITNESS
+// FOR  A PARTICULAR PURPOSE ARE DISCLAIMED. IN  NO  EVENT SHALL THE TRUSTEES
+// OF INDIANA UNIVERSITY AND CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+// INCIDENTAL, SPECIAL, EXEMPLARY,  OR CONSEQUENTIAL DAMAGES (INCLUDING,  BUT
+// NOT  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA,  OR PROFITS;  OR BUSINESS  INTERRUPTION)  HOWEVER  CAUSED AND ON ANY
+// THEORY  OF  LIABILITY,  WHETHER  IN  CONTRACT,  STRICT  LIABILITY, OR TORT
+// (INCLUDING  NEGLIGENCE  OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+// THIS  SOFTWARE,  EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+//===========================================================================
+
+/**@file gmm_solver_bicgstab.h
+   @author Andrew Lumsdaine <lums@osl.iu.edu>
+   @author Lie-Quan Lee <llee@osl.iu.edu>
+   @author  Yves Renard <Yves.Renard@insa-lyon.fr>
+   @date October 13, 2002.
+   @brief BiCGStab iterative solver.
+*/
+
+#ifndef GMM_SOLVER_BICGSTAB_H__
+#define GMM_SOLVER_BICGSTAB_H__
+
+#include "gmm_kernel.h"
+#include "gmm_iter.h"
+
+namespace gmm {
+
+  /* ******************************************************************** */
+  /*		BiConjugate Gradient Stabilized               		  */
+  /* (preconditionned, with parametrable scalar product)        	  */
+  /* ******************************************************************** */
+
+  template <typename Matrix, typename Vector, typename VectorB,
+	    typename Preconditioner>
+  void bicgstab(const Matrix& A, Vector& x, const VectorB& b,
+	       const Preconditioner& M, iteration &iter) {
+
+    typedef typename linalg_traits<Vector>::value_type T;
+    typedef typename number_traits<T>::magnitude_type R;
+    typedef typename temporary_dense_vector<Vector>::vector_type temp_vector;
+    
+    T rho_1, rho_2(0), alpha(0), beta, omega(0);
+    temp_vector p(vect_size(x)), phat(vect_size(x)), s(vect_size(x)),
+      shat(vect_size(x)), 
+      t(vect_size(x)), v(vect_size(x)), r(vect_size(x)), rtilde(vect_size(x));
+    
+    gmm::mult(A, gmm::scaled(x, -T(1)), b, r);	  
+    gmm::copy(r, rtilde);
+    R norm_r = gmm::vect_norm2(r);
+    iter.set_rhsnorm(gmm::vect_norm2(b));
+
+    if (iter.get_rhsnorm() == 0.0) { clear(x); return; }
+    
+    while (!iter.finished(norm_r)) {
+      
+      rho_1 = gmm::vect_sp(rtilde, r);
+      if (rho_1 == T(0)) {
+	if (iter.get_maxiter() == size_type(-1)) 
+	  { GMM_ASSERT1(false, "Bicgstab failed to converge"); }
+	else { GMM_WARNING1("Bicgstab failed to converge"); return; }
+      }
+      
+      if (iter.first())
+	gmm::copy(r, p);
+      else {
+	if (omega == T(0)) {
+	  if (iter.get_maxiter() == size_type(-1))
+	    { GMM_ASSERT1(false, "Bicgstab failed to converge"); }
+	  else { GMM_WARNING1("Bicgstab failed to converge"); return; }
+	}
+	
+	beta = (rho_1 / rho_2) * (alpha / omega);
+	
+	gmm::add(gmm::scaled(v, -omega), p);
+	gmm::add(r, gmm::scaled(p, beta), p);      
+      }
+      gmm::mult(M, p, phat);
+      gmm::mult(A, phat, v);	
+      alpha = rho_1 / gmm::vect_sp(v, rtilde);
+      gmm::add(r, gmm::scaled(v, -alpha), s);
+      
+      if (iter.finished_vect(s)) 
+	{ gmm::add(gmm::scaled(phat, alpha), x); break; }
+      
+      gmm::mult(M, s, shat);	
+      gmm::mult(A, shat, t);
+      omega = gmm::vect_sp(t, s) / gmm::vect_norm2_sqr(t);
+      
+      gmm::add(gmm::scaled(phat, alpha), x); 
+      gmm::add(gmm::scaled(shat, omega), x);
+      gmm::add(s, gmm::scaled(t, -omega), r); 
+      norm_r = gmm::vect_norm2(r);
+      rho_2 = rho_1;
+      
+      ++iter;
+    }
+  }
+  
+  template <typename Matrix, typename Vector, typename VectorB,
+	    typename Preconditioner>
+  void bicgstab(const Matrix& A, const Vector& x, const VectorB& b,
+	       const Preconditioner& M, iteration &iter)
+  { bicgstab(A, linalg_const_cast(x), b, M, iter); }
+  
+}
+
+
+#endif //  GMM_SOLVER_BICGSTAB_H__
--- a/gmm/gmm_solver_cg.h
+++ b/gmm/gmm_solver_cg.h
@ -0,0 +1,180 @@
+/* -*- c++ -*- (enables emacs c++ mode) */
+/*===========================================================================
+
+ Copyright (C) 2002-2017 Yves Renard
+
+ This file is a part of GetFEM++
+
+ GetFEM++  is  free software;  you  can  redistribute  it  and/or modify it
+ under  the  terms  of the  GNU  Lesser General Public License as published
+ by  the  Free Software Foundation;  either version 3 of the License,  or
+ (at your option) any later version along with the GCC Runtime Library
+ Exception either version 3.1 or (at your option) any later version.
+ This program  is  distributed  in  the  hope  that it will be useful,  but
+ WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ or  FITNESS  FOR  A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+ License and GCC Runtime Library Exception for more details.
+ You  should  have received a copy of the GNU Lesser General Public License
+ along  with  this program;  if not, write to the Free Software Foundation,
+ Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301, USA.
+
+ As a special exception, you  may use  this file  as it is a part of a free
+ software  library  without  restriction.  Specifically,  if   other  files
+ instantiate  templates  or  use macros or inline functions from this file,
+ or  you compile this  file  and  link  it  with other files  to produce an
+ executable, this file  does  not  by itself cause the resulting executable
+ to be covered  by the GNU Lesser General Public License.  This   exception
+ does not  however  invalidate  any  other  reasons why the executable file
+ might be covered by the GNU Lesser General Public License.
+
+===========================================================================*/
+
+// This file is a modified version of cg.h from ITL.
+// See http://osl.iu.edu/research/itl/
+// Following the corresponding Copyright notice.
+//===========================================================================
+//
+// Copyright (c) 1998-2001, University of Notre Dame. All rights reserved.
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+//    * Redistributions of source code must retain the above copyright
+//      notice, this list of conditions and the following disclaimer.
+//    * Redistributions in binary form must reproduce the above copyright
+//      notice, this list of conditions and the following disclaimer in the
+//      documentation and/or other materials provided with the distribution.
+//    * Neither the name of the University of Notre Dame nor the
+//      names of its contributors may be used to endorse or promote products
+//      derived from this software without specific prior written permission.
+//
+// THIS SOFTWARE  IS  PROVIDED  BY  THE TRUSTEES  OF  INDIANA UNIVERSITY  AND
+// CONTRIBUTORS  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,
+// BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND  FITNESS
+// FOR  A PARTICULAR PURPOSE ARE DISCLAIMED. IN  NO  EVENT SHALL THE TRUSTEES
+// OF INDIANA UNIVERSITY AND CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+// INCIDENTAL, SPECIAL, EXEMPLARY,  OR CONSEQUENTIAL DAMAGES (INCLUDING,  BUT
+// NOT  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA,  OR PROFITS;  OR BUSINESS  INTERRUPTION)  HOWEVER  CAUSED AND ON ANY
+// THEORY  OF  LIABILITY,  WHETHER  IN  CONTRACT,  STRICT  LIABILITY, OR TORT
+// (INCLUDING  NEGLIGENCE  OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+// THIS  SOFTWARE,  EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+//===========================================================================
+
+/**@file gmm_solver_cg.h
+   @author  Andrew Lumsdaine <lums@osl.iu.edu>
+   @author  Lie-Quan Lee <llee@osl.iu.edu>
+   @author  Yves Renard <Yves.Renard@insa-lyon.fr>
+   @date October 13, 2002.
+   @brief Conjugate gradient iterative solver. 
+*/
+#ifndef GMM_SOLVER_CG_H__
+#define GMM_SOLVER_CG_H__
+
+#include "gmm_kernel.h"
+#include "gmm_iter.h"
+
+namespace gmm {
+
+  /* ******************************************************************** */
+  /*		conjugate gradient                           		  */
+  /* (preconditionned, with parametrable additional scalar product)       */
+  /* ******************************************************************** */
+
+  template <typename Matrix, typename Matps, typename Precond, 
+            typename Vector1, typename Vector2>
+  void cg(const Matrix& A, Vector1& x, const Vector2& b, const Matps& PS,
+	  const Precond &P, iteration &iter) {
+
+    typedef typename temporary_dense_vector<Vector1>::vector_type temp_vector;
+    typedef typename linalg_traits<Vector1>::value_type T;
+
+    T rho, rho_1(0), a;
+    temp_vector p(vect_size(x)), q(vect_size(x)), r(vect_size(x)),
+      z(vect_size(x));
+    iter.set_rhsnorm(gmm::sqrt(gmm::abs(vect_hp(PS, b, b))));
+
+    if (iter.get_rhsnorm() == 0.0)
+      clear(x);
+    else {
+      mult(A, scaled(x, T(-1)), b, r);
+      mult(P, r, z);
+      rho = vect_hp(PS, z, r);
+      copy(z, p);
+
+      while (!iter.finished_vect(r)) {
+
+	if (!iter.first()) { 
+	  mult(P, r, z);
+	  rho = vect_hp(PS, z, r);
+	  add(z, scaled(p, rho / rho_1), p);
+	}
+	mult(A, p, q);
+
+	a = rho / vect_hp(PS, q, p);	
+	add(scaled(p, a), x);
+	add(scaled(q, -a), r);
+	rho_1 = rho;
+
+	++iter;
+      }
+    }
+  }
+
+  template <typename Matrix, typename Matps, typename Precond, 
+            typename Vector1, typename Vector2>
+  void cg(const Matrix& A, Vector1& x, const Vector2& b, const Matps& PS,
+	  const gmm::identity_matrix &, iteration &iter) {
+
+    typedef typename temporary_dense_vector<Vector1>::vector_type temp_vector;
+    typedef typename linalg_traits<Vector1>::value_type T;
+
+    T rho, rho_1(0), a;
+    temp_vector p(vect_size(x)), q(vect_size(x)), r(vect_size(x));
+    iter.set_rhsnorm(gmm::sqrt(gmm::abs(vect_hp(PS, b, b))));
+
+    if (iter.get_rhsnorm() == 0.0)
+      clear(x);
+    else {
+      mult(A, scaled(x, T(-1)), b, r);
+      rho = vect_hp(PS, r, r);
+      copy(r, p);
+
+      while (!iter.finished_vect(r)) {
+
+	if (!iter.first()) { 
+	  rho = vect_hp(PS, r, r);
+	  add(r, scaled(p, rho / rho_1), p);
+	}	
+	mult(A, p, q);
+	a = rho / vect_hp(PS, q, p);	
+	add(scaled(p, a), x);
+	add(scaled(q, -a), r);
+	rho_1 = rho;
+	++iter;
+      }
+    }
+  }
+
+  template <typename Matrix, typename Matps, typename Precond, 
+            typename Vector1, typename Vector2> inline 
+  void cg(const Matrix& A, const Vector1& x, const Vector2& b, const Matps& PS,
+	 const Precond &P, iteration &iter)
+  { cg(A, linalg_const_cast(x), b, PS, P, iter); }
+
+  template <typename Matrix, typename Precond, 
+            typename Vector1, typename Vector2> inline
+  void cg(const Matrix& A, Vector1& x, const Vector2& b,
+	 const Precond &P, iteration &iter)
+  { cg(A, x , b, identity_matrix(), P, iter); }
+
+  template <typename Matrix, typename Precond, 
+            typename Vector1, typename Vector2> inline
+  void cg(const Matrix& A, const Vector1& x, const Vector2& b,
+	 const Precond &P, iteration &iter)
+  { cg(A, x , b , identity_matrix(), P , iter); }
+
+}
+
+
+#endif //  GMM_SOLVER_CG_H__
--- a/gmm/gmm_solver_constrained_cg.h
+++ b/gmm/gmm_solver_constrained_cg.h
@ -0,0 +1,165 @@
+/* -*- c++ -*- (enables emacs c++ mode) */
+/*===========================================================================
+
+ Copyright (C) 2002-2017 Yves Renard
+
+ This file is a part of GetFEM++
+
+ GetFEM++  is  free software;  you  can  redistribute  it  and/or modify it
+ under  the  terms  of the  GNU  Lesser General Public License as published
+ by  the  Free Software Foundation;  either version 3 of the License,  or
+ (at your option) any later version along with the GCC Runtime Library
+ Exception either version 3.1 or (at your option) any later version.
+ This program  is  distributed  in  the  hope  that it will be useful,  but
+ WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ or  FITNESS  FOR  A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+ License and GCC Runtime Library Exception for more details.
+ You  should  have received a copy of the GNU Lesser General Public License
+ along  with  this program;  if not, write to the Free Software Foundation,
+ Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301, USA.
+
+ As a special exception, you  may use  this file  as it is a part of a free
+ software  library  without  restriction.  Specifically,  if   other  files
+ instantiate  templates  or  use macros or inline functions from this file,
+ or  you compile this  file  and  link  it  with other files  to produce an
+ executable, this file  does  not  by itself cause the resulting executable
+ to be covered  by the GNU Lesser General Public License.  This   exception
+ does not  however  invalidate  any  other  reasons why the executable file
+ might be covered by the GNU Lesser General Public License.
+
+===========================================================================*/
+
+/**@file gmm_solver_constrained_cg.h
+   @author  Yves Renard <Yves.Renard@insa-lyon.fr>
+   @date October 13, 2002.
+   @brief Constrained conjugate gradient. */
+//  preconditionning does not work
+
+#ifndef GMM_SOLVER_CCG_H__
+#define GMM_SOLVER_CCG_H__
+
+#include "gmm_kernel.h"
+#include "gmm_iter.h"
+
+namespace gmm {
+
+  template <typename CMatrix, typename CINVMatrix, typename Matps,
+	    typename VectorX>
+  void pseudo_inverse(const CMatrix &C, CINVMatrix &CINV,
+		      const Matps& /* PS */, VectorX&) {
+    // compute the pseudo inverse of the non-square matrix C such
+    // CINV = inv(C * trans(C)) * C.
+    // based on a conjugate gradient method.
+    
+    // optimisable : copie de la ligne, precalcul de C * trans(C).
+    
+    typedef VectorX TmpVec;
+    typedef typename linalg_traits<VectorX>::value_type value_type;
+    
+    size_type nr = mat_nrows(C), nc = mat_ncols(C);
+    
+    TmpVec d(nr), e(nr), l(nc), p(nr), q(nr), r(nr);
+    value_type rho, rho_1, alpha;
+    clear(d);
+    clear(CINV);
+    
+    for (size_type i = 0; i < nr; ++i) {
+      d[i] = 1.0; rho = 1.0;
+      clear(e);
+      copy(d, r);
+      copy(d, p);
+      
+      while (rho >= 1E-38) { /* conjugate gradient to compute e             */
+	                     /* which is the i nd row of inv(C * trans(C))  */
+	mult(gmm::transposed(C), p, l);
+	mult(C, l, q);	  
+	alpha = rho / vect_sp(p, q);
+	add(scaled(p, alpha), e);  
+	add(scaled(q, -alpha), r); 
+	rho_1 = rho;
+	rho = vect_sp(r, r);
+	add(r, scaled(p, rho / rho_1), p);
+      }
+      
+      mult(transposed(C), e, l); /* l is the i nd row of CINV     */
+      // cout << "l = " << l << endl;
+      clean(l, 1E-15);
+      copy(l, mat_row(CINV, i));
+      
+      d[i] = 0.0;
+    }
+  }
+  
+  /** Compute the minimum of @f$ 1/2((Ax).x) - bx @f$ under the contraint @f$ Cx <= f @f$ */
+  template < typename Matrix,  typename CMatrix, typename Matps,
+	     typename VectorX, typename VectorB, typename VectorF,
+	     typename Preconditioner >
+  void constrained_cg(const Matrix& A, const CMatrix& C, VectorX& x,
+		      const VectorB& b, const VectorF& f,const Matps& PS,
+		      const Preconditioner& M, iteration &iter) {
+    typedef typename temporary_dense_vector<VectorX>::vector_type TmpVec;
+    typedef typename temporary_vector<CMatrix>::vector_type TmpCVec;
+    typedef row_matrix<TmpCVec> TmpCmat;
+    
+    typedef typename linalg_traits<VectorX>::value_type value_type;
+    value_type rho = 1.0, rho_1, lambda, gamma;
+    TmpVec p(vect_size(x)), q(vect_size(x)), q2(vect_size(x)),
+      r(vect_size(x)), old_z(vect_size(x)), z(vect_size(x)),
+      memox(vect_size(x));
+    std::vector<bool> satured(mat_nrows(C));
+    clear(p);
+    iter.set_rhsnorm(sqrt(vect_sp(PS, b, b)));
+    if (iter.get_rhsnorm() == 0.0) iter.set_rhsnorm(1.0);
+   
+    TmpCmat CINV(mat_nrows(C), mat_ncols(C));
+    pseudo_inverse(C, CINV, PS, x);
+    
+    while(true) {
+      // computation of residu
+      copy(z, old_z);
+      copy(x, memox);
+      mult(A, scaled(x, -1.0), b, r);
+      mult(M, r, z); // preconditionner not coherent
+      bool transition = false;
+      for (size_type i = 0; i < mat_nrows(C); ++i) {
+	value_type al = vect_sp(mat_row(C, i), x) - f[i];
+	if (al >= -1.0E-15) {
+	  if (!satured[i]) { satured[i] = true; transition = true; }
+	  value_type bb = vect_sp(mat_row(CINV, i), z);
+	  if (bb > 0.0) add(scaled(mat_row(C, i), -bb), z);
+	}
+	else
+	  satured[i] = false;
+      }
+    
+      // descent direction
+      rho_1 = rho; rho = vect_sp(PS, r, z); // ...
+      
+      if (iter.finished(rho)) break;
+      
+      if (iter.get_noisy() > 0 && transition) std::cout << "transition\n";
+      if (transition || iter.first()) gamma = 0.0;
+      else gamma = std::max(0.0, (rho - vect_sp(PS, old_z, z) ) / rho_1);
+      // std::cout << "gamma = " << gamma << endl;
+      // itl::add(r, itl::scaled(p, gamma), p);
+      add(z, scaled(p, gamma), p); // ...
+      
+      ++iter;
+      // one dimensionnal optimization
+      mult(A, p, q);
+      lambda = rho / vect_sp(PS, q, p);
+      for (size_type i = 0; i < mat_nrows(C); ++i)
+	if (!satured[i]) {
+	  value_type bb = vect_sp(mat_row(C, i), p) - f[i];
+	  if (bb > 0.0)
+	    lambda = std::min(lambda, (f[i]-vect_sp(mat_row(C, i), x)) / bb);
+	}
+      add(x, scaled(p, lambda), x);
+      add(memox, scaled(x, -1.0), memox);
+      
+    }
+  }
+  
+}
+
+#endif //  GMM_SOLVER_CCG_H__
--- a/gmm/gmm_solver_gmres.h
+++ b/gmm/gmm_solver_gmres.h
@ -0,0 +1,173 @@
+/* -*- c++ -*- (enables emacs c++ mode) */
+/*===========================================================================
+
+ Copyright (C) 2002-2017 Yves Renard
+
+ This file is a part of GetFEM++
+
+ GetFEM++  is  free software;  you  can  redistribute  it  and/or modify it
+ under  the  terms  of the  GNU  Lesser General Public License as published
+ by  the  Free Software Foundation;  either version 3 of the License,  or
+ (at your option) any later version along with the GCC Runtime Library
+ Exception either version 3.1 or (at your option) any later version.
+ This program  is  distributed  in  the  hope  that it will be useful,  but
+ WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ or  FITNESS  FOR  A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+ License and GCC Runtime Library Exception for more details.
+ You  should  have received a copy of the GNU Lesser General Public License
+ along  with  this program;  if not, write to the Free Software Foundation,
+ Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301, USA.
+
+ As a special exception, you  may use  this file  as it is a part of a free
+ software  library  without  restriction.  Specifically,  if   other  files
+ instantiate  templates  or  use macros or inline functions from this file,
+ or  you compile this  file  and  link  it  with other files  to produce an
+ executable, this file  does  not  by itself cause the resulting executable
+ to be covered  by the GNU Lesser General Public License.  This   exception
+ does not  however  invalidate  any  other  reasons why the executable file
+ might be covered by the GNU Lesser General Public License.
+
+===========================================================================*/
+
+// This file is a modified version of gmres.h from ITL.
+// See http://osl.iu.edu/research/itl/
+// Following the corresponding Copyright notice.
+//===========================================================================
+//
+// Copyright (c) 1998-2001, University of Notre Dame. All rights reserved.
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+//    * Redistributions of source code must retain the above copyright
+//      notice, this list of conditions and the following disclaimer.
+//    * Redistributions in binary form must reproduce the above copyright
+//      notice, this list of conditions and the following disclaimer in the
+//      documentation and/or other materials provided with the distribution.
+//    * Neither the name of the University of Notre Dame nor the
+//      names of its contributors may be used to endorse or promote products
+//      derived from this software without specific prior written permission.
+//
+// THIS SOFTWARE  IS  PROVIDED  BY  THE TRUSTEES  OF  INDIANA UNIVERSITY  AND
+// CONTRIBUTORS  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,
+// BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND  FITNESS
+// FOR  A PARTICULAR PURPOSE ARE DISCLAIMED. IN  NO  EVENT SHALL THE TRUSTEES
+// OF INDIANA UNIVERSITY AND CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+// INCIDENTAL, SPECIAL, EXEMPLARY,  OR CONSEQUENTIAL DAMAGES (INCLUDING,  BUT
+// NOT  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA,  OR PROFITS;  OR BUSINESS  INTERRUPTION)  HOWEVER  CAUSED AND ON ANY
+// THEORY  OF  LIABILITY,  WHETHER  IN  CONTRACT,  STRICT  LIABILITY, OR TORT
+// (INCLUDING  NEGLIGENCE  OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+// THIS  SOFTWARE,  EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+//===========================================================================
+
+/**@file gmm_solver_gmres.h
+   @author  Andrew Lumsdaine <lums@osl.iu.edu>
+   @author  Lie-Quan Lee     <llee@osl.iu.edu>
+   @author  Yves Renard <Yves.Renard@insa-lyon.fr>
+   @date October 13, 2002.
+   @brief GMRES (Generalized Minimum Residual) iterative solver.
+*/
+#ifndef GMM_KRYLOV_GMRES_H
+#define GMM_KRYLOV_GMRES_H
+
+#include "gmm_kernel.h"
+#include "gmm_iter.h"
+#include "gmm_modified_gram_schmidt.h"
+
+namespace gmm {
+
+  /** Generalized Minimum Residual
+   
+      This solve the unsymmetric linear system Ax = b using restarted GMRES.
+      
+      See: Y. Saad and M. Schulter. GMRES: A generalized minimum residual
+      algorithm for solving nonsysmmetric linear systems, SIAM
+      J. Sci. Statist. Comp.  7(1986), pp, 856-869
+  */
+  template <typename Mat, typename Vec, typename VecB, typename Precond,
+	    typename Basis >
+  void gmres(const Mat &A, Vec &x, const VecB &b, const Precond &M,
+	     int restart, iteration &outer, Basis& KS) {
+
+    typedef typename linalg_traits<Vec>::value_type T;
+    typedef typename number_traits<T>::magnitude_type R;
+
+    std::vector<T> w(vect_size(x)), r(vect_size(x)), u(vect_size(x));
+    std::vector<T> c_rot(restart+1), s_rot(restart+1), s(restart+1);
+    gmm::dense_matrix<T> H(restart+1, restart);
+#ifdef GMM_USES_MPI
+      double t_ref, t_prec = MPI_Wtime(), t_tot = 0;
+      static double tmult_tot = 0.0;
+t_ref = MPI_Wtime();
+    cout << "GMRES " << endl;
+#endif
+    mult(M,b,r);
+    outer.set_rhsnorm(gmm::vect_norm2(r));
+    if (outer.get_rhsnorm() == 0.0) { clear(x); return; }
+    
+    mult(A, scaled(x, T(-1)), b, w);
+    mult(M, w, r);
+    R beta = gmm::vect_norm2(r), beta_old = beta;
+    int blocked = 0;
+
+    iteration inner = outer;
+    inner.reduce_noisy();
+    inner.set_maxiter(restart);
+    inner.set_name("GMRes inner");
+
+    while (! outer.finished(beta)) {
+      
+      gmm::copy(gmm::scaled(r, R(1)/beta), KS[0]);
+      gmm::clear(s);
+      s[0] = beta;
+      
+      size_type i = 0; inner.init();
+      
+      do {
+	mult(A, KS[i], u);
+	mult(M, u, KS[i+1]);
+	orthogonalize(KS, mat_col(H, i), i);
+	R a = gmm::vect_norm2(KS[i+1]);
+	H(i+1, i) = T(a);
+	gmm::scale(KS[i+1], T(1) / a);
+	for (size_type k = 0; k < i; ++k)
+	  Apply_Givens_rotation_left(H(k,i), H(k+1,i), c_rot[k], s_rot[k]);
+	
+	Givens_rotation(H(i,i), H(i+1,i), c_rot[i], s_rot[i]);
+	Apply_Givens_rotation_left(H(i,i), H(i+1,i), c_rot[i], s_rot[i]);
+	Apply_Givens_rotation_left(s[i], s[i+1], c_rot[i], s_rot[i]);
+	
+	++inner, ++outer, ++i;
+      } while (! inner.finished(gmm::abs(s[i])));
+
+      upper_tri_solve(H, s, i, false);
+      combine(KS, s, x, i);
+      mult(A, gmm::scaled(x, T(-1)), b, w);
+      mult(M, w, r);
+      beta_old = std::min(beta, beta_old); beta = gmm::vect_norm2(r);
+      if (int(inner.get_iteration()) < restart -1 || beta_old <= beta)
+	++blocked; else blocked = 0;
+      if (blocked > 10) {
+	if (outer.get_noisy()) cout << "Gmres is blocked, exiting\n";
+	break;
+      }
+#ifdef GMM_USES_MPI
+	t_tot = MPI_Wtime() - t_ref;
+	cout << "temps GMRES : " << t_tot << endl; 
+#endif
+    }
+  }
+
+
+  template <typename Mat, typename Vec, typename VecB, typename Precond >
+  void gmres(const Mat &A, Vec &x, const VecB &b,
+	     const Precond &M, int restart, iteration& outer) {
+    typedef typename linalg_traits<Vec>::value_type T;
+    modified_gram_schmidt<T> orth(restart, vect_size(x));
+    gmres(A, x, b, M, restart, outer, orth); 
+  }
+
+}
+
+#endif
--- a/gmm/gmm_solver_idgmres.h
+++ b/gmm/gmm_solver_idgmres.h
@ -0,0 +1,805 @@
+/* -*- c++ -*- (enables emacs c++ mode) */
+/*===========================================================================
+
+ Copyright (C) 2003-2017 Yves Renard, Caroline Lecalvez
+
+ This file is a part of GetFEM++
+
+ GetFEM++  is  free software;  you  can  redistribute  it  and/or modify it
+ under  the  terms  of the  GNU  Lesser General Public License as published
+ by  the  Free Software Foundation;  either version 3 of the License,  or
+ (at your option) any later version along with the GCC Runtime Library
+ Exception either version 3.1 or (at your option) any later version.
+ This program  is  distributed  in  the  hope  that it will be useful,  but
+ WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ or  FITNESS  FOR  A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+ License and GCC Runtime Library Exception for more details.
+ You  should  have received a copy of the GNU Lesser General Public License
+ along  with  this program;  if not, write to the Free Software Foundation,
+ Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301, USA.
+
+ As a special exception, you  may use  this file  as it is a part of a free
+ software  library  without  restriction.  Specifically,  if   other  files
+ instantiate  templates  or  use macros or inline functions from this file,
+ or  you compile this  file  and  link  it  with other files  to produce an
+ executable, this file  does  not  by itself cause the resulting executable
+ to be covered  by the GNU Lesser General Public License.  This   exception
+ does not  however  invalidate  any  other  reasons why the executable file
+ might be covered by the GNU Lesser General Public License.
+
+===========================================================================*/
+
+/**@file gmm_solver_idgmres.h
+   @author  Caroline Lecalvez <Caroline.Lecalvez@gmm.insa-tlse.fr>
+   @author  Yves Renard <Yves.Renard@insa-lyon.fr>
+   @date October 6, 2003.
+   @brief Implicitly restarted and deflated Generalized Minimum Residual.
+*/
+#ifndef GMM_IDGMRES_H
+#define GMM_IDGMRES_H
+
+#include "gmm_kernel.h"
+#include "gmm_iter.h"
+#include "gmm_dense_sylvester.h"
+
+namespace gmm {
+
+  template <typename T> compare_vp {
+    bool operator()(const std::pair<T, size_type> &a,
+		    const std::pair<T, size_type> &b) const
+    { return (gmm::abs(a.first) > gmm::abs(b.first)); }
+  }
+
+  struct idgmres_state {
+    size_type m, tb_deb, tb_def, p, k, nb_want, nb_unwant;
+    size_type nb_nolong, tb_deftot, tb_defwant, conv, nb_un, fin;
+    bool ok;
+
+    idgmres_state(size_type mm, size_type pp, size_type kk)
+      : m(mm), tb_deb(1), tb_def(0), p(pp), k(kk), nb_want(0),
+	nb_unwant(0), nb_nolong(0), tb_deftot(0), tb_defwant(0),
+	conv(0), nb_un(0), fin(0), ok(false); {}
+  }
+
+    idgmres_state(size_type mm, size_type pp, size_type kk)
+      : m(mm), tb_deb(1), tb_def(0), p(pp), k(kk), nb_want(0),
+	nb_unwant(0), nb_nolong(0), tb_deftot(0), tb_defwant(0),
+	conv(0), nb_un(0), fin(0), ok(false); {}
+  
+
+  template <typename CONT, typename IND>
+  apply_permutation(CONT &cont, const IND &ind) {
+    size_type m = ind.end() - ind.begin();
+    std::vector<bool> sorted(m, false);
+    
+    for (size_type l = 0; l < m; ++l)
+      if (!sorted[l] && ind[l] != l) {
+
+	typeid(cont[0]) aux = cont[l];
+	k = ind[l];
+	cont[l] = cont[k];
+	sorted[l] = true;
+	
+	for(k2 = ind[k]; k2 != l; k2 = ind[k]) {
+	  cont[k] = cont[k2];
+	  sorted[k] = true;
+	  k = k2;
+	}
+	cont[k] = aux;
+      }
+  }
+
+
+  /** Implicitly restarted and deflated Generalized Minimum Residual
+
+      See: C. Le Calvez, B. Molina, Implicitly restarted and deflated
+      FOM and GMRES, numerical applied mathematics,
+      (30) 2-3 (1999) pp191-212.
+      
+      @param A Real or complex unsymmetric matrix.
+      @param x initial guess vector and final result.
+      @param b right hand side
+      @param M preconditionner
+      @param m size of the subspace between two restarts
+      @param p number of converged ritz values seeked
+      @param k size of the remaining Krylov subspace when the p ritz values
+      have not yet converged 0 <= p <= k < m.
+      @param tol_vp : tolerance on the ritz values.
+      @param outer
+      @param KS
+  */
+  template < typename Mat, typename Vec, typename VecB, typename Precond,
+	     typename Basis >
+  void idgmres(const Mat &A, Vec &x, const VecB &b, const Precond &M,
+	     size_type m, size_type p, size_type k, double tol_vp,
+	     iteration &outer, Basis& KS) {
+
+    typedef typename linalg_traits<Mat>::value_type T;
+    typedef typename number_traits<T>::magnitude_type R;
+    
+    R a, beta;
+    idgmres_state st(m, p, k);
+
+    std::vector<T> w(vect_size(x)), r(vect_size(x)), u(vect_size(x));
+    std::vector<T> c_rot(m+1), s_rot(m+1), s(m+1);
+    std::vector<T> y(m+1), ztest(m+1), gam(m+1);
+    std::vector<T> gamma(m+1);
+    gmm::dense_matrix<T> H(m+1, m), Hess(m+1, m),
+      Hobl(m+1, m), W(vect_size(x), m+1);
+
+    gmm::clear(H);
+
+    outer.set_rhsnorm(gmm::vect_norm2(b));
+    if (outer.get_rhsnorm() == 0.0) { clear(x); return; }
+    
+    mult(A, scaled(x, -1.0), b, w);
+    mult(M, w, r);
+    beta = gmm::vect_norm2(r);
+
+    iteration inner = outer;
+    inner.reduce_noisy();
+    inner.set_maxiter(m);
+    inner.set_name("GMRes inner iter");
+    
+    while (! outer.finished(beta)) {
+      
+      gmm::copy(gmm::scaled(r, 1.0/beta), KS[0]);
+      gmm::clear(s);
+      s[0] = beta;
+      gmm::copy(s, gamma);
+
+      inner.set_maxiter(m - st.tb_deb + 1);
+      size_type i = st.tb_deb - 1; inner.init();
+      
+      do {
+	mult(A, KS[i], u);
+	mult(M, u, KS[i+1]);
+	orthogonalize_with_refinment(KS, mat_col(H, i), i);
+	H(i+1, i) = a = gmm::vect_norm2(KS[i+1]);
+	gmm::scale(KS[i+1], R(1) / a);
+
+	gmm::copy(mat_col(H, i), mat_col(Hess, i));
+	gmm::copy(mat_col(H, i), mat_col(Hobl, i));
+	
+
+	for (size_type l = 0; l < i; ++l)
+	  Apply_Givens_rotation_left(H(l,i), H(l+1,i), c_rot[l], s_rot[l]);
+	
+	Givens_rotation(H(i,i), H(i+1,i), c_rot[i], s_rot[i]);
+	Apply_Givens_rotation_left(H(i,i), H(i+1,i), c_rot[i], s_rot[i]);
+	H(i+1, i) = T(0); 
+	Apply_Givens_rotation_left(s[i], s[i+1], c_rot[i], s_rot[i]);
+	
+	++inner, ++outer, ++i;
+      } while (! inner.finished(gmm::abs(s[i])));
+
+      if (inner.converged()) {
+	gmm::copy(s, y);
+	upper_tri_solve(H, y, i, false);
+	combine(KS, y, x, i);
+	mult(A, gmm::scaled(x, T(-1)), b, w);
+	mult(M, w, r);
+	beta = gmm::vect_norm2(r); // + verif sur beta ... à faire
+	break;
+      }
+
+      gmm::clear(gam); gam[m] = s[i];
+      for (size_type l = m; l > 0; --l)
+	Apply_Givens_rotation_left(gam[l-1], gam[l], gmm::conj(c_rot[l-1]),
+				   -s_rot[l-1]);
+
+      mult(KS.mat(), gam, r);
+      beta = gmm::vect_norm2(r);
+      
+      mult(Hess, scaled(y, T(-1)), gamma, ztest);
+      // En fait, d'après Caroline qui s'y connait ztest et gam devrait
+      // être confondus
+      // Quand on aura vérifié que ça marche, il faudra utiliser gam à la 
+      // place de ztest.
+      if (st.tb_def < p) {
+        T nss = H(m,m-1) / ztest[m];
+	nss /= gmm::abs(nss); // ns à calculer plus tard aussi
+	gmm::copy(KS.mat(), W); gmm::copy(scaled(r, nss /beta), mat_col(W, m));
+	
+	// Computation of the oblique matrix
+	sub_interval SUBI(0, m);
+	add(scaled(sub_vector(ztest, SUBI), -Hobl(m, m-1) / ztest[m]),
+	    sub_vector(mat_col(Hobl, m-1), SUBI));
+	Hobl(m, m-1) *= nss * beta / ztest[m]; 
+
+	/* **************************************************************** */
+	/*  Locking                                                         */
+	/* **************************************************************** */
+
+	// Computation of the Ritz eigenpairs.
+	std::vector<std::complex<R> > eval(m);
+	dense_matrix<T> YB(m-st.tb_def, m-st.tb_def);
+	std::vector<char> pure(m-st.tb_def, 0);
+	gmm::clear(YB);
+
+	select_eval(Hobl, eval, YB, pure, st);
+
+	if (st.conv != 0) {
+	  // DEFLATION using the QR Factorization of YB
+	  
+	  T alpha = Lock(W, Hobl,
+			 sub_matrix(YB,  sub_interval(0, m-st.tb_def)),
+			 sub_interval(st.tb_def, m-st.tb_def), 
+			 (st.tb_defwant < p)); 
+	  // ns *= alpha; // à calculer plus tard ??
+	  //  V(:,m+1) = alpha*V(:, m+1); ça devait servir à qlq chose ...
+
+
+	  //       Clean the portions below the diagonal corresponding
+	  //       to the lock Schur vectors
+
+	  for (size_type j = st.tb_def; j < st.tb_deftot; ++j) {
+	    if ( pure[j-st.tb_def] == 0)
+	      gmm::clear(sub_vector(mat_col(Hobl,j), sub_interval(j+1,m-j)));
+	    else if (pure[j-st.tb_def] == 1) {
+	      gmm::clear(sub_matrix(Hobl, sub_interval(j+2,m-j-1),
+				    sub_interval(j, 2))); 
+	      ++j;
+	    }
+	    else GMM_ASSERT3(false, "internal error");
+	  }
+	  
+	  if (!st.ok) {
+
+	    // attention si m = 0;
+	    size_type mm = std::min(k+st.nb_unwant+st.nb_nolong, m-1);
+
+	    if (eval_sort[m-mm-1].second != R(0)
+		&& eval_sort[m-mm-1].second == -eval_sort[m-mm].second) ++mm;
+
+	    std::vector<complex<R> > shifts(m-mm);
+	    for (size_type i = 0; i < m-mm; ++i)
+	      shifts[i] = eval_sort[i].second;
+
+	    apply_shift_to_Arnoldi_factorization(W, Hobl, shifts, mm,
+						 m-mm, true);
+
+	    st.fin = mm;
+	  }
+	  else
+	    st.fin = st.tb_deftot;
+
+
+	  /* ************************************************************** */
+	  /*  Purge                                                         */
+	  /* ************************************************************** */
+
+	  if (st.nb_nolong + st.nb_unwant > 0) {
+
+	    std::vector<std::complex<R> > eval(m);
+	    dense_matrix<T> YB(st.fin, st.tb_deftot);
+	    std::vector<char> pure(st.tb_deftot, 0);
+	    gmm::clear(YB);
+	    st.nb_un = st.nb_nolong + st.nb_unwant;
+	    
+	    select_eval_for_purging(Hobl, eval, YB, pure, st);
+	    
+	    T alpha = Lock(W, Hobl, YB, sub_interval(0, st.fin), ok);
+
+	    //       Clean the portions below the diagonal corresponding
+	    //       to the unwanted lock Schur vectors
+	    
+	    for (size_type j = 0; j < st.tb_deftot; ++j) {
+	      if ( pure[j] == 0)
+		gmm::clear(sub_vector(mat_col(Hobl,j), sub_interval(j+1,m-j)));
+	      else if (pure[j] == 1) {
+		gmm::clear(sub_matrix(Hobl, sub_interval(j+2,m-j-1),
+				      sub_interval(j, 2))); 
+		++j;
+	      }
+	      else GMM_ASSERT3(false, "internal error");
+	    }
+
+	    gmm::dense_matrix<T> z(st.nb_un, st.fin - st.nb_un);
+	    sub_interval SUBI(0, st.nb_un), SUBJ(st.nb_un, st.fin - st.nb_un);
+	    sylvester(sub_matrix(Hobl, SUBI),
+		      sub_matrix(Hobl, SUBJ),
+		      sub_matrix(gmm::scaled(Hobl, -T(1)), SUBI, SUBJ), z);
+	    
+	  }
+
+	}
+	
+      }
+    }
+  }
+  
+
+  template < typename Mat, typename Vec, typename VecB, typename Precond >
+    void idgmres(const Mat &A, Vec &x, const VecB &b,
+		 const Precond &M, size_type m, iteration& outer) {
+    typedef typename linalg_traits<Mat>::value_type T;
+    modified_gram_schmidt<T> orth(m, vect_size(x));
+    gmres(A, x, b, M, m, outer, orth); 
+  }
+
+
+  // Lock stage of an implicit restarted Arnoldi process.
+  // 1- QR factorization of YB through Householder matrices
+  //    Q(Rl) = YB
+  //     (0 )
+  // 2- Update of the Arnoldi factorization.
+  //    H <- Q*HQ,  W <- WQ
+  // 3- Restore the Hessemberg form of H.
+
+  template <typename T, typename MATYB>
+    void Lock(gmm::dense_matrix<T> &W, gmm::dense_matrix<T> &H,
+	      const MATYB &YB, const sub_interval SUB,
+	      bool restore, T &ns) {
+
+    size_type n = mat_nrows(W), m = mat_ncols(W) - 1;
+    size_type ncols = mat_ncols(YB), nrows = mat_nrows(YB);
+    size_type begin = min(SUB); end = max(SUB) - 1;
+    sub_interval SUBR(0, nrows), SUBC(0, ncols);
+    T alpha(1);
+
+    GMM_ASSERT2(((end-begin) == ncols) && (m == mat_nrows(H)) 
+		&& (m+1 == mat_ncols(H)), "dimensions mismatch");
+    
+    // DEFLATION using the QR Factorization of YB
+	  
+    dense_matrix<T> QR(n_rows, n_rows);
+    gmmm::copy(YB, sub_matrix(QR, SUBR, SUBC));
+    gmm::clear(submatrix(QR, SUBR, sub_interval(ncols, nrows-ncols)));
+    qr_factor(QR); 
+
+
+    apply_house_left(QR, sub_matrix(H, SUB));
+    apply_house_right(QR, sub_matrix(H, SUBR, SUB));
+    apply_house_right(QR, sub_matrix(W, sub_interval(0, n), SUB));
+    
+    //       Restore to the initial block hessenberg form
+    
+    if (restore) {
+      
+      // verifier quand m = 0 ...
+      gmm::dense_matrix tab_p(end - st.tb_deftot, end - st.tb_deftot);
+      gmm::copy(identity_matrix(), tab_p);
+      
+      for (size_type j = end-1; j >= st.tb_deftot+2; --j) {
+	
+	size_type jm = j-1;
+	std::vector<T> v(jm - st.tb_deftot);
+	sub_interval SUBtot(st.tb_deftot, jm - st.tb_deftot);
+	sub_interval SUBtot2(st.tb_deftot, end - st.tb_deftot);
+	gmm::copy(sub_vector(mat_row(H, j), SUBtot), v);
+	house_vector_last(v);
+	w.resize(end);
+	col_house_update(sub_matrix(H, SUBI, SUBtot), v, w);
+	w.resize(end - st.tb_deftot);
+	row_house_update(sub_matrix(H, SUBtot, SUBtot2), v, w);
+	gmm::clear(sub_vector(mat_row(H, j),
+			      sub_interval(st.tb_deftot, j-1-st.tb_deftot)));
+	w.resize(end - st.tb_deftot);
+	col_house_update(sub_matrix(tab_p, sub_interval(0, end-st.tb_deftot),
+				    sub_interval(0, jm-st.tb_deftot)), v, w);
+	w.resize(n);
+	col_house_update(sub_matrix(W, sub_interval(0, n), SUBtot), v, w);
+      }
+      
+      //       restore positive subdiagonal elements
+      
+      std::vector<T> d(fin-st.tb_deftot); d[0] = T(1);
+      
+      // We compute d[i+1] in order 
+      // (d[i+1] * H(st.tb_deftot+i+1,st.tb_deftoti)) / d[i] 
+      // be equal to |H(st.tb_deftot+i+1,st.tb_deftot+i))|.
+      for (size_type j = 0; j+1 < end-st.tb_deftot; ++j) {
+	T e = H(st.tb_deftot+j, st.tb_deftot+j-1);
+	d[j+1] = (e == T(0)) ? T(1) :  d[j] * gmm::abs(e) / e;
+	scale(sub_vector(mat_row(H, st.tb_deftot+j+1),
+			 sub_interval(st.tb_deftot, m-st.tb_deftot)), d[j+1]);
+	scale(mat_col(H, st.tb_deftot+j+1), T(1) / d[j+1]);
+	scale(mat_col(W, st.tb_deftot+j+1), T(1) / d[j+1]);
+      }
+
+      alpha = tab_p(end-st.tb_deftot-1, end-st.tb_deftot-1) / d[end-st.tb_deftot-1];
+      alpha /= gmm::abs(alpha);
+      scale(mat_col(W, m), alpha);
+	    
+    }
+	 
+    return alpha;
+  }
+
+
+
+
+
+
+
+
+  // Apply p implicit shifts to the Arnoldi factorization
+  // AV = VH+H(k+p+1,k+p) V(:,k+p+1) e_{k+p}*
+  // and produces the following new Arnoldi factorization
+  // A(VQ) = (VQ)(Q*HQ)+H(k+p+1,k+p) V(:,k+p+1) e_{k+p}* Q
+  // where only the first k columns are relevant.
+  //
+  // Dan Sorensen and Richard J. Radke, 11/95
+  template<typename T, typename C>
+    apply_shift_to_Arnoldi_factorization(dense_matrix<T> V, dense_matrix<T> H,
+					 std::vector<C> Lambda, size_type &k,
+					 size_type p, bool true_shift = false) {
+
+
+    size_type k1 = 0, num = 0, kend = k+p, kp1 = k + 1;
+    bool mark = false;
+    T c, s, x, y, z;
+
+    dense_matrix<T> q(1, kend);
+    gmm::clear(q); q(0,kend-1) = T(1);
+    std::vector<T> hv(3), w(std::max(kend, mat_nrows(V)));
+
+    for(size_type jj = 0; jj < p; ++jj) {
+      //     compute and apply a bulge chase sweep initiated by the
+      //     implicit shift held in w(jj)
+   
+      if (abs(Lambda[jj].real()) == 0.0) {
+	//       apply a real shift using 2 by 2 Givens rotations
+
+	for (size_type k1 = 0, k2 = 0; k2 != kend-1; k1 = k2+1) {
+	  k2 = k1;
+	  while (h(k2+1, k2) != T(0) && k2 < kend-1) ++k2;
+
+	  Givens_rotation(H(k1, k1) - Lambda[jj], H(k1+1, k1), c, s);
+	  
+	  for (i = k1; i <= k2; ++i) {
+            if (i > k1) Givens_rotation(H(i, i-1), H(i+1, i-1), c, s);
+            
+	    // Ne pas oublier de nettoyer H(i+1,i-1) (le mettre à zéro).
+	    // Vérifier qu'au final H(i+1,i) est bien un réel positif.
+
+            // apply rotation from left to rows of H
+	    row_rot(sub_matrix(H, sub_interval(i,2), sub_interval(i, kend-i)),
+		    c, s, 0, 0);
+	    
+	    // apply rotation from right to columns of H
+            size_type ip2 = std::min(i+2, kend);
+            col_rot(sub_matrix(H, sub_interval(0, ip2), sub_interval(i, 2))
+		    c, s, 0, 0);
+            
+            // apply rotation from right to columns of V
+	    col_rot(V, c, s, i, i+1);
+            
+            // accumulate e'  Q so residual can be updated k+p
+	    Apply_Givens_rotation_left(q(0,i), q(0,i+1), c, s);
+	    // peut être que nous utilisons G au lieu de G* et que
+	    // nous allons trop loin en k2.
+	  }
+	}
+	
+	num = num + 1;
+      }
+      else {
+      
+	// Apply a double complex shift using 3 by 3 Householder 
+	// transformations
+      
+	if (jj == p || mark)
+	  mark = false;     // skip application of conjugate shift
+	else {
+	  num = num + 2;    // mark that a complex conjugate
+	  mark = true;      // pair has been applied
+
+	  // Indices de fin de boucle à surveiller... de près !
+	  for (size_type k1 = 0, k3 = 0; k3 != kend-2; k1 = k3+1) {
+	    k3 = k1;
+	    while (h(k3+1, k3) != T(0) && k3 < kend-2) ++k3;
+	    size_type k2 = k1+1;
+
+
+            x = H(k1,k1) * H(k1,k1) + H(k1,k2) * H(k2,k1)
+	      - 2.0*Lambda[jj].real() * H(k1,k1) + gmm::abs_sqr(Lambda[jj]);
+	    y = H(k2,k1) * (H(k1,k1) + H(k2,k2) - 2.0*Lambda[jj].real());
+	    z = H(k2+1,k2) * H(k2,k1);
+
+	    for (size_type i = k1; i <= k3; ++i) {
+	      if (i > k1) {
+		x = H(i, i-1);
+		y = H(i+1, i-1);
+		z = H(i+2, i-1);
+		// Ne pas oublier de nettoyer H(i+1,i-1) et H(i+2,i-1) 
+		// (les mettre à zéro).
+	      }
+
+	      hv[0] = x; hv[1] = y; hv[2] = z;
+	      house_vector(v);
+
+	      // Vérifier qu'au final H(i+1,i) est bien un réel positif
+
+	      // apply transformation from left to rows of H
+	      w.resize(kend-i);
+	      row_house_update(sub_matrix(H, sub_interval(i, 2),
+					  sub_interval(i, kend-i)), v, w);
+               
+	      // apply transformation from right to columns of H
+               
+	      size_type ip3 = std::min(kend, i + 3);
+	      w.resize(ip3);
+              col_house_update(sub_matrix(H, sub_interval(0, ip3),
+					  sub_interval(i, 2)), v, w);
+               
+	      // apply transformation from right to columns of V
+	      
+	      w.resize(mat_nrows(V));
+	      col_house_update(sub_matrix(V, sub_interval(0, mat_nrows(V)),
+					  sub_interval(i, 2)), v, w);
+               
+	      // accumulate e' Q so residual can be updated  k+p
+
+	      w.resize(1);
+	      col_house_update(sub_matrix(q, sub_interval(0,1),
+					  sub_interval(i,2)), v, w);
+               
+	    }
+	  }
+         
+	  //           clean up step with Givens rotation
+
+	  i = kend-2;
+	  c = x; s = y;
+	  if (i > k1) Givens_rotation(H(i, i-1), H(i+1, i-1), c, s);
+            
+	  // Ne pas oublier de nettoyer H(i+1,i-1) (le mettre à zéro).
+	  // Vérifier qu'au final H(i+1,i) est bien un réel positif.
+
+	  // apply rotation from left to rows of H
+	  row_rot(sub_matrix(H, sub_interval(i,2), sub_interval(i, kend-i)),
+		    c, s, 0, 0);
+	    
+	  // apply rotation from right to columns of H
+	  size_type ip2 = std::min(i+2, kend);
+	  col_rot(sub_matrix(H, sub_interval(0, ip2), sub_interval(i, 2))
+		  c, s, 0, 0);
+            
+	  // apply rotation from right to columns of V
+	  col_rot(V, c, s, i, i+1);
+            
+	  // accumulate e'  Q so residual can be updated k+p
+	  Apply_Givens_rotation_left(q(0,i), q(0,i+1), c, s);
+
+	}
+      }
+    }
+
+    //  update residual and store in the k+1 -st column of v
+
+    k = kend - num;
+    scale(mat_col(V, kend), q(0, k));
+    
+    if (k < mat_nrows(H)) {
+      if (true_shift)
+	gmm::copy(mat_col(V, kend), mat_col(V, k));
+      else
+	   //   v(:,k+1) = v(:,kend+1) + v(:,k+1)*h(k+1,k);
+	   //   v(:,k+1) = v(:,kend+1) ;
+	gmm::add(scaled(mat_col(V, kend), H(kend, kend-1)), 
+		 scaled(mat_col(V, k), H(k, k-1)), mat_col(V, k));
+    }
+
+    H(k, k-1) = vect_norm2(mat_col(V, k));
+    scale(mat_col(V, kend), T(1) / H(k, k-1));
+  }
+
+
+
+  template<typename MAT, typename EVAL, typename PURE>
+  void select_eval(const MAT &Hobl, EVAL &eval, MAT &YB, PURE &pure,
+		   idgmres_state &st) {
+
+    typedef typename linalg_traits<MAT>::value_type T;
+    typedef typename number_traits<T>::magnitude_type R;
+    size_type m = st.m;
+
+    // Computation of the Ritz eigenpairs.
+    
+    col_matrix< std::vector<T> > evect(m-st.tb_def, m-st.tb_def);
+    // std::vector<std::complex<R> > eval(m);
+    std::vector<R> ritznew(m, T(-1));
+	
+    // dense_matrix<T> evect_lock(st.tb_def, st.tb_def);
+    
+    sub_interval SUB1(st.tb_def, m-st.tb_def);
+    implicit_qr_algorithm(sub_matrix(Hobl, SUB1),
+			  sub_vector(eval, SUB1), evect);
+    sub_interval SUB2(0, st.tb_def);
+    implicit_qr_algorithm(sub_matrix(Hobl, SUB2),
+			  sub_vector(eval, SUB2), /* evect_lock */);
+    
+    for (size_type l = st.tb_def; l < m; ++l)
+      ritznew[l] = gmm::abs(evect(m-st.tb_def-1, l-st.tb_def) * Hobl(m, m-1));
+    
+    std::vector< std::pair<T, size_type> > eval_sort(m);
+    for (size_type l = 0; l < m; ++l)
+      eval_sort[l] = std::pair<T, size_type>(eval[l], l);
+    std::sort(eval_sort.begin(), eval_sort.end(), compare_vp());
+    
+    std::vector<size_type> index(m);
+    for (size_type l = 0; l < m; ++l) index[l] = eval_sort[l].second;
+    
+    std::vector<bool> kept(m, false);
+    std::fill(kept.begin(), kept.begin()+st.tb_def, true);
+
+    apply_permutation(eval, index);
+    apply_permutation(evect, index);
+    apply_permutation(ritznew, index);
+    apply_permutation(kept, index);
+
+    //	Which are the eigenvalues that converged ?
+    //
+    //	nb_want is the number of eigenvalues of 
+    //	Hess(tb_def+1:n,tb_def+1:n) that converged and are WANTED
+    //
+    //	nb_unwant is the number of eigenvalues of 
+    //	Hess(tb_def+1:n,tb_def+1:n) that converged and are UNWANTED
+    //
+    //	nb_nolong is the number of eigenvalues of 
+    //	Hess(1:tb_def,1:tb_def) that are NO LONGER WANTED. 
+    //
+    //	tb_deftot is the number of the deflated eigenvalues
+    //	that is tb_def + nb_want + nb_unwant
+    //
+    //	tb_defwant is the number of the wanted deflated eigenvalues
+    //	that is tb_def + nb_want - nb_nolong
+    
+    st.nb_want = 0, st.nb_unwant = 0, st.nb_nolong = 0;
+    size_type j, ind;
+    
+    for (j = 0, ind = 0; j < m-p; ++j) {
+      if (ritznew[j] == R(-1)) {
+	if (std::imag(eval[j]) != R(0)) {
+	  st.nb_nolong += 2; ++j; //  à adapter dans le cas complexe ...
+	} 
+	else st.nb_nolong++;
+      }
+      else {
+	if (ritznew[j]
+	    < tol_vp * gmm::abs(eval[j])) {
+	  
+	  for (size_type l = 0, l < m-st.tb_def; ++l)
+	    YB(l, ind) = std::real(evect(l, j));
+	  kept[j] = true;
+	  ++j; ++st.nb_unwant; ind++;
+	  
+	  if (std::imag(eval[j]) != R(0)) {
+	    for (size_type l = 0, l < m-st.tb_def; ++l)
+	      YB(l, ind) = std::imag(evect(l, j));
+	    pure[ind-1] = 1;
+	    pure[ind] = 2;
+	    
+	    kept[j] = true;
+	    
+	    st.nb_unwant++;
+	    ++ind;
+	  }
+	}
+      }
+    }
+    
+    
+    for (; j < m; ++j) {
+      if (ritznew[j] != R(-1)) {
+
+	for (size_type l = 0, l < m-st.tb_def; ++l)
+	  YB(l, ind) = std::real(evect(l, j));
+	pure[ind] = 1;
+	++ind;
+	kept[j] = true;
+	++st.nb_want;
+	
+	if (ritznew[j]
+	    < tol_vp * gmm::abs(eval[j])) {
+	  for (size_type l = 0, l < m-st.tb_def; ++l)
+	    YB(l, ind) = std::imag(evect(l, j));
+	  pure[ind] = 2;
+	  
+	  j++;
+	  kept[j] = true;
+	  
+	  st.nb_want++;
+	  ++ind;	      
+	}
+      }
+    }
+    
+    std::vector<T> shift(m - st.tb_def - st.nb_want - st.nb_unwant);
+    for (size_type j = 0, i = 0; j < m; ++j)
+      if (!kept[j]) shift[i++] = eval[j];
+    
+    // st.conv (st.nb_want+st.nb_unwant) is the number of eigenpairs that
+    //   have just converged.
+    // st.tb_deftot is the total number of eigenpairs that have converged.
+    
+    size_type st.conv = ind;
+    size_type st.tb_deftot = st.tb_def + st.conv;
+    size_type st.tb_defwant = st.tb_def + st.nb_want - st.nb_nolong;
+    
+    sub_interval SUBYB(0, st.conv);
+    
+    if ( st.tb_defwant >= p ) { // An invariant subspace has been found.
+      
+      st.nb_unwant = 0;
+      st.nb_want = p + st.nb_nolong - st.tb_def;
+      st.tb_defwant = p;
+      
+      if ( pure[st.conv - st.nb_want + 1] == 2 ) {
+	++st.nb_want; st.tb_defwant = ++p;// il faudrait que ce soit un p local
+      }
+      
+      SUBYB = sub_interval(st.conv - st.nb_want, st.nb_want);
+      // YB = YB(:, st.conv-st.nb_want+1 : st.conv); // On laisse en suspend ..
+      // pure = pure(st.conv-st.nb_want+1 : st.conv,1); // On laisse suspend ..
+      st.conv = st.nb_want;
+      st.tb_deftot = st.tb_def + st.conv;
+      st.ok = true;
+    }
+    
+  }
+
+
+
+  template<typename MAT, typename EVAL, typename PURE>
+  void select_eval_for_purging(const MAT &Hobl, EVAL &eval, MAT &YB,
+			       PURE &pure, idgmres_state &st) {
+
+    typedef typename linalg_traits<MAT>::value_type T;
+    typedef typename number_traits<T>::magnitude_type R;
+    size_type m = st.m;
+
+    // Computation of the Ritz eigenpairs.
+    
+    col_matrix< std::vector<T> > evect(st.tb_deftot, st.tb_deftot);
+    
+    sub_interval SUB1(0, st.tb_deftot);
+    implicit_qr_algorithm(sub_matrix(Hobl, SUB1),
+			  sub_vector(eval, SUB1), evect);
+    std::fill(eval.begin() + st.tb_deftot, eval.end(), std::complex<R>(0));
+    
+    std::vector< std::pair<T, size_type> > eval_sort(m);
+    for (size_type l = 0; l < m; ++l)
+      eval_sort[l] = std::pair<T, size_type>(eval[l], l);
+    std::sort(eval_sort.begin(), eval_sort.end(), compare_vp());
+
+    std::vector<bool> sorted(m);
+    std::fill(sorted.begin(), sorted.end(), false);
+    
+    std::vector<size_type> ind(m);
+    for (size_type l = 0; l < m; ++l) ind[l] = eval_sort[l].second;
+    
+    std::vector<bool> kept(m, false);
+    std::fill(kept.begin(), kept.begin()+st.tb_def, true);
+
+    apply_permutation(eval, ind);
+    apply_permutation(evect, ind);
+    
+    size_type j;
+    for (j = 0; j < st.tb_deftot; ++j) {
+	  
+      for (size_type l = 0, l < st.tb_deftot; ++l)
+	YB(l, j) = std::real(evect(l, j));
+      
+      if (std::imag(eval[j]) != R(0)) {
+	for (size_type l = 0, l < m-st.tb_def; ++l)
+	  YB(l, j+1) = std::imag(evect(l, j));
+	pure[j] = 1;
+	pure[j+1] = 2;
+	
+	j += 2;
+      }
+      else ++j;
+    }
+  }
+  
+
+
+
+
+
+}
+
+#endif
--- a/gmm/gmm_solver_qmr.h
+++ b/gmm/gmm_solver_qmr.h
@ -0,0 +1,210 @@
+/* -*- c++ -*- (enables emacs c++ mode) */
+/*===========================================================================
+
+ Copyright (C) 2002-2017 Yves Renard
+
+ This file is a part of GetFEM++
+
+ GetFEM++  is  free software;  you  can  redistribute  it  and/or modify it
+ under  the  terms  of the  GNU  Lesser General Public License as published
+ by  the  Free Software Foundation;  either version 3 of the License,  or
+ (at your option) any later version along with the GCC Runtime Library
+ Exception either version 3.1 or (at your option) any later version.
+ This program  is  distributed  in  the  hope  that it will be useful,  but
+ WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ or  FITNESS  FOR  A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+ License and GCC Runtime Library Exception for more details.
+ You  should  have received a copy of the GNU Lesser General Public License
+ along  with  this program;  if not, write to the Free Software Foundation,
+ Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301, USA.
+
+ As a special exception, you  may use  this file  as it is a part of a free
+ software  library  without  restriction.  Specifically,  if   other  files
+ instantiate  templates  or  use macros or inline functions from this file,
+ or  you compile this  file  and  link  it  with other files  to produce an
+ executable, this file  does  not  by itself cause the resulting executable
+ to be covered  by the GNU Lesser General Public License.  This   exception
+ does not  however  invalidate  any  other  reasons why the executable file
+ might be covered by the GNU Lesser General Public License.
+
+===========================================================================*/
+
+// This file is a modified version of qmr.h from ITL.
+// See http://osl.iu.edu/research/itl/
+// Following the corresponding Copyright notice.
+//===========================================================================
+//
+// Copyright (c) 1997-2001, The Trustees of Indiana University.
+// All rights reserved.
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+//    * Redistributions of source code must retain the above copyright
+//      notice, this list of conditions and the following disclaimer.
+//    * Redistributions in binary form must reproduce the above copyright
+//      notice, this list of conditions and the following disclaimer in the
+//      documentation and/or other materials provided with the distribution.
+//    * Neither the name of the University of Notre Dame nor the
+//      names of its contributors may be used to endorse or promote products
+//      derived from this software without specific prior written permission.
+//
+// THIS SOFTWARE  IS  PROVIDED  BY  THE TRUSTEES  OF  INDIANA UNIVERSITY  AND
+// CONTRIBUTORS  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,
+// BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND  FITNESS
+// FOR  A PARTICULAR PURPOSE ARE DISCLAIMED. IN  NO  EVENT SHALL THE TRUSTEES
+// OF INDIANA UNIVERSITY AND CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+// INCIDENTAL, SPECIAL, EXEMPLARY,  OR CONSEQUENTIAL DAMAGES (INCLUDING,  BUT
+// NOT  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA,  OR PROFITS;  OR BUSINESS  INTERRUPTION)  HOWEVER  CAUSED AND ON ANY
+// THEORY  OF  LIABILITY,  WHETHER  IN  CONTRACT,  STRICT  LIABILITY, OR TORT
+// (INCLUDING  NEGLIGENCE  OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+// THIS  SOFTWARE,  EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+//===========================================================================
+
+/**@file gmm_solver_qmr.h
+   @author Andrew Lumsdaine <lums@osl.iu.edu>
+   @author Lie-Quan Lee     <llee@osl.iu.edu>
+   @author  Yves Renard <Yves.Renard@insa-lyon.fr>
+   @date October 13, 2002.
+   @brief Quasi-Minimal Residual iterative solver.
+*/
+#ifndef GMM_QMR_H
+#define GMM_QMR_H
+
+#include "gmm_kernel.h"
+#include "gmm_iter.h"
+
+namespace gmm {
+
+  /** Quasi-Minimal Residual.
+     
+     This routine solves the unsymmetric linear system Ax = b using
+     the Quasi-Minimal Residual method.
+   
+     See: R. W. Freund and N. M. Nachtigal, A quasi-minimal residual
+     method for non-Hermitian linear systems, Numerical Math.,
+     60(1991), pp. 315-339
+  
+     Preconditioner -  Incomplete LU, Incomplete LU with threshold,
+                       SSOR or identity_preconditioner.
+  */
+  template <typename Matrix, typename Vector, typename VectorB,
+	    typename Precond1>
+  void qmr(const Matrix &A, Vector &x, const VectorB &b, const Precond1 &M1,
+	   iteration& iter) {
+
+    typedef typename linalg_traits<Vector>::value_type T;
+    typedef typename number_traits<T>::magnitude_type R;
+
+    T delta(0), ep(0), beta(0), theta_1(0), gamma_1(0);
+    T theta(0), gamma(1), eta(-1);
+    R rho_1(0), rho, xi;
+
+    typedef typename temporary_vector<Vector>::vector_type TmpVec;
+    size_type nn = vect_size(x);
+    TmpVec r(nn), v_tld(nn), y(nn), w_tld(nn), z(nn), v(nn), w(nn);
+    TmpVec y_tld(nn), z_tld(nn), p(nn), q(nn), p_tld(nn), d(nn), s(nn);
+
+    iter.set_rhsnorm(double(gmm::vect_norm2(b)));
+    if (iter.get_rhsnorm() == 0.0) { clear(x); return; }
+
+    gmm::mult(A, gmm::scaled(x, T(-1)), b, r);
+    gmm::copy(r, v_tld);
+
+    gmm::left_mult(M1, v_tld, y);
+    rho = gmm::vect_norm2(y);
+
+    gmm::copy(r, w_tld);
+    gmm::transposed_right_mult(M1, w_tld, z);
+    xi = gmm::vect_norm2(z);
+  
+    while (! iter.finished_vect(r)) {
+    
+      if (rho == R(0) || xi == R(0)) {
+	if (iter.get_maxiter() == size_type(-1)) 
+	  { GMM_ASSERT1(false, "QMR failed to converge"); }
+	else { GMM_WARNING1("QMR failed to converge"); return; }
+      }
+      gmm::copy(gmm::scaled(v_tld, T(R(1)/rho)), v);
+      gmm::scale(y, T(R(1)/rho));
+
+      gmm::copy(gmm::scaled(w_tld, T(R(1)/xi)), w);
+      gmm::scale(z, T(R(1)/xi));
+
+      delta = gmm::vect_sp(z, y);
+      if (delta == T(0)) {
+	if (iter.get_maxiter() == size_type(-1)) 
+	  { GMM_ASSERT1(false, "QMR failed to converge"); }
+	else { GMM_WARNING1("QMR failed to converge"); return; }
+      }
+      gmm::right_mult(M1, y, y_tld);		
+      gmm::transposed_left_mult(M1, z, z_tld);
+
+      if (iter.first()) {
+	gmm::copy(y_tld, p);
+	gmm::copy(z_tld, q);
+      } else {
+	gmm::add(y_tld, gmm::scaled(p, -(T(xi  * delta) / ep)), p);
+	gmm::add(z_tld, gmm::scaled(q, -(T(rho * delta) / ep)), q);
+      }
+    
+      gmm::mult(A, p, p_tld);
+
+      ep = gmm::vect_sp(q, p_tld);
+      if (ep == T(0)) {
+	if (iter.get_maxiter() == size_type(-1)) 
+	  { GMM_ASSERT1(false, "QMR failed to converge"); }
+	else { GMM_WARNING1("QMR failed to converge"); return; }
+      }
+      beta = ep / delta;
+      if (beta == T(0)) {
+	if (iter.get_maxiter() == size_type(-1)) 
+	  { GMM_ASSERT1(false, "QMR failed to converge"); }
+	else { GMM_WARNING1("QMR failed to converge"); return; }
+      }
+      gmm::add(p_tld, gmm::scaled(v, -beta), v_tld);
+      gmm::left_mult(M1, v_tld, y);
+
+      rho_1 = rho;
+      rho = gmm::vect_norm2(y);
+
+      gmm::mult(gmm::transposed(A), q, w_tld);
+      gmm::add(w_tld, gmm::scaled(w, -beta), w_tld);
+      gmm::transposed_right_mult(M1, w_tld, z);
+
+      xi = gmm::vect_norm2(z);
+
+      gamma_1 = gamma;
+      theta_1 = theta;
+
+      theta = rho / (gamma_1 * beta);
+      gamma = T(1) / gmm::sqrt(T(1) + gmm::sqr(theta));
+
+      if (gamma == T(0)) {
+	if (iter.get_maxiter() == size_type(-1)) 
+	  { GMM_ASSERT1(false, "QMR failed to converge"); }
+	else { GMM_WARNING1("QMR failed to converge"); return; }
+      }
+      eta = -eta * T(rho_1) * gmm::sqr(gamma) / (beta * gmm::sqr(gamma_1));
+
+      if (iter.first()) {
+	gmm::copy(gmm::scaled(p, eta), d);
+	gmm::copy(gmm::scaled(p_tld, eta), s);
+      } else {
+	T tmp = gmm::sqr(theta_1 * gamma);
+	gmm::add(gmm::scaled(p, eta), gmm::scaled(d, tmp), d);
+	gmm::add(gmm::scaled(p_tld, eta), gmm::scaled(s, tmp), s);
+      }
+      gmm::add(d, x);
+      gmm::add(gmm::scaled(s, T(-1)), r);
+
+      ++iter;
+    }
+  }
+
+
+}
+
+#endif 
+
--- a/gmm/gmm_std.h
+++ b/gmm/gmm_std.h
@ -0,0 +1,424 @@
+/* -*- c++ -*- (enables emacs c++ mode) */
+/*===========================================================================
+
+ Copyright (C) 2002-2017 Yves Renard
+
+ This file is a part of GetFEM++
+
+ GetFEM++  is  free software;  you  can  redistribute  it  and/or modify it
+ under  the  terms  of the  GNU  Lesser General Public License as published
+ by  the  Free Software Foundation;  either version 3 of the License,  or
+ (at your option) any later version along with the GCC Runtime Library
+ Exception either version 3.1 or (at your option) any later version.
+ This program  is  distributed  in  the  hope  that it will be useful,  but
+ WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ or  FITNESS  FOR  A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+ License and GCC Runtime Library Exception for more details.
+ You  should  have received a copy of the GNU Lesser General Public License
+ along  with  this program;  if not, write to the Free Software Foundation,
+ Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301, USA.
+
+ As a special exception, you  may use  this file  as it is a part of a free
+ software  library  without  restriction.  Specifically,  if   other  files
+ instantiate  templates  or  use macros or inline functions from this file,
+ or  you compile this  file  and  link  it  with other files  to produce an
+ executable, this file  does  not  by itself cause the resulting executable
+ to be covered  by the GNU Lesser General Public License.  This   exception
+ does not  however  invalidate  any  other  reasons why the executable file
+ might be covered by the GNU Lesser General Public License.
+
+===========================================================================*/
+
+/**@file gmm_std.h
+@author  Yves Renard <Yves.Renard@insa-lyon.fr>,
+@author  Julien Pommier <Julien.Pommier@insa-toulouse.fr>
+@date June 01, 1995.
+@brief basic setup for gmm (includes, typedefs etc.)
+*/
+#ifndef GMM_STD_H__
+#define GMM_STD_H__
+
+//#include <getfem/getfem_arch_config.h>
+
+#ifndef __USE_STD_IOSTREAM
+# define __USE_STD_IOSTREAM
+#endif
+
+#ifndef __USE_BSD
+# define __USE_BSD
+#endif
+
+#ifndef __USE_ISOC99
+# define __USE_ISOC99
+#endif
+
+#if defined(_MSC_VER) && _MSC_VER >= 1400 // Secure versions for VC++
+# define GMM_SECURE_CRT
+# define SECURE_NONCHAR_SSCANF sscanf_s
+# define SECURE_NONCHAR_FSCANF fscanf_s
+# define SECURE_STRNCPY(a, la, b, lb) strncpy_s(a, la, b, lb)
+# define SECURE_FOPEN(F, filename, mode) (*(F) = 0,  fopen_s(F, filename, mode))
+# define SECURE_SPRINTF1(S, l, st, p1) sprintf_s(S, l, st, p1) 
+# define SECURE_SPRINTF2(S, l, st, p1, p2) sprintf_s(S, l, st, p1, p2) 
+# define SECURE_SPRINTF4(S, l, st, p1, p2, p3, p4) sprintf_s(S, l, st, p1, p2, p3, p4)
+# define SECURE_STRDUP(s) _strdup(s)
+# ifndef _SCL_SECURE_NO_DEPRECATE
+#   error Add the option /D_SCL_SECURE_NO_DEPRECATE to the compilation command
+# endif
+#else
+# define SECURE_NONCHAR_SSCANF sscanf
+# define SECURE_NONCHAR_FSCANF fscanf
+# define SECURE_STRNCPY(a, la, b, lb) strncpy(a, b, lb)
+# define SECURE_FOPEN(F, filename, mode) ((*(F)) = fopen(filename, mode))
+# define SECURE_SPRINTF1(S, l, st, p1) sprintf(S, st, p1)
+# define SECURE_SPRINTF2(S, l, st, p1, p2) sprintf(S, st, p1, p2)
+# define SECURE_SPRINTF4(S, l, st, p1, p2, p3, p4) sprintf(S, st, p1, p2, p3, p4) 
+# define SECURE_STRDUP(s) strdup(s)
+#endif
+
+inline void GMM_NOPERATION_(int) { }
+#define GMM_NOPERATION(a) { GMM_NOPERATION_(abs(&(a) != &(a))); }
+
+/* ********************************************************************** */
+/*	Compilers detection.						  */
+/* ********************************************************************** */
+
+/* for sun CC 5.0 ...
+#if defined(__SUNPRO_CC) && __SUNPRO_CC >= 0x500
+# include <stdcomp.h>
+# undef _RWSTD_NO_CLASS_PARTIAL_SPEC
+# undef _RWSTD_NO_NAMESPACE
+#endif
+*/
+/* for VISUAL C++ ...
+#if defined(_MSC_VER) //  && !defined(__MWERKS__)
+#define _GETFEM_MSVCPP_ _MSC_VER
+#endif
+*/
+
+#if defined(__GNUC__)
+#  if (__GNUC__ < 4)
+#    error : PLEASE UPDATE g++ TO AT LEAST 4.8 VERSION
+#  endif
+#endif
+
+/* ********************************************************************** */
+/*	C++ Standard Headers.						  */
+/* ********************************************************************** */
+#include <clocale>
+#include <cstdlib>
+#include <cstddef>
+#include <cmath>
+#include <cstring>
+#include <cctype>
+#include <cassert>
+#include <climits>
+#include <iostream>
+//#include <ios>
+#include <fstream>
+#include <ctime>
+#include <exception>
+#include <typeinfo>
+#include <stdexcept>
+#include <iterator>
+#include <algorithm>
+#include <vector>
+#include <deque>
+#include <string>
+#include <complex>
+#include <limits>
+#include <sstream>
+#include <numeric>
+#include <memory>
+#include <array>
+#include <locale.h>
+
+namespace std {
+#if defined(__GNUC__) && (__cplusplus <= 201103L)
+  template<typename _Tp>
+    struct _MakeUniq
+    { typedef unique_ptr<_Tp> __single_object; };
+  template<typename _Tp>
+    struct _MakeUniq<_Tp[]>
+    { typedef unique_ptr<_Tp[]> __array; };
+  template<typename _Tp, size_t _Bound>
+    struct _MakeUniq<_Tp[_Bound]>
+    { struct __invalid_type { }; };
+  /// std::make_unique for single objects
+  template<typename _Tp, typename... _Args>
+    inline typename _MakeUniq<_Tp>::__single_object
+    make_unique(_Args&&... __args)
+    { return unique_ptr<_Tp>(new _Tp(std::forward<_Args>(__args)...)); }
+  /// std::make_unique for arrays of unknown bound
+  template<typename _Tp>
+    inline typename _MakeUniq<_Tp>::__array
+    make_unique(size_t __num)
+    { return unique_ptr<_Tp>(new typename remove_extent<_Tp>::type[__num]()); }
+  /// Disable std::make_unique for arrays of known bound
+  template<typename _Tp, typename... _Args>
+    inline typename _MakeUniq<_Tp>::__invalid_type
+    make_unique(_Args&&...) = delete;
+#endif
+
+
+  // Should simply be replaced by std::shared_ptr<T[]> when it will be supported
+  // by the STL
+  template <typename T> class shared_array_ptr : shared_ptr<T> {
+  public:
+    shared_array_ptr() {}
+    shared_array_ptr(T *q) : std::shared_ptr<T>(q, default_delete<T[]>()) {}
+    template <typename Y> shared_array_ptr(const std::shared_ptr<Y> &p, T *q)
+      : std::shared_ptr<T>(p, q) {}
+    T *get() const { return shared_ptr<T>::get(); }
+    T& operator*() const { return shared_ptr<T>::operator*(); }
+    T* operator->() const { return shared_ptr<T>::operator->(); }
+  };
+  
+  template <typename T> shared_array_ptr<T> make_shared_array(size_t num)
+  { return shared_array_ptr<T>(new T[num]); }
+}
+
+
+
+
+#ifdef GETFEM_HAVE_OPENMP
+
+#include <omp.h>
+	/**number of OpenMP threads*/
+	inline size_t num_threads(){return omp_get_max_threads();}
+	/**index of the current thread*/
+	inline size_t this_thread() {return omp_get_thread_num();}
+	/**is the program running in the parallel section*/
+	inline bool me_is_multithreaded_now(){return static_cast<bool>(omp_in_parallel());}
+#else
+	inline size_t num_threads(){return size_t(1);}
+	inline size_t this_thread() {return size_t(0);}
+	inline bool me_is_multithreaded_now(){return false;}
+#endif
+
+namespace gmm {
+
+	using std::endl; using std::cout; using std::cerr;
+        using std::ends; using std::cin; using std::isnan;
+
+#ifdef _WIN32
+
+	class standard_locale {
+		std::string cloc;
+		std::locale cinloc;
+	public :
+		inline standard_locale(void) : cinloc(cin.getloc())
+		{
+			if (!me_is_multithreaded_now()){
+				 cloc=setlocale(LC_NUMERIC, 0);
+				 setlocale(LC_NUMERIC,"C");
+			}
+		}
+
+		inline ~standard_locale() {
+			if (!me_is_multithreaded_now())
+					setlocale(LC_NUMERIC, cloc.c_str());
+
+		}
+	};
+#else
+	/**this is the above solutions for linux, but I still needs to be tested.*/
+	//class standard_locale {
+	//	locale_t oldloc;
+	//	locale_t temploc;
+
+	//public :
+	//	inline standard_locale(void) : oldloc(uselocale((locale_t)0))
+	//	{
+	//			temploc = newlocale(LC_NUMERIC, "C", NULL);
+    //              uselocale(temploc);
+	//	}
+
+	//	inline ~standard_locale()
+	//	{
+	//		    uselocale(oldloc);
+	//			freelocale(temploc);
+	//	}
+	//};
+
+
+  class standard_locale {
+    std::string cloc;
+    std::locale cinloc;
+
+  public :
+    inline standard_locale(void)
+      : cloc(setlocale(LC_NUMERIC, 0)), cinloc(cin.getloc())
+    { setlocale(LC_NUMERIC,"C"); cin.imbue(std::locale("C")); }
+    inline ~standard_locale()
+    { setlocale(LC_NUMERIC, cloc.c_str()); cin.imbue(cinloc); }
+  };
+
+
+#endif
+
+  class stream_standard_locale {
+    std::locale cloc;
+    std::ios &io;
+
+  public :
+    inline stream_standard_locale(std::ios &i)
+      : cloc(i.getloc()), io(i) { io.imbue(std::locale("C")); }
+    inline ~stream_standard_locale() { io.imbue(cloc); }
+  };
+
+
+
+
+  /* ******************************************************************* */
+  /*       Clock functions.                                              */
+  /* ******************************************************************* */
+
+# if  defined(HAVE_SYS_TIMES)
+  inline double uclock_sec(void) {
+    static double ttclk = 0.;
+    if (ttclk == 0.) ttclk = sysconf(_SC_CLK_TCK);
+    tms t; times(&t); return double(t.tms_utime) / ttclk;
+  }
+# else
+  inline double uclock_sec(void)
+  { return double(clock())/double(CLOCKS_PER_SEC); }
+# endif
+
+  /* ******************************************************************** */
+  /*	Fixed size integer types.                     			  */
+  /* ******************************************************************** */
+  // Remark : the test program dynamic_array tests the length of
+  //          resulting integers
+
+  template <size_t s> struct fixed_size_integer_generator {
+    typedef void int_base_type;
+    typedef void uint_base_type;
+  };
+
+  template <> struct fixed_size_integer_generator<sizeof(char)> {
+    typedef signed char int_base_type;
+    typedef unsigned char uint_base_type;
+  };
+
+  template <> struct fixed_size_integer_generator<sizeof(short int)
+    - ((sizeof(short int) == sizeof(char)) ? 78 : 0)> {
+  typedef signed short int int_base_type;
+  typedef unsigned short int uint_base_type;
+};
+
+template <> struct fixed_size_integer_generator<sizeof(int)
+  - ((sizeof(int) == sizeof(short int)) ? 59 : 0)> {
+    typedef signed int int_base_type;
+    typedef unsigned int uint_base_type;
+  };
+
+template <> struct fixed_size_integer_generator<sizeof(long)
+  - ((sizeof(int) == sizeof(long)) ? 93 : 0)> {
+    typedef signed long int_base_type;
+    typedef unsigned long uint_base_type;
+  };
+
+template <> struct fixed_size_integer_generator<sizeof(long long)
+  - ((sizeof(long long) == sizeof(long)) ? 99 : 0)> {
+    typedef signed long long int_base_type;
+    typedef unsigned long long uint_base_type;
+  };
+
+typedef fixed_size_integer_generator<1>::int_base_type int8_type;
+typedef fixed_size_integer_generator<1>::uint_base_type uint8_type;
+typedef fixed_size_integer_generator<2>::int_base_type int16_type;
+typedef fixed_size_integer_generator<2>::uint_base_type uint16_type;
+typedef fixed_size_integer_generator<4>::int_base_type int32_type;
+typedef fixed_size_integer_generator<4>::uint_base_type uint32_type;
+typedef fixed_size_integer_generator<8>::int_base_type int64_type;
+typedef fixed_size_integer_generator<8>::uint_base_type uint64_type;
+
+// #if INT_MAX == 32767
+//   typedef signed int    int16_type;
+//   typedef unsigned int uint16_type;
+// #elif  SHRT_MAX == 32767
+//   typedef signed short int    int16_type;
+//   typedef unsigned short int uint16_type;
+// #else
+// # error "impossible to build a 16 bits integer"
+// #endif
+
+// #if INT_MAX == 2147483647
+//   typedef signed int    int32_type;
+//   typedef unsigned int uint32_type;
+// #elif  SHRT_MAX == 2147483647
+//   typedef signed short int    int32_type;
+//   typedef unsigned short int uint32_type;
+// #elif LONG_MAX == 2147483647
+//   typedef signed long int    int32_type;
+//   typedef unsigned long int uint32_type;
+// #else
+// # error "impossible to build a 32 bits integer"
+// #endif
+
+// #if INT_MAX == 9223372036854775807L || INT_MAX == 9223372036854775807
+//   typedef signed int    int64_type;
+//   typedef unsigned int uint64_type;
+// #elif LONG_MAX == 9223372036854775807L || LONG_MAX == 9223372036854775807
+//   typedef signed long int    int64_type;
+//   typedef unsigned long int uint64_type;
+// #elif LLONG_MAX == 9223372036854775807LL || LLONG_MAX == 9223372036854775807L || LLONG_MAX == 9223372036854775807
+//   typedef signed long long int int64_type;
+//   typedef unsigned long long int uint64_type;
+// #else
+// # error "impossible to build a 64 bits integer"
+// #endif
+
+#if defined(__GNUC__) && !defined(__ICC)
+/*
+   g++ can issue a warning at each usage of a function declared with this special attribute
+   (also works with typedefs and variable declarations)
+*/
+# define IS_DEPRECATED __attribute__ ((__deprecated__))
+/*
+  the specified function is inlined at any optimization level
+*/
+# define ALWAYS_INLINE __attribute__((always_inline))
+#else
+# define IS_DEPRECATED
+# define ALWAYS_INLINE
+#endif
+
+}
+
+  /* ******************************************************************** */
+  /*	Import/export classes and interfaces from a shared library          */
+  /* ******************************************************************** */
+
+#if defined(EXPORTED_TO_SHARED_LIB)
+#  if defined(_MSC_VER) || defined(__INTEL_COMPILER)
+#     define APIDECL __declspec(dllexport)
+#  elif defined(__GNUC__)
+#     define __attribute__((visibility("default")))
+#  else
+#     define APIDECL
+#  endif
+#   if defined(IMPORTED_FROM_SHARED_LIB)
+#	  error INTENTIONAL COMPILCATION ERROR, DLL IMPORT AND EXPORT ARE INCOMPITABLE
+#   endif
+#endif
+
+#if defined(IMPORTED_FROM_SHARED_LIB)
+#  if defined(_MSC_VER) || defined(__INTEL_COMPILER)
+#     define APIDECL __declspec(dllimport)
+#  else
+#     define APIDECL
+#  endif
+#   if defined(EXPORTED_TO_SHARED_LIB)
+#	  error INTENTIONAL COMPILCATION ERROR, DLL IMPORT AND EXPORT ARE INCOMPITABLE
+#   endif
+#endif
+
+#ifndef EXPORTED_TO_SHARED_LIB
+#  ifndef IMPORTED_FROM_SHARED_LIB
+#    define APIDECL  //empty, used during static linking
+#  endif
+#endif
+
+#endif /* GMM_STD_H__ */
--- a/gmm/gmm_sub_index.h
+++ b/gmm/gmm_sub_index.h
@ -0,0 +1,224 @@
+/* -*- c++ -*- (enables emacs c++ mode) */
+/*===========================================================================
+
+ Copyright (C) 2002-2017 Yves Renard
+
+ This file is a part of GetFEM++
+
+ GetFEM++  is  free software;  you  can  redistribute  it  and/or modify it
+ under  the  terms  of the  GNU  Lesser General Public License as published
+ by  the  Free Software Foundation;  either version 3 of the License,  or
+ (at your option) any later version along with the GCC Runtime Library
+ Exception either version 3.1 or (at your option) any later version.
+ This program  is  distributed  in  the  hope  that it will be useful,  but
+ WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ or  FITNESS  FOR  A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+ License and GCC Runtime Library Exception for more details.
+ You  should  have received a copy of the GNU Lesser General Public License
+ along  with  this program;  if not, write to the Free Software Foundation,
+ Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301, USA.
+
+ As a special exception, you  may use  this file  as it is a part of a free
+ software  library  without  restriction.  Specifically,  if   other  files
+ instantiate  templates  or  use macros or inline functions from this file,
+ or  you compile this  file  and  link  it  with other files  to produce an
+ executable, this file  does  not  by itself cause the resulting executable
+ to be covered  by the GNU Lesser General Public License.  This   exception
+ does not  however  invalidate  any  other  reasons why the executable file
+ might be covered by the GNU Lesser General Public License.
+
+===========================================================================*/
+
+/**@file gmm_sub_index.h
+   @author  Yves Renard <Yves.Renard@insa-lyon.fr>
+   @date October 13, 2002.
+   @brief sub-indices.
+*/
+
+#ifndef GMM_SUB_INDEX_H__
+#define GMM_SUB_INDEX_H__
+
+#include "gmm_def.h"
+
+namespace gmm {
+
+  /* ******************************************************************** */
+  /*		sub indices                               		  */
+  /* ******************************************************************** */
+
+  struct basic_index : public std::vector<size_t> {
+    
+    mutable size_type nb_ref;
+    // size_type key1; faire la somme des composantes
+    // const basic_index *rind; rindex s'il existe
+    
+
+    size_t operator[](size_type i) const {
+      return (i < size()) ? std::vector<size_t>::operator[](i) : size_type(-1);
+    }
+    
+    basic_index() : nb_ref(1) {}
+    basic_index(size_type j) : std::vector<size_t>(j), nb_ref(1) {}
+    template <typename IT> basic_index(IT b, IT e)
+      : std::vector<size_t>(e-b), nb_ref(1) { std::copy(b, e, begin()); }
+    basic_index(const basic_index *pbi) : nb_ref(1) {
+      const_iterator it = pbi->begin(), ite = pbi->end();
+      size_type i = 0;
+      for ( ; it != ite; ++it) i = std::max(i, *it);
+      resize(i+1); std::fill(begin(), end(), size_type(-1));
+      for (it = pbi->begin(), i = 0; it != ite; ++it, ++i)
+	std::vector<size_t>::operator[](*it) = i;
+    }
+    void swap(size_type i, size_type j) {
+      std::swap(std::vector<size_t>::operator[](i),
+		std::vector<size_t>::operator[](j));
+    }
+    
+  };
+
+  typedef basic_index *pbasic_index;
+
+  struct index_generator {
+
+    template <typename IT> static pbasic_index create_index(IT begin, IT end)
+    { return new basic_index(begin, end); }
+    static pbasic_index create_rindex(pbasic_index pbi)
+    { return new basic_index(pbi); }
+    static void attach(pbasic_index pbi) { if (pbi) pbi->nb_ref++; }
+    static void unattach(pbasic_index pbi)
+      { if (pbi && --(pbi->nb_ref) == 0) delete pbi; }
+
+  };
+
+  struct sub_index {
+
+    size_type first_, last_;
+    typedef basic_index base_type;
+    typedef base_type::const_iterator const_iterator;
+
+    mutable pbasic_index ind;
+    mutable pbasic_index rind;
+
+    void comp_extr(void) {
+      std::vector<size_t>::const_iterator it = ind->begin(), ite = ind->end();
+      if (it != ite) { first_=last_= *it; ++it; } else { first_=last_= 0; }
+      for (; it != ite; ++it) 
+	{ first_ = std::min(first_, *it); last_ = std::max(last_, *it); }
+    }
+
+    inline void test_rind(void) const
+    { if (!rind) rind = index_generator::create_rindex(ind); }
+    size_type size(void) const { return ind->size(); }
+    size_type first(void) const { return first_; }
+    size_type last(void) const { return last_; }
+    size_type index(size_type i) const { return (*ind)[i]; }
+    size_type rindex(size_type i) const {
+      test_rind();
+      if (i < rind->size()) return (*rind)[i]; else return size_type(-1);
+    }
+   
+    const_iterator  begin(void) const { return  ind->begin(); }
+    const_iterator    end(void) const { return  ind->end();   }
+    const_iterator rbegin(void) const { test_rind(); return rind->begin(); }
+    const_iterator   rend(void) const { test_rind(); return rind->end();   }
+
+    sub_index() : ind(0), rind(0) {}
+    template <typename IT> sub_index(IT it, IT ite)
+      : ind(index_generator::create_index(it, ite)),
+	rind(0) { comp_extr(); }
+    template <typename CONT> sub_index(const CONT &c)
+      : ind(index_generator::create_index(c.begin(), c.end())),
+	rind(0) { comp_extr(); }
+    ~sub_index() {
+      index_generator::unattach(rind);
+      index_generator::unattach(ind);
+    }
+    sub_index(const sub_index &si) : first_(si.first_), last_(si.last_),
+				     ind(si.ind), rind(si.rind)
+    { index_generator::attach(rind); index_generator::attach(ind); }
+    sub_index &operator =(const sub_index &si) {
+      index_generator::unattach(rind);
+      index_generator::unattach(ind);
+      ind = si.ind; rind = si.rind;
+      index_generator::attach(rind);
+      index_generator::attach(ind);
+      first_ = si.first_; last_ = si.last_;
+      return *this;
+    }
+  };
+
+  struct unsorted_sub_index : public sub_index {
+    typedef basic_index base_type;
+    typedef base_type::const_iterator const_iterator;
+    
+    template <typename IT> unsorted_sub_index(IT it, IT ite)
+      : sub_index(it, ite) {}
+    template <typename CONT> unsorted_sub_index(const CONT &c)
+      : sub_index(c) {}
+    unsorted_sub_index() {}
+    unsorted_sub_index(const unsorted_sub_index &si) : sub_index((const sub_index &)(si)) { }
+    unsorted_sub_index &operator =(const unsorted_sub_index &si)
+    { sub_index::operator =(si); return *this; }
+    void swap(size_type i, size_type j) {
+      GMM_ASSERT2(ind->nb_ref <= 1, "Operation not allowed on this index");
+      if (rind) rind->swap((*ind)[i], (*ind)[j]);
+      ind->swap(i, j);
+    }
+  };
+
+  inline std::ostream &operator << (std::ostream &o, const sub_index &si) { 
+    o << "sub_index(";
+    if (si.size() != 0) o << si.index(0);
+    for (size_type i = 1; i < si.size(); ++i) o << ", " << si.index(i);
+    o << ")";
+    return o;
+  }
+
+  struct sub_interval {
+    size_type min, max; 
+
+    size_type size(void) const { return max - min; }
+    size_type first(void) const { return min; }
+    size_type last(void) const { return max; }
+    size_type index(size_type i) const { return min + i; }
+    size_type step(void) const { return 1; }
+    size_type rindex(size_type i) const
+    { if (i >= min && i < max) return i - min; return size_type(-1); }
+    sub_interval(size_type mi, size_type l) : min(mi), max(mi+l) {}
+    sub_interval() {}
+  };
+
+  inline std::ostream &operator << (std::ostream &o, const sub_interval &si)
+  { o << "sub_interval(" << si.min << ", " << si.size() << ")"; return o; }
+
+  struct sub_slice {
+    size_type min, max, N;
+
+    size_type size(void) const { return (max - min) / N; }
+    size_type first(void) const { return min; }
+    size_type last(void) const { return (min == max) ? max : max+1-N; }
+    size_type step(void) const { return N; }
+    size_type index(size_type i) const { return min + N * i; }
+    size_type rindex(size_type i) const { 
+      if (i >= min && i < max)
+	{ size_type j = (i - min); if (j % N == 0) return j / N; }
+      return size_type(-1);
+    }
+    sub_slice(size_type mi, size_type l, size_type n)
+      : min(mi), max(mi+l*n), N(n) {}
+    sub_slice(void) {}
+  };
+
+  inline std::ostream &operator << (std::ostream &o, const sub_slice &si) {
+    o << "sub_slice(" << si.min << ", " << si.size() << ", " << si.step() 
+      << ")"; return o;
+  }
+
+  template<class SUBI> struct index_is_sorted
+  {  typedef linalg_true bool_type; };
+  template<> struct index_is_sorted<unsorted_sub_index>
+  {  typedef linalg_false bool_type; };
+
+}
+
+#endif //  GMM_SUB_INDEX_H__
--- a/gmm/gmm_sub_matrix.h
+++ b/gmm/gmm_sub_matrix.h
@ -0,0 +1,406 @@
+/* -*- c++ -*- (enables emacs c++ mode) */
+/*===========================================================================
+
+ Copyright (C) 2002-2017 Yves Renard
+
+ This file is a part of GetFEM++
+
+ GetFEM++  is  free software;  you  can  redistribute  it  and/or modify it
+ under  the  terms  of the  GNU  Lesser General Public License as published
+ by  the  Free Software Foundation;  either version 3 of the License,  or
+ (at your option) any later version along with the GCC Runtime Library
+ Exception either version 3.1 or (at your option) any later version.
+ This program  is  distributed  in  the  hope  that it will be useful,  but
+ WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ or  FITNESS  FOR  A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+ License and GCC Runtime Library Exception for more details.
+ You  should  have received a copy of the GNU Lesser General Public License
+ along  with  this program;  if not, write to the Free Software Foundation,
+ Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301, USA.
+
+ As a special exception, you  may use  this file  as it is a part of a free
+ software  library  without  restriction.  Specifically,  if   other  files
+ instantiate  templates  or  use macros or inline functions from this file,
+ or  you compile this  file  and  link  it  with other files  to produce an
+ executable, this file  does  not  by itself cause the resulting executable
+ to be covered  by the GNU Lesser General Public License.  This   exception
+ does not  however  invalidate  any  other  reasons why the executable file
+ might be covered by the GNU Lesser General Public License.
+
+===========================================================================*/
+
+/**@file gmm_sub_matrix.h
+   @author Yves Renard <Yves.Renard@insa-lyon.fr>
+   @date October 13, 2002.
+   @brief Generic sub-matrices.
+*/
+
+#ifndef GMM_SUB_MATRIX_H__
+#define GMM_SUB_MATRIX_H__
+
+#include "gmm_sub_vector.h"
+
+namespace gmm {
+
+  /* ********************************************************************* */
+  /*		sub row matrices type                                      */
+  /* ********************************************************************* */
+
+  template <typename PT, typename SUBI1, typename SUBI2>
+  struct gen_sub_row_matrix {
+    typedef gen_sub_row_matrix<PT, SUBI1, SUBI2> this_type;
+    typedef typename std::iterator_traits<PT>::value_type M;
+    typedef M * CPT;
+    typedef typename std::iterator_traits<PT>::reference ref_M;
+    typedef typename select_ref<typename linalg_traits<M>
+            ::const_row_iterator, typename linalg_traits<M>::row_iterator,
+	    PT>::ref_type iterator;
+    typedef typename linalg_traits<this_type>::reference reference;
+    typedef typename linalg_traits<this_type>::porigin_type porigin_type;
+
+    SUBI1 si1;
+    SUBI2 si2;
+    iterator begin_;
+    porigin_type origin;
+    
+    reference operator()(size_type i, size_type j) const 
+    { return linalg_traits<M>::access(begin_ + si1.index(i), si2.index(j)); }
+   
+    size_type nrows(void) const { return si1.size(); }
+    size_type ncols(void) const { return si2.size(); }
+    
+    gen_sub_row_matrix(ref_M m, const SUBI1 &s1, const SUBI2 &s2)
+      : si1(s1), si2(s2), begin_(mat_row_begin(m)),
+	origin(linalg_origin(m)) {}
+    gen_sub_row_matrix() {}
+    gen_sub_row_matrix(const gen_sub_row_matrix<CPT, SUBI1, SUBI2> &cr) :
+      si1(cr.si1), si2(cr.si2), begin_(cr.begin_),origin(cr.origin) {}
+  };
+
+  template <typename PT, typename SUBI1, typename SUBI2>
+  struct gen_sub_row_matrix_iterator {
+    typedef gen_sub_row_matrix<PT, SUBI1, SUBI2> this_type;
+    typedef typename modifiable_pointer<PT>::pointer MPT;
+    typedef typename std::iterator_traits<PT>::value_type M;
+    typedef typename select_ref<typename linalg_traits<M>
+            ::const_row_iterator, typename linalg_traits<M>::row_iterator,
+	    PT>::ref_type ITER;
+    typedef ITER value_type;
+    typedef ITER *pointer;
+    typedef ITER &reference;
+    typedef ptrdiff_t difference_type;
+    typedef size_t size_type;
+    typedef std::random_access_iterator_tag  iterator_category;
+    typedef gen_sub_row_matrix_iterator<PT, SUBI1, SUBI2> iterator;
+
+    ITER it;
+    SUBI1 si1;
+    SUBI2 si2;
+    size_type ii;
+    
+    iterator operator ++(int) { iterator tmp = *this; ii++; return tmp; }
+    iterator operator --(int) { iterator tmp = *this; ii--; return tmp; }
+    iterator &operator ++()   { ii++; return *this; }
+    iterator &operator --()   { ii--; return *this; }
+    iterator &operator +=(difference_type i) { ii += i; return *this; }
+    iterator &operator -=(difference_type i) { ii -= i; return *this; }
+    iterator operator +(difference_type i) const 
+    { iterator itt = *this; return (itt += i); }
+    iterator operator -(difference_type i) const
+    { iterator itt = *this; return (itt -= i); }
+    difference_type operator -(const iterator &i) const { return ii - i.ii; }
+
+    ITER operator *() const { return it + si1.index(ii); }
+    ITER operator [](int i) { return it + si1.index(ii+i); }
+
+    bool operator ==(const iterator &i) const { return (ii == i.ii); }
+    bool operator !=(const iterator &i) const { return !(i == *this); }
+    bool operator < (const iterator &i) const { return (ii < i.ii); }
+
+    gen_sub_row_matrix_iterator(void) {}
+    gen_sub_row_matrix_iterator(const 
+	     gen_sub_row_matrix_iterator<MPT, SUBI1, SUBI2> &itm)
+      : it(itm.it), si1(itm.si1), si2(itm.si2), ii(itm.ii) {}
+    gen_sub_row_matrix_iterator(const ITER &iter, const SUBI1 &s1,
+				const SUBI2 &s2, size_type i)
+      : it(iter), si1(s1), si2(s2), ii(i) { }
+    
+  };
+
+  template <typename PT, typename SUBI1, typename SUBI2>
+  struct linalg_traits<gen_sub_row_matrix<PT, SUBI1, SUBI2> > {
+    typedef gen_sub_row_matrix<PT, SUBI1, SUBI2> this_type;
+    typedef typename std::iterator_traits<PT>::value_type M;
+    typedef typename which_reference<PT>::is_reference is_reference;
+    typedef abstract_matrix linalg_type;
+    typedef typename linalg_traits<M>::origin_type origin_type;
+    typedef typename select_ref<const origin_type *, origin_type *,
+				PT>::ref_type porigin_type;
+    typedef typename linalg_traits<M>::value_type value_type;
+    typedef typename select_ref<value_type,
+            typename linalg_traits<M>::reference, PT>::ref_type reference;
+    typedef abstract_null_type sub_col_type;
+    typedef abstract_null_type col_iterator;
+    typedef abstract_null_type const_sub_col_type;
+    typedef abstract_null_type const_col_iterator;
+    typedef typename sub_vector_type<const typename org_type<typename
+	    linalg_traits<M>::const_sub_row_type>::t *, SUBI2>::vector_type
+            const_sub_row_type;
+    typedef typename select_ref<abstract_null_type, 
+	    typename sub_vector_type<typename org_type<typename linalg_traits<M>::sub_row_type>::t *,
+	    SUBI2>::vector_type, PT>::ref_type sub_row_type;
+    typedef gen_sub_row_matrix_iterator<typename const_pointer<PT>::pointer,
+	    SUBI1, SUBI2> const_row_iterator;
+    typedef typename select_ref<abstract_null_type, 
+	    gen_sub_row_matrix_iterator<PT, SUBI1, SUBI2>, PT>::ref_type
+            row_iterator;
+    typedef typename linalg_traits<const_sub_row_type>::storage_type
+            storage_type;
+    typedef row_major sub_orientation;
+    typedef linalg_true index_sorted;
+    static size_type nrows(const this_type &m) { return m.nrows(); }
+    static size_type ncols(const this_type &m) { return m.ncols(); }
+    static const_sub_row_type row(const const_row_iterator &it)
+    { return const_sub_row_type(linalg_traits<M>::row(*it), it.si2); }
+    static sub_row_type row(const row_iterator &it)
+    { return sub_row_type(linalg_traits<M>::row(*it), it.si2); }
+    static const_row_iterator row_begin(const this_type &m)
+    { return const_row_iterator(m.begin_, m.si1, m.si2, 0); }
+    static row_iterator row_begin(this_type &m)
+    { return row_iterator(m.begin_, m.si1, m.si2, 0); }
+    static const_row_iterator row_end(const this_type &m)
+    { return const_row_iterator(m.begin_, m.si1, m.si2,  m.nrows()); }
+    static row_iterator row_end(this_type &m)
+    { return row_iterator(m.begin_, m.si1, m.si2, m.nrows()); }
+    static origin_type* origin(this_type &v) { return v.origin; }
+    static const origin_type* origin(const this_type &v) { return v.origin; }
+    static void do_clear(this_type &m) {
+      row_iterator it = mat_row_begin(m), ite = mat_row_end(m);
+      for (; it != ite; ++it) clear(row(it));
+    }
+    static value_type access(const const_row_iterator &itrow, size_type i)
+    { return linalg_traits<M>::access(*itrow, itrow.si2.index(i)); }
+    static reference access(const row_iterator &itrow, size_type i)
+    { return linalg_traits<M>::access(*itrow, itrow.si2.index(i)); }
+  };
+  
+  template <typename PT, typename SUBI1, typename SUBI2>
+  std::ostream &operator <<(std::ostream &o,
+			    const gen_sub_row_matrix<PT, SUBI1, SUBI2>& m)
+  { gmm::write(o,m); return o; }
+
+
+  /* ********************************************************************* */
+  /*		sub column matrices type                                   */
+  /* ********************************************************************* */
+
+  template <typename PT, typename SUBI1, typename SUBI2>
+  struct gen_sub_col_matrix {
+    typedef gen_sub_col_matrix<PT, SUBI1, SUBI2> this_type;
+    typedef typename std::iterator_traits<PT>::value_type M;
+    typedef M * CPT;
+    typedef typename std::iterator_traits<PT>::reference ref_M;
+    typedef typename select_ref<typename linalg_traits<M>
+            ::const_col_iterator, typename linalg_traits<M>::col_iterator,
+	    PT>::ref_type iterator;
+    typedef typename linalg_traits<this_type>::reference reference;
+    typedef typename linalg_traits<this_type>::porigin_type porigin_type;
+
+    SUBI1 si1;
+    SUBI2 si2;
+    iterator begin_;
+    porigin_type origin;
+    
+    reference operator()(size_type i, size_type j) const
+    { return linalg_traits<M>::access(begin_ + si2.index(j), si1.index(i)); }
+
+    size_type nrows(void) const { return si1.size(); }
+    size_type ncols(void) const { return si2.size(); }
+    
+    gen_sub_col_matrix(ref_M m, const SUBI1 &s1, const SUBI2 &s2)
+      : si1(s1), si2(s2), begin_(mat_col_begin(m)),
+        origin(linalg_origin(m)) {}
+    gen_sub_col_matrix() {}
+    gen_sub_col_matrix(const gen_sub_col_matrix<CPT, SUBI1, SUBI2> &cr) :
+      si1(cr.si1), si2(cr.si2), begin_(cr.begin_),origin(cr.origin) {}
+  };
+
+  template <typename PT, typename SUBI1, typename SUBI2>
+  struct gen_sub_col_matrix_iterator {
+    typedef gen_sub_col_matrix<PT, SUBI1, SUBI2> this_type;
+    typedef typename modifiable_pointer<PT>::pointer MPT;
+    typedef typename std::iterator_traits<PT>::value_type M;
+    typedef typename select_ref<typename linalg_traits<M>::const_col_iterator,
+				typename linalg_traits<M>::col_iterator,
+				PT>::ref_type ITER;
+    typedef ITER value_type;
+    typedef ITER *pointer;
+    typedef ITER &reference;
+    typedef ptrdiff_t difference_type;
+    typedef size_t size_type;
+    typedef std::random_access_iterator_tag  iterator_category;
+    typedef gen_sub_col_matrix_iterator<PT, SUBI1, SUBI2> iterator;
+
+    ITER it;
+    SUBI1 si1;
+    SUBI2 si2;
+    size_type ii;
+    
+    iterator operator ++(int) { iterator tmp = *this; ii++; return tmp; }
+    iterator operator --(int) { iterator tmp = *this; ii--; return tmp; }
+    iterator &operator ++()   { ii++; return *this; }
+    iterator &operator --()   { ii--; return *this; }
+    iterator &operator +=(difference_type i) { ii += i; return *this; }
+    iterator &operator -=(difference_type i) { ii -= i; return *this; }
+    iterator operator +(difference_type i) const 
+    { iterator itt = *this; return (itt += i); }
+    iterator operator -(difference_type i) const
+    { iterator itt = *this; return (itt -= i); }
+    difference_type operator -(const iterator &i) const { return ii - i.ii; }
+
+    ITER operator *() const { return it + si2.index(ii); }
+    ITER operator [](int i) { return it + si2.index(ii+i); }
+
+    bool operator ==(const iterator &i) const { return (ii == i.ii); }
+    bool operator !=(const iterator &i) const { return !(i == *this); }
+    bool operator < (const iterator &i) const { return (ii < i.ii); }
+
+    gen_sub_col_matrix_iterator(void) {}
+    gen_sub_col_matrix_iterator(const 
+	gen_sub_col_matrix_iterator<MPT, SUBI1, SUBI2> &itm)
+      : it(itm.it), si1(itm.si1), si2(itm.si2), ii(itm.ii) {}
+    gen_sub_col_matrix_iterator(const ITER &iter, const SUBI1 &s1,
+				const SUBI2 &s2, size_type i)
+      : it(iter), si1(s1), si2(s2), ii(i) { }
+  };
+
+  template <typename PT, typename SUBI1, typename SUBI2>
+  struct linalg_traits<gen_sub_col_matrix<PT, SUBI1, SUBI2> > {
+    typedef gen_sub_col_matrix<PT, SUBI1, SUBI2> this_type;
+    typedef typename std::iterator_traits<PT>::value_type M;
+    typedef typename linalg_traits<M>::origin_type origin_type;
+    typedef typename select_ref<const origin_type *, origin_type *,
+			        PT>::ref_type porigin_type;
+    typedef typename which_reference<PT>::is_reference is_reference;
+    typedef abstract_matrix linalg_type;
+    typedef typename linalg_traits<M>::value_type value_type;
+    typedef typename select_ref<value_type,
+            typename linalg_traits<M>::reference, PT>::ref_type reference;
+    typedef abstract_null_type sub_row_type;
+    typedef abstract_null_type row_iterator;
+    typedef abstract_null_type const_sub_row_type;
+    typedef abstract_null_type const_row_iterator;
+    typedef typename sub_vector_type<const typename org_type<typename linalg_traits<M>::const_sub_col_type>::t *, SUBI1>::vector_type const_sub_col_type;
+    typedef typename select_ref<abstract_null_type, typename sub_vector_type<typename org_type<typename linalg_traits<M>::sub_col_type>::t *, SUBI1>::vector_type, PT>::ref_type sub_col_type;
+    typedef gen_sub_col_matrix_iterator<typename const_pointer<PT>::pointer,
+	    SUBI1, SUBI2> const_col_iterator;
+    typedef typename select_ref<abstract_null_type, 
+	    gen_sub_col_matrix_iterator<PT, SUBI1, SUBI2>, PT>::ref_type
+            col_iterator;
+    typedef col_major sub_orientation;
+    typedef linalg_true index_sorted;
+    typedef typename linalg_traits<const_sub_col_type>::storage_type
+    storage_type;
+    static size_type nrows(const this_type &m) { return m.nrows(); }
+    static size_type ncols(const this_type &m) { return m.ncols(); }
+    static const_sub_col_type col(const const_col_iterator &it)
+    { return const_sub_col_type(linalg_traits<M>::col(*it), it.si1); }
+    static sub_col_type col(const col_iterator &it)
+    { return sub_col_type(linalg_traits<M>::col(*it), it.si1); }
+    static const_col_iterator col_begin(const this_type &m)
+    { return const_col_iterator(m.begin_, m.si1, m.si2, 0); }
+    static col_iterator col_begin(this_type &m)
+    { return col_iterator(m.begin_, m.si1, m.si2, 0); }
+    static const_col_iterator col_end(const this_type &m)
+    { return const_col_iterator(m.begin_, m.si1, m.si2,  m.ncols()); }
+    static col_iterator col_end(this_type &m)
+    { return col_iterator(m.begin_, m.si1, m.si2, m.ncols()); } 
+    static origin_type* origin(this_type &v) { return v.origin; }
+    static const origin_type* origin(const this_type &v) { return v.origin; }
+    static void do_clear(this_type &m) {
+      col_iterator it = mat_col_begin(m), ite = mat_col_end(m);
+      for (; it != ite; ++it) clear(col(it));
+    }
+    static value_type access(const const_col_iterator &itcol, size_type i)
+    { return linalg_traits<M>::access(*itcol, itcol.si1.index(i)); }
+    static reference access(const col_iterator &itcol, size_type i)
+    { return linalg_traits<M>::access(*itcol, itcol.si1.index(i)); }
+  };
+
+  template <typename PT, typename SUBI1, typename SUBI2> std::ostream &operator <<
+  (std::ostream &o, const gen_sub_col_matrix<PT, SUBI1, SUBI2>& m)
+  { gmm::write(o,m); return o; }
+
+  /* ******************************************************************** */
+  /*		sub matrices                                              */
+  /* ******************************************************************** */
+  
+  template <typename PT, typename SUBI1, typename SUBI2, typename ST>
+  struct sub_matrix_type_ {
+    typedef abstract_null_type return_type;
+  };
+  template <typename PT, typename SUBI1, typename SUBI2>
+  struct sub_matrix_type_<PT, SUBI1, SUBI2, col_major>
+  { typedef gen_sub_col_matrix<PT, SUBI1, SUBI2> matrix_type; };
+  template <typename PT, typename SUBI1, typename SUBI2>
+  struct sub_matrix_type_<PT, SUBI1, SUBI2, row_major>
+  { typedef gen_sub_row_matrix<PT, SUBI1, SUBI2> matrix_type; };
+  template <typename PT, typename SUBI1, typename SUBI2>
+  struct sub_matrix_type {
+    typedef typename std::iterator_traits<PT>::value_type M;
+    typedef typename sub_matrix_type_<PT, SUBI1, SUBI2,
+        typename principal_orientation_type<typename
+        linalg_traits<M>::sub_orientation>::potype>::matrix_type matrix_type;
+  };
+
+  template <typename M, typename SUBI1, typename SUBI2>  inline
+    typename select_return<typename sub_matrix_type<const M *, SUBI1, SUBI2>
+    ::matrix_type, typename sub_matrix_type<M *, SUBI1, SUBI2>::matrix_type,
+    M *>::return_type
+  sub_matrix(M &m, const SUBI1 &si1, const SUBI2 &si2) {
+    GMM_ASSERT2(si1.last() <= mat_nrows(m) && si2.last() <= mat_ncols(m),
+		"sub matrix too large");
+    return typename select_return<typename sub_matrix_type<const M *, SUBI1,
+      SUBI2>::matrix_type, typename sub_matrix_type<M *, SUBI1, SUBI2>
+      ::matrix_type, M *>::return_type(linalg_cast(m), si1, si2);
+  }
+
+  template <typename M, typename SUBI1, typename SUBI2>  inline
+    typename select_return<typename sub_matrix_type<const M *, SUBI1, SUBI2>
+    ::matrix_type, typename sub_matrix_type<M *, SUBI1, SUBI2>::matrix_type,
+    const M *>::return_type
+  sub_matrix(const M &m, const SUBI1 &si1, const SUBI2 &si2) {
+    GMM_ASSERT2(si1.last() <= mat_nrows(m) && si2.last() <= mat_ncols(m),
+		"sub matrix too large");
+    return typename select_return<typename sub_matrix_type<const M *, SUBI1,
+      SUBI2>::matrix_type, typename sub_matrix_type<M *, SUBI1, SUBI2>
+      ::matrix_type, const M *>::return_type(linalg_cast(m), si1, si2);
+  }
+
+  template <typename M, typename SUBI1>  inline
+    typename select_return<typename sub_matrix_type<const M *, SUBI1, SUBI1>
+    ::matrix_type, typename sub_matrix_type<M *, SUBI1, SUBI1>::matrix_type,
+    M *>::return_type
+  sub_matrix(M &m, const SUBI1 &si1) {
+    GMM_ASSERT2(si1.last() <= mat_nrows(m) && si1.last() <= mat_ncols(m),
+		"sub matrix too large");
+    return typename select_return<typename sub_matrix_type<const M *, SUBI1,
+      SUBI1>::matrix_type, typename sub_matrix_type<M *, SUBI1, SUBI1>
+      ::matrix_type, M *>::return_type(linalg_cast(m), si1, si1);
+  }
+
+  template <typename M, typename SUBI1>  inline
+    typename select_return<typename sub_matrix_type<const M *, SUBI1, SUBI1>
+    ::matrix_type, typename sub_matrix_type<M *, SUBI1, SUBI1>::matrix_type,
+    const M *>::return_type
+  sub_matrix(const M &m, const SUBI1 &si1) {
+    GMM_ASSERT2(si1.last() <= mat_nrows(m) && si1.last() <= mat_ncols(m),
+		"sub matrix too large");
+    return typename select_return<typename sub_matrix_type<const M *, SUBI1,
+      SUBI1>::matrix_type, typename sub_matrix_type<M *, SUBI1, SUBI1>
+      ::matrix_type, const M *>::return_type(linalg_cast(m), si1, si1);
+  }
+
+}
+
+#endif //  GMM_SUB_MATRIX_H__
--- a/gmm/gmm_sub_vector.h
+++ b/gmm/gmm_sub_vector.h
@ -0,0 +1,560 @@
+/* -*- c++ -*- (enables emacs c++ mode) */
+/*===========================================================================
+
+ Copyright (C) 2002-2017 Yves Renard
+
+ This file is a part of GetFEM++
+
+ GetFEM++  is  free software;  you  can  redistribute  it  and/or modify it
+ under  the  terms  of the  GNU  Lesser General Public License as published
+ by  the  Free Software Foundation;  either version 3 of the License,  or
+ (at your option) any later version along with the GCC Runtime Library
+ Exception either version 3.1 or (at your option) any later version.
+ This program  is  distributed  in  the  hope  that it will be useful,  but
+ WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ or  FITNESS  FOR  A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+ License and GCC Runtime Library Exception for more details.
+ You  should  have received a copy of the GNU Lesser General Public License
+ along  with  this program;  if not, write to the Free Software Foundation,
+ Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301, USA.
+
+ As a special exception, you  may use  this file  as it is a part of a free
+ software  library  without  restriction.  Specifically,  if   other  files
+ instantiate  templates  or  use macros or inline functions from this file,
+ or  you compile this  file  and  link  it  with other files  to produce an
+ executable, this file  does  not  by itself cause the resulting executable
+ to be covered  by the GNU Lesser General Public License.  This   exception
+ does not  however  invalidate  any  other  reasons why the executable file
+ might be covered by the GNU Lesser General Public License.
+
+===========================================================================*/
+
+/**@file gmm_sub_vector.h
+   @author  Yves Renard <Yves.Renard@insa-lyon.fr>
+   @date October 13, 2002.
+   @brief Generic sub-vectors.
+*/
+
+#ifndef GMM_SUB_VECTOR_H__
+#define GMM_SUB_VECTOR_H__
+
+#include "gmm_interface.h"
+#include "gmm_sub_index.h"
+
+namespace gmm {
+
+  /* ********************************************************************* */
+  /*		sparse sub-vectors                                         */
+  /* ********************************************************************* */
+
+  template <typename IT, typename MIT, typename SUBI>
+  struct sparse_sub_vector_iterator {
+
+    IT itb, itbe;
+    SUBI si;
+
+    typedef std::iterator_traits<IT>                traits_type;
+    typedef typename traits_type::value_type        value_type;
+    typedef typename traits_type::pointer           pointer;
+    typedef typename traits_type::reference         reference;
+    typedef typename traits_type::difference_type   difference_type;
+    typedef std::bidirectional_iterator_tag         iterator_category;
+    typedef size_t                                  size_type;
+    typedef sparse_sub_vector_iterator<IT, MIT, SUBI>    iterator;
+
+    size_type index(void) const { return si.rindex(itb.index()); }
+    void forward(void);
+    void backward(void);
+    iterator &operator ++()
+    { ++itb; forward(); return *this; }
+    iterator operator ++(int) { iterator tmp = *this; ++(*this); return tmp; }
+    iterator &operator --()
+    { --itb; backward(); return *this; }
+    iterator operator --(int) { iterator tmp = *this; --(*this); return tmp; }
+    reference operator *() const { return *itb; }
+
+    bool operator ==(const iterator &i) const { return itb == i.itb; }
+    bool operator !=(const iterator &i) const { return !(i == *this); }
+
+    sparse_sub_vector_iterator(void) {}
+    sparse_sub_vector_iterator(const IT &it, const IT &ite, const SUBI &s)
+      : itb(it), itbe(ite), si(s) { forward(); }
+    sparse_sub_vector_iterator(const sparse_sub_vector_iterator<MIT, MIT,
+	 SUBI> &it) : itb(it.itb), itbe(it.itbe), si(it.si) {}
+  };
+
+  template <typename IT, typename MIT, typename SUBI>
+  void  sparse_sub_vector_iterator<IT, MIT, SUBI>::forward(void)
+  { while(itb!=itbe && index()==size_type(-1)) { ++itb; } }
+
+  template <typename IT, typename MIT, typename SUBI>
+  void  sparse_sub_vector_iterator<IT, MIT, SUBI>::backward(void)
+  { while(itb!=itbe && index()==size_type(-1)) --itb; }
+
+  template <typename PT, typename SUBI> struct sparse_sub_vector {
+    typedef sparse_sub_vector<PT, SUBI> this_type;
+    typedef typename std::iterator_traits<PT>::value_type V;
+    typedef V * CPT;
+    typedef typename select_ref<typename linalg_traits<V>::const_iterator,
+            typename linalg_traits<V>::iterator, PT>::ref_type iterator;
+    typedef typename linalg_traits<this_type>::reference reference;
+    typedef typename linalg_traits<this_type>::porigin_type porigin_type;
+
+    iterator begin_, end_;
+    porigin_type origin;
+    SUBI si;
+
+    size_type size(void) const { return si.size(); }
+   
+    reference operator[](size_type i) const
+    { return linalg_traits<V>::access(origin, begin_, end_, si.index(i)); }
+
+    sparse_sub_vector(V &v, const SUBI &s) : begin_(vect_begin(v)),
+       end_(vect_end(v)), origin(linalg_origin(v)), si(s) {}
+    sparse_sub_vector(const V &v, const SUBI &s) 
+      : begin_(vect_begin(const_cast<V &>(v))),
+       end_(vect_end(const_cast<V &>(v))),
+	origin(linalg_origin(const_cast<V &>(v))), si(s) {}
+    sparse_sub_vector() {}
+    sparse_sub_vector(const sparse_sub_vector<CPT, SUBI> &cr)
+      : begin_(cr.begin_),end_(cr.end_),origin(cr.origin), si(cr.si) {} 
+  };
+
+  template <typename IT, typename MIT, typename SUBI, typename ORG,
+	    typename PT> inline
+  void set_to_begin(sparse_sub_vector_iterator<IT, MIT, SUBI> &it,
+		    ORG o, sparse_sub_vector<PT, SUBI> *,
+		    linalg_modifiable) {
+    typedef sparse_sub_vector<PT, SUBI> VECT;
+    typedef typename linalg_traits<VECT>::V_reference ref_t;
+    set_to_begin(it.itb, o, typename linalg_traits<VECT>::pV(), ref_t());
+    set_to_end(it.itbe, o, typename linalg_traits<VECT>::pV(), ref_t());
+    it.forward();
+  }
+  template <typename IT, typename MIT, typename SUBI, typename ORG,
+	    typename PT> inline
+  void set_to_begin(sparse_sub_vector_iterator<IT, MIT, SUBI> &it,
+		    ORG o, const sparse_sub_vector<PT, SUBI> *, 
+		    linalg_modifiable) {
+    typedef sparse_sub_vector<PT, SUBI> VECT;
+    typedef typename linalg_traits<VECT>::V_reference ref_t;
+    set_to_begin(it.itb, o, typename linalg_traits<VECT>::pV(), ref_t());
+    set_to_end(it.itbe, o, typename linalg_traits<VECT>::pV(), ref_t());
+    it.forward();
+  }
+  
+  template <typename IT, typename MIT, typename SUBI, typename ORG,
+	    typename PT> inline
+  void set_to_end(sparse_sub_vector_iterator<IT, MIT, SUBI> &it,
+		    ORG o, sparse_sub_vector<PT, SUBI> *, linalg_modifiable) {
+    typedef sparse_sub_vector<PT, SUBI> VECT;
+    typedef typename linalg_traits<VECT>::V_reference ref_t;
+    set_to_end(it.itb, o, typename linalg_traits<VECT>::pV(), ref_t());
+    set_to_end(it.itbe, o, typename linalg_traits<VECT>::pV(), ref_t());
+    it.forward();
+  }
+  template <typename IT, typename MIT, typename SUBI, typename ORG,
+	    typename PT> inline
+  void set_to_end(sparse_sub_vector_iterator<IT, MIT, SUBI> &it,
+		    ORG o, const sparse_sub_vector<PT, SUBI> *,
+		  linalg_modifiable) {
+    typedef sparse_sub_vector<PT, SUBI> VECT;
+    typedef typename linalg_traits<VECT>::V_reference ref_t;
+    set_to_end(it.itb, o, typename linalg_traits<VECT>::pV(), ref_t());
+    set_to_end(it.itbe, o, typename linalg_traits<VECT>::pV(), ref_t());
+    it.forward();
+  }
+
+  template <typename PT, typename SUBI>
+  struct linalg_traits<sparse_sub_vector<PT, SUBI> > {
+    typedef sparse_sub_vector<PT, SUBI> this_type;
+    typedef this_type * pthis_type;
+    typedef PT pV;
+    typedef typename std::iterator_traits<PT>::value_type V;
+    typedef typename linalg_and<typename index_is_sorted<SUBI>::bool_type,
+	    typename linalg_traits<V>::index_sorted>::bool_type index_sorted;
+    typedef typename linalg_traits<V>::is_reference V_reference;
+    typedef typename linalg_traits<V>::origin_type origin_type;
+    typedef typename select_ref<const origin_type *, origin_type *,
+			        PT>::ref_type porigin_type;
+    typedef typename which_reference<PT>::is_reference is_reference;
+    typedef abstract_vector linalg_type;
+    typedef typename linalg_traits<V>::value_type value_type;
+    typedef typename select_ref<value_type, typename
+            linalg_traits<V>::reference, PT>::ref_type reference;
+    typedef typename select_ref<typename linalg_traits<V>::const_iterator,
+	    typename linalg_traits<V>::iterator, PT>::ref_type pre_iterator;
+    typedef typename select_ref<abstract_null_type, 
+	    sparse_sub_vector_iterator<pre_iterator, pre_iterator, SUBI>,
+	    PT>::ref_type iterator;
+    typedef sparse_sub_vector_iterator<typename linalg_traits<V>
+            ::const_iterator, pre_iterator, SUBI> const_iterator;
+    typedef abstract_sparse storage_type;
+    static size_type size(const this_type &v) { return v.size(); }
+    static iterator begin(this_type &v) {
+      iterator it;
+      it.itb = v.begin_; it.itbe = v.end_; it.si = v.si;
+      if (!is_const_reference(is_reference()))
+	set_to_begin(it, v.origin, pthis_type(), is_reference());
+      else it.forward();
+      return it;
+    }
+    static const_iterator begin(const this_type &v) {
+      const_iterator it; it.itb = v.begin_; it.itbe = v.end_; it.si = v.si;
+      if (!is_const_reference(is_reference()))
+	{ set_to_begin(it, v.origin, pthis_type(), is_reference()); }
+      else it.forward();
+      return it;
+    }
+    static iterator end(this_type &v) {
+      iterator it;
+      it.itb = v.end_; it.itbe = v.end_; it.si = v.si;
+      if (!is_const_reference(is_reference()))
+	set_to_end(it, v.origin, pthis_type(), is_reference());
+      else it.forward();
+      return it;
+    }
+    static const_iterator end(const this_type &v) {
+      const_iterator it; it.itb = v.end_; it.itbe = v.end_; it.si = v.si;
+      if (!is_const_reference(is_reference()))
+	set_to_end(it, v.origin, pthis_type(), is_reference());
+      else it.forward();
+      return it;
+    }
+    static origin_type* origin(this_type &v) { return v.origin; }
+    static const origin_type* origin(const this_type &v) { return v.origin; }
+    static void clear(origin_type* o, const iterator &begin_,
+		      const iterator &end_) {
+      std::deque<size_type> ind;
+      iterator it = begin_;
+      for (; it != end_; ++it) ind.push_front(it.index());
+      for (; !(ind.empty()); ind.pop_back())
+	access(o, begin_, end_, ind.back()) = value_type(0);
+    }
+    static void do_clear(this_type &v) { clear(v.origin, begin(v), end(v)); }
+    static value_type access(const origin_type *o, const const_iterator &it,
+			     const const_iterator &ite, size_type i)
+    { return linalg_traits<V>::access(o, it.itb, ite.itb, it.si.index(i)); }
+    static reference access(origin_type *o, const iterator &it,
+			    const iterator &ite, size_type i)
+    { return linalg_traits<V>::access(o, it.itb, ite.itb, it.si.index(i)); }
+  };
+
+  template <typename PT, typename SUBI> std::ostream &operator <<
+  (std::ostream &o, const sparse_sub_vector<PT, SUBI>& m)
+  { gmm::write(o,m); return o; }
+
+  /* ********************************************************************* */
+  /*		skyline sub-vectors                                        */
+  /* ********************************************************************* */
+
+    template <typename IT, typename MIT, typename SUBI>
+  struct skyline_sub_vector_iterator {
+
+    IT itb;
+    SUBI si;
+
+    typedef std::iterator_traits<IT>                traits_type;
+    typedef typename traits_type::value_type        value_type;
+    typedef typename traits_type::pointer           pointer;
+    typedef typename traits_type::reference         reference;
+    typedef typename traits_type::difference_type   difference_type;
+    typedef std::bidirectional_iterator_tag         iterator_category;
+    typedef size_t                                  size_type;
+    typedef skyline_sub_vector_iterator<IT, MIT, SUBI>    iterator;
+
+    size_type index(void) const
+    { return (itb.index() - si.min + si.step() - 1) / si.step(); }
+    void backward(void);
+    iterator &operator ++()
+    { itb += si.step(); return *this; }
+    iterator operator ++(int) { iterator tmp = *this; ++(*this); return tmp; }
+    iterator &operator --()
+    { itb -= si.step(); return *this; }
+    iterator operator --(int) { iterator tmp = *this; --(*this); return tmp; }
+
+    iterator &operator +=(difference_type i)
+    { itb += si.step() * i; return *this; }
+    iterator &operator -=(difference_type i)
+    { itb -= si.step() * i; return *this; }
+    iterator operator +(difference_type i) const
+    { iterator ii = *this; return (ii += i); }
+    iterator operator -(difference_type i) const
+    { iterator ii = *this; return (ii -= i); }
+    difference_type operator -(const iterator &i) const
+    { return (itb - i.itb) / si.step(); }
+
+    reference operator *() const  { return *itb; }
+    reference operator [](int ii) { return *(itb + ii * si.step());  }
+
+    bool operator ==(const iterator &i) const { return index() == i.index();}
+    bool operator !=(const iterator &i) const { return !(i == *this); }
+    bool operator < (const iterator &i) const { return index()  < i.index();}
+
+    skyline_sub_vector_iterator(void) {}
+    skyline_sub_vector_iterator(const IT &it, const SUBI &s)
+      : itb(it), si(s) {}
+    skyline_sub_vector_iterator(const skyline_sub_vector_iterator<MIT, MIT,
+	 SUBI> &it) : itb(it.itb), si(it.si) {}
+  };
+
+  template <typename IT, typename SUBI>
+  void update_for_sub_skyline(IT &it, IT &ite, const SUBI &si) {
+    if (it.index() >= si.max || ite.index() <= si.min) { it = ite; return; }
+    ptrdiff_t dec1 = si.min - it.index(), dec2 = ite.index() - si.max;
+    it  += (dec1 < 0) ? ((si.step()-((-dec1) % si.step())) % si.step()) : dec1;
+    ite -= (dec2 < 0) ? -((-dec2) % si.step()) : dec2;
+  }
+
+  template <typename PT, typename SUBI> struct skyline_sub_vector {
+    typedef skyline_sub_vector<PT, SUBI> this_type;
+    typedef typename std::iterator_traits<PT>::value_type V;
+    typedef V * pV;
+    typedef typename select_ref<typename linalg_traits<V>::const_iterator,
+            typename linalg_traits<V>::iterator, PT>::ref_type iterator;
+    typedef typename linalg_traits<this_type>::reference reference;
+    typedef typename linalg_traits<this_type>::porigin_type porigin_type;
+
+    iterator begin_, end_;
+    porigin_type origin;
+    SUBI si;
+
+    size_type size(void) const { return si.size(); }
+   
+    reference operator[](size_type i) const
+    { return linalg_traits<V>::access(origin, begin_, end_, si.index(i)); }
+
+    skyline_sub_vector(V &v, const SUBI &s) : begin_(vect_begin(v)),
+       end_(vect_end(v)), origin(linalg_origin(v)), si(s) {
+      update_for_sub_skyline(begin_, end_, si);
+    }
+    skyline_sub_vector(const V &v, const SUBI &s)
+      : begin_(vect_begin(const_cast<V &>(v))),
+	end_(vect_end(const_cast<V &>(v))),
+	origin(linalg_origin(const_cast<V &>(v))), si(s) {
+      update_for_sub_skyline(begin_, end_, si);
+    }
+    skyline_sub_vector() {}
+    skyline_sub_vector(const skyline_sub_vector<pV, SUBI> &cr)
+      : begin_(cr.begin_),end_(cr.end_),origin(cr.origin), si(cr.si) {}
+  };
+
+  template <typename IT, typename MIT, typename SUBI, typename ORG,
+	    typename PT> inline
+  void set_to_begin(skyline_sub_vector_iterator<IT, MIT, SUBI> &it,
+		    ORG o, skyline_sub_vector<PT, SUBI> *,
+		    linalg_modifiable) {
+    typedef skyline_sub_vector<PT, SUBI> VECT;
+    typedef typename linalg_traits<VECT>::V_reference ref_t;
+    IT itbe = it.itb;
+    set_to_begin(it.itb, o, typename linalg_traits<VECT>::pV(), ref_t());
+    set_to_end(itbe, o, typename linalg_traits<VECT>::pV(), ref_t());
+    update_for_sub_skyline(it.itb, itbe, it.si);
+  }
+  template <typename IT, typename MIT, typename SUBI, typename ORG,
+	    typename PT> inline
+  void set_to_begin(skyline_sub_vector_iterator<IT, MIT, SUBI> &it,
+		    ORG o, const skyline_sub_vector<PT, SUBI> *,
+		    linalg_modifiable) {
+    typedef skyline_sub_vector<PT, SUBI> VECT;
+    typedef typename linalg_traits<VECT>::V_reference ref_t;
+    IT itbe = it.itb;
+    set_to_begin(it.itb, o, typename linalg_traits<VECT>::pV(), ref_t());
+    set_to_end(itbe, o, typename linalg_traits<VECT>::pV(), ref_t());
+    update_for_sub_skyline(it.itb, itbe, it.si);
+  }
+  
+  template <typename IT, typename MIT, typename SUBI, typename ORG,
+	    typename PT> inline
+  void set_to_end(skyline_sub_vector_iterator<IT, MIT, SUBI> &it,
+		    ORG o, skyline_sub_vector<PT, SUBI> *,
+		  linalg_modifiable) {
+    typedef skyline_sub_vector<PT, SUBI> VECT;
+    typedef typename linalg_traits<VECT>::V_reference ref_t;
+    IT itb = it.itb;
+    set_to_begin(itb, o, typename linalg_traits<VECT>::pV(), ref_t());
+    set_to_end(it.itb, o, typename linalg_traits<VECT>::pV(), ref_t());
+    update_for_sub_skyline(itb, it.itb, it.si);
+  }
+  template <typename IT, typename MIT, typename SUBI, typename ORG,
+	    typename PT> inline
+  void set_to_end(skyline_sub_vector_iterator<IT, MIT, SUBI> &it,
+		    ORG o, const skyline_sub_vector<PT, SUBI> *,
+		  linalg_modifiable) {
+    typedef skyline_sub_vector<PT, SUBI> VECT;
+    typedef typename linalg_traits<VECT>::V_reference ref_t;
+    IT itb = it.itb;
+    set_to_begin(itb, o, typename linalg_traits<VECT>::pV(), ref_t());
+    set_to_end(it.itb, o, typename linalg_traits<VECT>::pV(), ref_t());
+    update_for_sub_skyline(itb, it.itb, it.si);   
+  }
+
+
+  template <typename PT, typename SUBI>
+  struct linalg_traits<skyline_sub_vector<PT, SUBI> > {
+    typedef skyline_sub_vector<PT, SUBI> this_type;
+    typedef this_type *pthis_type;
+    typedef typename std::iterator_traits<PT>::value_type V;
+    typedef typename linalg_traits<V>::is_reference V_reference;
+    typedef typename linalg_traits<V>::origin_type origin_type;
+    typedef typename select_ref<const origin_type *, origin_type *,
+			        PT>::ref_type porigin_type;
+    typedef V * pV;
+    typedef typename which_reference<PT>::is_reference is_reference;
+    typedef abstract_vector linalg_type;
+    typedef typename linalg_traits<V>::value_type value_type;
+    typedef typename select_ref<value_type, typename
+            linalg_traits<V>::reference, PT>::ref_type reference;
+    typedef typename linalg_traits<V>::const_iterator const_V_iterator;
+    typedef typename linalg_traits<V>::iterator V_iterator;    
+    typedef typename select_ref<const_V_iterator, V_iterator, 
+				PT>::ref_type pre_iterator;
+    typedef typename select_ref<abstract_null_type, 
+	    skyline_sub_vector_iterator<pre_iterator, pre_iterator, SUBI>,
+	    PT>::ref_type iterator;
+    typedef skyline_sub_vector_iterator<const_V_iterator, pre_iterator, SUBI>
+            const_iterator;
+    typedef abstract_skyline storage_type;
+    typedef linalg_true index_sorted;
+    static size_type size(const this_type &v) { return v.size(); }
+    static iterator begin(this_type &v) {
+      iterator it;
+      it.itb = v.begin_; it.si = v.si;
+      if (!is_const_reference(is_reference()))
+	set_to_begin(it, v.origin, pthis_type(), is_reference());
+      return it;
+    }
+    static const_iterator begin(const this_type &v) {
+      const_iterator it; it.itb = v.begin_; it.si = v.si;
+      if (!is_const_reference(is_reference()))
+	{ set_to_begin(it, v.origin, pthis_type(), is_reference()); }
+      return it;
+    }
+    static iterator end(this_type &v) {
+      iterator it;
+      it.itb = v.end_; it.si = v.si;
+      if (!is_const_reference(is_reference()))
+	set_to_end(it, v.origin, pthis_type(), is_reference());
+      return it;
+    }
+    static const_iterator end(const this_type &v) {
+      const_iterator it; it.itb = v.end_; it.si = v.si;
+      if (!is_const_reference(is_reference()))
+	set_to_end(it, v.origin, pthis_type(), is_reference());
+      return it;
+    }
+    static origin_type* origin(this_type &v) { return v.origin; }
+    static const origin_type* origin(const this_type &v) { return v.origin; }
+    static void clear(origin_type*, const iterator &it, const iterator &ite)
+    { std::fill(it, ite, value_type(0)); }
+    static void do_clear(this_type &v) { clear(v.origin, begin(v), end(v)); }
+    static value_type access(const origin_type *o, const const_iterator &it,
+			     const const_iterator &ite, size_type i)
+    { return linalg_traits<V>::access(o, it.itb, ite.itb, it.si.index(i)); }
+    static reference access(origin_type *o, const iterator &it,
+			    const iterator &ite, size_type i)
+    { return linalg_traits<V>::access(o, it.itb, ite.itb, it.si.index(i)); }
+  };
+
+  template <typename PT, typename SUBI> std::ostream &operator <<
+  (std::ostream &o, const skyline_sub_vector<PT, SUBI>& m)
+  { gmm::write(o,m); return o; }
+
+  /* ******************************************************************** */
+  /*		sub vector.                                               */
+  /* ******************************************************************** */
+  /* sub_vector_type<PT, SUBI>::vector_type is the sub vector type        */
+  /* returned by sub_vector(v, sub_index)                                 */
+  /************************************************************************/
+
+  template <typename PT, typename SUBI, typename st_type> struct svrt_ir {
+    typedef abstract_null_type vector_type;
+  };
+
+  template <typename PT>
+  struct svrt_ir<PT, sub_index, abstract_dense> {
+    typedef typename std::iterator_traits<PT>::value_type V;
+    typedef typename vect_ref_type<PT,  V>::iterator iterator;
+    typedef tab_ref_index_ref_with_origin<iterator,
+      sub_index::const_iterator, V> vector_type;
+  }; 
+
+  template <typename PT>
+  struct svrt_ir<PT, unsorted_sub_index, abstract_dense> {
+    typedef typename std::iterator_traits<PT>::value_type V;
+    typedef typename vect_ref_type<PT,  V>::iterator iterator;
+    typedef tab_ref_index_ref_with_origin<iterator,
+      unsorted_sub_index::const_iterator, V> vector_type;
+  }; 
+
+  template <typename PT>
+  struct svrt_ir<PT, sub_interval, abstract_dense> {
+    typedef typename std::iterator_traits<PT>::value_type V;
+    typedef typename vect_ref_type<PT,  V>::iterator iterator;
+    typedef tab_ref_with_origin<iterator, V> vector_type;
+  }; 
+
+  template <typename PT>
+  struct svrt_ir<PT, sub_slice, abstract_dense> {
+    typedef typename std::iterator_traits<PT>::value_type V;
+    typedef typename vect_ref_type<PT,  V>::iterator iterator;
+    typedef tab_ref_reg_spaced_with_origin<iterator, V> vector_type;
+  };
+
+  template <typename PT, typename SUBI>
+  struct svrt_ir<PT, SUBI, abstract_skyline> {
+    typedef skyline_sub_vector<PT, SUBI> vector_type;
+  };
+
+  template <typename PT>
+  struct svrt_ir<PT, sub_index, abstract_skyline> {
+    typedef sparse_sub_vector<PT, sub_index> vector_type;
+  };
+
+  template <typename PT>
+  struct svrt_ir<PT, unsorted_sub_index, abstract_skyline> {
+    typedef sparse_sub_vector<PT, unsorted_sub_index> vector_type;
+  };
+
+
+  template <typename PT, typename SUBI>
+  struct svrt_ir<PT, SUBI, abstract_sparse> {
+    typedef sparse_sub_vector<PT, SUBI> vector_type;
+  };
+
+  template <typename PT, typename SUBI>
+  struct sub_vector_type {
+    typedef typename std::iterator_traits<PT>::value_type V;
+    typedef typename svrt_ir<PT, SUBI,
+      typename linalg_traits<V>::storage_type>::vector_type vector_type;
+  };
+
+  template <typename V, typename SUBI>
+  typename select_return<
+    typename sub_vector_type<const V *, SUBI>::vector_type,
+    typename sub_vector_type<V *, SUBI>::vector_type, const V *>::return_type
+  sub_vector(const V &v, const SUBI &si) {
+    GMM_ASSERT2(si.last() <= vect_size(v),
+                "sub vector too large, " << si.last() << " > " << vect_size(v));
+    return typename select_return<
+      typename sub_vector_type<const V *, SUBI>::vector_type,
+      typename sub_vector_type<V *, SUBI>::vector_type, const V *>::return_type
+      (linalg_cast(v), si);
+  }
+
+  template <typename V, typename SUBI>
+  typename select_return<
+    typename sub_vector_type<const V *, SUBI>::vector_type,
+    typename sub_vector_type<V *, SUBI>::vector_type, V *>::return_type
+  sub_vector(V &v, const SUBI &si) {
+    GMM_ASSERT2(si.last() <= vect_size(v),
+                "sub vector too large, " << si.last() << " > " << vect_size(v));
+    return  typename select_return<
+      typename sub_vector_type<const V *, SUBI>::vector_type,
+      typename sub_vector_type<V *, SUBI>::vector_type, V *>::return_type
+      (linalg_cast(v), si);
+  }
+
+}
+
+#endif //  GMM_SUB_VECTOR_H__
--- a/gmm/gmm_superlu_interface.h
+++ b/gmm/gmm_superlu_interface.h
@ -0,0 +1,410 @@
+/* -*- c++ -*- (enables emacs c++ mode) */
+/*===========================================================================
+
+ Copyright (C) 2003-2017 Yves Renard
+
+ This file is a part of GetFEM++
+
+ GetFEM++  is  free software;  you  can  redistribute  it  and/or modify it
+ under  the  terms  of the  GNU  Lesser General Public License as published
+ by  the  Free Software Foundation;  either version 3 of the License,  or
+ (at your option) any later version along with the GCC Runtime Library
+ Exception either version 3.1 or (at your option) any later version.
+ This program  is  distributed  in  the  hope  that it will be useful,  but
+ WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ or  FITNESS  FOR  A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+ License and GCC Runtime Library Exception for more details.
+ You  should  have received a copy of the GNU Lesser General Public License
+ along  with  this program;  if not, write to the Free Software Foundation,
+ Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301, USA.
+
+ As a special exception, you  may use  this file  as it is a part of a free
+ software  library  without  restriction.  Specifically,  if   other  files
+ instantiate  templates  or  use macros or inline functions from this file,
+ or  you compile this  file  and  link  it  with other files  to produce an
+ executable, this file  does  not  by itself cause the resulting executable
+ to be covered  by the GNU Lesser General Public License.  This   exception
+ does not  however  invalidate  any  other  reasons why the executable file
+ might be covered by the GNU Lesser General Public License.
+
+===========================================================================*/
+
+/**@file gmm_superlu_interface.h
+   @author  Yves Renard <Yves.Renard@insa-lyon.fr>
+   @date October 17, 2003.
+   @brief Interface with SuperLU (LU direct solver for sparse matrices).
+*/
+#if defined(GMM_USES_SUPERLU) && !defined(GETFEM_VERSION)
+
+#ifndef GMM_SUPERLU_INTERFACE_H
+#define GMM_SUPERLU_INTERFACE_H
+
+#include "gmm_kernel.h"
+
+typedef int int_t;
+
+/* because SRC/util.h defines TRUE and FALSE ... */
+#ifdef TRUE
+# undef TRUE
+#endif
+#ifdef FALSE
+# undef FALSE
+#endif
+
+#include "superlu/slu_Cnames.h"
+#include "superlu/supermatrix.h"
+#include "superlu/slu_util.h"
+
+namespace SuperLU_S {
+#include "superlu/slu_sdefs.h"
+}
+namespace SuperLU_D {
+#include "superlu/slu_ddefs.h"
+}
+namespace SuperLU_C {
+#include "superlu/slu_cdefs.h"
+}
+namespace SuperLU_Z {
+#include "superlu/slu_zdefs.h" 
+}
+
+
+
+namespace gmm {
+
+  /*  interface for Create_CompCol_Matrix */
+
+  inline void Create_CompCol_Matrix(SuperMatrix *A, int m, int n, int nnz,
+			     float *a, int *ir, int *jc) {
+    SuperLU_S::sCreate_CompCol_Matrix(A, m, n, nnz, a, ir, jc,
+				      SLU_NC, SLU_S, SLU_GE);
+  }
+  
+  inline void Create_CompCol_Matrix(SuperMatrix *A, int m, int n, int nnz,
+			     double *a, int *ir, int *jc) {
+    SuperLU_D::dCreate_CompCol_Matrix(A, m, n, nnz, a, ir, jc,
+				      SLU_NC, SLU_D, SLU_GE);
+  }
+  
+  inline void Create_CompCol_Matrix(SuperMatrix *A, int m, int n, int nnz,
+			     std::complex<float> *a, int *ir, int *jc) {
+    SuperLU_C::cCreate_CompCol_Matrix(A, m, n, nnz, (SuperLU_C::complex *)(a),
+				      ir, jc, SLU_NC, SLU_C, SLU_GE);
+  }
+  
+  inline void Create_CompCol_Matrix(SuperMatrix *A, int m, int n, int nnz,
+			     std::complex<double> *a, int *ir, int *jc) {
+    SuperLU_Z::zCreate_CompCol_Matrix(A, m, n, nnz,
+				      (SuperLU_Z::doublecomplex *)(a), ir, jc,
+				      SLU_NC, SLU_Z, SLU_GE);
+  }
+
+  /*  interface for Create_Dense_Matrix */
+
+  inline void Create_Dense_Matrix(SuperMatrix *A, int m, int n, float *a, int k)
+  { SuperLU_S::sCreate_Dense_Matrix(A, m, n, a, k, SLU_DN, SLU_S, SLU_GE); }
+  inline void Create_Dense_Matrix(SuperMatrix *A, int m, int n, double *a, int k)
+  { SuperLU_D::dCreate_Dense_Matrix(A, m, n, a, k, SLU_DN, SLU_D, SLU_GE); }
+  inline void Create_Dense_Matrix(SuperMatrix *A, int m, int n,
+			   std::complex<float> *a, int k) {
+    SuperLU_C::cCreate_Dense_Matrix(A, m, n, (SuperLU_C::complex *)(a),
+				    k, SLU_DN, SLU_C, SLU_GE);
+  }
+  inline void Create_Dense_Matrix(SuperMatrix *A, int m, int n, 
+			   std::complex<double> *a, int k) {
+    SuperLU_Z::zCreate_Dense_Matrix(A, m, n, (SuperLU_Z::doublecomplex *)(a),
+				    k, SLU_DN, SLU_Z, SLU_GE);
+  }
+
+  /*  interface for gssv */
+
+#define DECL_GSSV(NAMESPACE,FNAME,FLOATTYPE,KEYTYPE) \
+  inline void SuperLU_gssv(superlu_options_t *options, SuperMatrix *A, int *p, \
+  int *q, SuperMatrix *L, SuperMatrix *U, SuperMatrix *B,               \
+  SuperLUStat_t *stats, int *info, KEYTYPE) {                           \
+  NAMESPACE::FNAME(options, A, p, q, L, U, B, stats, info);             \
+  }
+
+  DECL_GSSV(SuperLU_S,sgssv,float,float)
+  DECL_GSSV(SuperLU_C,cgssv,float,std::complex<float>)
+  DECL_GSSV(SuperLU_D,dgssv,double,double)
+  DECL_GSSV(SuperLU_Z,zgssv,double,std::complex<double>)
+
+  /*  interface for gssvx */
+
+#define DECL_GSSVX(NAMESPACE,FNAME,FLOATTYPE,KEYTYPE) \
+    inline float SuperLU_gssvx(superlu_options_t *options, SuperMatrix *A,	\
+		     int *perm_c, int *perm_r, int *etree, char *equed,  \
+		     FLOATTYPE *R, FLOATTYPE *C, SuperMatrix *L,         \
+		     SuperMatrix *U, void *work, int lwork,              \
+		     SuperMatrix *B, SuperMatrix *X,                     \
+		     FLOATTYPE *recip_pivot_growth,                      \
+		     FLOATTYPE *rcond, FLOATTYPE *ferr, FLOATTYPE *berr, \
+		     SuperLUStat_t *stats, int *info, KEYTYPE) {         \
+    NAMESPACE::mem_usage_t mem_usage;                                    \
+    NAMESPACE::FNAME(options, A, perm_c, perm_r, etree, equed, R, C, L,  \
+		     U, work, lwork, B, X, recip_pivot_growth, rcond,    \
+		     ferr, berr, &mem_usage, stats, info);               \
+    return mem_usage.for_lu; /* bytes used by the factor storage */     \
+  }
+
+  DECL_GSSVX(SuperLU_S,sgssvx,float,float)
+  DECL_GSSVX(SuperLU_C,cgssvx,float,std::complex<float>)
+  DECL_GSSVX(SuperLU_D,dgssvx,double,double)
+  DECL_GSSVX(SuperLU_Z,zgssvx,double,std::complex<double>)
+
+  /* ********************************************************************* */
+  /*   SuperLU solve interface                                             */
+  /* ********************************************************************* */
+
+  template <typename MAT, typename VECTX, typename VECTB>
+  int SuperLU_solve(const MAT &A, const VECTX &X_, const VECTB &B,
+		     double& rcond_, int permc_spec = 3) {
+    VECTX &X = const_cast<VECTX &>(X_);
+    /*
+     * Get column permutation vector perm_c[], according to permc_spec:
+     *   permc_spec = 0: use the natural ordering 
+     *   permc_spec = 1: use minimum degree ordering on structure of A'*A
+     *   permc_spec = 2: use minimum degree ordering on structure of A'+A
+     *   permc_spec = 3: use approximate minimum degree column ordering
+     */
+    typedef typename linalg_traits<MAT>::value_type T;
+    typedef typename number_traits<T>::magnitude_type R;
+
+    int m = mat_nrows(A), n = mat_ncols(A), nrhs = 1, info = 0;
+
+    csc_matrix<T> csc_A(m, n); gmm::copy(A, csc_A);
+    std::vector<T> rhs(m), sol(m);
+    gmm::copy(B, rhs);
+
+    int nz = nnz(csc_A);
+    if ((2 * nz / n) >= m)
+      GMM_WARNING2("CAUTION : it seems that SuperLU has a problem"
+		  " for nearly dense sparse matrices");
+
+    superlu_options_t options;
+    set_default_options(&options);
+    options.ColPerm = NATURAL;
+    options.PrintStat = NO;
+    options.ConditionNumber = YES;
+    switch (permc_spec) {
+    case 1 : options.ColPerm = MMD_ATA; break;
+    case 2 : options.ColPerm = MMD_AT_PLUS_A; break;
+    case 3 : options.ColPerm = COLAMD; break;
+    }
+    SuperLUStat_t stat;
+    StatInit(&stat);
+
+    SuperMatrix SA, SL, SU, SB, SX; // SuperLU format.
+    Create_CompCol_Matrix(&SA, m, n, nz, (double *)(&(csc_A.pr[0])),
+			  (int *)(&(csc_A.ir[0])), (int *)(&(csc_A.jc[0])));
+    Create_Dense_Matrix(&SB, m, nrhs, &rhs[0], m);
+    Create_Dense_Matrix(&SX, m, nrhs, &sol[0], m);
+    memset(&SL,0,sizeof SL);
+    memset(&SU,0,sizeof SU);
+
+    std::vector<int> etree(n);
+    char equed[] = "B";
+    std::vector<R> Rscale(m),Cscale(n); // row scale factors
+    std::vector<R> ferr(nrhs), berr(nrhs);
+    R recip_pivot_gross, rcond;
+    std::vector<int> perm_r(m), perm_c(n);
+
+    SuperLU_gssvx(&options, &SA, &perm_c[0], &perm_r[0], 
+		  &etree[0] /* output */, equed /* output         */, 
+		  &Rscale[0] /* row scale factors (output)        */, 
+		  &Cscale[0] /* col scale factors (output)        */,
+		  &SL /* fact L (output)*/, &SU /* fact U (output)*/, 
+		  NULL /* work                                    */, 
+		  0 /* lwork: superlu auto allocates (input)      */, 
+		  &SB /* rhs */, &SX /* solution                  */,
+		  &recip_pivot_gross /* reciprocal pivot growth   */
+		  /* factor max_j( norm(A_j)/norm(U_j) ).         */,  
+		  &rcond /*estimate of the reciprocal condition   */
+		  /* number of the matrix A after equilibration   */,
+		  &ferr[0] /* estimated forward error             */,
+		  &berr[0] /* relative backward error             */,
+		  &stat, &info, T());
+    rcond_ = rcond;
+    Destroy_SuperMatrix_Store(&SB);
+    Destroy_SuperMatrix_Store(&SX);
+    Destroy_SuperMatrix_Store(&SA);
+    Destroy_SuperNode_Matrix(&SL);
+    Destroy_CompCol_Matrix(&SU);
+    StatFree(&stat);
+    GMM_ASSERT1(info >= 0, "SuperLU solve failed: info =" << info);
+    if (info > 0) GMM_WARNING1("SuperLU solve failed: info =" << info);
+    gmm::copy(sol, X);
+    return info;
+  }
+
+  template <class T> class SuperLU_factor {
+    typedef typename number_traits<T>::magnitude_type R;
+
+    csc_matrix<T> csc_A;
+    mutable SuperMatrix SA, SL, SB, SU, SX;
+    mutable SuperLUStat_t stat;
+    mutable superlu_options_t options;
+    float memory_used;
+    mutable std::vector<int> etree, perm_r, perm_c;
+    mutable std::vector<R> Rscale, Cscale;
+    mutable std::vector<R> ferr, berr;
+    mutable std::vector<T> rhs;
+    mutable std::vector<T> sol;
+    mutable bool is_init;
+    mutable char equed;
+
+  public :
+    enum { LU_NOTRANSP, LU_TRANSP, LU_CONJUGATED };
+    void free_supermatrix(void);
+    template <class MAT> void build_with(const MAT &A,  int permc_spec = 3);
+    template <typename VECTX, typename VECTB> 
+    /* transp = LU_NOTRANSP   -> solves Ax = B
+       transp = LU_TRANSP     -> solves A'x = B
+       transp = LU_CONJUGATED -> solves conj(A)X = B */
+    void solve(const VECTX &X_, const VECTB &B, int transp=LU_NOTRANSP) const;
+    SuperLU_factor(void) { is_init = false; }
+    SuperLU_factor(const SuperLU_factor& other) {
+      GMM_ASSERT2(!(other.is_init),
+		 "copy of initialized SuperLU_factor is forbidden");
+      is_init = false;
+    }
+    SuperLU_factor& operator=(const SuperLU_factor& other) {
+      GMM_ASSERT2(!(other.is_init) && !is_init,
+		  "assignment of initialized SuperLU_factor is forbidden");
+      return *this;
+    }
+    ~SuperLU_factor() { free_supermatrix(); }
+    float memsize() { return memory_used; }
+  };
+
+
+  template <class T> void SuperLU_factor<T>::free_supermatrix(void) {
+      if (is_init) {
+	if (SB.Store) Destroy_SuperMatrix_Store(&SB);
+	if (SX.Store) Destroy_SuperMatrix_Store(&SX);
+	if (SA.Store) Destroy_SuperMatrix_Store(&SA);
+	if (SL.Store) Destroy_SuperNode_Matrix(&SL);
+	if (SU.Store) Destroy_CompCol_Matrix(&SU);
+      }
+    }
+
+    
+    template <class T> template <class MAT>
+    void SuperLU_factor<T>::build_with(const MAT &A,  int permc_spec) {
+    /*
+     * Get column permutation vector perm_c[], according to permc_spec:
+     *   permc_spec = 0: use the natural ordering 
+     *   permc_spec = 1: use minimum degree ordering on structure of A'*A
+     *   permc_spec = 2: use minimum degree ordering on structure of A'+A
+     *   permc_spec = 3: use approximate minimum degree column ordering
+     */
+      free_supermatrix();
+      int n = mat_nrows(A), m = mat_ncols(A), info = 0;
+      csc_A.init_with(A);
+
+      rhs.resize(m); sol.resize(m);
+      gmm::clear(rhs);
+      int nz = nnz(csc_A);
+
+      set_default_options(&options);
+      options.ColPerm = NATURAL;
+      options.PrintStat = NO;
+      options.ConditionNumber = NO;
+      switch (permc_spec) {
+      case 1 : options.ColPerm = MMD_ATA; break;
+      case 2 : options.ColPerm = MMD_AT_PLUS_A; break;
+      case 3 : options.ColPerm = COLAMD; break;
+      }
+      StatInit(&stat);
+
+      Create_CompCol_Matrix(&SA, m, n, nz, (double *)(&(csc_A.pr[0])),
+			    (int *)(&(csc_A.ir[0])), (int *)(&(csc_A.jc[0])));
+
+      Create_Dense_Matrix(&SB, m, 0, &rhs[0], m);
+      Create_Dense_Matrix(&SX, m, 0, &sol[0], m);
+      memset(&SL,0,sizeof SL);
+      memset(&SU,0,sizeof SU);
+      equed = 'B';
+      Rscale.resize(m); Cscale.resize(n); etree.resize(n);
+      ferr.resize(1); berr.resize(1);
+      R recip_pivot_gross, rcond;
+      perm_r.resize(m); perm_c.resize(n);
+      memory_used = SuperLU_gssvx(&options, &SA, &perm_c[0], &perm_r[0], 
+		    &etree[0] /* output */, &equed /* output        */, 
+		    &Rscale[0] /* row scale factors (output)        */, 
+		    &Cscale[0] /* col scale factors (output)        */,
+		    &SL /* fact L (output)*/, &SU /* fact U (output)*/, 
+		    NULL /* work                                    */, 
+		    0 /* lwork: superlu auto allocates (input)      */, 
+		    &SB /* rhs */, &SX /* solution                  */,
+		    &recip_pivot_gross /* reciprocal pivot growth   */
+		    /* factor max_j( norm(A_j)/norm(U_j) ).         */,  
+		    &rcond /*estimate of the reciprocal condition   */
+		    /* number of the matrix A after equilibration   */,
+		    &ferr[0] /* estimated forward error             */,
+		    &berr[0] /* relative backward error             */,
+		    &stat, &info, T());
+      
+      Destroy_SuperMatrix_Store(&SB);
+      Destroy_SuperMatrix_Store(&SX);
+      Create_Dense_Matrix(&SB, m, 1, &rhs[0], m);
+      Create_Dense_Matrix(&SX, m, 1, &sol[0], m);
+      StatFree(&stat);
+
+      GMM_ASSERT1(info == 0, "SuperLU solve failed: info=" << info);
+      is_init = true;
+    }
+    
+    template <class T> template <typename VECTX, typename VECTB> 
+    void SuperLU_factor<T>::solve(const VECTX &X_, const VECTB &B,
+				  int transp) const {
+      VECTX &X = const_cast<VECTX &>(X_);
+      gmm::copy(B, rhs);
+      options.Fact = FACTORED;
+      options.IterRefine = NOREFINE;
+      switch (transp) {
+      case LU_NOTRANSP: options.Trans = NOTRANS; break;
+      case LU_TRANSP: options.Trans = TRANS; break;
+      case LU_CONJUGATED: options.Trans = CONJ; break;
+      default: GMM_ASSERT1(false, "invalid value for transposition option");
+      }
+      StatInit(&stat);
+      int info = 0;
+      R recip_pivot_gross, rcond;
+      SuperLU_gssvx(&options, &SA, &perm_c[0], &perm_r[0], 
+		    &etree[0] /* output */, &equed /* output        */, 
+		    &Rscale[0] /* row scale factors (output)        */, 
+		    &Cscale[0] /* col scale factors (output)        */,
+		    &SL /* fact L (output)*/, &SU /* fact U (output)*/, 
+		    NULL /* work                                    */, 
+		    0 /* lwork: superlu auto allocates (input)      */, 
+		    &SB /* rhs */, &SX /* solution                  */,
+		    &recip_pivot_gross /* reciprocal pivot growth   */
+		    /* factor max_j( norm(A_j)/norm(U_j) ).         */,  
+		    &rcond /*estimate of the reciprocal condition   */
+		    /* number of the matrix A after equilibration   */,
+		    &ferr[0] /* estimated forward error             */,
+		    &berr[0] /* relative backward error             */,
+		    &stat, &info, T());
+     StatFree(&stat);
+     GMM_ASSERT1(info == 0, "SuperLU solve failed: info=" << info);
+     gmm::copy(sol, X);
+    }
+
+  template <typename T, typename V1, typename V2> inline
+  void mult(const SuperLU_factor<T>& P, const V1 &v1, const V2 &v2) {
+    P.solve(v2,v1);
+  }
+
+  template <typename T, typename V1, typename V2> inline
+  void transposed_mult(const SuperLU_factor<T>& P,const V1 &v1,const V2 &v2) {
+    P.solve(v2, v1, SuperLU_factor<T>::LU_TRANSP);
+  }
+
+}
+
+  
+#endif // GMM_SUPERLU_INTERFACE_H
+
+#endif // GMM_USES_SUPERLU
--- a/gmm/gmm_transposed.h
+++ b/gmm/gmm_transposed.h
@ -0,0 +1,244 @@
+/* -*- c++ -*- (enables emacs c++ mode) */
+/*===========================================================================
+
+ Copyright (C) 2002-2017 Yves Renard
+
+ This file is a part of GetFEM++
+
+ GetFEM++  is  free software;  you  can  redistribute  it  and/or modify it
+ under  the  terms  of the  GNU  Lesser General Public License as published
+ by  the  Free Software Foundation;  either version 3 of the License,  or
+ (at your option) any later version along with the GCC Runtime Library
+ Exception either version 3.1 or (at your option) any later version.
+ This program  is  distributed  in  the  hope  that it will be useful,  but
+ WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ or  FITNESS  FOR  A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+ License and GCC Runtime Library Exception for more details.
+ You  should  have received a copy of the GNU Lesser General Public License
+ along  with  this program;  if not, write to the Free Software Foundation,
+ Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301, USA.
+
+ As a special exception, you  may use  this file  as it is a part of a free
+ software  library  without  restriction.  Specifically,  if   other  files
+ instantiate  templates  or  use macros or inline functions from this file,
+ or  you compile this  file  and  link  it  with other files  to produce an
+ executable, this file  does  not  by itself cause the resulting executable
+ to be covered  by the GNU Lesser General Public License.  This   exception
+ does not  however  invalidate  any  other  reasons why the executable file
+ might be covered by the GNU Lesser General Public License.
+
+===========================================================================*/
+
+/**@file gmm_transposed.h
+   @author  Yves Renard <Yves.Renard@insa-lyon.fr>
+   @date November 10, 2002.
+   @brief Generic transposed matrices
+*/
+#ifndef GMM_TRANSPOSED_H__
+#define GMM_TRANSPOSED_H__
+
+#include "gmm_def.h"
+
+namespace gmm {
+
+  /* ********************************************************************* */
+  /*		transposed reference                    		   */
+  /* ********************************************************************* */
+  
+  template <typename PT> struct  transposed_row_ref {
+    
+    typedef transposed_row_ref<PT> this_type;
+    typedef typename std::iterator_traits<PT>::value_type M;
+    typedef M * CPT;
+    typedef typename std::iterator_traits<PT>::reference ref_M;
+    typedef typename select_ref<typename linalg_traits<this_type>
+            ::const_col_iterator, typename linalg_traits<this_type>
+            ::col_iterator, PT>::ref_type iterator;
+    typedef typename linalg_traits<this_type>::reference reference;
+    typedef typename linalg_traits<this_type>::porigin_type porigin_type;
+
+    iterator begin_, end_;
+    porigin_type origin;
+    size_type nr, nc;
+
+    transposed_row_ref(ref_M m)
+      : begin_(mat_row_begin(m)), end_(mat_row_end(m)),
+	origin(linalg_origin(m)), nr(mat_ncols(m)), nc(mat_nrows(m)) {}
+
+    transposed_row_ref(const transposed_row_ref<CPT> &cr) :
+      begin_(cr.begin_),end_(cr.end_), origin(cr.origin),nr(cr.nr),nc(cr.nc) {}
+
+    reference operator()(size_type i, size_type j) const
+    { return linalg_traits<M>::access(begin_+j, i); }
+  };
+
+  template <typename PT> struct linalg_traits<transposed_row_ref<PT> > {
+    typedef transposed_row_ref<PT> this_type;
+    typedef typename std::iterator_traits<PT>::value_type M;
+    typedef typename linalg_traits<M>::origin_type origin_type;
+    typedef typename select_ref<const origin_type *, origin_type *,
+			        PT>::ref_type porigin_type;
+    typedef typename which_reference<PT>::is_reference is_reference;
+    typedef abstract_matrix linalg_type;
+    typedef typename linalg_traits<M>::value_type value_type;
+    typedef typename select_ref<value_type,
+            typename linalg_traits<M>::reference, PT>::ref_type reference;
+    typedef typename linalg_traits<M>::storage_type storage_type;
+    typedef abstract_null_type sub_row_type;
+    typedef abstract_null_type const_sub_row_type;
+    typedef abstract_null_type row_iterator;
+    typedef abstract_null_type const_row_iterator;
+    typedef typename linalg_traits<M>::const_sub_row_type const_sub_col_type;
+    typedef typename select_ref<abstract_null_type, typename
+	    linalg_traits<M>::sub_row_type, PT>::ref_type sub_col_type;
+    typedef typename linalg_traits<M>::const_row_iterator const_col_iterator;
+    typedef typename select_ref<abstract_null_type, typename
+            linalg_traits<M>::row_iterator, PT>::ref_type col_iterator;
+    typedef col_major sub_orientation;
+    typedef typename linalg_traits<M>::index_sorted index_sorted;
+    static size_type ncols(const this_type &v) { return v.nc; }
+    static size_type nrows(const this_type &v) { return v.nr; }
+    static const_sub_col_type col(const const_col_iterator &it)
+    { return linalg_traits<M>::row(it); }
+    static sub_col_type col(const col_iterator &it)
+    { return linalg_traits<M>::row(it); }
+    static col_iterator col_begin(this_type &m) { return m.begin_; }
+    static col_iterator col_end(this_type &m) { return m.end_; }
+    static const_col_iterator col_begin(const this_type &m)
+    { return m.begin_; }
+    static const_col_iterator col_end(const this_type &m) { return m.end_; }
+    static origin_type* origin(this_type &v) { return v.origin; }
+    static const origin_type* origin(const this_type &v) { return v.origin; }
+    static void do_clear(this_type &v);
+    static value_type access(const const_col_iterator &itcol, size_type i)
+    { return linalg_traits<M>::access(itcol, i); }
+    static reference access(const col_iterator &itcol, size_type i)
+    { return linalg_traits<M>::access(itcol, i); }
+  };
+  
+  template <typename PT> 
+  void linalg_traits<transposed_row_ref<PT> >::do_clear(this_type &v) { 
+    col_iterator it = mat_col_begin(v), ite = mat_col_end(v);
+    for (; it != ite; ++it) clear(col(it));
+  }
+  
+  template<typename PT> std::ostream &operator <<
+  (std::ostream &o, const transposed_row_ref<PT>& m)
+  { gmm::write(o,m); return o; }
+
+  template <typename PT> struct  transposed_col_ref {
+    
+    typedef transposed_col_ref<PT> this_type;
+    typedef typename std::iterator_traits<PT>::value_type M;
+    typedef M * CPT;
+    typedef typename std::iterator_traits<PT>::reference ref_M;
+    typedef typename select_ref<typename linalg_traits<this_type>
+            ::const_row_iterator, typename linalg_traits<this_type>
+            ::row_iterator, PT>::ref_type iterator;
+    typedef typename linalg_traits<this_type>::reference reference;
+    typedef typename linalg_traits<this_type>::porigin_type porigin_type;
+    
+    iterator begin_, end_;
+    porigin_type origin;
+    size_type nr, nc;
+
+    transposed_col_ref(ref_M m)
+      : begin_(mat_col_begin(m)), end_(mat_col_end(m)),
+	origin(linalg_origin(m)), nr(mat_ncols(m)), nc(mat_nrows(m)) {}
+
+    transposed_col_ref(const transposed_col_ref<CPT> &cr) :
+      begin_(cr.begin_),end_(cr.end_), origin(cr.origin),nr(cr.nr),nc(cr.nc) {}
+
+    reference operator()(size_type i, size_type j) const
+    { return linalg_traits<M>::access(begin_+i, j); }
+  };
+
+  template <typename PT> struct linalg_traits<transposed_col_ref<PT> > {
+    typedef transposed_col_ref<PT> this_type;
+    typedef typename std::iterator_traits<PT>::value_type M;
+    typedef typename linalg_traits<M>::origin_type origin_type;
+    typedef typename select_ref<const origin_type *, origin_type *,
+			        PT>::ref_type porigin_type;
+    typedef typename which_reference<PT>::is_reference is_reference;
+    typedef abstract_matrix linalg_type;
+    typedef typename linalg_traits<M>::value_type value_type;
+    typedef typename select_ref<value_type,
+            typename linalg_traits<M>::reference, PT>::ref_type reference;
+    typedef typename linalg_traits<M>::storage_type storage_type;
+    typedef abstract_null_type sub_col_type;
+    typedef abstract_null_type const_sub_col_type;
+    typedef abstract_null_type col_iterator;
+    typedef abstract_null_type const_col_iterator;
+    typedef typename linalg_traits<M>::const_sub_col_type const_sub_row_type;
+    typedef typename select_ref<abstract_null_type, typename
+	    linalg_traits<M>::sub_col_type, PT>::ref_type sub_row_type;
+    typedef typename linalg_traits<M>::const_col_iterator const_row_iterator;
+    typedef typename select_ref<abstract_null_type, typename
+            linalg_traits<M>::col_iterator, PT>::ref_type row_iterator;
+    typedef row_major sub_orientation;
+    typedef typename linalg_traits<M>::index_sorted index_sorted;
+    static size_type nrows(const this_type &v)
+    { return v.nr; }
+    static size_type ncols(const this_type &v)
+    { return v.nc; }
+    static const_sub_row_type row(const const_row_iterator &it)
+    { return linalg_traits<M>::col(it); }
+    static sub_row_type row(const row_iterator &it)
+    { return linalg_traits<M>::col(it); }
+    static row_iterator row_begin(this_type &m) { return m.begin_; }
+    static row_iterator row_end(this_type &m) { return m.end_; }
+    static const_row_iterator row_begin(const this_type &m)
+    { return m.begin_; }
+    static const_row_iterator row_end(const this_type &m) { return m.end_; }
+    static origin_type* origin(this_type &v) { return v.origin; }
+    static const origin_type* origin(const this_type &v) { return v.origin; }
+    static void do_clear(this_type &m);
+    static value_type access(const const_row_iterator &itrow, size_type i)
+    { return linalg_traits<M>::access(itrow, i); }
+    static reference access(const row_iterator &itrow, size_type i)
+    { return linalg_traits<M>::access(itrow, i); }
+  };
+
+  template <typename PT> 
+  void linalg_traits<transposed_col_ref<PT> >::do_clear(this_type &v) { 
+    row_iterator it = mat_row_begin(v), ite = mat_row_end(v);
+    for (; it != ite; ++it) clear(row(it));
+  }
+
+  template<typename PT> std::ostream &operator <<
+  (std::ostream &o, const transposed_col_ref<PT>& m)
+  { gmm::write(o,m); return o; }
+
+  template <typename TYPE, typename PT> struct transposed_return_ {
+    typedef abstract_null_type return_type;
+  };
+  template <typename PT> struct transposed_return_<row_major, PT> {
+    typedef typename std::iterator_traits<PT>::value_type L;
+    typedef typename select_return<transposed_row_ref<const L *>,
+            transposed_row_ref< L *>, PT>::return_type return_type;
+  };
+  template <typename PT> struct transposed_return_<col_major, PT> {
+    typedef typename std::iterator_traits<PT>::value_type L;
+    typedef typename select_return<transposed_col_ref<const L *>,
+            transposed_col_ref< L *>, PT>::return_type return_type;
+  };
+  template <typename PT> struct transposed_return {
+    typedef typename std::iterator_traits<PT>::value_type L;
+    typedef typename transposed_return_<typename principal_orientation_type<
+            typename linalg_traits<L>::sub_orientation>::potype,
+	    PT>::return_type return_type;
+  };
+
+  template <typename L> inline 
+  typename transposed_return<const L *>::return_type transposed(const L &l) {
+    return typename transposed_return<const L *>::return_type
+      (linalg_cast(const_cast<L &>(l)));
+  }
+
+  template <typename L> inline 
+  typename transposed_return<L *>::return_type transposed(L &l)
+  { return typename transposed_return<L *>::return_type(linalg_cast(l)); }
+
+}
+
+#endif //  GMM_TRANSPOSED_H__
--- a/gmm/gmm_tri_solve.h
+++ b/gmm/gmm_tri_solve.h
@ -0,0 +1,222 @@
+/* -*- c++ -*- (enables emacs c++ mode) */
+/*===========================================================================
+
+ Copyright (C) 2002-2017 Yves Renard
+
+ This file is a part of GetFEM++
+
+ GetFEM++  is  free software;  you  can  redistribute  it  and/or modify it
+ under  the  terms  of the  GNU  Lesser General Public License as published
+ by  the  Free Software Foundation;  either version 3 of the License,  or
+ (at your option) any later version along with the GCC Runtime Library
+ Exception either version 3.1 or (at your option) any later version.
+ This program  is  distributed  in  the  hope  that it will be useful,  but
+ WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ or  FITNESS  FOR  A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+ License and GCC Runtime Library Exception for more details.
+ You  should  have received a copy of the GNU Lesser General Public License
+ along  with  this program;  if not, write to the Free Software Foundation,
+ Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301, USA.
+
+ As a special exception, you  may use  this file  as it is a part of a free
+ software  library  without  restriction.  Specifically,  if   other  files
+ instantiate  templates  or  use macros or inline functions from this file,
+ or  you compile this  file  and  link  it  with other files  to produce an
+ executable, this file  does  not  by itself cause the resulting executable
+ to be covered  by the GNU Lesser General Public License.  This   exception
+ does not  however  invalidate  any  other  reasons why the executable file
+ might be covered by the GNU Lesser General Public License.
+
+===========================================================================*/
+
+/**@file gmm_tri_solve.h
+   @author Yves Renard
+   @date October 13, 2002.
+   @brief Solve triangular linear system for dense matrices.
+*/
+
+#ifndef GMM_TRI_SOLVE_H__
+#define GMM_TRI_SOLVE_H__
+
+#include "gmm_interface.h"
+
+namespace gmm {
+
+  template <typename TriMatrix, typename VecX>
+  void upper_tri_solve__(const TriMatrix& T, VecX& x, size_t k,
+			 col_major, abstract_sparse, bool is_unit) {
+    typename linalg_traits<TriMatrix>::value_type x_j;
+    for (int j = int(k) - 1; j >= 0; --j) {
+      typedef typename linalg_traits<TriMatrix>::const_sub_col_type COL;
+      COL c = mat_const_col(T, j);
+      typename linalg_traits<typename org_type<COL>::t>::const_iterator 
+	it = vect_const_begin(c), ite = vect_const_end(c);
+      if (!is_unit) x[j] /= c[j];
+      for (x_j = x[j]; it != ite ; ++it)
+	if (int(it.index()) < j) x[it.index()] -= x_j * (*it);
+    }    
+  }
+
+  template <typename TriMatrix, typename VecX>
+  void upper_tri_solve__(const TriMatrix& T, VecX& x, size_t k,
+			 col_major, abstract_dense, bool is_unit) {
+    typename linalg_traits<TriMatrix>::value_type x_j;
+    for (int j = int(k) - 1; j >= 0; --j) {
+      typedef typename linalg_traits<TriMatrix>::const_sub_col_type COL;
+      COL c = mat_const_col(T, j);
+      typename linalg_traits<typename org_type<COL>::t>::const_iterator
+	it = vect_const_begin(c), ite = it + j;
+      typename linalg_traits<VecX>::iterator itx = vect_begin(x);
+      if (!is_unit) x[j] /= c[j];
+      for (x_j = x[j]; it != ite ; ++it, ++itx) *itx -= x_j * (*it);
+    }
+  }
+
+  template <typename TriMatrix, typename VecX>
+  void lower_tri_solve__(const TriMatrix& T, VecX& x, size_t k,
+			 col_major, abstract_sparse, bool is_unit) {
+    typename linalg_traits<TriMatrix>::value_type x_j;
+    // cout << "(lower col)The Tri Matrix = " << T << endl;
+    // cout << "k = " << endl;
+    for (int j = 0; j < int(k); ++j) {
+      typedef typename linalg_traits<TriMatrix>::const_sub_col_type COL;
+      COL c = mat_const_col(T, j);
+      typename linalg_traits<typename org_type<COL>::t>::const_iterator 
+	it = vect_const_begin(c), ite = vect_const_end(c);
+      if (!is_unit) x[j] /= c[j];
+      for (x_j = x[j]; it != ite ; ++it)
+	if (int(it.index()) > j && it.index() < k) x[it.index()] -= x_j*(*it);
+    }    
+  }
+  
+  template <typename TriMatrix, typename VecX>
+  void lower_tri_solve__(const TriMatrix& T, VecX& x, size_t k,
+			 col_major, abstract_dense, bool is_unit) {
+    typename linalg_traits<TriMatrix>::value_type x_j;
+    for (int j = 0; j < int(k); ++j) {
+      typedef typename linalg_traits<TriMatrix>::const_sub_col_type COL;
+      COL c = mat_const_col(T, j);
+      typename linalg_traits<typename org_type<COL>::t>::const_iterator 
+	it = vect_const_begin(c) + (j+1), ite = vect_const_begin(c) + k;
+      typename linalg_traits<VecX>::iterator itx = vect_begin(x) + (j+1);
+      if (!is_unit) x[j] /= c[j];
+      for (x_j = x[j]; it != ite ; ++it, ++itx) *itx -= x_j * (*it);
+    }    
+  }
+  
+
+  template <typename TriMatrix, typename VecX>
+  void upper_tri_solve__(const TriMatrix& T, VecX& x, size_t k,
+			 row_major, abstract_sparse, bool is_unit) {
+    typedef typename linalg_traits<TriMatrix>::const_sub_row_type ROW;
+    typename linalg_traits<TriMatrix>::value_type t;
+    typename linalg_traits<TriMatrix>::const_row_iterator
+      itr = mat_row_const_end(T);
+    for (int i = int(k) - 1; i >= 0; --i) {
+      --itr;
+      ROW c = linalg_traits<TriMatrix>::row(itr);
+      typename linalg_traits<typename org_type<ROW>::t>::const_iterator 
+	it = vect_const_begin(c), ite = vect_const_end(c);
+      for (t = x[i]; it != ite; ++it)
+	if (int(it.index()) > i && it.index() < k) t -= (*it) * x[it.index()];
+      if (!is_unit) x[i] = t / c[i]; else x[i] = t;    
+    }    
+  }
+
+  template <typename TriMatrix, typename VecX>
+  void upper_tri_solve__(const TriMatrix& T, VecX& x, size_t k,
+			 row_major, abstract_dense, bool is_unit) {
+    typename linalg_traits<TriMatrix>::value_type t;
+   
+    for (int i = int(k) - 1; i >= 0; --i) {
+      typedef typename linalg_traits<TriMatrix>::const_sub_row_type ROW;
+      ROW c = mat_const_row(T, i);
+      typename linalg_traits<typename org_type<ROW>::t>::const_iterator 
+	it = vect_const_begin(c) + (i + 1), ite = vect_const_begin(c) + k;
+      typename linalg_traits<VecX>::iterator itx = vect_begin(x) + (i+1);
+      
+      for (t = x[i]; it != ite; ++it, ++itx) t -= (*it) * (*itx);
+      if (!is_unit) x[i] = t / c[i]; else x[i] = t;   
+    }    
+  }
+
+  template <typename TriMatrix, typename VecX>
+  void lower_tri_solve__(const TriMatrix& T, VecX& x, size_t k,
+			 row_major, abstract_sparse, bool is_unit) {
+    typename linalg_traits<TriMatrix>::value_type t;
+   
+    for (int i = 0; i < int(k); ++i) {
+      typedef typename linalg_traits<TriMatrix>::const_sub_row_type ROW;
+      ROW c = mat_const_row(T, i);
+      typename linalg_traits<typename org_type<ROW>::t>::const_iterator 
+	it = vect_const_begin(c), ite = vect_const_end(c);
+
+      for (t = x[i]; it != ite; ++it)
+	if (int(it.index()) < i) t -= (*it) * x[it.index()];
+      if (!is_unit) x[i] = t / c[i]; else x[i] = t; 
+    }    
+  }
+
+  template <typename TriMatrix, typename VecX>
+  void lower_tri_solve__(const TriMatrix& T, VecX& x, size_t k,
+			 row_major, abstract_dense, bool is_unit) {
+    typename linalg_traits<TriMatrix>::value_type t;
+   
+    for (int i = 0; i < int(k); ++i) {
+      typedef typename linalg_traits<TriMatrix>::const_sub_row_type ROW;
+      ROW c = mat_const_row(T, i);
+      typename linalg_traits<typename org_type<ROW>::t>::const_iterator 
+	it = vect_const_begin(c), ite = it + i;
+      typename linalg_traits<VecX>::iterator itx = vect_begin(x);
+
+      for (t = x[i]; it != ite; ++it, ++itx) t -= (*it) * (*itx);
+      if (!is_unit) x[i] = t / c[i]; else x[i] = t;
+    }
+  }
+
+
+// Triangular Solve:  x <-- T^{-1} * x
+
+  template <typename TriMatrix, typename VecX> inline
+  void upper_tri_solve(const TriMatrix& T, VecX &x_, bool is_unit = false)
+  { upper_tri_solve(T, x_, mat_nrows(T), is_unit); }
+  
+  template <typename TriMatrix, typename VecX> inline
+  void lower_tri_solve(const TriMatrix& T, VecX &x_, bool is_unit = false)
+  { lower_tri_solve(T, x_, mat_nrows(T), is_unit); }
+
+  template <typename TriMatrix, typename VecX> inline
+  void upper_tri_solve(const TriMatrix& T, VecX &x_, size_t k,
+		       bool is_unit) {
+    VecX& x = const_cast<VecX&>(x_);
+    GMM_ASSERT2(mat_nrows(T) >= k && vect_size(x) >= k
+		&& mat_ncols(T) >= k && !is_sparse(x_), "dimensions mismatch");
+    upper_tri_solve__(T, x, k, 
+		      typename principal_orientation_type<typename
+		      linalg_traits<TriMatrix>::sub_orientation>::potype(),
+		      typename linalg_traits<TriMatrix>::storage_type(),
+		      is_unit);
+  }
+  
+  template <typename TriMatrix, typename VecX> inline
+  void lower_tri_solve(const TriMatrix& T, VecX &x_, size_t k,
+		       bool is_unit) {
+    VecX& x = const_cast<VecX&>(x_);
+    GMM_ASSERT2(mat_nrows(T) >= k && vect_size(x) >= k
+		&& mat_ncols(T) >= k && !is_sparse(x_), "dimensions mismatch");
+    lower_tri_solve__(T, x, k, 
+		      typename principal_orientation_type<typename
+		      linalg_traits<TriMatrix>::sub_orientation>::potype(),
+		      typename linalg_traits<TriMatrix>::storage_type(),
+		      is_unit);
+  }
+
+
+ 
+
+
+
+}
+
+
+#endif //  GMM_TRI_SOLVE_H__
--- a/gmm/gmm_vector.h
+++ b/gmm/gmm_vector.h
--- a/gmm/gmm_vector_to_matrix.h
+++ b/gmm/gmm_vector_to_matrix.h
@ -0,0 +1,340 @@
+/* -*- c++ -*- (enables emacs c++ mode) */
+/*===========================================================================
+
+ Copyright (C) 2003-2017 Yves Renard
+
+ This file is a part of GetFEM++
+
+ GetFEM++  is  free software;  you  can  redistribute  it  and/or modify it
+ under  the  terms  of the  GNU  Lesser General Public License as published
+ by  the  Free Software Foundation;  either version 3 of the License,  or
+ (at your option) any later version along with the GCC Runtime Library
+ Exception either version 3.1 or (at your option) any later version.
+ This program  is  distributed  in  the  hope  that it will be useful,  but
+ WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ or  FITNESS  FOR  A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+ License and GCC Runtime Library Exception for more details.
+ You  should  have received a copy of the GNU Lesser General Public License
+ along  with  this program;  if not, write to the Free Software Foundation,
+ Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301, USA.
+
+ As a special exception, you  may use  this file  as it is a part of a free
+ software  library  without  restriction.  Specifically,  if   other  files
+ instantiate  templates  or  use macros or inline functions from this file,
+ or  you compile this  file  and  link  it  with other files  to produce an
+ executable, this file  does  not  by itself cause the resulting executable
+ to be covered  by the GNU Lesser General Public License.  This   exception
+ does not  however  invalidate  any  other  reasons why the executable file
+ might be covered by the GNU Lesser General Public License.
+
+===========================================================================*/
+
+/**@file gmm_vector_to_matrix.h
+   @author  Yves Renard <Yves.Renard@insa-lyon.fr>
+   @date December 6, 2003.
+   @brief View vectors as row or column matrices. */
+#ifndef GMM_VECTOR_TO_MATRIX_H__
+#define GMM_VECTOR_TO_MATRIX_H__
+
+#include "gmm_interface.h"
+
+namespace gmm {
+
+  /* ********************************************************************* */
+  /*	     row vector -> transform a vector in a (1, n) matrix.          */
+  /* ********************************************************************* */
+
+  template <typename PT> struct gen_row_vector {
+    typedef gen_row_vector<PT> this_type;
+    typedef typename std::iterator_traits<PT>::value_type V;
+    typedef V * CPT;
+    typedef typename std::iterator_traits<PT>::reference ref_V;
+    typedef typename linalg_traits<this_type>::reference reference;
+
+    simple_vector_ref<PT> vec;
+    
+    reference operator()(size_type, size_type j) const { return vec[j]; }
+   
+    size_type nrows(void) const { return 1; }
+    size_type ncols(void) const { return vect_size(vec); }
+    
+    gen_row_vector(ref_V v) : vec(v) {}
+    gen_row_vector() {}
+    gen_row_vector(const gen_row_vector<CPT> &cr) : vec(cr.vec) {}
+  };
+
+  template <typename PT>
+  struct gen_row_vector_iterator {
+    typedef gen_row_vector<PT> this_type;
+    typedef typename modifiable_pointer<PT>::pointer MPT;
+    typedef typename std::iterator_traits<PT>::value_type V;
+    typedef simple_vector_ref<PT> value_type;
+    typedef const simple_vector_ref<PT> *pointer;
+    typedef const simple_vector_ref<PT> &reference;
+    typedef ptrdiff_t difference_type;
+    typedef size_t size_type;
+    typedef std::random_access_iterator_tag  iterator_category;
+    typedef gen_row_vector_iterator<PT> iterator;
+
+    simple_vector_ref<PT> vec;
+    bool isend;
+    
+    iterator &operator ++()   { isend = true; return *this; }
+    iterator &operator --()   { isend = false; return *this; }
+    iterator operator ++(int) { iterator tmp = *this; ++(*this); return tmp; }
+    iterator operator --(int) { iterator tmp = *this; --(*this); return tmp; }
+    iterator &operator +=(difference_type i)
+    { if (i) isend = false; return *this; }
+    iterator &operator -=(difference_type i)
+    { if (i) isend = true; return *this;  }
+    iterator operator +(difference_type i) const 
+    { iterator itt = *this; return (itt += i); }
+    iterator operator -(difference_type i) const
+    { iterator itt = *this; return (itt -= i); }
+    difference_type operator -(const iterator &i) const { 
+      return (isend == true) ? ((i.isend == true) ? 0 : 1)
+	                     : ((i.isend == true) ? -1 : 0);
+    }
+
+    const simple_vector_ref<PT>& operator *() const { return vec; }
+    const simple_vector_ref<PT>& operator [](int i) { return vec; }
+
+    bool operator ==(const iterator &i) const { return (isend == i.isend); }
+    bool operator !=(const iterator &i) const { return !(i == *this); }
+    bool operator < (const iterator &i) const { return (*this - i < 0); }
+
+    gen_row_vector_iterator(void) {}
+    gen_row_vector_iterator(const gen_row_vector_iterator<MPT> &itm)
+      : vec(itm.vec), isend(itm.isend) {}
+    gen_row_vector_iterator(const gen_row_vector<PT> &m, bool iis_end)
+      : vec(m.vec), isend(iis_end) { }
+    
+  };
+
+  template <typename PT>
+  struct linalg_traits<gen_row_vector<PT> > {
+    typedef gen_row_vector<PT> this_type;
+    typedef typename std::iterator_traits<PT>::value_type V;
+    typedef typename which_reference<PT>::is_reference is_reference;
+    typedef abstract_matrix linalg_type;
+    typedef typename linalg_traits<V>::origin_type origin_type;
+    typedef typename select_ref<const origin_type *, origin_type *,
+				PT>::ref_type porigin_type;
+    typedef typename linalg_traits<V>::value_type value_type;
+    typedef typename select_ref<value_type,
+            typename linalg_traits<V>::reference, PT>::ref_type reference;
+    typedef abstract_null_type sub_col_type;
+    typedef abstract_null_type col_iterator;
+    typedef abstract_null_type const_sub_col_type;
+    typedef abstract_null_type const_col_iterator;
+    typedef simple_vector_ref<const V *> const_sub_row_type;
+    typedef typename select_ref<abstract_null_type, 
+            simple_vector_ref<V *>, PT>::ref_type sub_row_type;
+    typedef gen_row_vector_iterator<typename const_pointer<PT>::pointer>
+            const_row_iterator;
+    typedef typename select_ref<abstract_null_type, 
+	    gen_row_vector_iterator<PT>, PT>::ref_type row_iterator;
+    typedef typename linalg_traits<V>::storage_type storage_type;
+    typedef row_major sub_orientation;
+    typedef typename linalg_traits<V>::index_sorted index_sorted;
+    static size_type nrows(const this_type &) { return 1; }
+    static size_type ncols(const this_type &m) { return m.ncols(); }
+    static const_sub_row_type row(const const_row_iterator &it) { return *it; }
+    static sub_row_type row(const row_iterator &it) { return *it; }
+    static const_row_iterator row_begin(const this_type &m)
+    { return const_row_iterator(m, false); }
+    static row_iterator row_begin(this_type &m)
+    { return row_iterator(m, false); }
+    static const_row_iterator row_end(const this_type &m)
+    { return const_row_iterator(m, true); }
+    static row_iterator row_end(this_type &m)
+    { return row_iterator(m, true); }
+    static origin_type* origin(this_type &m) { return m.vec.origin; }
+    static const origin_type* origin(const this_type &m)
+    { return m.vec.origin; }
+    static void do_clear(this_type &m)
+    { clear(row(mat_row_begin(m))); }
+    static value_type access(const const_row_iterator &itrow, size_type i)
+    { return itrow.vec[i]; }
+    static reference access(const row_iterator &itrow, size_type i)
+    { return itrow.vec[i]; }
+  };
+  
+  template <typename PT>
+  std::ostream &operator <<(std::ostream &o, const gen_row_vector<PT>& m)
+  { gmm::write(o,m); return o; }
+
+  /* ********************************************************************* */
+  /*	     col vector -> transform a vector in a (n, 1) matrix.          */
+  /* ********************************************************************* */
+
+  template <typename PT> struct gen_col_vector {
+    typedef gen_col_vector<PT> this_type;
+    typedef typename std::iterator_traits<PT>::value_type V;
+    typedef V * CPT;
+    typedef typename std::iterator_traits<PT>::reference ref_V;
+    typedef typename linalg_traits<this_type>::reference reference;
+
+    simple_vector_ref<PT> vec;
+    
+    reference operator()(size_type i, size_type) const { return vec[i]; }
+   
+    size_type ncols(void) const { return 1; }
+    size_type nrows(void) const { return vect_size(vec); }
+    
+    gen_col_vector(ref_V v) : vec(v) {}
+    gen_col_vector() {}
+    gen_col_vector(const gen_col_vector<CPT> &cr) : vec(cr.vec) {}
+  };
+
+  template <typename PT>
+  struct gen_col_vector_iterator {
+    typedef gen_col_vector<PT> this_type;
+    typedef typename modifiable_pointer<PT>::pointer MPT;
+    typedef typename std::iterator_traits<PT>::value_type V;
+    typedef simple_vector_ref<PT> value_type;
+    typedef const simple_vector_ref<PT> *pointer;
+    typedef const simple_vector_ref<PT> &reference;
+    typedef ptrdiff_t difference_type;
+    typedef size_t size_type;
+    typedef std::random_access_iterator_tag  iterator_category;
+    typedef gen_col_vector_iterator<PT> iterator;
+
+    simple_vector_ref<PT> vec;
+    bool isend;
+    
+    iterator &operator ++()   { isend = true; return *this; }
+    iterator &operator --()   { isend = false; return *this; }
+    iterator operator ++(int) { iterator tmp = *this; ++(*this); return tmp; }
+    iterator operator --(int) { iterator tmp = *this; --(*this); return tmp; }
+    iterator &operator +=(difference_type i)
+    { if (i) isend = false; return *this; }
+    iterator &operator -=(difference_type i)
+    { if (i) isend = true; return *this;  }
+    iterator operator +(difference_type i) const 
+    { iterator itt = *this; return (itt += i); }
+    iterator operator -(difference_type i) const
+    { iterator itt = *this; return (itt -= i); }
+    difference_type operator -(const iterator &i) const { 
+      return (isend == true) ? ((i.isend == true) ? 0 : 1)
+	                     : ((i.isend == true) ? -1 : 0);
+    }
+
+    const simple_vector_ref<PT>& operator *() const { return vec; }
+    const simple_vector_ref<PT>& operator [](int i) { return vec; }
+
+    bool operator ==(const iterator &i) const { return (isend == i.isend); }
+    bool operator !=(const iterator &i) const { return !(i == *this); }
+    bool operator < (const iterator &i) const { return (*this - i < 0); }
+
+    gen_col_vector_iterator(void) {}
+    gen_col_vector_iterator(const gen_col_vector_iterator<MPT> &itm)
+      : vec(itm.vec), isend(itm.isend) {}
+    gen_col_vector_iterator(const gen_col_vector<PT> &m, bool iis_end)
+      : vec(m.vec), isend(iis_end) { }
+    
+  };
+
+  template <typename PT>
+  struct linalg_traits<gen_col_vector<PT> > {
+    typedef gen_col_vector<PT> this_type;
+    typedef typename std::iterator_traits<PT>::value_type V;
+    typedef typename which_reference<PT>::is_reference is_reference;
+    typedef abstract_matrix linalg_type;
+    typedef typename linalg_traits<V>::origin_type origin_type;
+    typedef typename select_ref<const origin_type *, origin_type *,
+				PT>::ref_type porigin_type;
+    typedef typename linalg_traits<V>::value_type value_type;
+    typedef typename select_ref<value_type,
+            typename linalg_traits<V>::reference, PT>::ref_type reference;
+    typedef abstract_null_type sub_row_type;
+    typedef abstract_null_type row_iterator;
+    typedef abstract_null_type const_sub_row_type;
+    typedef abstract_null_type const_row_iterator;
+    typedef simple_vector_ref<const V *> const_sub_col_type;
+    typedef typename select_ref<abstract_null_type, 
+            simple_vector_ref<V *>, PT>::ref_type sub_col_type;
+    typedef gen_col_vector_iterator<typename const_pointer<PT>::pointer>
+            const_col_iterator;
+    typedef typename select_ref<abstract_null_type, 
+	    gen_col_vector_iterator<PT>, PT>::ref_type col_iterator;
+    typedef typename linalg_traits<V>::storage_type storage_type;
+    typedef col_major sub_orientation;
+    typedef typename linalg_traits<V>::index_sorted index_sorted;
+    static size_type ncols(const this_type &) { return 1; }
+    static size_type nrows(const this_type &m) { return m.nrows(); }
+    static const_sub_col_type col(const const_col_iterator &it) { return *it; }
+    static sub_col_type col(const col_iterator &it) { return *it; }
+    static const_col_iterator col_begin(const this_type &m)
+    { return const_col_iterator(m, false); }
+    static col_iterator col_begin(this_type &m)
+    { return col_iterator(m, false); }
+    static const_col_iterator col_end(const this_type &m)
+    { return const_col_iterator(m, true); }
+    static col_iterator col_end(this_type &m)
+    { return col_iterator(m, true); }
+    static origin_type* origin(this_type &m) { return m.vec.origin; }
+    static const origin_type* origin(const this_type &m)
+    { return m.vec.origin; }
+    static void do_clear(this_type &m)
+    { clear(col(mat_col_begin(m))); }
+    static value_type access(const const_col_iterator &itcol, size_type i)
+    { return itcol.vec[i]; }
+    static reference access(const col_iterator &itcol, size_type i)
+    { return itcol.vec[i]; }
+  };
+  
+  template <typename PT>
+  std::ostream &operator <<(std::ostream &o, const gen_col_vector<PT>& m)
+  { gmm::write(o,m); return o; }
+
+  /* ******************************************************************** */
+  /*		col and row vectors                                       */
+  /* ******************************************************************** */
+
+  
+  template <class V> inline
+  typename select_return< gen_row_vector<const V *>, gen_row_vector<V *>,
+			  const V *>::return_type
+  row_vector(const V& v) {
+    return typename select_return< gen_row_vector<const V *>,
+      gen_row_vector<V *>, const V *>::return_type(linalg_cast(v));
+  }
+
+  template <class V> inline
+  typename select_return< gen_row_vector<const V *>, gen_row_vector<V *>,
+			  V *>::return_type
+  row_vector(V& v) {
+    return typename select_return< gen_row_vector<const V *>,
+      gen_row_vector<V *>, V *>::return_type(linalg_cast(v));
+  }
+ 
+  template <class V> inline gen_row_vector<const V *>
+  const_row_vector(V& v)
+  { return gen_row_vector<const V *>(v); }
+ 
+
+  template <class V> inline
+  typename select_return< gen_col_vector<const V *>, gen_col_vector<V *>,
+			  const V *>::return_type
+  col_vector(const V& v) {
+    return typename select_return< gen_col_vector<const V *>,
+      gen_col_vector<V *>, const V *>::return_type(linalg_cast(v));
+  }
+
+  template <class V> inline
+  typename select_return< gen_col_vector<const V *>, gen_col_vector<V *>,
+			  V *>::return_type
+  col_vector(V& v) {
+    return typename select_return< gen_col_vector<const V *>,
+      gen_col_vector<V *>, V *>::return_type(linalg_cast(v));
+  }
+ 
+  template <class V> inline gen_col_vector<const V *>
+  const_col_vector(V& v)
+  { return gen_col_vector<const V *>(v); }
+ 
+
+}
+
+#endif //  GMM_VECTOR_TO_MATRIX_H__
--- a/2
+++ b/2
@ -1 +1 @@
-Subproject commit 34e28fe18c77efe661e04742f9b3350eba880267
+Subproject commit f949aabf5c4632df97746c273cab27a1ea1bffe4