From 5149128b60a750cc4acf97399d2201834ac3a584 Mon Sep 17 00:00:00 2001 From: Jack Andersen Date: Mon, 16 Oct 2017 19:51:53 -1000 Subject: [PATCH] DCLN cooking and various bug fixes --- DataSpec/DNACommon/CMakeLists.txt | 1 + DataSpec/DNACommon/DeafBabe.cpp | 46 +- DataSpec/DNACommon/OBBTreeBuilder.cpp | 256 ++ DataSpec/DNACommon/OBBTreeBuilder.hpp | 18 + DataSpec/DNAMP1/DCLN.hpp | 80 +- DataSpec/DNAMP1/ScriptObjects/CameraHint.hpp | 2 +- DataSpec/SpecBase.cpp | 6 + DataSpec/SpecBase.hpp | 3 + DataSpec/SpecMP1.cpp | 11 + DataSpec/SpecMP2.cpp | 6 + DataSpec/SpecMP3.cpp | 6 + Editor/ViewManager.cpp | 1 + GMM-LICENSE | 13 + Runtime/Camera/CBallCamera.cpp | 6 +- Runtime/Graphics/CBooRenderer.hpp | 2 + Runtime/Graphics/CModel.hpp | 18 +- Runtime/Graphics/CModelBoo.cpp | 88 +- .../Graphics/Shaders/CModelShadersGLSL.cpp | 2 +- .../Graphics/Shaders/CModelShadersHLSL.cpp | 2 +- .../Graphics/Shaders/CModelShadersMetal.cpp | 4 +- .../Shaders/CParticleSwooshShadersGLSL.cpp | 2 +- .../Shaders/CParticleSwooshShadersHLSL.cpp | 2 +- .../Shaders/CParticleSwooshShadersMetal.cpp | 2 +- Runtime/IMain.hpp | 3 +- Runtime/MP1/MP1.cpp | 65 +- Runtime/MP1/MP1.hpp | 5 + Runtime/World/CGameArea.cpp | 38 + Runtime/World/CGameArea.hpp | 3 + gmm/gmm.h | 54 + gmm/gmm_MUMPS_interface.h | 355 +++ gmm/gmm_algobase.h | 228 ++ gmm/gmm_blas.h | 2221 +++++++++++++++++ gmm/gmm_blas_interface.h | 948 +++++++ gmm/gmm_condition_number.h | 147 ++ gmm/gmm_conjugated.h | 398 +++ gmm/gmm_def.h | 1123 +++++++++ gmm/gmm_dense_Householder.h | 317 +++ gmm/gmm_dense_lu.h | 250 ++ gmm/gmm_dense_matrix_functions.h | 302 +++ gmm/gmm_dense_qr.h | 789 ++++++ gmm/gmm_dense_sylvester.h | 174 ++ gmm/gmm_domain_decomp.h | 165 ++ gmm/gmm_except.h | 328 +++ gmm/gmm_inoutput.h | 1176 +++++++++ gmm/gmm_interface.h | 1068 ++++++++ gmm/gmm_interface_bgeot.h | 83 + gmm/gmm_iter.h | 162 ++ gmm/gmm_iter_solvers.h | 111 + gmm/gmm_kernel.h | 55 + gmm/gmm_lapack_interface.h | 470 ++++ gmm/gmm_least_squares_cg.h | 96 + gmm/gmm_matrix.h | 1199 +++++++++ gmm/gmm_modified_gram_schmidt.h | 127 + gmm/gmm_opt.h | 128 + gmm/gmm_precond.h | 65 + gmm/gmm_precond_diagonal.h | 132 + gmm/gmm_precond_ildlt.h | 241 ++ gmm/gmm_precond_ildltt.h | 174 ++ gmm/gmm_precond_ilu.h | 280 +++ gmm/gmm_precond_ilut.h | 263 ++ gmm/gmm_precond_ilutp.h | 284 +++ gmm/gmm_precond_mr_approx_inverse.h | 149 ++ gmm/gmm_range_basis.h | 499 ++++ gmm/gmm_real_part.h | 605 +++++ gmm/gmm_ref.h | 526 ++++ gmm/gmm_scaled.h | 434 ++++ gmm/gmm_solver_Schwarz_additive.h | 805 ++++++ gmm/gmm_solver_bfgs.h | 210 ++ gmm/gmm_solver_bicgstab.h | 160 ++ gmm/gmm_solver_cg.h | 180 ++ gmm/gmm_solver_constrained_cg.h | 165 ++ gmm/gmm_solver_gmres.h | 173 ++ gmm/gmm_solver_idgmres.h | 805 ++++++ gmm/gmm_solver_qmr.h | 210 ++ gmm/gmm_std.h | 424 ++++ gmm/gmm_sub_index.h | 224 ++ gmm/gmm_sub_matrix.h | 406 +++ gmm/gmm_sub_vector.h | 560 +++++ gmm/gmm_superlu_interface.h | 410 +++ gmm/gmm_transposed.h | 244 ++ gmm/gmm_tri_solve.h | 222 ++ gmm/gmm_vector.h | 1571 ++++++++++++ gmm/gmm_vector_to_matrix.h | 340 +++ hecl | 2 +- 84 files changed, 23876 insertions(+), 52 deletions(-) create mode 100644 DataSpec/DNACommon/OBBTreeBuilder.cpp create mode 100644 DataSpec/DNACommon/OBBTreeBuilder.hpp create mode 100644 GMM-LICENSE create mode 100644 gmm/gmm.h create mode 100644 gmm/gmm_MUMPS_interface.h create mode 100644 gmm/gmm_algobase.h create mode 100644 gmm/gmm_blas.h create mode 100644 gmm/gmm_blas_interface.h create mode 100644 gmm/gmm_condition_number.h create mode 100644 gmm/gmm_conjugated.h create mode 100644 gmm/gmm_def.h create mode 100644 gmm/gmm_dense_Householder.h create mode 100644 gmm/gmm_dense_lu.h create mode 100644 gmm/gmm_dense_matrix_functions.h create mode 100644 gmm/gmm_dense_qr.h create mode 100644 gmm/gmm_dense_sylvester.h create mode 100644 gmm/gmm_domain_decomp.h create mode 100644 gmm/gmm_except.h create mode 100644 gmm/gmm_inoutput.h create mode 100644 gmm/gmm_interface.h create mode 100644 gmm/gmm_interface_bgeot.h create mode 100644 gmm/gmm_iter.h create mode 100644 gmm/gmm_iter_solvers.h create mode 100644 gmm/gmm_kernel.h create mode 100644 gmm/gmm_lapack_interface.h create mode 100644 gmm/gmm_least_squares_cg.h create mode 100644 gmm/gmm_matrix.h create mode 100644 gmm/gmm_modified_gram_schmidt.h create mode 100644 gmm/gmm_opt.h create mode 100644 gmm/gmm_precond.h create mode 100644 gmm/gmm_precond_diagonal.h create mode 100644 gmm/gmm_precond_ildlt.h create mode 100644 gmm/gmm_precond_ildltt.h create mode 100644 gmm/gmm_precond_ilu.h create mode 100644 gmm/gmm_precond_ilut.h create mode 100644 gmm/gmm_precond_ilutp.h create mode 100644 gmm/gmm_precond_mr_approx_inverse.h create mode 100644 gmm/gmm_range_basis.h create mode 100644 gmm/gmm_real_part.h create mode 100644 gmm/gmm_ref.h create mode 100644 gmm/gmm_scaled.h create mode 100644 gmm/gmm_solver_Schwarz_additive.h create mode 100644 gmm/gmm_solver_bfgs.h create mode 100644 gmm/gmm_solver_bicgstab.h create mode 100644 gmm/gmm_solver_cg.h create mode 100644 gmm/gmm_solver_constrained_cg.h create mode 100644 gmm/gmm_solver_gmres.h create mode 100644 gmm/gmm_solver_idgmres.h create mode 100644 gmm/gmm_solver_qmr.h create mode 100644 gmm/gmm_std.h create mode 100644 gmm/gmm_sub_index.h create mode 100644 gmm/gmm_sub_matrix.h create mode 100644 gmm/gmm_sub_vector.h create mode 100644 gmm/gmm_superlu_interface.h create mode 100644 gmm/gmm_transposed.h create mode 100644 gmm/gmm_tri_solve.h create mode 100644 gmm/gmm_vector.h create mode 100644 gmm/gmm_vector_to_matrix.h diff --git a/DataSpec/DNACommon/CMakeLists.txt b/DataSpec/DNACommon/CMakeLists.txt index 576397aa5..cb12614a9 100644 --- a/DataSpec/DNACommon/CMakeLists.txt +++ b/DataSpec/DNACommon/CMakeLists.txt @@ -34,6 +34,7 @@ set(DNACOMMON_SOURCES BabeDead.hpp BabeDead.cpp RigInverter.hpp RigInverter.cpp AROTBuilder.hpp AROTBuilder.cpp + OBBTreeBuilder.hpp OBBTreeBuilder.cpp Tweaks/ITweak.hpp Tweaks/TweakWriter.hpp Tweaks/ITweakGame.hpp diff --git a/DataSpec/DNACommon/DeafBabe.cpp b/DataSpec/DNACommon/DeafBabe.cpp index eaf384e5e..1f7ce6a94 100644 --- a/DataSpec/DNACommon/DeafBabe.cpp +++ b/DataSpec/DNACommon/DeafBabe.cpp @@ -83,16 +83,40 @@ template void DeafBabeSendToBlender(hecl::BlenderConnection::P template void DeafBabeSendToBlender(hecl::BlenderConnection::PyOutStream& os, const DNAMP2::DeafBabe& db, bool isDcln, atInt32 idx); template void DeafBabeSendToBlender(hecl::BlenderConnection::PyOutStream& os, const DNAMP1::DCLN::Collision& db, bool isDcln, atInt32 idx); +template +static void PopulateAreaFields(DEAFBABE& db, + const hecl::BlenderConnection::DataStream::ColMesh& colMesh, + const zeus::CAABox& fullAABB, + std::enable_if_t::value || + std::is_same::value, int>* = 0) +{ + AROTBuilder builder; + auto octree = builder.buildCol(colMesh, db.rootNodeType); + static_cast&>(db.bspTree) = std::move(octree.first); + db.bspSize = octree.second; + + db.unk1 = 0x1000000; + db.length = db.binarySize(0) - 8; + db.magic = 0xDEAFBABE; + db.version = 3; + db.aabb[0] = fullAABB.min; + db.aabb[1] = fullAABB.max; +} + +template +static void PopulateAreaFields(DEAFBABE& db, + const hecl::BlenderConnection::DataStream::ColMesh& colMesh, + const zeus::CAABox& fullAABB, + std::enable_if_t::value, int>* = 0) +{ + db.magic = 0xDEAFBABE; + db.version = 2; + db.memSize = 0; +} + template void DeafBabeBuildFromBlender(DEAFBABE& db, const hecl::BlenderConnection::DataStream::ColMesh& colMesh) { - { - AROTBuilder builder; - auto octree = builder.buildCol(colMesh, db.rootNodeType); - static_cast&>(db.bspTree) = std::move(octree.first); - db.bspSize = octree.second; - } - db.materials.reserve(colMesh.materials.size()); for (const hecl::BlenderConnection::DataStream::ColMesh::Material& mat : colMesh.materials) { @@ -186,15 +210,11 @@ void DeafBabeBuildFromBlender(DEAFBABE& db, const hecl::BlenderConnection::DataS db.triMatsCount = colMesh.trianges.size(); db.triangleEdgesCount = colMesh.trianges.size() * 3; - db.unk1 = 0x1000000; - db.length = db.binarySize(0) - 8; - db.magic = 0xDEAFBABE; - db.version = 3; - db.aabb[0] = fullAABB.min; - db.aabb[1] = fullAABB.max; + PopulateAreaFields(db, colMesh, fullAABB); } template void DeafBabeBuildFromBlender(DNAMP1::DeafBabe& db, const hecl::BlenderConnection::DataStream::ColMesh& colMesh); template void DeafBabeBuildFromBlender(DNAMP2::DeafBabe& db, const hecl::BlenderConnection::DataStream::ColMesh& colMesh); +template void DeafBabeBuildFromBlender(DNAMP1::DCLN::Collision& db, const hecl::BlenderConnection::DataStream::ColMesh& colMesh); } diff --git a/DataSpec/DNACommon/OBBTreeBuilder.cpp b/DataSpec/DNACommon/OBBTreeBuilder.cpp new file mode 100644 index 000000000..dce9fb3e0 --- /dev/null +++ b/DataSpec/DNACommon/OBBTreeBuilder.cpp @@ -0,0 +1,256 @@ +#include +#include "OBBTreeBuilder.hpp" +#include "zeus/CTransform.hpp" +#include "DataSpec/DNAMP1/DCLN.hpp" +#include "gmm/gmm.h" + +namespace DataSpec +{ + +using ColMesh = hecl::BlenderConnection::DataStream::ColMesh; + +struct FittedOBB +{ + zeus::CTransform xf; + zeus::CVector3f he; +}; + +static std::vector MakeRootTriangleIndex(const ColMesh& mesh) +{ + std::vector ret; + ret.reserve(mesh.trianges.size()); + for (int i = 0; i < mesh.trianges.size(); ++i) + ret.push_back(i); + return ret; +} + +static std::unordered_set GetTriangleVerts(const ColMesh& mesh, int triIdx) +{ + const ColMesh::Triangle& T = mesh.trianges[triIdx]; + std::unordered_set verts; + verts.insert(mesh.edges[T.edges[0]].verts[0]); + verts.insert(mesh.edges[T.edges[0]].verts[1]); + verts.insert(mesh.edges[T.edges[1]].verts[0]); + verts.insert(mesh.edges[T.edges[1]].verts[1]); + verts.insert(mesh.edges[T.edges[2]].verts[0]); + verts.insert(mesh.edges[T.edges[2]].verts[1]); + return verts; +} + +// method to set the OBB parameters which produce a box oriented according to +// the covariance matrix C, which just containts the points pnts +static FittedOBB BuildFromCovarianceMatrix(gmm::dense_matrix& C, + const ColMesh& mesh, const std::vector& index) +{ + FittedOBB ret; + + // extract the eigenvalues and eigenvectors from C + gmm::dense_matrix eigvec(3,3); + std::vector eigval(3); + gmm::symmetric_qr_algorithm(C, eigval, eigvec); + + // find the right, up and forward vectors from the eigenvectors + zeus::CVector3f r(eigvec(0,0), eigvec(1,0), eigvec(2,0)); + zeus::CVector3f u(eigvec(0,1), eigvec(1,1), eigvec(2,1)); + zeus::CVector3f f(eigvec(0,2), eigvec(1,2), eigvec(2,2)); + r.normalize(); u.normalize(), f.normalize(); + + // set the rotation matrix using the eigvenvectors + ret.xf.basis[0][0]=r.x; ret.xf.basis[1][0]=u.x; ret.xf.basis[2][0]=f.x; + ret.xf.basis[0][1]=r.y; ret.xf.basis[1][1]=u.y; ret.xf.basis[2][1]=f.y; + ret.xf.basis[0][2]=r.z; ret.xf.basis[1][2]=u.z; ret.xf.basis[2][2]=f.z; + + // now build the bounding box extents in the rotated frame + zeus::CVector3f minim(1e10f, 1e10f, 1e10f), maxim(-1e10f, -1e10f, -1e10f); + for (int triIdx : index) + { + std::unordered_set verts = GetTriangleVerts(mesh, triIdx); + for (uint32_t v : verts) + { + const zeus::CVector3f& p = mesh.verts[v].val; + zeus::CVector3f p_prime(r.dot(p), u.dot(p), f.dot(p)); + minim = zeus::min(minim, p_prime); + maxim = zeus::max(maxim, p_prime); + } + } + + // set the center of the OBB to be the average of the + // minimum and maximum, and the extents be half of the + // difference between the minimum and maximum + zeus::CVector3f center = (maxim + minim) * 0.5f; + ret.xf.origin = ret.xf.basis * center; + ret.he = (maxim - minim) * 0.5f; + + return ret; +} + +// builds an OBB from triangles specified as an array of +// points with integer indices into the point array. Forms +// the covariance matrix for the triangles, then uses the +// method build_from_covariance_matrix() method to fit +// the box. ALL points will be fit in the box, regardless +// of whether they are indexed by a triangle or not. +static FittedOBB FitOBB(const ColMesh& mesh, const std::vector& index) +{ + float Ai, Am=0.0; + zeus::CVector3f mu, mui; + gmm::dense_matrix C(3,3); + float cxx=0.0, cxy=0.0, cxz=0.0, cyy=0.0, cyz=0.0, czz=0.0; + + // loop over the triangles this time to find the + // mean location + for (int i : index) + { + const ColMesh::Triangle& T = mesh.trianges[i]; + std::unordered_set verts = GetTriangleVerts(mesh, i); + auto it = verts.begin(); + zeus::CVector3f p = mesh.verts[*it++].val; + zeus::CVector3f q = mesh.verts[*it++].val; + zeus::CVector3f r = mesh.verts[*it++].val; + mui = (p+q+r)/3.f; + Ai = (q-p).cross(r-p).magnitude() / 2.f; + mu += mui*Ai; + Am += Ai; + + // these bits set the c terms to Am*E[xx], Am*E[xy], Am*E[xz].... + cxx += ( 9.0*mui.x*mui.x + p.x*p.x + q.x*q.x + r.x*r.x )*(Ai/12.0); + cxy += ( 9.0*mui.x*mui.y + p.x*p.y + q.x*q.y + r.x*r.y )*(Ai/12.0); + cxz += ( 9.0*mui.x*mui.z + p.x*p.z + q.x*q.z + r.x*r.z )*(Ai/12.0); + cyy += ( 9.0*mui.y*mui.y + p.y*p.y + q.y*q.y + r.y*r.y )*(Ai/12.0); + cyz += ( 9.0*mui.y*mui.z + p.y*p.z + q.y*q.z + r.y*r.z )*(Ai/12.0); + } + // divide out the Am fraction from the average position and + // covariance terms + mu = mu / Am; + cxx /= Am; cxy /= Am; cxz /= Am; cyy /= Am; cyz /= Am; czz /= Am; + + // now subtract off the E[x]*E[x], E[x]*E[y], ... terms + cxx -= mu.x*mu.x; cxy -= mu.x*mu.y; cxz -= mu.x*mu.z; + cyy -= mu.y*mu.y; cyz -= mu.y*mu.z; czz -= mu.z*mu.z; + + // now build the covariance matrix + C(0,0)=cxx; C(0,1)=cxy; C(0,2)=cxz; + C(1,0)=cxy; C(1,1)=cyy; C(1,2)=cyz; + C(2,0)=cxz; C(1,2)=cyz; C(2,2)=czz; + + // set the obb parameters from the covariance matrix + return BuildFromCovarianceMatrix(C, mesh, index); +} + +template +static void MakeLeaf(const ColMesh& mesh, const std::vector& index, Node& n) +{ + n.left.reset(); + n.right.reset(); + n.isLeaf = true; + n.leafData = std::make_unique(); + n.leafData->edgeIndexCount = atUint32(index.size() * 3); + n.leafData->edgeIndices.reserve(n.leafData->edgeIndexCount); + for (int i : index) + { + const ColMesh::Triangle& T = mesh.trianges[i]; + for (int j = 0; j < 3; ++j) + n.leafData->edgeIndices.push_back(T.edges[j]); + } +} + +template +static std::unique_ptr RecursiveMakeNode(const ColMesh& mesh, const std::vector& index) +{ + // calculate root OBB + FittedOBB obb = FitOBB(mesh, index); + + // make results row-major and also invert the rotation basis + obb.xf.basis.transpose(); + + std::unique_ptr n = std::make_unique(); + for (int i = 0; i < 3; ++i) + { + n->xf[i] = zeus::CVector4f{obb.xf.basis[i]}; + n->xf[i].vec[3] = obb.xf.origin[i]; + } + n->halfExtent = obb.he; + + // terminate branch when volume < 1.0 + if (obb.he[0] * obb.he[1] * obb.he[2] < 1.f) + { + MakeLeaf(mesh, index, *n); + return n; + } + + n->isLeaf = false; + + std::vector indexNeg[3]; + std::vector indexPos[3]; + for (int c = 0; c < 3; ++c) + { + // subdivide negative side + indexNeg[c].reserve(index.size()); + for (int i : index) + { + std::unordered_set verts = GetTriangleVerts(mesh, i); + for (uint32_t vtx : verts) + { + zeus::CVector3f v = mesh.verts[vtx].val; + v = obb.xf.basis * (v - obb.xf.origin); + if (v[c] < 0.f) + { + indexNeg[c].push_back(i); + break; + } + } + } + + // subdivide positive side + indexPos[c].reserve(index.size()); + for (int i : index) + { + std::unordered_set verts = GetTriangleVerts(mesh, i); + for (uint32_t vtx : verts) + { + zeus::CVector3f v = mesh.verts[vtx].val; + v = obb.xf.basis * (v - obb.xf.origin); + if (v[c] >= 0.f) + { + indexPos[c].push_back(i); + break; + } + } + } + } + + size_t idxMin = index.size(); + int minComp = -1; + for (int c = 0; c < 3; ++c) + { + size_t test = std::max(indexNeg[c].size(), indexPos[c].size()); + if (test < idxMin && test < index.size() * 3 / 4) + { + minComp = c; + idxMin = test; + } + } + + if (minComp == -1) + { + MakeLeaf(mesh, index, *n); + return n; + } + + n->left = RecursiveMakeNode(mesh, indexNeg[minComp]); + n->right = RecursiveMakeNode(mesh, indexPos[minComp]); + + return n; +} + +template +std::unique_ptr OBBTreeBuilder::buildCol(const ColMesh& mesh) +{ + std::vector root = MakeRootTriangleIndex(mesh); + return RecursiveMakeNode(mesh, root); +} + +template std::unique_ptr +OBBTreeBuilder::buildCol(const ColMesh& mesh); + +} diff --git a/DataSpec/DNACommon/OBBTreeBuilder.hpp b/DataSpec/DNACommon/OBBTreeBuilder.hpp new file mode 100644 index 000000000..612ac894d --- /dev/null +++ b/DataSpec/DNACommon/OBBTreeBuilder.hpp @@ -0,0 +1,18 @@ +#ifndef DNACOMMON_OBBTREEBUILDER_HPP +#define DNACOMMON_OBBTREEBUILDER_HPP + +#include "DNACommon.hpp" + +namespace DataSpec +{ + +struct OBBTreeBuilder +{ + using ColMesh = hecl::BlenderConnection::DataStream::ColMesh; + template + static std::unique_ptr buildCol(const ColMesh& mesh); +}; + +} + +#endif // DNACOMMON_OBBTREEBUILDER_HPP diff --git a/DataSpec/DNAMP1/DCLN.hpp b/DataSpec/DNAMP1/DCLN.hpp index 9a227b539..b73cb29cc 100644 --- a/DataSpec/DNAMP1/DCLN.hpp +++ b/DataSpec/DNAMP1/DCLN.hpp @@ -1,8 +1,10 @@ #ifndef __DNAMP1_DCLN_HPP__ #define __DNAMP1_DCLN_HPP__ +#include #include "../DNACommon/DeafBabe.hpp" #include "../DNACommon/PAK.hpp" +#include "../DNACommon/OBBTreeBuilder.hpp" #include "DNAMP1.hpp" #include "DeafBabe.hpp" @@ -13,6 +15,8 @@ namespace DNAMP1 struct DCLN : BigDNA { + using Mesh = hecl::BlenderConnection::DataStream::ColMesh; + DECL_DNA Value colCount; struct Collision : BigDNA @@ -40,19 +44,20 @@ struct DCLN : BigDNA Value vertCount; Vector verts; - struct LeafData : BigDNA - { - DECL_DNA - Value edgeIndexCount; - Vector edgeIndices; - size_t getMemoryUsage() const { return (((edgeIndices.size() * 2) + 16) + 3) & ~3; } - }; - struct Node : BigDNA { Delete _d; + + struct LeafData : BigDNA + { + DECL_DNA + Value edgeIndexCount; + Vector edgeIndices; + size_t getMemoryUsage() const { return (((edgeIndices.size() * 2) + 16) + 3) & ~3; } + }; + Value xf[3]; - Value origin; + Value halfExtent; Value isLeaf; std::unique_ptr leafData; std::unique_ptr left; @@ -63,7 +68,7 @@ struct DCLN : BigDNA xf[0] = __dna_reader.readVec4fBig(); xf[1] = __dna_reader.readVec4fBig(); xf[2] = __dna_reader.readVec4fBig(); - origin = __dna_reader.readVec3fBig(); + halfExtent = __dna_reader.readVec3fBig(); isLeaf = __dna_reader.readBool(); if (isLeaf) { @@ -84,7 +89,7 @@ struct DCLN : BigDNA __dna_writer.writeVec4fBig(xf[0]); __dna_writer.writeVec4fBig(xf[1]); __dna_writer.writeVec4fBig(xf[2]); - __dna_writer.writeVec3fBig(origin); + __dna_writer.writeVec3fBig(halfExtent); __dna_writer.writeBool(isLeaf); if (isLeaf && leafData) leafData->write(__dna_writer); @@ -121,6 +126,30 @@ struct DCLN : BigDNA return (ret + 3) & ~3; } + + void sendToBlender(hecl::BlenderConnection::PyOutStream& os) const + { + os.format("obj = bpy.data.objects.new('%s', None)\n" + "obj.empty_draw_type = 'CUBE'\n" + "bpy.context.scene.objects.link(obj)\n" + "mtx = Matrix(((%f,%f,%f,%f),(%f,%f,%f,%f),(%f,%f,%f,%f),(0.0,0.0,0.0,1.0)))\n" + "mtxd = mtx.decompose()\n" + "obj.rotation_mode = 'QUATERNION'\n" + "obj.location = mtxd[0]\n" + "obj.rotation_quaternion = mtxd[1]\n" + "obj.scale = (%f,%f,%f)\n", isLeaf ? "leaf" : "branch", + xf[0].vec[0], xf[0].vec[1], xf[0].vec[2], xf[0].vec[3], + xf[1].vec[0], xf[1].vec[1], xf[1].vec[2], xf[1].vec[3], + xf[2].vec[0], xf[2].vec[1], xf[2].vec[2], xf[2].vec[3], + halfExtent.vec[0], halfExtent.vec[1], halfExtent.vec[2]); + if (isLeaf) + os << "obj.show_name = True\n"; + if (!isLeaf) + { + left->sendToBlender(os); + right->sendToBlender(os); + } + } }; Node root; size_t getMemoryUsage() @@ -141,7 +170,8 @@ struct DCLN : BigDNA hecl::BlenderConnection::PyOutStream os = conn.beginPythonOut(true); os.format("import bpy\n" "import bmesh\n" - "from mathutils import Vector\n" + "from mathutils import Vector, Matrix\n" + "\n" "bpy.context.scene.name = '%s'\n" "# Clear Scene\n" @@ -154,7 +184,10 @@ struct DCLN : BigDNA DeafBabe::BlenderInit(os); atInt32 idx = 0; for (const Collision& col : collision) + { DeafBabeSendToBlender(os, col, true, idx++); + col.root.sendToBlender(os); + } os.centerView(); os.close(); } @@ -171,12 +204,33 @@ struct DCLN : BigDNA DCLN dcln; dcln.read(rs); hecl::BlenderConnection& conn = btok.getBlenderConnection(); - if (!conn.createBlend(outPath, hecl::BlenderConnection::BlendType::Mesh)) + if (!conn.createBlend(outPath, hecl::BlenderConnection::BlendType::ColMesh)) return false; dcln.sendToBlender(conn, pakRouter.getBestEntryName(entry, false)); return conn.saveBlend(); } + + static bool Cook(const hecl::ProjectPath& outPath, + const hecl::ProjectPath& inPath, + const std::vector& meshes, + hecl::BlenderConnection* conn = nullptr) + { + DCLN dcln; + dcln.colCount = atUint32(meshes.size()); + for (const Mesh& mesh : meshes) + { + dcln.collision.emplace_back(); + Collision& colOut = dcln.collision.back(); + DeafBabeBuildFromBlender(colOut, mesh); + colOut.root = std::move(*OBBTreeBuilder::buildCol(mesh)); + colOut.memSize = atUint32(colOut.root.getMemoryUsage()); + } + + athena::io::FileWriter w(outPath.getAbsolutePath()); + dcln.write(w); + return true; + } }; } diff --git a/DataSpec/DNAMP1/ScriptObjects/CameraHint.hpp b/DataSpec/DNAMP1/ScriptObjects/CameraHint.hpp index 371ee9165..c0c42441e 100644 --- a/DataSpec/DNAMP1/ScriptObjects/CameraHint.hpp +++ b/DataSpec/DNAMP1/ScriptObjects/CameraHint.hpp @@ -22,7 +22,7 @@ struct CameraHint : IScriptObject { DECL_YAML Value propertyCount; - Value unknown1; // 0x1 + Value calculateCamPos; // 0x1 Value chaseAllowed; // 0x2 Value boostAllowed; // 0x4 Value obscureAvoidance; // 0x8 diff --git a/DataSpec/SpecBase.cpp b/DataSpec/SpecBase.cpp index cfcf36cd8..c52d9e873 100644 --- a/DataSpec/SpecBase.cpp +++ b/DataSpec/SpecBase.cpp @@ -249,6 +249,12 @@ void SpecBase::doCook(const hecl::ProjectPath& path, const hecl::ProjectPath& co cookMesh(cookedPath, path, ds, fast, btok, progress); break; } + case hecl::BlenderConnection::BlendType::ColMesh: + { + hecl::BlenderConnection::DataStream ds = conn.beginData(); + cookColMesh(cookedPath, path, ds, fast, btok, progress); + break; + } case hecl::BlenderConnection::BlendType::Actor: { hecl::BlenderConnection::DataStream ds = conn.beginData(); diff --git a/DataSpec/SpecBase.hpp b/DataSpec/SpecBase.hpp index d9c8bb115..2c93382dd 100644 --- a/DataSpec/SpecBase.hpp +++ b/DataSpec/SpecBase.hpp @@ -71,6 +71,9 @@ struct SpecBase : hecl::Database::IDataSpec virtual void cookMesh(const hecl::ProjectPath& out, const hecl::ProjectPath& in, BlendStream& ds, bool fast, hecl::BlenderToken& btok, FCookProgress progress)=0; + virtual void cookColMesh(const hecl::ProjectPath& out, const hecl::ProjectPath& in, + BlendStream& ds, bool fast, hecl::BlenderToken& btok, + FCookProgress progress)=0; virtual void cookActor(const hecl::ProjectPath& out, const hecl::ProjectPath& in, BlendStream& ds, bool fast, hecl::BlenderToken& btok, FCookProgress progress)=0; diff --git a/DataSpec/SpecMP1.cpp b/DataSpec/SpecMP1.cpp index 44618df66..b1a943a09 100644 --- a/DataSpec/SpecMP1.cpp +++ b/DataSpec/SpecMP1.cpp @@ -10,6 +10,7 @@ #include "DNAMP1/STRG.hpp" #include "DNAMP1/SCAN.hpp" #include "DNAMP1/CMDL.hpp" +#include "DNAMP1/DCLN.hpp" #include "DNAMP1/MREA.hpp" #include "DNAMP1/ANCS.hpp" #include "DNAMP1/AGSC.hpp" @@ -555,6 +556,8 @@ struct SpecMP1 : SpecBase { case hecl::BlenderConnection::BlendType::Mesh: return {SBIG('CMDL'), path.hash().val32()}; + case hecl::BlenderConnection::BlendType::ColMesh: + return {SBIG('DCLN'), path.hash().val32()}; case hecl::BlenderConnection::BlendType::Actor: if (path.getAuxInfo().size()) { @@ -728,6 +731,14 @@ struct SpecMP1 : SpecBase DNAMP1::CMDL::Cook(out, in, mesh); } + void cookColMesh(const hecl::ProjectPath& out, const hecl::ProjectPath& in, BlendStream& ds, bool fast, + hecl::BlenderToken& btok, FCookProgress progress) + { + std::vector mesh = ds.compileColMeshes(); + ds.close(); + DNAMP1::DCLN::Cook(out, in, mesh); + } + void cookActor(const hecl::ProjectPath& out, const hecl::ProjectPath& in, BlendStream& ds, bool fast, hecl::BlenderToken& btok, FCookProgress progress) { diff --git a/DataSpec/SpecMP2.cpp b/DataSpec/SpecMP2.cpp index 17b709a72..f9cddac9d 100644 --- a/DataSpec/SpecMP2.cpp +++ b/DataSpec/SpecMP2.cpp @@ -329,6 +329,12 @@ struct SpecMP2 : SpecBase { } + void cookColMesh(const hecl::ProjectPath& out, const hecl::ProjectPath& in, + BlendStream& ds, bool fast, hecl::BlenderToken& btok, + FCookProgress progress) + { + } + void cookActor(const hecl::ProjectPath& out, const hecl::ProjectPath& in, BlendStream& ds, bool fast, hecl::BlenderToken& btok, FCookProgress progress) diff --git a/DataSpec/SpecMP3.cpp b/DataSpec/SpecMP3.cpp index bc8ba7836..b9e52f66a 100644 --- a/DataSpec/SpecMP3.cpp +++ b/DataSpec/SpecMP3.cpp @@ -523,6 +523,12 @@ struct SpecMP3 : SpecBase { } + void cookColMesh(const hecl::ProjectPath& out, const hecl::ProjectPath& in, + BlendStream& ds, bool fast, hecl::BlenderToken& btok, + FCookProgress progress) + { + } + void cookActor(const hecl::ProjectPath& out, const hecl::ProjectPath& in, BlendStream& ds, bool fast, hecl::BlenderToken& btok, FCookProgress progress) diff --git a/Editor/ViewManager.cpp b/Editor/ViewManager.cpp index 5bdbc89e1..628831a76 100644 --- a/Editor/ViewManager.cpp +++ b/Editor/ViewManager.cpp @@ -37,6 +37,7 @@ void ViewManager::BuildTestPART() void ViewManager::InitMP1(MP1::CMain& main) { main.Init(m_fileStoreManager, m_mainWindow.get(), m_voiceEngine.get(), *m_amuseAllocWrapper); + main.WarmupShaders(); } void ViewManager::TestGameView::resized(const boo::SWindowRect& root, const boo::SWindowRect& sub) diff --git a/GMM-LICENSE b/GMM-LICENSE new file mode 100644 index 000000000..65bf0a958 --- /dev/null +++ b/GMM-LICENSE @@ -0,0 +1,13 @@ +GetFEM++ is free software; you can redistribute it and/or modify it +under the terms of the GNU Lesser General Public License as published +by the Free Software Foundation; either version 3 of the License, or +(at your option) any later version along with the GCC Runtime Library +Exception either version 3.1 or (at your option) any later version. +This program is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License and the GCC Runtime Library Exception for more details. +You should have received a copy of the GNU Lesser General Public License +along with this program (see GNU_GPL_V3, GNU_LGPL_V3 and +GNU_GCC_RUNTIME_EXCEPTION files); if not, write to the Free Software +Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. \ No newline at end of file diff --git a/Runtime/Camera/CBallCamera.cpp b/Runtime/Camera/CBallCamera.cpp index d1514edf6..8a56f6f35 100644 --- a/Runtime/Camera/CBallCamera.cpp +++ b/Runtime/Camera/CBallCamera.cpp @@ -2423,9 +2423,9 @@ void CBallCamera::ApplyCameraHint(CStateManager& mgr) zeus::CVector3f camPos = mgr.GetPlayer().GetBallPosition() + hint->GetHint().GetBallToCam(); if ((hint->GetHint().GetOverrideFlags() & 0x1) != 0) { - float f30 = hint->GetHint().GetBallToCam().toVec2f().magnitude(); - zeus::CVector3f x23c = -zeus::CVector3f(hint->GetHint().GetBallToCam().toVec2f()).normalized(); - camPos = FindDesiredPosition(f30, hint->GetHint().GetBallToCam().z, x23c, mgr, false); + float distance = hint->GetHint().GetBallToCam().toVec2f().magnitude(); + zeus::CVector3f camToBall = -zeus::CVector3f(hint->GetHint().GetBallToCam().toVec2f()).normalized(); + camPos = FindDesiredPosition(distance, hint->GetHint().GetBallToCam().z, camToBall, mgr, false); } TeleportCamera(zeus::lookAt(camPos, x1d8_lookPos), mgr); break; diff --git a/Runtime/Graphics/CBooRenderer.hpp b/Runtime/Graphics/CBooRenderer.hpp index afdf5e508..bd3741e25 100644 --- a/Runtime/Graphics/CBooRenderer.hpp +++ b/Runtime/Graphics/CBooRenderer.hpp @@ -53,6 +53,8 @@ public: class CBooRenderer : public IRenderer { friend class CBooModel; + friend class CModel; + friend class CGameArea; friend class CWorldTransManager; friend class CMorphBallShadow; diff --git a/Runtime/Graphics/CModel.hpp b/Runtime/Graphics/CModel.hpp index 74381ce09..6d3b7161e 100644 --- a/Runtime/Graphics/CModel.hpp +++ b/Runtime/Graphics/CModel.hpp @@ -73,6 +73,7 @@ struct CBooSurface class CBooModel { friend class CModel; + friend class CGameArea; friend class CBooRenderer; friend class CMetroidModelInstance; friend class CSkinnedModel; @@ -151,6 +152,8 @@ private: void DrawNormalSurfaces(const CModelFlags& flags) const; void DrawSurfaces(const CModelFlags& flags) const; void DrawSurface(const CBooSurface& surf, const CModelFlags& flags) const; + void WarmupDrawSurfaces() const; + void WarmupDrawSurface(const CBooSurface& surf) const; static zeus::CVector3f g_PlayerPosition; static float g_ModSeconds; @@ -177,6 +180,7 @@ public: void RemapMaterialData(SShader& shader); bool TryLockTextures() const; void UnlockTextures() const; + void SyncLoadTextures() const; void Touch(int shaderIdx) const; void VerifyCurrentShader(int shaderIdx); boo::IGraphicsBufferD* UpdateUniformData(const CModelFlags& flags, @@ -214,15 +218,8 @@ public: static boo::ITexture* g_shadowMap; static zeus::CTransform g_shadowTexXf; - static void EnableShadowMaps(boo::ITexture* map, const zeus::CTransform& texXf) - { - g_shadowMap = map; - g_shadowTexXf = texXf; - } - static void DisableShadowMaps() - { - g_shadowMap = nullptr; - } + static void EnableShadowMaps(boo::ITexture* map, const zeus::CTransform& texXf); + static void DisableShadowMaps(); }; class CModel @@ -270,6 +267,9 @@ public: zeus::CVector3f GetPoolNormal(size_t idx) const; void ApplyVerticesCPU(boo::IGraphicsBufferD* vertBuf, const std::vector>& vn) const; + + void _WarmupShaders(); + static void WarmupShaders(const SObjectTag& cmdlTag); }; CFactoryFnReturn FModelFactory(const urde::SObjectTag& tag, diff --git a/Runtime/Graphics/CModelBoo.cpp b/Runtime/Graphics/CModelBoo.cpp index d2dba2be0..9bbdf46c1 100644 --- a/Runtime/Graphics/CModelBoo.cpp +++ b/Runtime/Graphics/CModelBoo.cpp @@ -126,6 +126,16 @@ void CBooModel::EnsureViewDepStateCached(const CBooModel& model, const CBooSurfa boo::ITexture* CBooModel::g_shadowMap = nullptr; zeus::CTransform CBooModel::g_shadowTexXf; +void CBooModel::EnableShadowMaps(boo::ITexture* map, const zeus::CTransform& texXf) +{ + g_shadowMap = map; + g_shadowTexXf = texXf; +} +void CBooModel::DisableShadowMaps() +{ + g_shadowMap = nullptr; +} + CBooModel::~CBooModel() { if (m_prev) @@ -488,6 +498,16 @@ void CBooModel::UnlockTextures() const const_cast(this)->x40_24_texturesLoaded = false; } +void CBooModel::SyncLoadTextures() const +{ + if (!x40_24_texturesLoaded) + { + for (TCachedToken& tex : const_cast>&>(x1c_textures)) + tex.GetObj(); + const_cast(this)->x40_24_texturesLoaded = true; + } +} + void CBooModel::DrawFlat(ESurfaceSelection sel, EExtendedShader extendedIdx) const { const CBooSurface* surf; @@ -571,6 +591,39 @@ void CBooModel::DrawSurface(const CBooSurface& surf, const CModelFlags& flags) c CGraphics::DrawArrayIndexed(surf.m_data.idxStart, surf.m_data.idxCount); } +void CBooModel::WarmupDrawSurfaces() const +{ + const CBooSurface* surf = x38_firstUnsortedSurface; + while (surf) + { + WarmupDrawSurface(*surf); + surf = surf->m_next; + } + + surf = x3c_firstSortedSurface; + while (surf) + { + WarmupDrawSurface(*surf); + surf = surf->m_next; + } +} + +void CBooModel::WarmupDrawSurface(const CBooSurface& surf) const +{ + if (m_uniUpdateCount > m_instances.size()) + return; + const ModelInstance& inst = m_instances[m_uniUpdateCount-1]; + + for (const std::vector& extendeds : inst.m_shaderDataBindings) + { + for (boo::IShaderDataBinding* binding : extendeds) + { + CGraphics::SetShaderDataBinding(binding); + CGraphics::DrawArrayIndexed(surf.m_data.idxStart, std::min(u32(3), surf.m_data.idxCount)); + } + } +} + void CBooModel::UVAnimationBuffer::ProcessAnimation(u8*& bufOut, const UVAnimation& anim) { zeus::CMatrix4f& texMtxOut = reinterpret_cast(*bufOut); @@ -1041,6 +1094,9 @@ CModel::CModel(std::unique_ptr&& in, u32 /* dataLen */, IObjectStore* stor m_gfxToken = CGraphics::CommitResources([&](boo::IGraphicsDataFactory::Context& ctx) -> bool { + /* Index buffer is always static */ + m_ibo = ctx.newStaticBuffer(boo::BufferUse::Index, iboData, 4, m_hmdlMeta.indexCount); + if (!m_hmdlMeta.bankCount) { /* Non-skinned models use static vertex buffers shared with CBooModel instances */ @@ -1056,8 +1112,6 @@ CModel::CModel(std::unique_ptr&& in, u32 /* dataLen */, IObjectStore* stor memmove(m_dynamicVertexData.get(), vboData, vboSz); } - /* Index buffer is always static */ - m_ibo = ctx.newStaticBuffer(boo::BufferUse::Index, iboData, 4, m_hmdlMeta.indexCount); return true; }); @@ -1172,6 +1226,36 @@ void CModel::ApplyVerticesCPU(boo::IGraphicsBufferD* vertBuf, vertBuf->unmap(); } +void CModel::_WarmupShaders() +{ + CBooModel::EnableShadowMaps(g_Renderer->x220_sphereRamp, zeus::CTransform::Identity()); + CGraphics::CProjectionState backupProj = CGraphics::GetProjectionState(); + zeus::CTransform backupViewPoint = CGraphics::g_ViewMatrix; + zeus::CTransform backupModel = CGraphics::g_GXModelMatrix; + CGraphics::SetModelMatrix(zeus::CTransform::Translate(-m_aabb.center())); + CGraphics::SetViewPointMatrix(zeus::CTransform::Translate(0.f, -2048.f, 0.f)); + CGraphics::SetOrtho(-2048.f, 2048.f, 2048.f, -2048.f, 0.f, 4096.f); + CModelFlags defaultFlags; + for (CBooModel::SShader& shader : x18_matSets) + { + GetInstance().RemapMaterialData(shader); + GetInstance().SyncLoadTextures(); + GetInstance().UpdateUniformData(defaultFlags, nullptr, nullptr); + GetInstance().WarmupDrawSurfaces(); + } + CGraphics::SetProjectionState(backupProj); + CGraphics::SetViewPointMatrix(backupViewPoint); + CGraphics::SetModelMatrix(backupModel); + CBooModel::DisableShadowMaps(); +} + +void CModel::WarmupShaders(const SObjectTag& cmdlTag) +{ + TToken model = g_SimplePool->GetObj(cmdlTag); + CModel* modelObj = model.GetObj(); + modelObj->_WarmupShaders(); +} + CFactoryFnReturn FModelFactory(const urde::SObjectTag& tag, std::unique_ptr&& in, u32 len, const urde::CVParamTransfer& vparms, diff --git a/Runtime/Graphics/Shaders/CModelShadersGLSL.cpp b/Runtime/Graphics/Shaders/CModelShadersGLSL.cpp index 6457c5712..ec5090ad0 100644 --- a/Runtime/Graphics/Shaders/CModelShadersGLSL.cpp +++ b/Runtime/Graphics/Shaders/CModelShadersGLSL.cpp @@ -93,7 +93,7 @@ static const char* LightingShadowGLSL = " lights[0].angAtt[1] * angDot +\n" " lights[0].angAtt[0];\n" " ret += lights[0].color * clamp(angAtt, 0.0, 1.0) * att * clamp(dot(normalize(-delta), mvNormIn.xyz), 0.0, 1.0) *\n" -" texture(extTex0, vtf.extTcgs[0]).r;\n" +" texture(extTex7, vtf.extTcgs[0]).r;\n" " \n" " for (int i=1 ; i<" _XSTR(URDE_MAX_LIGHTS) " ; ++i)\n" " {\n" diff --git a/Runtime/Graphics/Shaders/CModelShadersHLSL.cpp b/Runtime/Graphics/Shaders/CModelShadersHLSL.cpp index c5f113295..0a2b87d4e 100644 --- a/Runtime/Graphics/Shaders/CModelShadersHLSL.cpp +++ b/Runtime/Graphics/Shaders/CModelShadersHLSL.cpp @@ -92,7 +92,7 @@ static const char* LightingShadowHLSL = " lights[0].angAtt[1] * angDot +\n" " lights[0].angAtt[0];\n" " ret += lights[0].color * saturate(angAtt) * att * saturate(dot(normalize(-delta), mvNormIn.xyz)) *\n" -" extTex0.Sample(clampSamp, vtf.extTcgs[0]).r;\n" +" extTex7.Sample(clampSamp, vtf.extTcgs[0]).r;\n" " \n" " for (int i=1 ; i<" _XSTR(URDE_MAX_LIGHTS) " ; ++i)\n" " {\n" diff --git a/Runtime/Graphics/Shaders/CModelShadersMetal.cpp b/Runtime/Graphics/Shaders/CModelShadersMetal.cpp index bc79754ec..57a7805fd 100644 --- a/Runtime/Graphics/Shaders/CModelShadersMetal.cpp +++ b/Runtime/Graphics/Shaders/CModelShadersMetal.cpp @@ -79,7 +79,7 @@ static const char* LightingShadowMetal = "};\n" "\n" "static float4 EXTLightingShadowFunc(constant LightingUniform& lu, float4 mvPosIn, float4 mvNormIn,\n" -" thread VertToFrag& vtf, texture2d extTex0)\n" +" thread VertToFrag& vtf, texture2d extTex7)\n" "{\n" " float4 ret = lu.ambient;\n" " \n" @@ -93,7 +93,7 @@ static const char* LightingShadowMetal = " lu.lights[0].angAtt[1] * angDot +\n" " lu.lights[0].angAtt[0];\n" " ret += lu.lights[0].color * saturate(angAtt) * att * saturate(dot(normalize(-delta), mvNormIn.xyz)) *\n" -" extTex0.sample(clampSamp, vtf.extTcgs0);\n" +" extTex7.sample(clampSamp, vtf.extTcgs0);\n" " \n" " for (int i=1 ; i<" _XSTR(URDE_MAX_LIGHTS) " ; ++i)\n" " {\n" diff --git a/Runtime/Graphics/Shaders/CParticleSwooshShadersGLSL.cpp b/Runtime/Graphics/Shaders/CParticleSwooshShadersGLSL.cpp index e9f59e739..88fe99026 100644 --- a/Runtime/Graphics/Shaders/CParticleSwooshShadersGLSL.cpp +++ b/Runtime/Graphics/Shaders/CParticleSwooshShadersGLSL.cpp @@ -27,7 +27,7 @@ BOO_GLSL_BINDING_HEAD "void main()\n" "{\n" " vtf.color = colorIn;\n" -" vtf.uv = uvIn;\n" +" vtf.uv = uvIn.xy;\n" " gl_Position = mvp * vec4(posIn.xyz, 1.0);\n" "}\n"; diff --git a/Runtime/Graphics/Shaders/CParticleSwooshShadersHLSL.cpp b/Runtime/Graphics/Shaders/CParticleSwooshShadersHLSL.cpp index 3d7f1e985..64e1d3810 100644 --- a/Runtime/Graphics/Shaders/CParticleSwooshShadersHLSL.cpp +++ b/Runtime/Graphics/Shaders/CParticleSwooshShadersHLSL.cpp @@ -29,7 +29,7 @@ static const char* VS = "{\n" " VertToFrag vtf;\n" " vtf.color = v.colorIn;\n" -" vtf.uv = v.uvIn;\n" +" vtf.uv = v.uvIn.xy;\n" " vtf.pos = mul(mvp, float4(v.posIn.xyz, 1.0));\n" " return vtf;\n" "}\n"; diff --git a/Runtime/Graphics/Shaders/CParticleSwooshShadersMetal.cpp b/Runtime/Graphics/Shaders/CParticleSwooshShadersMetal.cpp index 0d1ef934c..f775ff8d9 100644 --- a/Runtime/Graphics/Shaders/CParticleSwooshShadersMetal.cpp +++ b/Runtime/Graphics/Shaders/CParticleSwooshShadersMetal.cpp @@ -31,7 +31,7 @@ static const char* VS = "{\n" " VertToFrag vtf;\n" " vtf.color = v.colorIn;\n" -" vtf.uv = v.uvIn;\n" +" vtf.uv = v.uvIn.xy;\n" " vtf.pos = su.mvp * float4(v.posIn.xyz, 1.0);\n" " return vtf;\n" "}\n"; diff --git a/Runtime/IMain.hpp b/Runtime/IMain.hpp index 7fd01bb8a..eb9671202 100644 --- a/Runtime/IMain.hpp +++ b/Runtime/IMain.hpp @@ -40,9 +40,10 @@ public: virtual void Draw()=0; virtual bool Proc()=0; virtual void Shutdown()=0; - virtual boo::IWindow* GetMainWindow() const=0; + virtual boo::IWindow* GetMainWindow() const= 0; virtual void SetFlowState(EFlowState) = 0; virtual size_t GetExpectedIdSize() const = 0; + virtual void WarmupShaders() = 0; }; } diff --git a/Runtime/MP1/MP1.cpp b/Runtime/MP1/MP1.cpp index 265b189ca..f06ef2519 100644 --- a/Runtime/MP1/MP1.cpp +++ b/Runtime/MP1/MP1.cpp @@ -17,14 +17,14 @@ #include "Graphics/Shaders/CFluidPlaneShader.hpp" #include "Graphics/Shaders/CAABoxShader.hpp" #include "Graphics/Shaders/CWorldShadowShader.hpp" -#include "Character/CCharLayoutInfo.hpp" +#include "Graphics/Shaders/CParticleSwooshShaders.hpp" #include "Audio/CStreamAudioManager.hpp" #include "CGBASupport.hpp" -#include "CBasics.hpp" #include "Audio/CAudioGroupSet.hpp" namespace urde { +URDE_DECL_SPECIALIZE_SHADER(CParticleSwooshShaders) URDE_DECL_SPECIALIZE_SHADER(CThermalColdFilter) URDE_DECL_SPECIALIZE_SHADER(CThermalHotFilter) URDE_DECL_SPECIALIZE_SHADER(CSpaceWarpFilter) @@ -223,6 +223,7 @@ CMain::BooSetter::BooSetter(boo::IGraphicsDataFactory* factory, boo::ITextureR* spareTex) { CGraphics::InitializeBoo(factory, cmdQ, spareTex); + TShader::Initialize(); TShader::Initialize(); TShader::Initialize(); TShader::Initialize(); @@ -318,8 +319,40 @@ void CMain::Init(const hecl::Runtime::FileStoreManager& storeMgr, //CStreamAudioManager::Start(false, "Audio/rui_samusL.dsp|Audio/rui_samusR.dsp", 0x7f, true, 1.f, 1.f); } +static logvisor::Module WarmupLog("Shader Warmup"); + +void CMain::WarmupShaders() +{ + if (m_warmupTags.size()) + return; + + size_t modelCount = 0; + g_ResFactory->EnumerateResources([&](const SObjectTag& tag) + { + if (tag.type == FOURCC('CMDL') || tag.type == FOURCC('MREA')) + ++modelCount; + return true; + }); + m_warmupTags.reserve(modelCount); + + g_ResFactory->EnumerateResources([&](const SObjectTag& tag) + { + if (tag.type == FOURCC('CMDL') || tag.type == FOURCC('MREA')) + m_warmupTags.push_back(tag); + return true; + }); + + m_warmupIt = m_warmupTags.begin(); + + WarmupLog.report(logvisor::Info, "Began warmup of %" PRISize " objects", modelCount); +} + bool CMain::Proc() { + // Warmup cycle overrides update + if (m_warmupTags.size()) + return false; + CGBASupport::GlobalPoll(); x164_archSupport->UpdateTicks(); x164_archSupport->Update(); @@ -340,6 +373,33 @@ bool CMain::Proc() void CMain::Draw() { + // Warmup cycle overrides draw + if (m_warmupTags.size()) + { + auto startTime = std::chrono::steady_clock::now(); + while (m_warmupIt != m_warmupTags.end()) + { + WarmupLog.report(logvisor::Info, "Warming %.4s %08X", m_warmupIt->type.getChars(), m_warmupIt->id.Value()); + + if (m_warmupIt->type == FOURCC('CMDL')) + CModel::WarmupShaders(*m_warmupIt); + else if (m_warmupIt->type == FOURCC('MREA')) + CGameArea::WarmupShaders(*m_warmupIt); + ++m_warmupIt; + + // Approximately 3/4 frame of warmups + auto curTime = std::chrono::steady_clock::now(); + if (std::chrono::duration_cast(curTime - startTime).count() > 12) + break; + } + if (m_warmupIt == m_warmupTags.end()) + { + m_warmupTags = std::vector(); + WarmupLog.report(logvisor::Info, "Finished warmup"); + } + return; + } + x164_archSupport->Draw(); } @@ -359,6 +419,7 @@ void CMain::Shutdown() { x164_archSupport.reset(); ShutdownSubsystems(); + TShader::Shutdown(); TShader::Shutdown(); TShader::Shutdown(); TShader::Shutdown(); diff --git a/Runtime/MP1/MP1.hpp b/Runtime/MP1/MP1.hpp index b9ee98d07..7b3c90232 100644 --- a/Runtime/MP1/MP1.hpp +++ b/Runtime/MP1/MP1.hpp @@ -240,6 +240,10 @@ private: boo::IWindow* m_mainWindow = nullptr; + // Warmup state + std::vector m_warmupTags; + std::vector::iterator m_warmupIt; + void InitializeSubsystems(const hecl::Runtime::FileStoreManager& storeMgr); public: @@ -259,6 +263,7 @@ public: boo::IWindow* window, boo::IAudioVoiceEngine* voiceEngine, amuse::IBackendVoiceAllocator& backend); + void WarmupShaders(); bool Proc(); void Draw(); void Shutdown(); diff --git a/Runtime/World/CGameArea.cpp b/Runtime/World/CGameArea.cpp index 9bcc0937a..80b611b77 100644 --- a/Runtime/World/CGameArea.cpp +++ b/Runtime/World/CGameArea.cpp @@ -411,6 +411,39 @@ CGameArea::CGameArea(CInputStream& in, int idx, int mlvlVersion) xec_totalResourcesSize += g_ResFactory->ResourceSize(SObjectTag{FOURCC('MREA'), x84_mrea}); } +CGameArea::CGameArea(CAssetId mreaId) +: x84_mrea(mreaId) +{ + while (StartStreamingMainArea()) {} + + for (auto& req : xf8_loadTransactions) + req->WaitForComplete(); + + MREAHeader header = VerifyHeader(); + x12c_postConstructed->x4c_insts.reserve(header.modelCount); + + FillInStaticGeometry(); + + CBooModel::EnableShadowMaps(g_Renderer->x220_sphereRamp, zeus::CTransform::Identity()); + CGraphics::CProjectionState backupProj = CGraphics::GetProjectionState(); + zeus::CTransform backupViewPoint = CGraphics::g_ViewMatrix; + zeus::CTransform backupModel = CGraphics::g_GXModelMatrix; + CGraphics::SetViewPointMatrix(zeus::CTransform::Translate(0.f, -2048.f, 0.f)); + CGraphics::SetOrtho(-2048.f, 2048.f, 2048.f, -2048.f, 0.f, 4096.f); + CModelFlags defaultFlags; + for (CMetroidModelInstance& inst : x12c_postConstructed->x4c_insts) + { + CGraphics::SetModelMatrix(zeus::CTransform::Translate(-inst.x34_aabb.center())); + inst.m_instance->SyncLoadTextures(); + inst.m_instance->UpdateUniformData(defaultFlags, nullptr, nullptr); + inst.m_instance->WarmupDrawSurfaces(); + } + CGraphics::SetProjectionState(backupProj); + CGraphics::SetViewPointMatrix(backupViewPoint); + CGraphics::SetModelMatrix(backupModel); + CBooModel::DisableShadowMaps(); +} + bool CGameArea::IGetScriptingMemoryAlways() const { return false; @@ -1192,5 +1225,10 @@ bool CGameArea::CAreaObjectList::IsQualified(const CEntity& ent) { return (ent.GetAreaIdAlways() == x200c_areaIdx); } +void CGameArea::WarmupShaders(const SObjectTag& mreaTag) +{ + // Calling this version of the constructor performs warmup implicitly + CGameArea area(mreaTag.id); +} } diff --git a/Runtime/World/CGameArea.hpp b/Runtime/World/CGameArea.hpp index 78c02ac80..cbe2bb63f 100644 --- a/Runtime/World/CGameArea.hpp +++ b/Runtime/World/CGameArea.hpp @@ -289,6 +289,7 @@ private: public: CGameArea(CInputStream& in, int idx, int mlvlVersion); + CGameArea(CAssetId mreaId); // Warmup constructor bool IsFinishedOccluding() const; void ReadDependencyList(); @@ -372,6 +373,8 @@ public: CObjectList& GetAreaObjects() const { return *GetPostConstructed()->x10c0_areaObjs.get(); } CGameArea* GetNext() const { return x130_next; } + + static void WarmupShaders(const SObjectTag& mreaTag); }; } diff --git a/gmm/gmm.h b/gmm/gmm.h new file mode 100644 index 000000000..feeb299fa --- /dev/null +++ b/gmm/gmm.h @@ -0,0 +1,54 @@ +/* -*- c++ -*- (enables emacs c++ mode) */ +/*=========================================================================== + + Copyright (C) 2002-2017 Yves Renard + + This file is a part of GetFEM++ + + GetFEM++ is free software; you can redistribute it and/or modify it + under the terms of the GNU Lesser General Public License as published + by the Free Software Foundation; either version 3 of the License, or + (at your option) any later version along with the GCC Runtime Library + Exception either version 3.1 or (at your option) any later version. + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public + License and GCC Runtime Library Exception for more details. + You should have received a copy of the GNU Lesser General Public License + along with this program; if not, write to the Free Software Foundation, + Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. + + As a special exception, you may use this file as it is a part of a free + software library without restriction. Specifically, if other files + instantiate templates or use macros or inline functions from this file, + or you compile this file and link it with other files to produce an + executable, this file does not by itself cause the resulting executable + to be covered by the GNU Lesser General Public License. This exception + does not however invalidate any other reasons why the executable file + might be covered by the GNU Lesser General Public License. + +===========================================================================*/ + +/**@file gmm.h + @author Yves Renard + @date October 13, 2002. + @brief Include common gmm files. +*/ +#ifndef GMM_H__ +#define GMM_H__ + +#include "gmm_kernel.h" +#include "gmm_dense_lu.h" +#include "gmm_dense_qr.h" + +#include "gmm_iter_solvers.h" +#include "gmm_condition_number.h" +#include "gmm_inoutput.h" + +#include "gmm_lapack_interface.h" +#include "gmm_superlu_interface.h" +#include "gmm_range_basis.h" + +#include "gmm_domain_decomp.h" + +#endif // GMM_H__ diff --git a/gmm/gmm_MUMPS_interface.h b/gmm/gmm_MUMPS_interface.h new file mode 100644 index 000000000..bc68777fc --- /dev/null +++ b/gmm/gmm_MUMPS_interface.h @@ -0,0 +1,355 @@ +/* -*- c++ -*- (enables emacs c++ mode) */ +/*=========================================================================== + + Copyright (C) 2003-2017 Yves Renard, Julien Pommier + + This file is a part of GetFEM++ + + GetFEM++ is free software; you can redistribute it and/or modify it + under the terms of the GNU Lesser General Public License as published + by the Free Software Foundation; either version 3 of the License, or + (at your option) any later version along with the GCC Runtime Library + Exception either version 3.1 or (at your option) any later version. + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public + License and GCC Runtime Library Exception for more details. + You should have received a copy of the GNU Lesser General Public License + along with this program; if not, write to the Free Software Foundation, + Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. + + As a special exception, you may use this file as it is a part of a free + software library without restriction. Specifically, if other files + instantiate templates or use macros or inline functions from this file, + or you compile this file and link it with other files to produce an + executable, this file does not by itself cause the resulting executable + to be covered by the GNU Lesser General Public License. This exception + does not however invalidate any other reasons why the executable file + might be covered by the GNU Lesser General Public License. + +===========================================================================*/ + +/**@file gmm_MUMPS_interface.h + @author Yves Renard , + @author Julien Pommier + @date December 8, 2005. + @brief Interface with MUMPS (LU direct solver for sparse matrices). +*/ +#if defined(GMM_USES_MUMPS) || defined(HAVE_DMUMPS_C_H) + +#ifndef GMM_MUMPS_INTERFACE_H +#define GMM_MUMPS_INTERFACE_H + +#include "gmm_kernel.h" + + +extern "C" { + +#include +#undef F_INT +#undef F_DOUBLE +#undef F_DOUBLE2 +#include +#undef F_INT +#undef F_DOUBLE +#undef F_DOUBLE2 +#include +#undef F_INT +#undef F_DOUBLE +#undef F_DOUBLE2 +#include +#undef F_INT +#undef F_DOUBLE +#undef F_DOUBLE2 + +} + +namespace gmm { + +#define ICNTL(I) icntl[(I)-1] +#define INFO(I) info[(I)-1] +#define INFOG(I) infog[(I)-1] +#define RINFOG(I) rinfog[(I)-1] + + template struct ij_sparse_matrix { + std::vector irn; + std::vector jcn; + std::vector a; + bool sym; + + template void store(const L& l, size_type i) { + typename linalg_traits::const_iterator it = vect_const_begin(l), + ite = vect_const_end(l); + for (; it != ite; ++it) { + int ir = (int)i + 1, jc = (int)it.index() + 1; + if (*it != T(0) && (!sym || ir >= jc)) + { irn.push_back(ir); jcn.push_back(jc); a.push_back(*it); } + } + } + + template void build_from(const L& l, row_major) { + for (size_type i = 0; i < mat_nrows(l); ++i) + store(mat_const_row(l, i), i); + } + + template void build_from(const L& l, col_major) { + for (size_type i = 0; i < mat_ncols(l); ++i) + store(mat_const_col(l, i), i); + irn.swap(jcn); + } + + template ij_sparse_matrix(const L& A, bool sym_) { + size_type nz = nnz(A); + sym = sym_; + irn.reserve(nz); jcn.reserve(nz); a.reserve(nz); + build_from(A, typename principal_orientation_type::sub_orientation>::potype()); + } + }; + + /* ********************************************************************* */ + /* MUMPS solve interface */ + /* ********************************************************************* */ + + template struct mumps_interf {}; + + template <> struct mumps_interf { + typedef SMUMPS_STRUC_C MUMPS_STRUC_C; + typedef float value_type; + + static void mumps_c(MUMPS_STRUC_C &id) { smumps_c(&id); } + }; + + template <> struct mumps_interf { + typedef DMUMPS_STRUC_C MUMPS_STRUC_C; + typedef double value_type; + static void mumps_c(MUMPS_STRUC_C &id) { dmumps_c(&id); } + }; + + template <> struct mumps_interf > { + typedef CMUMPS_STRUC_C MUMPS_STRUC_C; + typedef mumps_complex value_type; + static void mumps_c(MUMPS_STRUC_C &id) { cmumps_c(&id); } + }; + + template <> struct mumps_interf > { + typedef ZMUMPS_STRUC_C MUMPS_STRUC_C; + typedef mumps_double_complex value_type; + static void mumps_c(MUMPS_STRUC_C &id) { zmumps_c(&id); } + }; + + + template + static inline bool mumps_error_check(MUMPS_STRUCT &id) { + if (id.INFO(1) < 0) { + switch (id.INFO(1)) { + case -2: + GMM_ASSERT1(false, "Solve with MUMPS failed: NZ = " << id.INFO(2) + << " is out of range"); + case -6 : case -10 : + GMM_WARNING1("Solve with MUMPS failed: matrix is singular"); + return false; + case -9: + GMM_ASSERT1(false, "Solve with MUMPS failed: error " + << id.INFO(1) << ", increase ICNTL(14)"); + case -13 : + GMM_ASSERT1(false, "Solve with MUMPS failed: not enough memory"); + default : + GMM_ASSERT1(false, "Solve with MUMPS failed with error " + << id.INFO(1)); + } + } + return true; + } + + + /** MUMPS solve interface + * Works only with sparse or skyline matrices + */ + template + bool MUMPS_solve(const MAT &A, const VECTX &X_, const VECTB &B, + bool sym = false, bool distributed = false) { + VECTX &X = const_cast(X_); + + typedef typename linalg_traits::value_type T; + typedef typename mumps_interf::value_type MUMPS_T; + GMM_ASSERT2(gmm::mat_nrows(A) == gmm::mat_ncols(A), "Non-square matrix"); + + std::vector rhs(gmm::vect_size(B)); gmm::copy(B, rhs); + + ij_sparse_matrix AA(A, sym); + + const int JOB_INIT = -1; + const int JOB_END = -2; + const int USE_COMM_WORLD = -987654; + + typename mumps_interf::MUMPS_STRUC_C id; + + int rank(0); +#ifdef GMM_USES_MPI + MPI_Comm_rank(MPI_COMM_WORLD, &rank); +#endif + + id.job = JOB_INIT; + id.par = 1; + id.sym = sym ? 2 : 0; + id.comm_fortran = USE_COMM_WORLD; + mumps_interf::mumps_c(id); + + if (rank == 0 || distributed) { + id.n = int(gmm::mat_nrows(A)); + if (distributed) { + id.nz_loc = int(AA.irn.size()); + id.irn_loc = &(AA.irn[0]); + id.jcn_loc = &(AA.jcn[0]); + id.a_loc = (MUMPS_T*)(&(AA.a[0])); + } else { + id.nz = int(AA.irn.size()); + id.irn = &(AA.irn[0]); + id.jcn = &(AA.jcn[0]); + id.a = (MUMPS_T*)(&(AA.a[0])); + } + if (rank == 0) + id.rhs = (MUMPS_T*)(&(rhs[0])); + } + + id.ICNTL(1) = -1; // output stream for error messages + id.ICNTL(2) = -1; // output stream for other messages + id.ICNTL(3) = -1; // output stream for global information + id.ICNTL(4) = 0; // verbosity level + + if (distributed) + id.ICNTL(5) = 0; // assembled input matrix (default) + + id.ICNTL(14) += 80; /* small boost to the workspace size as we have encountered some problem + who did not fit in the default settings of mumps.. + by default, ICNTL(14) = 15 or 20 + */ + //cout << "ICNTL(14): " << id.ICNTL(14) << "\n"; + + if (distributed) + id.ICNTL(18) = 3; // strategy for distributed input matrix + + // id.ICNTL(22) = 1; /* enables out-of-core support */ + + id.job = 6; + mumps_interf::mumps_c(id); + bool ok = mumps_error_check(id); + + id.job = JOB_END; + mumps_interf::mumps_c(id); + +#ifdef GMM_USES_MPI + MPI_Bcast(&(rhs[0]),id.n,gmm::mpi_type(T()),0,MPI_COMM_WORLD); +#endif + + gmm::copy(rhs, X); + + return ok; + + } + + + + /** MUMPS solve interface for distributed matrices + * Works only with sparse or skyline matrices + */ + template + bool MUMPS_distributed_matrix_solve(const MAT &A, const VECTX &X_, + const VECTB &B, bool sym = false) { + return MUMPS_solve(A, X_, B, sym, true); + } + + + + template + inline T real_or_complex(std::complex a) { return a.real(); } + template + inline T real_or_complex(T &a) { return a; } + + + /** Evaluate matrix determinant with MUMPS + * Works only with sparse or skyline matrices + */ + template ::value_type> + T MUMPS_determinant(const MAT &A, int &exponent, + bool sym = false, bool distributed = false) { + exponent = 0; + typedef typename mumps_interf::value_type MUMPS_T; + typedef typename number_traits::magnitude_type R; + GMM_ASSERT2(gmm::mat_nrows(A) == gmm::mat_ncols(A), "Non-square matrix"); + + ij_sparse_matrix AA(A, sym); + + const int JOB_INIT = -1; + const int JOB_END = -2; + const int USE_COMM_WORLD = -987654; + + typename mumps_interf::MUMPS_STRUC_C id; + + int rank(0); +#ifdef GMM_USES_MPI + MPI_Comm_rank(MPI_COMM_WORLD, &rank); +#endif + + id.job = JOB_INIT; + id.par = 1; + id.sym = sym ? 2 : 0; + id.comm_fortran = USE_COMM_WORLD; + mumps_interf::mumps_c(id); + + if (rank == 0 || distributed) { + id.n = int(gmm::mat_nrows(A)); + if (distributed) { + id.nz_loc = int(AA.irn.size()); + id.irn_loc = &(AA.irn[0]); + id.jcn_loc = &(AA.jcn[0]); + id.a_loc = (MUMPS_T*)(&(AA.a[0])); + } else { + id.nz = int(AA.irn.size()); + id.irn = &(AA.irn[0]); + id.jcn = &(AA.jcn[0]); + id.a = (MUMPS_T*)(&(AA.a[0])); + } + } + + id.ICNTL(1) = -1; // output stream for error messages + id.ICNTL(2) = -1; // output stream for other messages + id.ICNTL(3) = -1; // output stream for global information + id.ICNTL(4) = 0; // verbosity level + + if (distributed) + id.ICNTL(5) = 0; // assembled input matrix (default) + +// id.ICNTL(14) += 80; // small boost to the workspace size + + if (distributed) + id.ICNTL(18) = 3; // strategy for distributed input matrix + + id.ICNTL(31) = 1; // only factorization, no solution to follow + id.ICNTL(33) = 1; // request determinant calculation + + id.job = 4; // abalysis (job=1) + factorization (job=2) + mumps_interf::mumps_c(id); + mumps_error_check(id); + + T det = real_or_complex(std::complex(id.RINFOG(12),id.RINFOG(13))); + exponent = id.INFOG(34); + + id.job = JOB_END; + mumps_interf::mumps_c(id); + + return det; + } + +#undef ICNTL +#undef INFO +#undef INFOG +#undef RINFOG + +} + + +#endif // GMM_MUMPS_INTERFACE_H + +#endif // GMM_USES_MUMPS diff --git a/gmm/gmm_algobase.h b/gmm/gmm_algobase.h new file mode 100644 index 000000000..64a859da1 --- /dev/null +++ b/gmm/gmm_algobase.h @@ -0,0 +1,228 @@ +/* -*- c++ -*- (enables emacs c++ mode) */ +/*=========================================================================== + + Copyright (C) 2000-2017 Yves Renard + + This file is a part of GetFEM++ + + GetFEM++ is free software; you can redistribute it and/or modify it + under the terms of the GNU Lesser General Public License as published + by the Free Software Foundation; either version 3 of the License, or + (at your option) any later version along with the GCC Runtime Library + Exception either version 3.1 or (at your option) any later version. + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public + License and GCC Runtime Library Exception for more details. + You should have received a copy of the GNU Lesser General Public License + along with this program; if not, write to the Free Software Foundation, + Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. + + As a special exception, you may use this file as it is a part of a free + software library without restriction. Specifically, if other files + instantiate templates or use macros or inline functions from this file, + or you compile this file and link it with other files to produce an + executable, this file does not by itself cause the resulting executable + to be covered by the GNU Lesser General Public License. This exception + does not however invalidate any other reasons why the executable file + might be covered by the GNU Lesser General Public License. + +===========================================================================*/ + +/** @file gmm_algobase.h + @author Yves Renard + @date September 28, 2000. + @brief Miscelleanous algorithms on containers. +*/ + +#ifndef GMM_ALGOBASE_H__ +#define GMM_ALGOBASE_H__ +#include "gmm_std.h" +#include "gmm_except.h" +#include + +namespace gmm { + + /* ********************************************************************* */ + /* Definitition de classes de comparaison. */ + /* retournant un int. */ + /* ********************************************************************* */ + + template + struct less : public std::binary_function { + inline int operator()(const T& x, const T& y) const + { return (x < y) ? -1 : ((y < x) ? 1 : 0); } + }; + + template<> struct less : public std::binary_function + { int operator()(int x, int y) const { return x-y; } }; + template<> struct less : public std::binary_function + { int operator()(char x, char y) const { return int(x-y); } }; + template<> struct less : public std::binary_function + { int operator()(short x, short y) const { return int(x-y); } }; + template<> struct less + : public std::binary_function { + int operator()(unsigned char x, unsigned char y) const + { return int(x)-int(y); } + }; + + + template + struct greater : public std::binary_function { + inline int operator()(const T& x, const T& y) const + { return (y < x) ? -1 : ((x < y) ? 1 : 0); } + }; + + template<> struct greater : public std::binary_function + { int operator()(int x, int y) const { return y-x; } }; + template<> struct greater : public std::binary_function + { int operator()(char x, char y) const { return int(y-x); } }; + template<> struct greater + : public std::binary_function + { int operator()(short x, short y) const { return int(y-x); } }; + template<> struct greater + : public std::binary_function { + int operator()(unsigned char x, unsigned char y) const + { return int(y)-int(x); } + }; + + template inline T my_abs(T a) { return (a < T(0)) ? T(-a) : a; } + + template + struct approx_less : public std::binary_function { + double eps; + inline int operator()(const T &x, const T &y) const + { if (my_abs(x - y) <= eps) return 0; if (x < y) return -1; return 1; } + approx_less(double e = 1E-13) { eps = e; } + }; + + template + struct approx_greater : public std::binary_function { + double eps; + inline int operator()(const T &x, const T &y) const + { if (my_abs(x - y) <= eps) return 0; if (x > y) return -1; return 1; } + approx_greater(double e = 1E-13) { eps = e; } + }; + + template + int lexicographical_compare(ITER1 b1, const ITER1 &e1, + ITER2 b2, const ITER2 &e2, const COMP &c) { + int i; + for ( ; b1 != e1 && b2 != e2; ++b1, ++b2) + if ((i = c(*b1, *b2)) != 0) return i; + if (b1 != e1) return 1; + if (b2 != e2) return -1; + return 0; + } + + template > + struct lexicographical_less : public std::binary_function + { + COMP c; + int operator()(const CONT &x, const CONT &y) const { + return gmm::lexicographical_compare(x.begin(), x.end(), + y.begin(), y.end(), c); + } + lexicographical_less(const COMP &d = COMP()) { c = d; } + }; + + template > + struct lexicographical_greater + : public std::binary_function { + COMP c; + int operator()(const CONT &x, const CONT &y) const { + return -gmm::lexicographical_compare(x.begin(), x.end(), + y.begin(), y.end(), c); + } + lexicographical_greater(const COMP &d = COMP()) { c = d; } + }; + + + /* ********************************************************************* */ + /* "Virtual" iterators on sequences. */ + /* The class T represent a class of sequence. */ + /* ********************************************************************* */ + + template struct sequence_iterator { + + typedef T value_type; + typedef value_type* pointer; + typedef value_type& reference; + typedef const value_type& const_reference; + typedef std::forward_iterator_tag iterator_category; + + T Un; + + sequence_iterator(T U0 = T(0)) { Un = U0; } + + sequence_iterator &operator ++() + { ++Un; return *this; } + sequence_iterator operator ++(int) + { sequence_iterator tmp = *this; (*this)++; return tmp; } + + const_reference operator *() const { return Un; } + reference operator *() { return Un; } + + bool operator ==(const sequence_iterator &i) const { return (i.Un==Un);} + bool operator !=(const sequence_iterator &i) const { return (i.Un!=Un);} + }; + + /* ********************************************************************* */ + /* generic algorithms. */ + /* ********************************************************************* */ + + template + ITER2 copy_n(ITER1 first, SIZE count, ITER2 result) { + for ( ; count > 0; --count, ++first, ++result) *result = *first; + return result; + } + + template + typename std::iterator_traits::value_type + mean_value(ITER first, const ITER &last) { + GMM_ASSERT2(first != last, "mean value of empty container"); + size_t n = 1; + typename std::iterator_traits::value_type res = *first++; + while (first != last) { res += *first; ++first; ++n; } + res /= float(n); + return res; + } + + template + typename CONT::value_type + mean_value(const CONT &c) { return mean_value(c.begin(), c.end()); } + + template /* hum ... */ + void minmax_box(typename std::iterator_traits::value_type &pmin, + typename std::iterator_traits::value_type &pmax, + ITER first, const ITER &last) { + typedef typename std::iterator_traits::value_type PT; + if (first != last) { pmin = pmax = *first; ++first; } + while (first != last) { + typename PT::const_iterator b = (*first).begin(), e = (*first).end(); + typename PT::iterator b1 = pmin.begin(), b2 = pmax.begin(); + while (b != e) + { *b1 = std::min(*b1, *b); *b2 = std::max(*b2, *b); ++b; ++b1; ++b2; } + } + } + + template struct sorted_indexes_aux { + const VEC &v; + public: + sorted_indexes_aux(const VEC& v_) : v(v_) {} + template + bool operator()(const IDX &ia, const IDX &ib) const + { return v[ia] < v[ib]; } + }; + + template + void sorted_indexes(const VEC &v, IVEC &iv) { + iv.clear(); iv.resize(v.size()); + for (size_t i=0; i < v.size(); ++i) iv[i] = i; + std::sort(iv.begin(), iv.end(), sorted_indexes_aux(v)); + } + +} + + +#endif /* GMM_ALGOBASE_H__ */ diff --git a/gmm/gmm_blas.h b/gmm/gmm_blas.h new file mode 100644 index 000000000..b23735559 --- /dev/null +++ b/gmm/gmm_blas.h @@ -0,0 +1,2221 @@ +/* -*- c++ -*- (enables emacs c++ mode) */ +/*=========================================================================== + + Copyright (C) 2002-2017 Yves Renard + + This file is a part of GetFEM++ + + GetFEM++ is free software; you can redistribute it and/or modify it + under the terms of the GNU Lesser General Public License as published + by the Free Software Foundation; either version 3 of the License, or + (at your option) any later version along with the GCC Runtime Library + Exception either version 3.1 or (at your option) any later version. + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public + License and GCC Runtime Library Exception for more details. + You should have received a copy of the GNU Lesser General Public License + along with this program; if not, write to the Free Software Foundation, + Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. + + As a special exception, you may use this file as it is a part of a free + software library without restriction. Specifically, if other files + instantiate templates or use macros or inline functions from this file, + or you compile this file and link it with other files to produce an + executable, this file does not by itself cause the resulting executable + to be covered by the GNU Lesser General Public License. This exception + does not however invalidate any other reasons why the executable file + might be covered by the GNU Lesser General Public License. + +===========================================================================*/ + +/**@file gmm_blas.h + @author Yves Renard + @date October 13, 2002. + @brief Basic linear algebra functions. +*/ + +#ifndef GMM_BLAS_H__ +#define GMM_BLAS_H__ + +#include "gmm_scaled.h" +#include "gmm_transposed.h" +#include "gmm_conjugated.h" + +namespace gmm { + + /* ******************************************************************** */ + /* */ + /* Generic algorithms */ + /* */ + /* ******************************************************************** */ + + + /* ******************************************************************** */ + /* Miscellaneous */ + /* ******************************************************************** */ + + /** clear (fill with zeros) a vector or matrix. */ + template inline void clear(L &l) + { linalg_traits::do_clear(l); } + /** @cond DOXY_SHOW_ALL_FUNCTIONS + skip all these redundant definitions in doxygen documentation.. + */ + template inline void clear(const L &l) + { linalg_traits::do_clear(linalg_const_cast(l)); } + + ///@endcond + /** count the number of non-zero entries of a vector or matrix. */ template inline size_type nnz(const L& l) + { return nnz(l, typename linalg_traits::linalg_type()); } + + ///@cond DOXY_SHOW_ALL_FUNCTIONS + template inline size_type nnz(const L& l, abstract_vector) { + auto it = vect_const_begin(l), ite = vect_const_end(l); + size_type res(0); + for (; it != ite; ++it) ++res; + return res; + } + + template inline size_type nnz(const L& l, abstract_matrix) { + return nnz(l, typename principal_orientation_type::sub_orientation>::potype()); + } + + template inline size_type nnz(const L& l, row_major) { + size_type res(0); + for (size_type i = 0; i < mat_nrows(l); ++i) + res += nnz(mat_const_row(l, i)); + return res; + } + + template inline size_type nnz(const L& l, col_major) { + size_type res(0); + for (size_type i = 0; i < mat_ncols(l); ++i) + res += nnz(mat_const_col(l, i)); + return res; + } + + ///@endcond + + + /** fill a vector or matrix with x. */ + template inline + void fill(L& l, typename gmm::linalg_traits::value_type x) { + typedef typename gmm::linalg_traits::value_type T; + if (x == T(0)) gmm::clear(l); + fill(l, x, typename linalg_traits::linalg_type()); + } + + template inline + void fill(const L& l, typename gmm::linalg_traits::value_type x) { + fill(linalg_const_cast(l), x); + } + + template inline // to be optimized for dense vectors ... + void fill(L& l, typename gmm::linalg_traits::value_type x, + abstract_vector) { + for (size_type i = 0; i < vect_size(l); ++i) l[i] = x; + } + + template inline // to be optimized for dense matrices ... + void fill(L& l, typename gmm::linalg_traits::value_type x, + abstract_matrix) { + for (size_type i = 0; i < mat_nrows(l); ++i) + for (size_type j = 0; j < mat_ncols(l); ++j) + l(i,j) = x; + } + + /** fill a vector or matrix with random value (uniform [-1,1]). */ + template inline void fill_random(L& l) + { fill_random(l, typename linalg_traits::linalg_type()); } + + ///@cond DOXY_SHOW_ALL_FUNCTIONS + template inline void fill_random(const L& l) { + fill_random(linalg_const_cast(l), + typename linalg_traits::linalg_type()); + } + + template inline void fill_random(L& l, abstract_vector) { + for (size_type i = 0; i < vect_size(l); ++i) + l[i] = gmm::random(typename linalg_traits::value_type()); + } + + template inline void fill_random(L& l, abstract_matrix) { + for (size_type i = 0; i < mat_nrows(l); ++i) + for (size_type j = 0; j < mat_ncols(l); ++j) + l(i,j) = gmm::random(typename linalg_traits::value_type()); + } + + ///@endcond + /** fill a vector or matrix with random value. + @param l a vector or matrix. + @param cfill probability of a non-zero value. + */ + template inline void fill_random(L& l, double cfill) + { fill_random(l, cfill, typename linalg_traits::linalg_type()); } + ///@cond DOXY_SHOW_ALL_FUNCTIONS + + template inline void fill_random(const L& l, double cfill) { + fill_random(linalg_const_cast(l), cfill, + typename linalg_traits::linalg_type()); + } + + template inline + void fill_random(L& l, double cfill, abstract_vector) { + typedef typename linalg_traits::value_type T; + size_type ntot = std::min(vect_size(l), + size_type(double(vect_size(l))*cfill) + 1); + for (size_type nb = 0; nb < ntot;) { + size_type i = gmm::irandom(vect_size(l)); + if (l[i] == T(0)) { + l[i] = gmm::random(typename linalg_traits::value_type()); + ++nb; + } + } + } + + template inline + void fill_random(L& l, double cfill, abstract_matrix) { + fill_random(l, cfill, typename principal_orientation_type::sub_orientation>::potype()); + } + + template inline + void fill_random(L& l, double cfill, row_major) { + for (size_type i=0; i < mat_nrows(l); ++i) fill_random(mat_row(l,i),cfill); + } + + template inline + void fill_random(L& l, double cfill, col_major) { + for (size_type j=0; j < mat_ncols(l); ++j) fill_random(mat_col(l,j),cfill); + } + + /* resize a vector */ + template inline + void resize(V &v, size_type n, linalg_false) + { linalg_traits::resize(v, n); } + + template inline + void resize(V &, size_type , linalg_modifiable) + { GMM_ASSERT1(false, "You cannot resize a reference"); } + + template inline + void resize(V &, size_type , linalg_const) + { GMM_ASSERT1(false, "You cannot resize a reference"); } + + ///@endcond + /** resize a vector. */ + template inline + void resize(V &v, size_type n) { + resize(v, n, typename linalg_traits::is_reference()); + } + ///@cond DOXY_SHOW_ALL_FUNCTIONS + + /** resize a matrix **/ + template inline + void resize(M &v, size_type m, size_type n, linalg_false) { + linalg_traits::resize(v, m, n); + } + + template inline + void resize(M &, size_type, size_type, linalg_modifiable) + { GMM_ASSERT1(false, "You cannot resize a reference"); } + + template inline + void resize(M &, size_type, size_type, linalg_const) + { GMM_ASSERT1(false, "You cannot resize a reference"); } + + ///@endcond + /** resize a matrix */ + template inline + void resize(M &v, size_type m, size_type n) + { resize(v, m, n, typename linalg_traits::is_reference()); } + ///@cond + + template inline + void reshape(M &v, size_type m, size_type n, linalg_false) + { linalg_traits::reshape(v, m, n); } + + template inline + void reshape(M &, size_type, size_type, linalg_modifiable) + { GMM_ASSERT1(false, "You cannot reshape a reference"); } + + template inline + void reshape(M &, size_type, size_type, linalg_const) + { GMM_ASSERT1(false, "You cannot reshape a reference"); } + + ///@endcond + /** reshape a matrix */ + template inline + void reshape(M &v, size_type m, size_type n) + { reshape(v, m, n, typename linalg_traits::is_reference()); } + ///@cond DOXY_SHOW_ALL_FUNCTIONS + + + /* ******************************************************************** */ + /* Scalar product */ + /* ******************************************************************** */ + + ///@endcond + /** scalar product between two vectors */ + template inline + typename strongest_value_type::value_type + vect_sp(const V1 &v1, const V2 &v2) { + GMM_ASSERT2(vect_size(v1) == vect_size(v2), "dimensions mismatch, " + << vect_size(v1) << " !=" << vect_size(v2)); + return vect_sp(v1, v2, + typename linalg_traits::storage_type(), + typename linalg_traits::storage_type()); + } + + /** scalar product between two vectors, using a matrix. + @param ps the matrix of the scalar product. + @param v1 the first vector + @param v2 the second vector + */ + template inline + typename strongest_value_type3::value_type + vect_sp(const MATSP &ps, const V1 &v1, const V2 &v2) { + return vect_sp_with_mat(ps, v1, v2, + typename linalg_traits::sub_orientation()); + } + ///@cond DOXY_SHOW_ALL_FUNCTIONS + + template inline + typename strongest_value_type3::value_type + vect_sp_with_mat(const MATSP &ps, const V1 &v1, const V2 &v2, row_major) { + return vect_sp_with_matr(ps, v1, v2, + typename linalg_traits::storage_type()); + } + + template inline + typename strongest_value_type3::value_type + vect_sp_with_matr(const MATSP &ps, const V1 &v1, const V2 &v2, + abstract_sparse) { + GMM_ASSERT2(vect_size(v1) == mat_ncols(ps) && + vect_size(v2) == mat_nrows(ps), "dimensions mismatch"); + size_type nr = mat_nrows(ps); + typename linalg_traits::const_iterator + it = vect_const_begin(v2), ite = vect_const_end(v2); + typename strongest_value_type3::value_type res(0); + for (; it != ite; ++it) + res += vect_sp(mat_const_row(ps, it.index()), v1)* (*it); + return res; + } + + template inline + typename strongest_value_type3::value_type + vect_sp_with_matr(const MATSP &ps, const V1 &v1, const V2 &v2, + abstract_skyline) + { return vect_sp_with_matr(ps, v1, v2, abstract_sparse()); } + + template inline + typename strongest_value_type3::value_type + vect_sp_with_matr(const MATSP &ps, const V1 &v1, const V2 &v2, + abstract_dense) { + GMM_ASSERT2(vect_size(v1) == mat_ncols(ps) && + vect_size(v2) == mat_nrows(ps), "dimensions mismatch"); + typename linalg_traits::const_iterator + it = vect_const_begin(v2), ite = vect_const_end(v2); + typename strongest_value_type3::value_type res(0); + for (size_type i = 0; it != ite; ++i, ++it) + res += vect_sp(mat_const_row(ps, i), v1) * (*it); + return res; + } + + template inline + typename strongest_value_type3::value_type + vect_sp_with_mat(const MATSP &ps, const V1 &v1,const V2 &v2,row_and_col) + { return vect_sp_with_mat(ps, v1, v2, row_major()); } + + template inline + typename strongest_value_type3::value_type + vect_sp_with_mat(const MATSP &ps, const V1 &v1, const V2 &v2,col_major){ + return vect_sp_with_matc(ps, v1, v2, + typename linalg_traits::storage_type()); + } + + template inline + typename strongest_value_type3::value_type + vect_sp_with_matc(const MATSP &ps, const V1 &v1, const V2 &v2, + abstract_sparse) { + GMM_ASSERT2(vect_size(v1) == mat_ncols(ps) && + vect_size(v2) == mat_nrows(ps), "dimensions mismatch"); + typename linalg_traits::const_iterator + it = vect_const_begin(v1), ite = vect_const_end(v1); + typename strongest_value_type3::value_type res(0); + for (; it != ite; ++it) + res += vect_sp(mat_const_col(ps, it.index()), v2) * (*it); + return res; + } + + template inline + typename strongest_value_type3::value_type + vect_sp_with_matc(const MATSP &ps, const V1 &v1, const V2 &v2, + abstract_skyline) + { return vect_sp_with_matc(ps, v1, v2, abstract_sparse()); } + + template inline + typename strongest_value_type3::value_type + vect_sp_with_matc(const MATSP &ps, const V1 &v1, const V2 &v2, + abstract_dense) { + GMM_ASSERT2(vect_size(v1) == mat_ncols(ps) && + vect_size(v2) == mat_nrows(ps), "dimensions mismatch"); + typename linalg_traits::const_iterator + it = vect_const_begin(v1), ite = vect_const_end(v1); + typename strongest_value_type3::value_type res(0); + for (size_type i = 0; it != ite; ++i, ++it) + res += vect_sp(mat_const_col(ps, i), v2) * (*it); + return res; + } + + template inline + typename strongest_value_type3::value_type + vect_sp_with_mat(const MATSP &ps, const V1 &v1,const V2 &v2,col_and_row) + { return vect_sp_with_mat(ps, v1, v2, col_major()); } + + template inline + typename strongest_value_type3::value_type + vect_sp_with_mat(const MATSP &ps, const V1 &v1, const V2 &v2, + abstract_null_type) { + typename temporary_vector::vector_type w(mat_nrows(ps)); + GMM_WARNING2("Warning, a temporary is used in scalar product\n"); + mult(ps, v1, w); + return vect_sp(w, v2); + } + + template inline + typename strongest_numeric_type::value_type, + typename std::iterator_traits::value_type>::T + vect_sp_dense_(IT1 it, IT1 ite, IT2 it2) { + typename strongest_numeric_type::value_type, + typename std::iterator_traits::value_type>::T res(0); + for (; it != ite; ++it, ++it2) res += (*it) * (*it2); + return res; + } + + template inline + typename strongest_numeric_type::value_type, + typename linalg_traits::value_type>::T + vect_sp_sparse_(IT1 it, IT1 ite, const V &v) { + typename strongest_numeric_type::value_type, + typename linalg_traits::value_type>::T res(0); + for (; it != ite; ++it) res += (*it) * v[it.index()]; + return res; + } + + template inline + typename strongest_value_type::value_type + vect_sp(const V1 &v1, const V2 &v2, abstract_dense, abstract_dense) { + return vect_sp_dense_(vect_const_begin(v1), vect_const_end(v1), + vect_const_begin(v2)); + } + + template inline + typename strongest_value_type::value_type + vect_sp(const V1 &v1, const V2 &v2, abstract_skyline, abstract_dense) { + typename linalg_traits::const_iterator it1 = vect_const_begin(v1), + ite = vect_const_end(v1); + typename linalg_traits::const_iterator it2 = vect_const_begin(v2); + return vect_sp_dense_(it1, ite, it2 + it1.index()); + } + + template inline + typename strongest_value_type::value_type + vect_sp(const V1 &v1, const V2 &v2, abstract_dense, abstract_skyline) { + typename linalg_traits::const_iterator it1 = vect_const_begin(v2), + ite = vect_const_end(v2); + typename linalg_traits::const_iterator it2 = vect_const_begin(v1); + return vect_sp_dense_(it1, ite, it2 + it1.index()); + } + + template inline + typename strongest_value_type::value_type + vect_sp(const V1 &v1, const V2 &v2, abstract_skyline, abstract_skyline) { + typedef typename strongest_value_type::value_type T; + auto it1 = vect_const_begin(v1), ite1 = vect_const_end(v1); + auto it2 = vect_const_begin(v2), ite2 = vect_const_end(v2); + size_type n = std::min(ite1.index(), ite2.index()); + size_type l = std::max(it1.index(), it2.index()); + + if (l < n) { + size_type m = l - it1.index(), p = l - it2.index(), q = m + n - l; + return vect_sp_dense_(it1+m, it1+q, it2 + p); + } + return T(0); + } + + template inline + typename strongest_value_type::value_type + vect_sp(const V1 &v1, const V2 &v2,abstract_sparse,abstract_dense) { + return vect_sp_sparse_(vect_const_begin(v1), vect_const_end(v1), v2); + } + + template inline + typename strongest_value_type::value_type + vect_sp(const V1 &v1, const V2 &v2, abstract_sparse, abstract_skyline) { + return vect_sp_sparse_(vect_const_begin(v1), vect_const_end(v1), v2); + } + + template inline + typename strongest_value_type::value_type + vect_sp(const V1 &v1, const V2 &v2, abstract_skyline, abstract_sparse) { + return vect_sp_sparse_(vect_const_begin(v2), vect_const_end(v2), v1); + } + + template inline + typename strongest_value_type::value_type + vect_sp(const V1 &v1, const V2 &v2, abstract_dense,abstract_sparse) { + return vect_sp_sparse_(vect_const_begin(v2), vect_const_end(v2), v1); + } + + + template inline + typename strongest_value_type::value_type + vect_sp_sparse_sparse(const V1 &v1, const V2 &v2, linalg_true) { + typename linalg_traits::const_iterator it1 = vect_const_begin(v1), + ite1 = vect_const_end(v1); + typename linalg_traits::const_iterator it2 = vect_const_begin(v2), + ite2 = vect_const_end(v2); + typename strongest_value_type::value_type res(0); + + while (it1 != ite1 && it2 != ite2) { + if (it1.index() == it2.index()) + { res += (*it1) * *it2; ++it1; ++it2; } + else if (it1.index() < it2.index()) ++it1; else ++it2; + } + return res; + } + + template inline + typename strongest_value_type::value_type + vect_sp_sparse_sparse(const V1 &v1, const V2 &v2, linalg_false) { + return vect_sp_sparse_(vect_const_begin(v1), vect_const_end(v1), v2); + } + + template inline + typename strongest_value_type::value_type + vect_sp(const V1 &v1, const V2 &v2,abstract_sparse,abstract_sparse) { + return vect_sp_sparse_sparse(v1, v2, + typename linalg_and::index_sorted, + typename linalg_traits::index_sorted>::bool_type()); + } + + /* ******************************************************************** */ + /* Hermitian product */ + /* ******************************************************************** */ + ///@endcond + /** Hermitian product. */ + template + inline typename strongest_value_type::value_type + vect_hp(const V1 &v1, const V2 &v2) + { return vect_sp(v1, conjugated(v2)); } + + /** Hermitian product with a matrix. */ + template inline + typename strongest_value_type3::value_type + vect_hp(const MATSP &ps, const V1 &v1, const V2 &v2) { + return vect_sp(ps, v1, gmm::conjugated(v2)); + } + + /* ******************************************************************** */ + /* Trace of a matrix */ + /* ******************************************************************** */ + + /** Trace of a matrix */ + template + typename linalg_traits::value_type + mat_trace(const M &m) { + typedef typename linalg_traits::value_type T; + T res(0); + for (size_type i = 0; i < std::min(mat_nrows(m), mat_ncols(m)); ++i) + res += m(i,i); + return res; + } + + /* ******************************************************************** */ + /* Euclidean norm */ + /* ******************************************************************** */ + + /** squared Euclidean norm of a vector. */ + template + typename number_traits::value_type> + ::magnitude_type + vect_norm2_sqr(const V &v) { + typedef typename linalg_traits::value_type T; + typedef typename number_traits::magnitude_type R; + auto it = vect_const_begin(v), ite = vect_const_end(v); + R res(0); + for (; it != ite; ++it) res += gmm::abs_sqr(*it); + return res; + } + + /** Euclidean norm of a vector. */ + template inline + typename number_traits::value_type> + ::magnitude_type + vect_norm2(const V &v) + { return sqrt(vect_norm2_sqr(v)); } + + + /** squared Euclidean distance between two vectors */ + template inline + typename number_traits::value_type> + ::magnitude_type + vect_dist2_sqr(const V1 &v1, const V2 &v2) { // not fully optimized + typedef typename linalg_traits::value_type T; + typedef typename number_traits::magnitude_type R; + auto it1 = vect_const_begin(v1), ite1 = vect_const_end(v1); + auto it2 = vect_const_begin(v2), ite2 = vect_const_end(v2); + size_type k1(0), k2(0); + R res(0); + while (it1 != ite1 && it2 != ite2) { + size_type i1 = index_of_it(it1, k1, + typename linalg_traits::storage_type()); + size_type i2 = index_of_it(it2, k2, + typename linalg_traits::storage_type()); + + if (i1 == i2) { + res += gmm::abs_sqr(*it2 - *it1); ++it1; ++k1; ++it2; ++k2; + } + else if (i1 < i2) { + res += gmm::abs_sqr(*it1); ++it1; ++k1; + } + else { + res += gmm::abs_sqr(*it2); ++it2; ++k2; + } + } + while (it1 != ite1) { res += gmm::abs_sqr(*it1); ++it1; } + while (it2 != ite2) { res += gmm::abs_sqr(*it2); ++it2; } + return res; + } + + /** Euclidean distance between two vectors */ + template inline + typename number_traits::value_type> + ::magnitude_type + vect_dist2(const V1 &v1, const V2 &v2) + { return sqrt(vect_dist2_sqr(v1, v2)); } + ///@cond DOXY_SHOW_ALL_FUNCTIONS + template + typename number_traits::value_type> + ::magnitude_type + mat_euclidean_norm_sqr(const M &m, row_major) { + typename number_traits::value_type> + ::magnitude_type res(0); + for (size_type i = 0; i < mat_nrows(m); ++i) + res += vect_norm2_sqr(mat_const_row(m, i)); + return res; + } + + template + typename number_traits::value_type> + ::magnitude_type + mat_euclidean_norm_sqr(const M &m, col_major) { + typename number_traits::value_type> + ::magnitude_type res(0); + for (size_type i = 0; i < mat_ncols(m); ++i) + res += vect_norm2_sqr(mat_const_col(m, i)); + return res; + } + ///@endcond + /** squared Euclidean norm of a matrix. */ + template inline + typename number_traits::value_type> + ::magnitude_type + mat_euclidean_norm_sqr(const M &m) { + return mat_euclidean_norm_sqr(m, + typename principal_orientation_type::sub_orientation>::potype()); + } + + /** Euclidean norm of a matrix. */ + template inline + typename number_traits::value_type> + ::magnitude_type + mat_euclidean_norm(const M &m) + { return gmm::sqrt(mat_euclidean_norm_sqr(m)); } + + /* ******************************************************************** */ + /* vector norm1 */ + /* ******************************************************************** */ + /** 1-norm of a vector */ + template + typename number_traits::value_type> + ::magnitude_type + vect_norm1(const V &v) { + auto it = vect_const_begin(v), ite = vect_const_end(v); + typename number_traits::value_type> + ::magnitude_type res(0); + for (; it != ite; ++it) res += gmm::abs(*it); + return res; + } + + /* ******************************************************************** */ + /* vector Infinity norm */ + /* ******************************************************************** */ + /** Infinity norm of a vector. */ + template + typename number_traits::value_type> + ::magnitude_type + vect_norminf(const V &v) { + auto it = vect_const_begin(v), ite = vect_const_end(v); + typename number_traits::value_type> + ::magnitude_type res(0); + for (; it != ite; ++it) res = std::max(res, gmm::abs(*it)); + return res; + } + + /* ******************************************************************** */ + /* matrix norm1 */ + /* ******************************************************************** */ + ///@cond DOXY_SHOW_ALL_FUNCTIONS + template + typename number_traits::value_type> + ::magnitude_type + mat_norm1(const M &m, col_major) { + typename number_traits::value_type> + ::magnitude_type res(0); + for (size_type i = 0; i < mat_ncols(m); ++i) + res = std::max(res, vect_norm1(mat_const_col(m,i))); + return res; + } + + template + typename number_traits::value_type> + ::magnitude_type + mat_norm1(const M &m, row_major) { + typedef typename linalg_traits::value_type T; + typedef typename number_traits::magnitude_type R; + typedef typename linalg_traits::storage_type store_type; + + std::vector aux(mat_ncols(m)); + for (size_type i = 0; i < mat_nrows(m); ++i) { + typename linalg_traits::const_sub_row_type row = mat_const_row(m, i); + auto it = vect_const_begin(row), ite = vect_const_end(row); + for (size_type k = 0; it != ite; ++it, ++k) + aux[index_of_it(it, k, store_type())] += gmm::abs(*it); + } + return vect_norminf(aux); + } + + template + typename number_traits::value_type> + ::magnitude_type + mat_norm1(const M &m, col_and_row) + { return mat_norm1(m, col_major()); } + + template + typename number_traits::value_type> + ::magnitude_type + mat_norm1(const M &m, row_and_col) + { return mat_norm1(m, col_major()); } + ///@endcond + /** 1-norm of a matrix */ + template + typename number_traits::value_type> + ::magnitude_type + mat_norm1(const M &m) { + return mat_norm1(m, typename linalg_traits::sub_orientation()); + } + + + /* ******************************************************************** */ + /* matrix Infinity norm */ + /* ******************************************************************** */ + ///@cond DOXY_SHOW_ALL_FUNCTIONS + template + typename number_traits::value_type> + ::magnitude_type + mat_norminf(const M &m, row_major) { + typename number_traits::value_type> + ::magnitude_type res(0); + for (size_type i = 0; i < mat_nrows(m); ++i) + res = std::max(res, vect_norm1(mat_const_row(m,i))); + return res; + } + + template + typename number_traits::value_type> + ::magnitude_type + mat_norminf(const M &m, col_major) { + typedef typename linalg_traits::value_type T; + typedef typename number_traits::magnitude_type R; + typedef typename linalg_traits::storage_type store_type; + + std::vector aux(mat_nrows(m)); + for (size_type i = 0; i < mat_ncols(m); ++i) { + typename linalg_traits::const_sub_col_type col = mat_const_col(m, i); + auto it = vect_const_begin(col), ite = vect_const_end(col); + for (size_type k = 0; it != ite; ++it, ++k) + aux[index_of_it(it, k, store_type())] += gmm::abs(*it); + } + return vect_norminf(aux); + } + + template + typename number_traits::value_type> + ::magnitude_type + mat_norminf(const M &m, col_and_row) + { return mat_norminf(m, row_major()); } + + template + typename number_traits::value_type> + ::magnitude_type + mat_norminf(const M &m, row_and_col) + { return mat_norminf(m, row_major()); } + ///@endcond + /** infinity-norm of a matrix.*/ + template + typename number_traits::value_type> + ::magnitude_type + mat_norminf(const M &m) { + return mat_norminf(m, typename linalg_traits::sub_orientation()); + } + + /* ******************************************************************** */ + /* Max norm for matrices */ + /* ******************************************************************** */ + ///@cond DOXY_SHOW_ALL_FUNCTIONS + template + typename number_traits::value_type> + ::magnitude_type + mat_maxnorm(const M &m, row_major) { + typename number_traits::value_type> + ::magnitude_type res(0); + for (size_type i = 0; i < mat_nrows(m); ++i) + res = std::max(res, vect_norminf(mat_const_row(m,i))); + return res; + } + + template + typename number_traits::value_type> + ::magnitude_type + mat_maxnorm(const M &m, col_major) { + typename number_traits::value_type> + ::magnitude_type res(0); + for (size_type i = 0; i < mat_ncols(m); ++i) + res = std::max(res, vect_norminf(mat_const_col(m,i))); + return res; + } + ///@endcond + /** max-norm of a matrix. */ + template + typename number_traits::value_type> + ::magnitude_type + mat_maxnorm(const M &m) { + return mat_maxnorm(m, + typename principal_orientation_type::sub_orientation>::potype()); + } + + /* ******************************************************************** */ + /* Clean */ + /* ******************************************************************** */ + /** Clean a vector or matrix (replace near-zero entries with zeroes). */ + + template inline void clean(L &l, double threshold); + + ///@cond DOXY_SHOW_ALL_FUNCTIONS + + template + void clean(L &l, double threshold, abstract_dense, T) { + typedef typename number_traits::magnitude_type R; + auto it = vect_begin(l), ite = vect_end(l); + for (; it != ite; ++it) + if (gmm::abs(*it) < R(threshold)) *it = T(0); + } + + template + void clean(L &l, double threshold, abstract_skyline, T) + { gmm::clean(l, threshold, abstract_dense(), T()); } + + template + void clean(L &l, double threshold, abstract_sparse, T) { + typedef typename number_traits::magnitude_type R; + auto it = vect_begin(l), ite = vect_end(l); + std::vector ind; + for (; it != ite; ++it) + if (gmm::abs(*it) < R(threshold)) ind.push_back(it.index()); + for (size_type i = 0; i < ind.size(); ++i) l[ind[i]] = T(0); + } + + template + void clean(L &l, double threshold, abstract_dense, std::complex) { + auto it = vect_begin(l), ite = vect_end(l); + for (; it != ite; ++it){ + if (gmm::abs((*it).real()) < T(threshold)) + *it = std::complex(T(0), (*it).imag()); + if (gmm::abs((*it).imag()) < T(threshold)) + *it = std::complex((*it).real(), T(0)); + } + } + + template + void clean(L &l, double threshold, abstract_skyline, std::complex) + { gmm::clean(l, threshold, abstract_dense(), std::complex()); } + + template + void clean(L &l, double threshold, abstract_sparse, std::complex) { + auto it = vect_begin(l), ite = vect_end(l); + std::vector ind; + for (; it != ite; ++it) { + bool r = (gmm::abs((*it).real()) < T(threshold)); + bool i = (gmm::abs((*it).imag()) < T(threshold)); + if (r && i) ind.push_back(it.index()); + else if (r) *it = std::complex(T(0), (*it).imag()); + else if (i) *it = std::complex((*it).real(), T(0)); + } + for (size_type i = 0; i < ind.size(); ++i) + l[ind[i]] = std::complex(T(0),T(0)); + } + + template inline void clean(L &l, double threshold, + abstract_vector) { + gmm::clean(l, threshold, typename linalg_traits::storage_type(), + typename linalg_traits::value_type()); + } + + template inline void clean(const L &l, double threshold); + + template void clean(L &l, double threshold, row_major) { + for (size_type i = 0; i < mat_nrows(l); ++i) + gmm::clean(mat_row(l, i), threshold); + } + + template void clean(L &l, double threshold, col_major) { + for (size_type i = 0; i < mat_ncols(l); ++i) + gmm::clean(mat_col(l, i), threshold); + } + + template inline void clean(L &l, double threshold, + abstract_matrix) { + gmm::clean(l, threshold, + typename principal_orientation_type::sub_orientation>::potype()); + } + + template inline void clean(L &l, double threshold) + { clean(l, threshold, typename linalg_traits::linalg_type()); } + + template inline void clean(const L &l, double threshold) + { gmm::clean(linalg_const_cast(l), threshold); } + + /* ******************************************************************** */ + /* Copy */ + /* ******************************************************************** */ + ///@endcond + /** Copy vectors or matrices. + @param l1 source vector or matrix. + @param l2 destination. + */ + template inline + void copy(const L1& l1, L2& l2) { + if ((const void *)(&l1) != (const void *)(&l2)) { + if (same_origin(l1,l2)) + GMM_WARNING2("Warning : a conflict is possible in copy\n"); + + copy(l1, l2, typename linalg_traits::linalg_type(), + typename linalg_traits::linalg_type()); + } + } + ///@cond DOXY_SHOW_ALL_FUNCTIONS + + template inline + void copy(const L1& l1, const L2& l2) { copy(l1, linalg_const_cast(l2)); } + + template inline + void copy(const L1& l1, L2& l2, abstract_vector, abstract_vector) { + GMM_ASSERT2(vect_size(l1) == vect_size(l2), "dimensions mismatch, " + << vect_size(l1) << " !=" << vect_size(l2)); + copy_vect(l1, l2, typename linalg_traits::storage_type(), + typename linalg_traits::storage_type()); + } + + template inline + void copy(const L1& l1, L2& l2, abstract_matrix, abstract_matrix) { + size_type m = mat_nrows(l1), n = mat_ncols(l1); + if (!m || !n) return; + GMM_ASSERT2(n==mat_ncols(l2) && m==mat_nrows(l2), "dimensions mismatch"); + copy_mat(l1, l2, typename linalg_traits::sub_orientation(), + typename linalg_traits::sub_orientation()); + } + + template inline + void copy_vect(const V1 &v1, const V2 &v2, C1, C2) + { copy_vect(v1, const_cast(v2), C1(), C2()); } + + + template + void copy_mat_by_row(const L1& l1, L2& l2) { + size_type nbr = mat_nrows(l1); + for (size_type i = 0; i < nbr; ++i) + copy(mat_const_row(l1, i), mat_row(l2, i)); + } + + template + void copy_mat_by_col(const L1 &l1, L2 &l2) { + size_type nbc = mat_ncols(l1); + for (size_type i = 0; i < nbc; ++i) { + copy(mat_const_col(l1, i), mat_col(l2, i)); + } + } + + template inline + void copy_mat(const L1& l1, L2& l2, row_major, row_major) + { copy_mat_by_row(l1, l2); } + + template inline + void copy_mat(const L1& l1, L2& l2, row_major, row_and_col) + { copy_mat_by_row(l1, l2); } + + template inline + void copy_mat(const L1& l1, L2& l2, row_and_col, row_and_col) + { copy_mat_by_row(l1, l2); } + + template inline + void copy_mat(const L1& l1, L2& l2, row_and_col, row_major) + { copy_mat_by_row(l1, l2); } + + template inline + void copy_mat(const L1& l1, L2& l2, col_and_row, row_major) + { copy_mat_by_row(l1, l2); } + + template inline + void copy_mat(const L1& l1, L2& l2, row_major, col_and_row) + { copy_mat_by_row(l1, l2); } + + template inline + void copy_mat(const L1& l1, L2& l2, col_and_row, row_and_col) + { copy_mat_by_row(l1, l2); } + + template inline + void copy_mat(const L1& l1, L2& l2, row_and_col, col_and_row) + { copy_mat_by_row(l1, l2); } + + template inline + void copy_mat(const L1& l1, L2& l2, col_major, col_major) + { copy_mat_by_col(l1, l2); } + + template inline + void copy_mat(const L1& l1, L2& l2, col_major, col_and_row) + { copy_mat_by_col(l1, l2); } + + template inline + void copy_mat(const L1& l1, L2& l2, col_major, row_and_col) + { copy_mat_by_col(l1, l2); } + + template inline + void copy_mat(const L1& l1, L2& l2, row_and_col, col_major) + { copy_mat_by_col(l1, l2); } + + template inline + void copy_mat(const L1& l1, L2& l2, col_and_row, col_major) + { copy_mat_by_col(l1, l2); } + + template inline + void copy_mat(const L1& l1, L2& l2, col_and_row, col_and_row) + { copy_mat_by_col(l1, l2); } + + template inline + void copy_mat_mixed_rc(const L1& l1, L2& l2, size_type i) { + copy_mat_mixed_rc(l1, l2, i, typename linalg_traits::storage_type()); + } + + template + void copy_mat_mixed_rc(const L1& l1, L2& l2, size_type i, abstract_sparse) { + auto it = vect_const_begin(l1), ite = vect_const_end(l1); + for (; it != ite; ++it) + l2(i, it.index()) = *it; + } + + template + void copy_mat_mixed_rc(const L1& l1, L2& l2, size_type i, abstract_skyline) { + auto it = vect_const_begin(l1), ite = vect_const_end(l1); + for (; it != ite; ++it) + l2(i, it.index()) = *it; + } + + template + void copy_mat_mixed_rc(const L1& l1, L2& l2, size_type i, abstract_dense) { + auto it = vect_const_begin(l1), ite = vect_const_end(l1); + for (size_type j = 0; it != ite; ++it, ++j) l2(i, j) = *it; + } + + template inline + void copy_mat_mixed_cr(const L1& l1, L2& l2, size_type i) { + copy_mat_mixed_cr(l1, l2, i, typename linalg_traits::storage_type()); + } + + template + void copy_mat_mixed_cr(const L1& l1, L2& l2, size_type i, abstract_sparse) { + auto it = vect_const_begin(l1), ite = vect_const_end(l1); + for (; it != ite; ++it) l2(it.index(), i) = *it; + } + + template + void copy_mat_mixed_cr(const L1& l1, L2& l2, size_type i, abstract_skyline) { + auto it = vect_const_begin(l1), ite = vect_const_end(l1); + for (; it != ite; ++it) l2(it.index(), i) = *it; + } + + template + void copy_mat_mixed_cr(const L1& l1, L2& l2, size_type i, abstract_dense) { + auto it = vect_const_begin(l1), ite = vect_const_end(l1); + for (size_type j = 0; it != ite; ++it, ++j) l2(j, i) = *it; + } + + template + void copy_mat(const L1& l1, L2& l2, row_major, col_major) { + clear(l2); + size_type nbr = mat_nrows(l1); + for (size_type i = 0; i < nbr; ++i) + copy_mat_mixed_rc(mat_const_row(l1, i), l2, i); + } + + template + void copy_mat(const L1& l1, L2& l2, col_major, row_major) { + clear(l2); + size_type nbc = mat_ncols(l1); + for (size_type i = 0; i < nbc; ++i) + copy_mat_mixed_cr(mat_const_col(l1, i), l2, i); + } + + template inline + void copy_vect(const L1 &l1, L2 &l2, abstract_dense, abstract_dense) { + std::copy(vect_const_begin(l1), vect_const_end(l1), vect_begin(l2)); + } + + template inline // to be optimised ? + void copy_vect(const L1 &l1, L2 &l2, abstract_skyline, abstract_skyline) { + auto it1 = vect_const_begin(l1), ite1 = vect_const_end(l1); + while (it1 != ite1 && *it1 == typename linalg_traits::value_type(0)) + ++it1; + + if (ite1 - it1 > 0) { + clear(l2); + auto it2 = vect_begin(l2), ite2 = vect_end(l2); + while (*(ite1-1) == typename linalg_traits::value_type(0)) ite1--; + + if (it2 == ite2) { + l2[it1.index()] = *it1; ++it1; + l2[ite1.index()-1] = *(ite1-1); --ite1; + if (it1 < ite1) + { it2 = vect_begin(l2); ++it2; std::copy(it1, ite1, it2); } + } + else { + ptrdiff_t m = it1.index() - it2.index(); + if (m >= 0 && ite1.index() <= ite2.index()) + std::copy(it1, ite1, it2 + m); + else { + if (m < 0) l2[it1.index()] = *it1; + if (ite1.index() > ite2.index()) l2[ite1.index()-1] = *(ite1-1); + it2 = vect_begin(l2); ite2 = vect_end(l2); + m = it1.index() - it2.index(); + std::copy(it1, ite1, it2 + m); + } + } + } + } + + template + void copy_vect(const L1& l1, L2& l2, abstract_sparse, abstract_dense) { + clear(l2); + auto it = vect_const_begin(l1), ite = vect_const_end(l1); + for (; it != ite; ++it) { l2[it.index()] = *it; } + } + + template + void copy_vect(const L1& l1, L2& l2, abstract_sparse, abstract_skyline) { + clear(l2); + auto it = vect_const_begin(l1), ite = vect_const_end(l1); + for (; it != ite; ++it) l2[it.index()] = *it; + } + + template + void copy_vect(const L1& l1, L2& l2, abstract_skyline, abstract_dense) { + typedef typename linalg_traits::value_type T; + auto it = vect_const_begin(l1), ite = vect_const_end(l1); + if (it == ite) + gmm::clear(l2); + else { + auto it2 = vect_begin(l2), ite2 = vect_end(l2); + + size_type i = it.index(), j; + for (j = 0; j < i; ++j, ++it2) *it2 = T(0); + for (; it != ite; ++it, ++it2) *it2 = *it; + for (; it2 != ite2; ++it2) *it2 = T(0); + } + } + + template + void copy_vect(const L1& l1, L2& l2, abstract_sparse, abstract_sparse) { + auto it = vect_const_begin(l1), ite = vect_const_end(l1); + clear(l2); + // cout << "copy " << l1 << " of size " << vect_size(l1) << " nnz = " << nnz(l1) << endl; + for (; it != ite; ++it) { + // cout << "*it = " << *it << endl; + // cout << "it.index() = " << it.index() << endl; + if (*it != (typename linalg_traits::value_type)(0)) + l2[it.index()] = *it; + } + } + + template + void copy_vect(const L1& l1, L2& l2, abstract_dense, abstract_sparse) { + clear(l2); + auto it = vect_const_begin(l1), ite = vect_const_end(l1); + for (size_type i = 0; it != ite; ++it, ++i) + if (*it != (typename linalg_traits::value_type)(0)) + l2[i] = *it; + } + + template // to be optimised ... + void copy_vect(const L1& l1, L2& l2, abstract_dense, abstract_skyline) { + clear(l2); + auto it = vect_const_begin(l1), ite = vect_const_end(l1); + for (size_type i = 0; it != ite; ++it, ++i) + if (*it != (typename linalg_traits::value_type)(0)) + l2[i] = *it; + } + + + template + void copy_vect(const L1& l1, L2& l2, abstract_skyline, abstract_sparse) { + clear(l2); + auto it = vect_const_begin(l1), ite = vect_const_end(l1); + for (; it != ite; ++it) + if (*it != (typename linalg_traits::value_type)(0)) + l2[it.index()] = *it; + } + + /* ******************************************************************** */ + /* Matrix and vector addition */ + /* algorithms are built in order to avoid some conflicts with */ + /* repeated arguments or with overlapping part of a same object. */ + /* In the latter case, conflicts are still possible. */ + /* ******************************************************************** */ + ///@endcond + /** Add two vectors or matrices + @param l1 + @param l2 contains on output, l2+l1. + */ + template inline + void add(const L1& l1, L2& l2) { + add_spec(l1, l2, typename linalg_traits::linalg_type()); + } + ///@cond + + template inline + void add(const L1& l1, const L2& l2) { add(l1, linalg_const_cast(l2)); } + + template inline + void add_spec(const L1& l1, L2& l2, abstract_vector) { + GMM_ASSERT2(vect_size(l1) == vect_size(l2), "dimensions mismatch, " + << vect_size(l1) << " !=" << vect_size(l2)); + add(l1, l2, typename linalg_traits::storage_type(), + typename linalg_traits::storage_type()); + } + + template inline + void add_spec(const L1& l1, L2& l2, abstract_matrix) { + GMM_ASSERT2(mat_nrows(l1)==mat_nrows(l2) && mat_ncols(l1)==mat_ncols(l2), + "dimensions mismatch l1 is " << mat_nrows(l1) << "x" + << mat_ncols(l1) << " and l2 is " << mat_nrows(l2) + << "x" << mat_ncols(l2)); + add(l1, l2, typename linalg_traits::sub_orientation(), + typename linalg_traits::sub_orientation()); + } + + template + void add(const L1& l1, L2& l2, row_major, row_major) { + auto it1 = mat_row_begin(l1), ite = mat_row_end(l1); + auto it2 = mat_row_begin(l2); + for ( ; it1 != ite; ++it1, ++it2) + add(linalg_traits::row(it1), linalg_traits::row(it2)); + } + + template + void add(const L1& l1, L2& l2, col_major, col_major) { + auto it1 = mat_col_const_begin(l1), ite = mat_col_const_end(l1); + typename linalg_traits::col_iterator it2 = mat_col_begin(l2); + for ( ; it1 != ite; ++it1, ++it2) + add(linalg_traits::col(it1), linalg_traits::col(it2)); + } + + template inline + void add_mat_mixed_rc(const L1& l1, L2& l2, size_type i) { + add_mat_mixed_rc(l1, l2, i, typename linalg_traits::storage_type()); + } + + template + void add_mat_mixed_rc(const L1& l1, L2& l2, size_type i, abstract_sparse) { + auto it = vect_const_begin(l1), ite = vect_const_end(l1); + for (; it != ite; ++it) l2(i, it.index()) += *it; + } + + template + void add_mat_mixed_rc(const L1& l1, L2& l2, size_type i, abstract_skyline) { + auto it = vect_const_begin(l1), ite = vect_const_end(l1); + for (; it != ite; ++it) l2(i, it.index()) += *it; + } + + template + void add_mat_mixed_rc(const L1& l1, L2& l2, size_type i, abstract_dense) { + auto it = vect_const_begin(l1), ite = vect_const_end(l1); + for (size_type j = 0; it != ite; ++it, ++j) l2(i, j) += *it; + } + + template inline + void add_mat_mixed_cr(const L1& l1, L2& l2, size_type i) { + add_mat_mixed_cr(l1, l2, i, typename linalg_traits::storage_type()); + } + + template + void add_mat_mixed_cr(const L1& l1, L2& l2, size_type i, abstract_sparse) { + auto it = vect_const_begin(l1), ite = vect_const_end(l1); + for (; it != ite; ++it) l2(it.index(), i) += *it; + } + + template + void add_mat_mixed_cr(const L1& l1, L2& l2, size_type i, abstract_skyline) { + auto it = vect_const_begin(l1), ite = vect_const_end(l1); + for (; it != ite; ++it) l2(it.index(), i) += *it; + } + + template + void add_mat_mixed_cr(const L1& l1, L2& l2, size_type i, abstract_dense) { + auto it = vect_const_begin(l1), ite = vect_const_end(l1); + for (size_type j = 0; it != ite; ++it, ++j) l2(j, i) += *it; + } + + template + void add(const L1& l1, L2& l2, row_major, col_major) { + size_type nbr = mat_nrows(l1); + for (size_type i = 0; i < nbr; ++i) + add_mat_mixed_rc(mat_const_row(l1, i), l2, i); + } + + template + void add(const L1& l1, L2& l2, col_major, row_major) { + size_type nbc = mat_ncols(l1); + for (size_type i = 0; i < nbc; ++i) + add_mat_mixed_cr(mat_const_col(l1, i), l2, i); + } + + template inline + void add(const L1& l1, L2& l2, row_and_col, row_major) + { add(l1, l2, row_major(), row_major()); } + + template inline + void add(const L1& l1, L2& l2, row_and_col, row_and_col) + { add(l1, l2, row_major(), row_major()); } + + template inline + void add(const L1& l1, L2& l2, row_and_col, col_and_row) + { add(l1, l2, row_major(), row_major()); } + + template inline + void add(const L1& l1, L2& l2, col_and_row, row_and_col) + { add(l1, l2, row_major(), row_major()); } + + template inline + void add(const L1& l1, L2& l2, row_major, row_and_col) + { add(l1, l2, row_major(), row_major()); } + + template inline + void add(const L1& l1, L2& l2, col_and_row, row_major) + { add(l1, l2, row_major(), row_major()); } + + template inline + void add(const L1& l1, L2& l2, row_major, col_and_row) + { add(l1, l2, row_major(), row_major()); } + + template inline + void add(const L1& l1, L2& l2, row_and_col, col_major) + { add(l1, l2, col_major(), col_major()); } + + template inline + void add(const L1& l1, L2& l2, col_major, row_and_col) + { add(l1, l2, col_major(), col_major()); } + + template inline + void add(const L1& l1, L2& l2, col_and_row, col_major) + { add(l1, l2, col_major(), col_major()); } + + template inline + void add(const L1& l1, L2& l2, col_and_row, col_and_row) + { add(l1, l2, col_major(), col_major()); } + + template inline + void add(const L1& l1, L2& l2, col_major, col_and_row) + { add(l1, l2, col_major(), col_major()); } + + ///@endcond + /** Addition of two vectors/matrices + @param l1 + @param l2 + @param l3 contains l1+l2 on output + */ + template inline + void add(const L1& l1, const L2& l2, L3& l3) { + add_spec(l1, l2, l3, typename linalg_traits::linalg_type()); + } + ///@cond DOXY_SHOW_ALL_FUNCTIONS + + template inline + void add(const L1& l1, const L2& l2, const L3& l3) + { add(l1, l2, linalg_const_cast(l3)); } + + template inline + void add_spec(const L1& l1, const L2& l2, L3& l3, abstract_matrix) + { copy(l2, l3); add(l1, l3); } + + template inline + void add_spec(const L1& l1, const L2& l2, L3& l3, abstract_vector) { + GMM_ASSERT2(vect_size(l1) == vect_size(l2) && + vect_size(l1) == vect_size(l3), "dimensions mismatch"); + if ((const void *)(&l1) == (const void *)(&l3)) + add(l2, l3); + else if ((const void *)(&l2) == (const void *)(&l3)) + add(l1, l3); + else + add(l1, l2, l3, typename linalg_traits::storage_type(), + typename linalg_traits::storage_type(), + typename linalg_traits::storage_type()); + } + + template + void add_full_(IT1 it1, IT2 it2, IT3 it3, IT3 ite) { + for (; it3 != ite; ++it3, ++it2, ++it1) *it3 = *it1 + *it2; + } + + template + void add_almost_full_(IT1 it1, IT1 ite1, IT2 it2, IT3 it3, IT3 ite3) { + IT3 it = it3; + for (; it != ite3; ++it, ++it2) *it = *it2; + for (; it1 != ite1; ++it1) + *(it3 + it1.index()) += *it1; + } + + template + void add_to_full_(IT1 it1, IT1 ite1, IT2 it2, IT2 ite2, + IT3 it3, IT3 ite3) { + typedef typename std::iterator_traits::value_type T; + IT3 it = it3; + for (; it != ite3; ++it) *it = T(0); + for (; it1 != ite1; ++it1) *(it3 + it1.index()) = *it1; + for (; it2 != ite2; ++it2) *(it3 + it2.index()) += *it2; + } + + template inline + void add(const L1& l1, const L2& l2, L3& l3, + abstract_dense, abstract_dense, abstract_dense) { + add_full_(vect_const_begin(l1), vect_const_begin(l2), + vect_begin(l3), vect_end(l3)); + } + + // generic function for add(v1, v2, v3). + // Need to be specialized to optimize particular additions. + template + inline void add(const L1& l1, const L2& l2, L3& l3, ST1, ST2, ST3) + { copy(l2, l3); add(l1, l3, ST1(), ST3()); } + + template inline + void add(const L1& l1, const L2& l2, L3& l3, + abstract_sparse, abstract_dense, abstract_dense) { + add_almost_full_(vect_const_begin(l1), vect_const_end(l1), + vect_const_begin(l2), vect_begin(l3), vect_end(l3)); + } + + template inline + void add(const L1& l1, const L2& l2, L3& l3, + abstract_dense, abstract_sparse, abstract_dense) + { add(l2, l1, l3, abstract_sparse(), abstract_dense(), abstract_dense()); } + + template inline + void add(const L1& l1, const L2& l2, L3& l3, + abstract_sparse, abstract_sparse, abstract_dense) { + add_to_full_(vect_const_begin(l1), vect_const_end(l1), + vect_const_begin(l2), vect_const_end(l2), + vect_begin(l3), vect_end(l3)); + } + + + template + void add_spspsp(const L1& l1, const L2& l2, L3& l3, linalg_true) { + auto it1 = vect_const_begin(l1), ite1 = vect_const_end(l1); + auto it2 = vect_const_begin(l2), ite2 = vect_const_end(l2); + clear(l3); + while (it1 != ite1 && it2 != ite2) { + ptrdiff_t d = it1.index() - it2.index(); + if (d < 0) + { l3[it1.index()] += *it1; ++it1; } + else if (d > 0) + { l3[it2.index()] += *it2; ++it2; } + else + { l3[it1.index()] = *it1 + *it2; ++it1; ++it2; } + } + for (; it1 != ite1; ++it1) l3[it1.index()] += *it1; + for (; it2 != ite2; ++it2) l3[it2.index()] += *it2; + } + + template + void add_spspsp(const L1& l1, const L2& l2, L3& l3, linalg_false) + { copy(l2, l3); add(l2, l3); } + + template + void add(const L1& l1, const L2& l2, L3& l3, + abstract_sparse, abstract_sparse, abstract_sparse) { + add_spspsp(l1, l2, l3, typename linalg_and::index_sorted, + typename linalg_traits::index_sorted>::bool_type()); + } + + template + void add(const L1& l1, L2& l2, abstract_dense, abstract_dense) { + auto it1 = vect_const_begin(l1); + auto it2 = vect_begin(l2), ite = vect_end(l2); + for (; it2 != ite; ++it2, ++it1) *it2 += *it1; + } + + template + void add(const L1& l1, L2& l2, abstract_dense, abstract_skyline) { + typedef typename linalg_traits::value_type T; + + auto it1 = vect_const_begin(l1), ite1 = vect_const_end(l1); + size_type i1 = 0, ie1 = vect_size(l1); + while (it1 != ite1 && *it1 == T(0)) { ++it1; ++i1; } + if (it1 != ite1) { + auto it2 = vect_begin(l2), ite2 = vect_end(l2); + while (ie1 && *(ite1-1) == T(0)) { ite1--; --ie1; } + + if (it2 == ite2 || i1 < it2.index()) { + l2[i1] = *it1; ++i1; ++it1; + if (it1 == ite1) return; + it2 = vect_begin(l2); ite2 = vect_end(l2); + } + if (ie1 > ite2.index()) { + --ite1; l2[ie1 - 1] = *ite1; + it2 = vect_begin(l2); + } + it2 += i1 - it2.index(); + for (; it1 != ite1; ++it1, ++it2) { *it2 += *it1; } + } + } + + + template + void add(const L1& l1, L2& l2, abstract_skyline, abstract_dense) { + auto it1 = vect_const_begin(l1), ite1 = vect_const_end(l1); + if (it1 != ite1) { + auto it2 = vect_begin(l2); + it2 += it1.index(); + for (; it1 != ite1; ++it2, ++it1) *it2 += *it1; + } + } + + + template + void add(const L1& l1, L2& l2, abstract_sparse, abstract_dense) { + auto it1 = vect_const_begin(l1), ite1 = vect_const_end(l1); + for (; it1 != ite1; ++it1) l2[it1.index()] += *it1; + } + + template + void add(const L1& l1, L2& l2, abstract_sparse, abstract_sparse) { + auto it1 = vect_const_begin(l1), ite1 = vect_const_end(l1); + for (; it1 != ite1; ++it1) l2[it1.index()] += *it1; + } + + template + void add(const L1& l1, L2& l2, abstract_sparse, abstract_skyline) { + auto it1 = vect_const_begin(l1), ite1 = vect_const_end(l1); + for (; it1 != ite1; ++it1) l2[it1.index()] += *it1; + } + + + template + void add(const L1& l1, L2& l2, abstract_skyline, abstract_sparse) { + auto it1 = vect_const_begin(l1), ite1 = vect_const_end(l1); + for (; it1 != ite1; ++it1) + if (*it1 != typename linalg_traits::value_type(0)) + l2[it1.index()] += *it1; + } + + template + void add(const L1& l1, L2& l2, abstract_skyline, abstract_skyline) { + typedef typename linalg_traits::value_type T1; + typedef typename linalg_traits::value_type T2; + + auto it1 = vect_const_begin(l1), ite1 = vect_const_end(l1); + + while (it1 != ite1 && *it1 == T1(0)) ++it1; + if (ite1 != it1) { + auto it2 = vect_begin(l2), ite2 = vect_end(l2); + while (*(ite1-1) == T1(0)) ite1--; + if (it2 == ite2 || it1.index() < it2.index()) { + l2[it1.index()] = T2(0); + it2 = vect_begin(l2); ite2 = vect_end(l2); + } + if (ite1.index() > ite2.index()) { + l2[ite1.index() - 1] = T2(0); + it2 = vect_begin(l2); + } + it2 += it1.index() - it2.index(); + for (; it1 != ite1; ++it1, ++it2) *it2 += *it1; + } + } + + template + void add(const L1& l1, L2& l2, abstract_dense, abstract_sparse) { + auto it1 = vect_const_begin(l1), ite1 = vect_const_end(l1); + for (size_type i = 0; it1 != ite1; ++it1, ++i) + if (*it1 != typename linalg_traits::value_type(0)) l2[i] += *it1; + } + + /* ******************************************************************** */ + /* Matrix-vector mult */ + /* ******************************************************************** */ + ///@endcond + /** matrix-vector or matrix-matrix product. + @param l1 a matrix. + @param l2 a vector or matrix. + @param l3 the product l1*l2. + */ + template inline + void mult(const L1& l1, const L2& l2, L3& l3) { + mult_dispatch(l1, l2, l3, typename linalg_traits::linalg_type()); + } + ///@cond DOXY_SHOW_ALL_FUNCTIONS + + template inline + void mult(const L1& l1, const L2& l2, const L3& l3) + { mult(l1, l2, linalg_const_cast(l3)); } + + template inline + void mult_dispatch(const L1& l1, const L2& l2, L3& l3, abstract_vector) { + size_type m = mat_nrows(l1), n = mat_ncols(l1); + if (!m || !n) { gmm::clear(l3); return; } + GMM_ASSERT2(n==vect_size(l2) && m==vect_size(l3), "dimensions mismatch"); + if (!same_origin(l2, l3)) + mult_spec(l1, l2, l3, typename principal_orientation_type::sub_orientation>::potype()); + else { + GMM_WARNING2("Warning, A temporary is used for mult\n"); + typename temporary_vector::vector_type temp(vect_size(l3)); + mult_spec(l1, l2, temp, typename principal_orientation_type::sub_orientation>::potype()); + copy(temp, l3); + } + } + + template + void mult_by_row(const L1& l1, const L2& l2, L3& l3, abstract_sparse) { + typedef typename linalg_traits::value_type T; + clear(l3); + size_type nr = mat_nrows(l1); + for (size_type i = 0; i < nr; ++i) { + T aux = vect_sp(mat_const_row(l1, i), l2); + if (aux != T(0)) l3[i] = aux; + } + } + + template + void mult_by_row(const L1& l1, const L2& l2, L3& l3, abstract_skyline) { + typedef typename linalg_traits::value_type T; + clear(l3); + size_type nr = mat_nrows(l1); + for (size_type i = 0; i < nr; ++i) { + T aux = vect_sp(mat_const_row(l1, i), l2); + if (aux != T(0)) l3[i] = aux; + } + } + + template + void mult_by_row(const L1& l1, const L2& l2, L3& l3, abstract_dense) { + typename linalg_traits::iterator it=vect_begin(l3), ite=vect_end(l3); + auto itr = mat_row_const_begin(l1); + for (; it != ite; ++it, ++itr) + *it = vect_sp(linalg_traits::row(itr), l2, + typename linalg_traits::storage_type(), + typename linalg_traits::storage_type()); + } + + template + void mult_by_col(const L1& l1, const L2& l2, L3& l3, abstract_dense) { + clear(l3); + size_type nc = mat_ncols(l1); + for (size_type i = 0; i < nc; ++i) + add(scaled(mat_const_col(l1, i), l2[i]), l3); + } + + template + void mult_by_col(const L1& l1, const L2& l2, L3& l3, abstract_sparse) { + typedef typename linalg_traits::value_type T; + clear(l3); + auto it = vect_const_begin(l2), ite = vect_const_end(l2); + for (; it != ite; ++it) + if (*it != T(0)) add(scaled(mat_const_col(l1, it.index()), *it), l3); + } + + template + void mult_by_col(const L1& l1, const L2& l2, L3& l3, abstract_skyline) { + typedef typename linalg_traits::value_type T; + clear(l3); + auto it = vect_const_begin(l2), ite = vect_const_end(l2); + for (; it != ite; ++it) + if (*it != T(0)) add(scaled(mat_const_col(l1, it.index()), *it), l3); + } + + template inline + void mult_spec(const L1& l1, const L2& l2, L3& l3, row_major) + { mult_by_row(l1, l2, l3, typename linalg_traits::storage_type()); } + + template inline + void mult_spec(const L1& l1, const L2& l2, L3& l3, col_major) + { mult_by_col(l1, l2, l3, typename linalg_traits::storage_type()); } + + template inline + void mult_spec(const L1& l1, const L2& l2, L3& l3, abstract_null_type) + { mult_ind(l1, l2, l3, typename linalg_traits::storage_type()); } + + template + void mult_ind(const L1& l1, const L2& l2, L3& l3, abstract_indirect) { + GMM_ASSERT1(false, "gmm::mult(m, ., .) undefined for this kind of matrix"); + } + + template inline + void mult(const L1& l1, const L2& l2, const L3& l3, L4& l4) { + size_type m = mat_nrows(l1), n = mat_ncols(l1); + copy(l3, l4); + if (!m || !n) { gmm::copy(l3, l4); return; } + GMM_ASSERT2(n==vect_size(l2) && m==vect_size(l4), "dimensions mismatch"); + if (!same_origin(l2, l4)) { + mult_add_spec(l1, l2, l4, typename principal_orientation_type::sub_orientation>::potype()); + } + else { + GMM_WARNING2("Warning, A temporary is used for mult\n"); + typename temporary_vector::vector_type temp(vect_size(l2)); + copy(l2, temp); + mult_add_spec(l1,temp, l4, typename principal_orientation_type::sub_orientation>::potype()); + } + } + + template inline + void mult(const L1& l1, const L2& l2, const L3& l3, const L4& l4) + { mult(l1, l2, l3, linalg_const_cast(l4)); } + + ///@endcond + /** Multiply-accumulate. l3 += l1*l2; */ + template inline + void mult_add(const L1& l1, const L2& l2, L3& l3) { + size_type m = mat_nrows(l1), n = mat_ncols(l1); + if (!m || !n) return; + GMM_ASSERT2(n==vect_size(l2) && m==vect_size(l3), "dimensions mismatch"); + if (!same_origin(l2, l3)) { + mult_add_spec(l1, l2, l3, typename principal_orientation_type::sub_orientation>::potype()); + } + else { + GMM_WARNING2("Warning, A temporary is used for mult\n"); + typename temporary_vector::vector_type temp(vect_size(l2)); + copy(l2, temp); + mult_add_spec(l1,temp, l3, typename principal_orientation_type::sub_orientation>::potype()); + } + } + ///@cond DOXY_SHOW_ALL_FUNCTIONS + + template inline + void mult_add(const L1& l1, const L2& l2, const L3& l3) + { mult_add(l1, l2, linalg_const_cast(l3)); } + + template + void mult_add_by_row(const L1& l1, const L2& l2, L3& l3, abstract_sparse) { + typedef typename linalg_traits::value_type T; + size_type nr = mat_nrows(l1); + for (size_type i = 0; i < nr; ++i) { + T aux = vect_sp(mat_const_row(l1, i), l2); + if (aux != T(0)) l3[i] += aux; + } + } + + template + void mult_add_by_row(const L1& l1, const L2& l2, L3& l3, abstract_skyline) { + typedef typename linalg_traits::value_type T; + size_type nr = mat_nrows(l1); + for (size_type i = 0; i < nr; ++i) { + T aux = vect_sp(mat_const_row(l1, i), l2); + if (aux != T(0)) l3[i] += aux; + } + } + + template + void mult_add_by_row(const L1& l1, const L2& l2, L3& l3, abstract_dense) { + auto it=vect_begin(l3), ite=vect_end(l3); + auto itr = mat_row_const_begin(l1); + for (; it != ite; ++it, ++itr) + *it += vect_sp(linalg_traits::row(itr), l2); + } + + template + void mult_add_by_col(const L1& l1, const L2& l2, L3& l3, abstract_dense) { + size_type nc = mat_ncols(l1); + for (size_type i = 0; i < nc; ++i) + add(scaled(mat_const_col(l1, i), l2[i]), l3); + } + + template + void mult_add_by_col(const L1& l1, const L2& l2, L3& l3, abstract_sparse) { + auto it = vect_const_begin(l2), ite = vect_const_end(l2); + for (; it != ite; ++it) + if (*it != typename linalg_traits::value_type(0)) + add(scaled(mat_const_col(l1, it.index()), *it), l3); + } + + template + void mult_add_by_col(const L1& l1, const L2& l2, L3& l3, abstract_skyline) { + auto it = vect_const_begin(l2), ite = vect_const_end(l2); + for (; it != ite; ++it) + if (*it != typename linalg_traits::value_type(0)) + add(scaled(mat_const_col(l1, it.index()), *it), l3); + } + + template inline + void mult_add_spec(const L1& l1, const L2& l2, L3& l3, row_major) + { mult_add_by_row(l1, l2, l3, typename linalg_traits::storage_type()); } + + template inline + void mult_add_spec(const L1& l1, const L2& l2, L3& l3, col_major) + { mult_add_by_col(l1, l2, l3, typename linalg_traits::storage_type()); } + + template inline + void mult_add_spec(const L1& l1, const L2& l2, L3& l3, abstract_null_type) + { mult_ind(l1, l2, l3, typename linalg_traits::storage_type()); } + + template + void transposed_mult(const L1& l1, const L2& l2, const L3& l3) + { mult(gmm::transposed(l1), l2, l3); } + + + /* ******************************************************************** */ + /* Matrix-matrix mult */ + /* ******************************************************************** */ + + + struct g_mult {}; // generic mult, less optimized + struct c_mult {}; // col x col -> col mult + struct r_mult {}; // row x row -> row mult + struct rcmult {}; // row x col -> col mult + struct crmult {}; // col x row -> row mult + + + template struct mult_t; + #define DEFMU__ template<> struct mult_t + DEFMU__ { typedef r_mult t; }; + DEFMU__ { typedef g_mult t; }; + DEFMU__ { typedef r_mult t; }; + DEFMU__ { typedef r_mult t; }; + DEFMU__ { typedef rcmult t; }; + DEFMU__ { typedef rcmult t; }; + DEFMU__ { typedef rcmult t; }; + DEFMU__ { typedef rcmult t; }; + DEFMU__ { typedef r_mult t; }; + DEFMU__ { typedef rcmult t; }; + DEFMU__ { typedef rcmult t; }; + DEFMU__ { typedef rcmult t; }; + DEFMU__ { typedef r_mult t; }; + DEFMU__ { typedef rcmult t; }; + DEFMU__ { typedef r_mult t; }; + DEFMU__ { typedef r_mult t; }; + DEFMU__ { typedef crmult t; }; + DEFMU__ { typedef g_mult t; }; + DEFMU__ { typedef crmult t; }; + DEFMU__ { typedef crmult t; }; + DEFMU__ { typedef g_mult t; }; + DEFMU__ { typedef c_mult t; }; + DEFMU__ { typedef c_mult t; }; + DEFMU__ { typedef c_mult t; }; + DEFMU__ { typedef crmult t; }; + DEFMU__ { typedef c_mult t; }; + DEFMU__ { typedef c_mult t; }; + DEFMU__ { typedef c_mult t; }; + DEFMU__ { typedef crmult t; }; + DEFMU__ { typedef c_mult t; }; + DEFMU__ { typedef c_mult t; }; + DEFMU__ { typedef c_mult t; }; + DEFMU__ { typedef r_mult t; }; + DEFMU__ { typedef c_mult t; }; + DEFMU__ { typedef r_mult t; }; + DEFMU__ { typedef r_mult t; }; + DEFMU__ { typedef rcmult t; }; + DEFMU__ { typedef c_mult t; }; + DEFMU__ { typedef c_mult t; }; + DEFMU__ { typedef c_mult t; }; + DEFMU__ { typedef r_mult t; }; + DEFMU__ { typedef c_mult t; }; + DEFMU__ { typedef c_mult t; }; + DEFMU__ { typedef c_mult t; }; + DEFMU__ { typedef r_mult t; }; + DEFMU__ { typedef c_mult t; }; + DEFMU__ { typedef c_mult t; }; + DEFMU__ { typedef r_mult t; }; + DEFMU__ { typedef r_mult t; }; + DEFMU__ { typedef c_mult t; }; + DEFMU__ { typedef r_mult t; }; + DEFMU__ { typedef r_mult t; }; + DEFMU__ { typedef rcmult t; }; + DEFMU__ { typedef c_mult t; }; + DEFMU__ { typedef c_mult t; }; + DEFMU__ { typedef c_mult t; }; + DEFMU__ { typedef rcmult t; }; + DEFMU__ { typedef rcmult t; }; + DEFMU__ { typedef rcmult t; }; + DEFMU__ { typedef rcmult t; }; + DEFMU__ { typedef r_mult t; }; + DEFMU__ { typedef c_mult t; }; + DEFMU__ { typedef r_mult t; }; + DEFMU__ { typedef r_mult t; }; + + template + void mult_dispatch(const L1& l1, const L2& l2, L3& l3, abstract_matrix) { + typedef typename temporary_matrix::matrix_type temp_mat_type; + size_type n = mat_ncols(l1); + if (n == 0) { gmm::clear(l3); return; } + GMM_ASSERT2(n == mat_nrows(l2) && mat_nrows(l1) == mat_nrows(l3) && + mat_ncols(l2) == mat_ncols(l3), "dimensions mismatch"); + + if (same_origin(l2, l3) || same_origin(l1, l3)) { + GMM_WARNING2("A temporary is used for mult"); + temp_mat_type temp(mat_nrows(l3), mat_ncols(l3)); + mult_spec(l1, l2, temp, typename mult_t< + typename linalg_traits::sub_orientation, + typename linalg_traits::sub_orientation, + typename linalg_traits::sub_orientation>::t()); + copy(temp, l3); + } + else + mult_spec(l1, l2, l3, typename mult_t< + typename linalg_traits::sub_orientation, + typename linalg_traits::sub_orientation, + typename linalg_traits::sub_orientation>::t()); + } + + // Completely generic but inefficient + + template + void mult_spec(const L1& l1, const L2& l2, L3& l3, g_mult) { + typedef typename linalg_traits::value_type T; + GMM_WARNING2("Inefficient generic matrix-matrix mult is used"); + for (size_type i = 0; i < mat_nrows(l3) ; ++i) + for (size_type j = 0; j < mat_ncols(l3) ; ++j) { + T a(0); + for (size_type k = 0; k < mat_nrows(l2) ; ++k) a += l1(i, k)*l2(k, j); + l3(i, j) = a; + } + } + + // row x col matrix-matrix mult + + template + void mult_row_col_with_temp(const L1& l1, const L2& l2, L3& l3, col_major) { + typedef typename temporary_col_matrix::matrix_type temp_col_mat; + temp_col_mat temp(mat_nrows(l1), mat_ncols(l1)); + copy(l1, temp); + mult(temp, l2, l3); + } + + template + void mult_row_col_with_temp(const L1& l1, const L2& l2, L3& l3, row_major) { + typedef typename temporary_row_matrix::matrix_type temp_row_mat; + temp_row_mat temp(mat_nrows(l2), mat_ncols(l2)); + copy(l2, temp); + mult(l1, temp, l3); + } + + template + void mult_spec(const L1& l1, const L2& l2, L3& l3, rcmult) { + if (is_sparse(l1) && is_sparse(l2)) { + GMM_WARNING3("Inefficient row matrix - col matrix mult for " + "sparse matrices, using temporary"); + mult_row_col_with_temp(l1, l2, l3, + typename principal_orientation_type::sub_orientation>::potype()); + } + else { + auto it2b = linalg_traits::col_begin(l2), it2 = it2b, + ite = linalg_traits::col_end(l2); + size_type i,j, k = mat_nrows(l1); + + for (i = 0; i < k; ++i) { + typename linalg_traits::const_sub_row_type r1=mat_const_row(l1, i); + for (it2 = it2b, j = 0; it2 != ite; ++it2, ++j) + l3(i,j) = vect_sp(r1, linalg_traits::col(it2)); + } + } + } + + // row - row matrix-matrix mult + + template inline + void mult_spec(const L1& l1, const L2& l2, L3& l3, r_mult) { + mult_spec(l1, l2, l3,r_mult(),typename linalg_traits::storage_type()); + } + + template + void mult_spec(const L1& l1, const L2& l2, L3& l3, r_mult, abstract_dense) { + // optimizable + clear(l3); + size_type nn = mat_nrows(l3), mm = mat_nrows(l2); + for (size_type i = 0; i < nn; ++i) { + for (size_type j = 0; j < mm; ++j) { + add(scaled(mat_const_row(l2, j), l1(i, j)), mat_row(l3, i)); + } + } + } + + template + void mult_spec(const L1& l1, const L2& l2, L3& l3, r_mult, abstract_sparse) { + // optimizable + clear(l3); + size_type nn = mat_nrows(l3); + for (size_type i = 0; i < nn; ++i) { + typename linalg_traits::const_sub_row_type rl1=mat_const_row(l1, i); + auto it = vect_const_begin(rl1), ite = vect_const_end(rl1); + for (; it != ite; ++it) + add(scaled(mat_const_row(l2, it.index()), *it), mat_row(l3, i)); + } + } + + template + void mult_spec(const L1& l1, const L2& l2, L3& l3, r_mult, abstract_skyline) + { mult_spec(l1, l2, l3, r_mult(), abstract_sparse()); } + + // col - col matrix-matrix mult + + template inline + void mult_spec(const L1& l1, const L2& l2, L3& l3, c_mult) { + mult_spec(l1, l2,l3,c_mult(),typename linalg_traits::storage_type(), + typename linalg_traits::sub_orientation()); + } + + + template + void mult_spec(const L1& l1, const L2& l2, L3& l3, c_mult, + abstract_dense, ORIEN) { + typedef typename linalg_traits::value_type T; + size_type nn = mat_ncols(l3), mm = mat_ncols(l1); + + for (size_type i = 0; i < nn; ++i) { + clear(mat_col(l3, i)); + for (size_type j = 0; j < mm; ++j) { + T b = l2(j, i); + if (b != T(0)) add(scaled(mat_const_col(l1, j), b), mat_col(l3, i)); + } + } + } + + template + void mult_spec(const L1& l1, const L2& l2, L3& l3, c_mult, + abstract_sparse, ORIEN) { + // optimizable + clear(l3); + size_type nn = mat_ncols(l3); + for (size_type i = 0; i < nn; ++i) { + typename linalg_traits::const_sub_col_type rc2 = mat_const_col(l2, i); + auto it = vect_const_begin(rc2), ite = vect_const_end(rc2); + for (; it != ite; ++it) + add(scaled(mat_const_col(l1, it.index()), *it), mat_col(l3, i)); + } + } + + template + void mult_spec(const L1& l1, const L2& l2, L3& l3, c_mult, + abstract_sparse, row_major) { + typedef typename linalg_traits::value_type T; + GMM_WARNING3("Inefficient matrix-matrix mult for sparse matrices"); + clear(l3); + size_type mm = mat_nrows(l2), nn = mat_ncols(l3); + for (size_type i = 0; i < nn; ++i) + for (size_type j = 0; j < mm; ++j) { + T a = l2(i,j); + if (a != T(0)) add(scaled(mat_const_col(l1, j), a), mat_col(l3, i)); + } + } + + template inline + void mult_spec(const L1& l1, const L2& l2, L3& l3, c_mult, + abstract_skyline, ORIEN) + { mult_spec(l1, l2, l3, c_mult(), abstract_sparse(), ORIEN()); } + + + // col - row matrix-matrix mult + + template inline + void mult_spec(const L1& l1, const L2& l2, L3& l3, crmult) + { mult_spec(l1,l2,l3,crmult(), typename linalg_traits::storage_type()); } + + + template + void mult_spec(const L1& l1, const L2& l2, L3& l3, crmult, abstract_dense) { + // optimizable + clear(l3); + size_type nn = mat_ncols(l1), mm = mat_nrows(l1); + for (size_type i = 0; i < nn; ++i) { + for (size_type j = 0; j < mm; ++j) + add(scaled(mat_const_row(l2, i), l1(j, i)), mat_row(l3, j)); + } + } + + template + void mult_spec(const L1& l1, const L2& l2, L3& l3, crmult, abstract_sparse) { + // optimizable + clear(l3); + size_type nn = mat_ncols(l1); + for (size_type i = 0; i < nn; ++i) { + typename linalg_traits::const_sub_col_type rc1 = mat_const_col(l1, i); + auto it = vect_const_begin(rc1), ite = vect_const_end(rc1); + for (; it != ite; ++it) + add(scaled(mat_const_row(l2, i), *it), mat_row(l3, it.index())); + } + } + + template inline + void mult_spec(const L1& l1, const L2& l2, L3& l3, crmult, abstract_skyline) + { mult_spec(l1, l2, l3, crmult(), abstract_sparse()); } + + + /* ******************************************************************** */ + /* Symmetry test. */ + /* ******************************************************************** */ + + ///@endcond + /** test if A is symmetric. + @param A a matrix. + @param tol a threshold. + */ + template inline + bool is_symmetric(const MAT &A, magnitude_of_linalg(MAT) tol + = magnitude_of_linalg(MAT)(-1)) { + typedef magnitude_of_linalg(MAT) R; + if (tol < R(0)) tol = default_tol(R()) * mat_maxnorm(A); + if (mat_nrows(A) != mat_ncols(A)) return false; + return is_symmetric(A, tol, typename linalg_traits::storage_type()); + } + ///@cond DOXY_SHOW_ALL_FUNCTIONS + + template + bool is_symmetric(const MAT &A, magnitude_of_linalg(MAT) tol, + abstract_dense) { + size_type m = mat_nrows(A); + for (size_type i = 1; i < m; ++i) + for (size_type j = 0; j < i; ++j) + if (gmm::abs(A(i, j)-A(j, i)) > tol) return false; + return true; + } + + template + bool is_symmetric(const MAT &A, magnitude_of_linalg(MAT) tol, + abstract_sparse) { + return is_symmetric(A, tol, typename principal_orientation_type::sub_orientation>::potype()); + } + + template + bool is_symmetric(const MAT &A, magnitude_of_linalg(MAT) tol, + row_major) { + for (size_type i = 0; i < mat_nrows(A); ++i) { + typename linalg_traits::const_sub_row_type row = mat_const_row(A, i); + auto it = vect_const_begin(row), ite = vect_const_end(row); + for (; it != ite; ++it) + if (gmm::abs(*it - A(it.index(), i)) > tol) return false; + } + return true; + } + + template + bool is_symmetric(const MAT &A, magnitude_of_linalg(MAT) tol, + col_major) { + for (size_type i = 0; i < mat_ncols(A); ++i) { + typename linalg_traits::const_sub_col_type col = mat_const_col(A, i); + auto it = vect_const_begin(col), ite = vect_const_end(col); + for (; it != ite; ++it) + if (gmm::abs(*it - A(i, it.index())) > tol) return false; + } + return true; + } + + template + bool is_symmetric(const MAT &A, magnitude_of_linalg(MAT) tol, + abstract_skyline) + { return is_symmetric(A, tol, abstract_sparse()); } + + ///@endcond + /** test if A is Hermitian. + @param A a matrix. + @param tol a threshold. + */ + template inline + bool is_hermitian(const MAT &A, magnitude_of_linalg(MAT) tol + = magnitude_of_linalg(MAT)(-1)) { + typedef magnitude_of_linalg(MAT) R; + if (tol < R(0)) tol = default_tol(R()) * mat_maxnorm(A); + if (mat_nrows(A) != mat_ncols(A)) return false; + return is_hermitian(A, tol, typename linalg_traits::storage_type()); + } + ///@cond DOXY_SHOW_ALL_FUNCTIONS + + template + bool is_hermitian(const MAT &A, magnitude_of_linalg(MAT) tol, + abstract_dense) { + size_type m = mat_nrows(A); + for (size_type i = 1; i < m; ++i) + for (size_type j = 0; j < i; ++j) + if (gmm::abs(A(i, j)-gmm::conj(A(j, i))) > tol) return false; + return true; + } + + template + bool is_hermitian(const MAT &A, magnitude_of_linalg(MAT) tol, + abstract_sparse) { + return is_hermitian(A, tol, typename principal_orientation_type::sub_orientation>::potype()); + } + + template + bool is_hermitian(const MAT &A, magnitude_of_linalg(MAT) tol, + row_major) { + for (size_type i = 0; i < mat_nrows(A); ++i) { + typename linalg_traits::const_sub_row_type row = mat_const_row(A, i); + auto it = vect_const_begin(row), ite = vect_const_end(row); + for (; it != ite; ++it) + if (gmm::abs(gmm::conj(*it) - A(it.index(), i)) > tol) return false; + } + return true; + } + + template + bool is_hermitian(const MAT &A, magnitude_of_linalg(MAT) tol, + col_major) { + for (size_type i = 0; i < mat_ncols(A); ++i) { + typename linalg_traits::const_sub_col_type col = mat_const_col(A, i); + auto it = vect_const_begin(col), ite = vect_const_end(col); + for (; it != ite; ++it) + if (gmm::abs(gmm::conj(*it) - A(i, it.index())) > tol) return false; + } + return true; + } + + template + bool is_hermitian(const MAT &A, magnitude_of_linalg(MAT) tol, + abstract_skyline) + { return is_hermitian(A, tol, abstract_sparse()); } + ///@endcond +} + + +#endif // GMM_BLAS_H__ diff --git a/gmm/gmm_blas_interface.h b/gmm/gmm_blas_interface.h new file mode 100644 index 000000000..c41ae95d3 --- /dev/null +++ b/gmm/gmm_blas_interface.h @@ -0,0 +1,948 @@ +/* -*- c++ -*- (enables emacs c++ mode) */ +/*=========================================================================== + + Copyright (C) 2003-2017 Yves Renard + + This file is a part of GetFEM++ + + GetFEM++ is free software; you can redistribute it and/or modify it + under the terms of the GNU Lesser General Public License as published + by the Free Software Foundation; either version 3 of the License, or + (at your option) any later version along with the GCC Runtime Library + Exception either version 3.1 or (at your option) any later version. + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public + License and GCC Runtime Library Exception for more details. + You should have received a copy of the GNU Lesser General Public License + along with this program; if not, write to the Free Software Foundation, + Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. + + As a special exception, you may use this file as it is a part of a free + software library without restriction. Specifically, if other files + instantiate templates or use macros or inline functions from this file, + or you compile this file and link it with other files to produce an + executable, this file does not by itself cause the resulting executable + to be covered by the GNU Lesser General Public License. This exception + does not however invalidate any other reasons why the executable file + might be covered by the GNU Lesser General Public License. + +===========================================================================*/ + +/**@file gmm_blas_interface.h + @author Yves Renard + @date October 7, 2003. + @brief gmm interface for fortran BLAS. +*/ + +#if defined(GETFEM_USES_BLAS) || defined(GMM_USES_BLAS) \ + || defined(GMM_USES_LAPACK) || defined(GMM_USES_ATLAS) + +#ifndef GMM_BLAS_INTERFACE_H +#define GMM_BLAS_INTERFACE_H + +#include "gmm_blas.h" +#include "gmm_interface.h" +#include "gmm_matrix.h" + +namespace gmm { + + // Use ./configure --enable-blas-interface to activate this interface. + +#define GMMLAPACK_TRACE(f) + // #define GMMLAPACK_TRACE(f) cout << "function " << f << " called" << endl; + + /* ********************************************************************* */ + /* Operations interfaced for T = float, double, std::complex */ + /* or std::complex : */ + /* */ + /* vect_norm2(std::vector) */ + /* */ + /* vect_sp(std::vector, std::vector) */ + /* vect_sp(scaled(std::vector), std::vector) */ + /* vect_sp(std::vector, scaled(std::vector)) */ + /* vect_sp(scaled(std::vector), scaled(std::vector)) */ + /* */ + /* vect_hp(std::vector, std::vector) */ + /* vect_hp(scaled(std::vector), std::vector) */ + /* vect_hp(std::vector, scaled(std::vector)) */ + /* vect_hp(scaled(std::vector), scaled(std::vector)) */ + /* */ + /* add(std::vector, std::vector) */ + /* add(scaled(std::vector, a), std::vector) */ + /* */ + /* mult(dense_matrix, dense_matrix, dense_matrix) */ + /* mult(transposed(dense_matrix), dense_matrix, dense_matrix) */ + /* mult(dense_matrix, transposed(dense_matrix), dense_matrix) */ + /* mult(transposed(dense_matrix), transposed(dense_matrix), */ + /* dense_matrix) */ + /* mult(conjugated(dense_matrix), dense_matrix, dense_matrix) */ + /* mult(dense_matrix, conjugated(dense_matrix), dense_matrix) */ + /* mult(conjugated(dense_matrix), conjugated(dense_matrix), */ + /* dense_matrix) */ + /* */ + /* mult(dense_matrix, std::vector, std::vector) */ + /* mult(transposed(dense_matrix), std::vector, std::vector) */ + /* mult(conjugated(dense_matrix), std::vector, std::vector) */ + /* mult(dense_matrix, scaled(std::vector), std::vector) */ + /* mult(transposed(dense_matrix), scaled(std::vector), */ + /* std::vector) */ + /* mult(conjugated(dense_matrix), scaled(std::vector), */ + /* std::vector) */ + /* */ + /* mult_add(dense_matrix, std::vector, std::vector) */ + /* mult_add(transposed(dense_matrix), std::vector, std::vector) */ + /* mult_add(conjugated(dense_matrix), std::vector, std::vector) */ + /* mult_add(dense_matrix, scaled(std::vector), std::vector) */ + /* mult_add(transposed(dense_matrix), scaled(std::vector), */ + /* std::vector) */ + /* mult_add(conjugated(dense_matrix), scaled(std::vector), */ + /* std::vector) */ + /* */ + /* mult(dense_matrix, std::vector, std::vector, std::vector) */ + /* mult(transposed(dense_matrix), std::vector, std::vector, */ + /* std::vector) */ + /* mult(conjugated(dense_matrix), std::vector, std::vector, */ + /* std::vector) */ + /* mult(dense_matrix, scaled(std::vector), std::vector, */ + /* std::vector) */ + /* mult(transposed(dense_matrix), scaled(std::vector), */ + /* std::vector, std::vector) */ + /* mult(conjugated(dense_matrix), scaled(std::vector), */ + /* std::vector, std::vector) */ + /* mult(dense_matrix, std::vector, scaled(std::vector), */ + /* std::vector) */ + /* mult(transposed(dense_matrix), std::vector, */ + /* scaled(std::vector), std::vector) */ + /* mult(conjugated(dense_matrix), std::vector, */ + /* scaled(std::vector), std::vector) */ + /* mult(dense_matrix, scaled(std::vector), scaled(std::vector), */ + /* std::vector) */ + /* mult(transposed(dense_matrix), scaled(std::vector), */ + /* scaled(std::vector), std::vector) */ + /* mult(conjugated(dense_matrix), scaled(std::vector), */ + /* scaled(std::vector), std::vector) */ + /* */ + /* lower_tri_solve(dense_matrix, std::vector, k, b) */ + /* upper_tri_solve(dense_matrix, std::vector, k, b) */ + /* lower_tri_solve(transposed(dense_matrix), std::vector, k, b) */ + /* upper_tri_solve(transposed(dense_matrix), std::vector, k, b) */ + /* lower_tri_solve(conjugated(dense_matrix), std::vector, k, b) */ + /* upper_tri_solve(conjugated(dense_matrix), std::vector, k, b) */ + /* */ + /* rank_one_update(dense_matrix, std::vector, std::vector) */ + /* rank_one_update(dense_matrix, scaled(std::vector), */ + /* std::vector) */ + /* rank_one_update(dense_matrix, std::vector, */ + /* scaled(std::vector)) */ + /* */ + /* ********************************************************************* */ + + /* ********************************************************************* */ + /* Basic defines. */ + /* ********************************************************************* */ + +# define BLAS_S float +# define BLAS_D double +# define BLAS_C std::complex +# define BLAS_Z std::complex + + /* ********************************************************************* */ + /* BLAS functions used. */ + /* ********************************************************************* */ + extern "C" { + void daxpy_(const int *n, const double *alpha, const double *x, + const int *incx, double *y, const int *incy); + void dgemm_(const char *tA, const char *tB, const int *m, + const int *n, const int *k, const double *alpha, + const double *A, const int *ldA, const double *B, + const int *ldB, const double *beta, double *C, + const int *ldC); + void sgemm_(...); void cgemm_(...); void zgemm_(...); + void sgemv_(...); void dgemv_(...); void cgemv_(...); void zgemv_(...); + void strsv_(...); void dtrsv_(...); void ctrsv_(...); void ztrsv_(...); + void saxpy_(...); /*void daxpy_(...); */void caxpy_(...); void zaxpy_(...); + BLAS_S sdot_ (...); BLAS_D ddot_ (...); + BLAS_C cdotu_(...); BLAS_Z zdotu_(...); + BLAS_C cdotc_(...); BLAS_Z zdotc_(...); + BLAS_S snrm2_(...); BLAS_D dnrm2_(...); + BLAS_S scnrm2_(...); BLAS_D dznrm2_(...); + void sger_(...); void dger_(...); void cgerc_(...); void zgerc_(...); + } + +#if 1 + + /* ********************************************************************* */ + /* vect_norm2(x). */ + /* ********************************************************************* */ + +# define nrm2_interface(param1, trans1, blas_name, base_type) \ + inline number_traits::magnitude_type \ + vect_norm2(param1(base_type)) { \ + GMMLAPACK_TRACE("nrm2_interface"); \ + int inc(1), n(int(vect_size(x))); trans1(base_type); \ + return blas_name(&n, &x[0], &inc); \ + } + +# define nrm2_p1(base_type) const std::vector &x +# define nrm2_trans1(base_type) + + nrm2_interface(nrm2_p1, nrm2_trans1, snrm2_ , BLAS_S) + nrm2_interface(nrm2_p1, nrm2_trans1, dnrm2_ , BLAS_D) + nrm2_interface(nrm2_p1, nrm2_trans1, scnrm2_, BLAS_C) + nrm2_interface(nrm2_p1, nrm2_trans1, dznrm2_, BLAS_Z) + + /* ********************************************************************* */ + /* vect_sp(x, y). */ + /* ********************************************************************* */ + +# define dot_interface(param1, trans1, mult1, param2, trans2, mult2, \ + blas_name, base_type) \ + inline base_type vect_sp(param1(base_type), param2(base_type)) { \ + GMMLAPACK_TRACE("dot_interface"); \ + trans1(base_type); trans2(base_type); int inc(1), n(int(vect_size(y)));\ + return mult1 mult2 blas_name(&n, &x[0], &inc, &y[0], &inc); \ + } + +# define dot_p1(base_type) const std::vector &x +# define dot_trans1(base_type) +# define dot_p1_s(base_type) \ + const scaled_vector_const_ref, base_type > &x_ +# define dot_trans1_s(base_type) \ + std::vector &x = \ + const_cast &>(*(linalg_origin(x_))); \ + base_type a(x_.r) + +# define dot_p2(base_type) const std::vector &y +# define dot_trans2(base_type) +# define dot_p2_s(base_type) \ + const scaled_vector_const_ref, base_type > &y_ +# define dot_trans2_s(base_type) \ + std::vector &y = \ + const_cast &>(*(linalg_origin(y_))); \ + base_type b(y_.r) + + dot_interface(dot_p1, dot_trans1, (BLAS_S), dot_p2, dot_trans2, (BLAS_S), + sdot_ , BLAS_S) + dot_interface(dot_p1, dot_trans1, (BLAS_D), dot_p2, dot_trans2, (BLAS_D), + ddot_ , BLAS_D) + dot_interface(dot_p1, dot_trans1, (BLAS_C), dot_p2, dot_trans2, (BLAS_C), + cdotu_, BLAS_C) + dot_interface(dot_p1, dot_trans1, (BLAS_Z), dot_p2, dot_trans2, (BLAS_Z), + zdotu_, BLAS_Z) + + dot_interface(dot_p1_s, dot_trans1_s, a*, dot_p2, dot_trans2, (BLAS_S), + sdot_ ,BLAS_S) + dot_interface(dot_p1_s, dot_trans1_s, a*, dot_p2, dot_trans2, (BLAS_D), + ddot_ ,BLAS_D) + dot_interface(dot_p1_s, dot_trans1_s, a*, dot_p2, dot_trans2, (BLAS_C), + cdotu_,BLAS_C) + dot_interface(dot_p1_s, dot_trans1_s, a*, dot_p2, dot_trans2, (BLAS_Z), + zdotu_,BLAS_Z) + + dot_interface(dot_p1, dot_trans1, (BLAS_S), dot_p2_s, dot_trans2_s, b*, + sdot_ ,BLAS_S) + dot_interface(dot_p1, dot_trans1, (BLAS_D), dot_p2_s, dot_trans2_s, b*, + ddot_ ,BLAS_D) + dot_interface(dot_p1, dot_trans1, (BLAS_C), dot_p2_s, dot_trans2_s, b*, + cdotu_,BLAS_C) + dot_interface(dot_p1, dot_trans1, (BLAS_Z), dot_p2_s, dot_trans2_s, b*, + zdotu_,BLAS_Z) + + dot_interface(dot_p1_s,dot_trans1_s,a*,dot_p2_s,dot_trans2_s,b*,sdot_ , + BLAS_S) + dot_interface(dot_p1_s,dot_trans1_s,a*,dot_p2_s,dot_trans2_s,b*,ddot_ , + BLAS_D) + dot_interface(dot_p1_s,dot_trans1_s,a*,dot_p2_s,dot_trans2_s,b*,cdotu_, + BLAS_C) + dot_interface(dot_p1_s,dot_trans1_s,a*,dot_p2_s,dot_trans2_s,b*,zdotu_, + BLAS_Z) + + + /* ********************************************************************* */ + /* vect_hp(x, y). */ + /* ********************************************************************* */ + +# define dotc_interface(param1, trans1, mult1, param2, trans2, mult2, \ + blas_name, base_type) \ + inline base_type vect_hp(param1(base_type), param2(base_type)) { \ + GMMLAPACK_TRACE("dotc_interface"); \ + trans1(base_type); trans2(base_type); int inc(1), n(int(vect_size(y)));\ + return mult1 mult2 blas_name(&n, &x[0], &inc, &y[0], &inc); \ + } + +# define dotc_p1(base_type) const std::vector &x +# define dotc_trans1(base_type) +# define dotc_p1_s(base_type) \ + const scaled_vector_const_ref, base_type > &x_ +# define dotc_trans1_s(base_type) \ + std::vector &x = \ + const_cast &>(*(linalg_origin(x_))); \ + base_type a(x_.r) + +# define dotc_p2(base_type) const std::vector &y +# define dotc_trans2(base_type) +# define dotc_p2_s(base_type) \ + const scaled_vector_const_ref, base_type > &y_ +# define dotc_trans2_s(base_type) \ + std::vector &y = \ + const_cast &>(*(linalg_origin(y_))); \ + base_type b(gmm::conj(y_.r)) + + dotc_interface(dotc_p1, dotc_trans1, (BLAS_S), dotc_p2, dotc_trans2, + (BLAS_S),sdot_ ,BLAS_S) + dotc_interface(dotc_p1, dotc_trans1, (BLAS_D), dotc_p2, dotc_trans2, + (BLAS_D),ddot_ ,BLAS_D) + dotc_interface(dotc_p1, dotc_trans1, (BLAS_C), dotc_p2, dotc_trans2, + (BLAS_C),cdotc_,BLAS_C) + dotc_interface(dotc_p1, dotc_trans1, (BLAS_Z), dotc_p2, dotc_trans2, + (BLAS_Z),zdotc_,BLAS_Z) + + dotc_interface(dotc_p1_s, dotc_trans1_s, a*, dotc_p2, dotc_trans2, + (BLAS_S),sdot_, BLAS_S) + dotc_interface(dotc_p1_s, dotc_trans1_s, a*, dotc_p2, dotc_trans2, + (BLAS_D),ddot_ , BLAS_D) + dotc_interface(dotc_p1_s, dotc_trans1_s, a*, dotc_p2, dotc_trans2, + (BLAS_C),cdotc_, BLAS_C) + dotc_interface(dotc_p1_s, dotc_trans1_s, a*, dotc_p2, dotc_trans2, + (BLAS_Z),zdotc_, BLAS_Z) + + dotc_interface(dotc_p1, dotc_trans1, (BLAS_S), dotc_p2_s, dotc_trans2_s, + b*,sdot_ , BLAS_S) + dotc_interface(dotc_p1, dotc_trans1, (BLAS_D), dotc_p2_s, dotc_trans2_s, + b*,ddot_ , BLAS_D) + dotc_interface(dotc_p1, dotc_trans1, (BLAS_C), dotc_p2_s, dotc_trans2_s, + b*,cdotc_, BLAS_C) + dotc_interface(dotc_p1, dotc_trans1, (BLAS_Z), dotc_p2_s, dotc_trans2_s, + b*,zdotc_, BLAS_Z) + + dotc_interface(dotc_p1_s,dotc_trans1_s,a*,dotc_p2_s,dotc_trans2_s,b*,sdot_ , + BLAS_S) + dotc_interface(dotc_p1_s,dotc_trans1_s,a*,dotc_p2_s,dotc_trans2_s,b*,ddot_ , + BLAS_D) + dotc_interface(dotc_p1_s,dotc_trans1_s,a*,dotc_p2_s,dotc_trans2_s,b*,cdotc_, + BLAS_C) + dotc_interface(dotc_p1_s,dotc_trans1_s,a*,dotc_p2_s,dotc_trans2_s,b*,zdotc_, + BLAS_Z) + + /* ********************************************************************* */ + /* add(x, y). */ + /* ********************************************************************* */ + +# define axpy_interface(param1, trans1, blas_name, base_type) \ + inline void add(param1(base_type), std::vector &y) { \ + GMMLAPACK_TRACE("axpy_interface"); \ + int inc(1), n(int(vect_size(y))); trans1(base_type); \ + if (n == 0) return; \ + blas_name(&n, &a, &x[0], &inc, &y[0], &inc); \ + } + +# define axpy_p1(base_type) const std::vector &x +# define axpy_trans1(base_type) base_type a(1) +# define axpy_p1_s(base_type) \ + const scaled_vector_const_ref, base_type > &x_ +# define axpy_trans1_s(base_type) \ + std::vector &x = \ + const_cast &>(*(linalg_origin(x_))); \ + base_type a(x_.r) + + axpy_interface(axpy_p1, axpy_trans1, saxpy_, BLAS_S) + axpy_interface(axpy_p1, axpy_trans1, daxpy_, BLAS_D) + axpy_interface(axpy_p1, axpy_trans1, caxpy_, BLAS_C) + axpy_interface(axpy_p1, axpy_trans1, zaxpy_, BLAS_Z) + + axpy_interface(axpy_p1_s, axpy_trans1_s, saxpy_, BLAS_S) + axpy_interface(axpy_p1_s, axpy_trans1_s, daxpy_, BLAS_D) + axpy_interface(axpy_p1_s, axpy_trans1_s, caxpy_, BLAS_C) + axpy_interface(axpy_p1_s, axpy_trans1_s, zaxpy_, BLAS_Z) + + + /* ********************************************************************* */ + /* mult_add(A, x, z). */ + /* ********************************************************************* */ + +# define gemv_interface(param1, trans1, param2, trans2, blas_name, \ + base_type, orien) \ + inline void mult_add_spec(param1(base_type), param2(base_type), \ + std::vector &z, orien) { \ + GMMLAPACK_TRACE("gemv_interface"); \ + trans1(base_type); trans2(base_type); base_type beta(1); \ + int m(int(mat_nrows(A))), lda(m), n(int(mat_ncols(A))), inc(1); \ + if (m && n) blas_name(&t, &m, &n, &alpha, &A(0,0), &lda, &x[0], &inc, \ + &beta, &z[0], &inc); \ + else gmm::clear(z); \ + } + + // First parameter +# define gem_p1_n(base_type) const dense_matrix &A +# define gem_trans1_n(base_type) const char t = 'N' +# define gem_p1_t(base_type) \ + const transposed_col_ref *> &A_ +# define gem_trans1_t(base_type) dense_matrix &A = \ + const_cast &>(*(linalg_origin(A_))); \ + const char t = 'T' +# define gem_p1_tc(base_type) \ + const transposed_col_ref *> &A_ +# define gem_p1_c(base_type) \ + const conjugated_col_matrix_const_ref > &A_ +# define gem_trans1_c(base_type) dense_matrix &A = \ + const_cast &>(*(linalg_origin(A_))); \ + const char t = 'C' + + // second parameter +# define gemv_p2_n(base_type) const std::vector &x +# define gemv_trans2_n(base_type) base_type alpha(1) +# define gemv_p2_s(base_type) \ + const scaled_vector_const_ref, base_type > &x_ +# define gemv_trans2_s(base_type) std::vector &x = \ + const_cast &>(*(linalg_origin(x_))); \ + base_type alpha(x_.r) + + // Z <- AX + Z. + gemv_interface(gem_p1_n, gem_trans1_n, gemv_p2_n, gemv_trans2_n, sgemv_, + BLAS_S, col_major) + gemv_interface(gem_p1_n, gem_trans1_n, gemv_p2_n, gemv_trans2_n, dgemv_, + BLAS_D, col_major) + gemv_interface(gem_p1_n, gem_trans1_n, gemv_p2_n, gemv_trans2_n, cgemv_, + BLAS_C, col_major) + gemv_interface(gem_p1_n, gem_trans1_n, gemv_p2_n, gemv_trans2_n, zgemv_, + BLAS_Z, col_major) + + // Z <- transposed(A)X + Z. + gemv_interface(gem_p1_t, gem_trans1_t, gemv_p2_n, gemv_trans2_n, sgemv_, + BLAS_S, row_major) + gemv_interface(gem_p1_t, gem_trans1_t, gemv_p2_n, gemv_trans2_n, dgemv_, + BLAS_D, row_major) + gemv_interface(gem_p1_t, gem_trans1_t, gemv_p2_n, gemv_trans2_n, cgemv_, + BLAS_C, row_major) + gemv_interface(gem_p1_t, gem_trans1_t, gemv_p2_n, gemv_trans2_n, zgemv_, + BLAS_Z, row_major) + + // Z <- transposed(const A)X + Z. + gemv_interface(gem_p1_tc, gem_trans1_t, gemv_p2_n, gemv_trans2_n, sgemv_, + BLAS_S, row_major) + gemv_interface(gem_p1_tc, gem_trans1_t, gemv_p2_n, gemv_trans2_n, dgemv_, + BLAS_D, row_major) + gemv_interface(gem_p1_tc, gem_trans1_t, gemv_p2_n, gemv_trans2_n, cgemv_, + BLAS_C, row_major) + gemv_interface(gem_p1_tc, gem_trans1_t, gemv_p2_n, gemv_trans2_n, zgemv_, + BLAS_Z, row_major) + + // Z <- conjugated(A)X + Z. + gemv_interface(gem_p1_c, gem_trans1_c, gemv_p2_n, gemv_trans2_n, sgemv_, + BLAS_S, row_major) + gemv_interface(gem_p1_c, gem_trans1_c, gemv_p2_n, gemv_trans2_n, dgemv_, + BLAS_D, row_major) + gemv_interface(gem_p1_c, gem_trans1_c, gemv_p2_n, gemv_trans2_n, cgemv_, + BLAS_C, row_major) + gemv_interface(gem_p1_c, gem_trans1_c, gemv_p2_n, gemv_trans2_n, zgemv_, + BLAS_Z, row_major) + + // Z <- A scaled(X) + Z. + gemv_interface(gem_p1_n, gem_trans1_n, gemv_p2_s, gemv_trans2_s, sgemv_, + BLAS_S, col_major) + gemv_interface(gem_p1_n, gem_trans1_n, gemv_p2_s, gemv_trans2_s, dgemv_, + BLAS_D, col_major) + gemv_interface(gem_p1_n, gem_trans1_n, gemv_p2_s, gemv_trans2_s, cgemv_, + BLAS_C, col_major) + gemv_interface(gem_p1_n, gem_trans1_n, gemv_p2_s, gemv_trans2_s, zgemv_, + BLAS_Z, col_major) + + // Z <- transposed(A) scaled(X) + Z. + gemv_interface(gem_p1_t, gem_trans1_t, gemv_p2_s, gemv_trans2_s, sgemv_, + BLAS_S, row_major) + gemv_interface(gem_p1_t, gem_trans1_t, gemv_p2_s, gemv_trans2_s, dgemv_, + BLAS_D, row_major) + gemv_interface(gem_p1_t, gem_trans1_t, gemv_p2_s, gemv_trans2_s, cgemv_, + BLAS_C, row_major) + gemv_interface(gem_p1_t, gem_trans1_t, gemv_p2_s, gemv_trans2_s, zgemv_, + BLAS_Z, row_major) + + // Z <- transposed(const A) scaled(X) + Z. + gemv_interface(gem_p1_tc, gem_trans1_t, gemv_p2_s, gemv_trans2_s, sgemv_, + BLAS_S, row_major) + gemv_interface(gem_p1_tc, gem_trans1_t, gemv_p2_s, gemv_trans2_s, dgemv_, + BLAS_D, row_major) + gemv_interface(gem_p1_tc, gem_trans1_t, gemv_p2_s, gemv_trans2_s, cgemv_, + BLAS_C, row_major) + gemv_interface(gem_p1_tc, gem_trans1_t, gemv_p2_s, gemv_trans2_s, zgemv_, + BLAS_Z, row_major) + + // Z <- conjugated(A) scaled(X) + Z. + gemv_interface(gem_p1_c, gem_trans1_c, gemv_p2_s, gemv_trans2_s, sgemv_, + BLAS_S, row_major) + gemv_interface(gem_p1_c, gem_trans1_c, gemv_p2_s, gemv_trans2_s, dgemv_, + BLAS_D, row_major) + gemv_interface(gem_p1_c, gem_trans1_c, gemv_p2_s, gemv_trans2_s, cgemv_, + BLAS_C, row_major) + gemv_interface(gem_p1_c, gem_trans1_c, gemv_p2_s, gemv_trans2_s, zgemv_, + BLAS_Z, row_major) + + + /* ********************************************************************* */ + /* mult(A, x, y). */ + /* ********************************************************************* */ + +# define gemv_interface2(param1, trans1, param2, trans2, blas_name, \ + base_type, orien) \ + inline void mult_spec(param1(base_type), param2(base_type), \ + std::vector &z, orien) { \ + GMMLAPACK_TRACE("gemv_interface2"); \ + trans1(base_type); trans2(base_type); base_type beta(0); \ + int m(int(mat_nrows(A))), lda(m), n(int(mat_ncols(A))), inc(1); \ + if (m && n) \ + blas_name(&t, &m, &n, &alpha, &A(0,0), &lda, &x[0], &inc, &beta, \ + &z[0], &inc); \ + else gmm::clear(z); \ + } + + // Y <- AX. + gemv_interface2(gem_p1_n, gem_trans1_n, gemv_p2_n, gemv_trans2_n, sgemv_, + BLAS_S, col_major) + gemv_interface2(gem_p1_n, gem_trans1_n, gemv_p2_n, gemv_trans2_n, dgemv_, + BLAS_D, col_major) + gemv_interface2(gem_p1_n, gem_trans1_n, gemv_p2_n, gemv_trans2_n, cgemv_, + BLAS_C, col_major) + gemv_interface2(gem_p1_n, gem_trans1_n, gemv_p2_n, gemv_trans2_n, zgemv_, + BLAS_Z, col_major) + + // Y <- transposed(A)X. + gemv_interface2(gem_p1_t, gem_trans1_t, gemv_p2_n, gemv_trans2_n, sgemv_, + BLAS_S, row_major) + gemv_interface2(gem_p1_t, gem_trans1_t, gemv_p2_n, gemv_trans2_n, dgemv_, + BLAS_D, row_major) + gemv_interface2(gem_p1_t, gem_trans1_t, gemv_p2_n, gemv_trans2_n, cgemv_, + BLAS_C, row_major) + gemv_interface2(gem_p1_t, gem_trans1_t, gemv_p2_n, gemv_trans2_n, zgemv_, + BLAS_Z, row_major) + + // Y <- transposed(const A)X. + gemv_interface2(gem_p1_tc, gem_trans1_t, gemv_p2_n, gemv_trans2_n, sgemv_, + BLAS_S, row_major) + gemv_interface2(gem_p1_tc, gem_trans1_t, gemv_p2_n, gemv_trans2_n, dgemv_, + BLAS_D, row_major) + gemv_interface2(gem_p1_tc, gem_trans1_t, gemv_p2_n, gemv_trans2_n, cgemv_, + BLAS_C, row_major) + gemv_interface2(gem_p1_tc, gem_trans1_t, gemv_p2_n, gemv_trans2_n, zgemv_, + BLAS_Z, row_major) + + // Y <- conjugated(A)X. + gemv_interface2(gem_p1_c, gem_trans1_c, gemv_p2_n, gemv_trans2_n, sgemv_, + BLAS_S, row_major) + gemv_interface2(gem_p1_c, gem_trans1_c, gemv_p2_n, gemv_trans2_n, dgemv_, + BLAS_D, row_major) + gemv_interface2(gem_p1_c, gem_trans1_c, gemv_p2_n, gemv_trans2_n, cgemv_, + BLAS_C, row_major) + gemv_interface2(gem_p1_c, gem_trans1_c, gemv_p2_n, gemv_trans2_n, zgemv_, + BLAS_Z, row_major) + + // Y <- A scaled(X). + gemv_interface2(gem_p1_n, gem_trans1_n, gemv_p2_s, gemv_trans2_s, sgemv_, + BLAS_S, col_major) + gemv_interface2(gem_p1_n, gem_trans1_n, gemv_p2_s, gemv_trans2_s, dgemv_, + BLAS_D, col_major) + gemv_interface2(gem_p1_n, gem_trans1_n, gemv_p2_s, gemv_trans2_s, cgemv_, + BLAS_C, col_major) + gemv_interface2(gem_p1_n, gem_trans1_n, gemv_p2_s, gemv_trans2_s, zgemv_, + BLAS_Z, col_major) + + // Y <- transposed(A) scaled(X). + gemv_interface2(gem_p1_t, gem_trans1_t, gemv_p2_s, gemv_trans2_s, sgemv_, + BLAS_S, row_major) + gemv_interface2(gem_p1_t, gem_trans1_t, gemv_p2_s, gemv_trans2_s, dgemv_, + BLAS_D, row_major) + gemv_interface2(gem_p1_t, gem_trans1_t, gemv_p2_s, gemv_trans2_s, cgemv_, + BLAS_C, row_major) + gemv_interface2(gem_p1_t, gem_trans1_t, gemv_p2_s, gemv_trans2_s, zgemv_, + BLAS_Z, row_major) + + // Y <- transposed(const A) scaled(X). + gemv_interface2(gem_p1_tc, gem_trans1_t, gemv_p2_s, gemv_trans2_s, sgemv_, + BLAS_S, row_major) + gemv_interface2(gem_p1_tc, gem_trans1_t, gemv_p2_s, gemv_trans2_s, dgemv_, + BLAS_D, row_major) + gemv_interface2(gem_p1_tc, gem_trans1_t, gemv_p2_s, gemv_trans2_s, cgemv_, + BLAS_C, row_major) + gemv_interface2(gem_p1_tc, gem_trans1_t, gemv_p2_s, gemv_trans2_s, zgemv_, + BLAS_Z, row_major) + + // Y <- conjugated(A) scaled(X). + gemv_interface2(gem_p1_c, gem_trans1_c, gemv_p2_s, gemv_trans2_s, sgemv_, + BLAS_S, row_major) + gemv_interface2(gem_p1_c, gem_trans1_c, gemv_p2_s, gemv_trans2_s, dgemv_, + BLAS_D, row_major) + gemv_interface2(gem_p1_c, gem_trans1_c, gemv_p2_s, gemv_trans2_s, cgemv_, + BLAS_C, row_major) + gemv_interface2(gem_p1_c, gem_trans1_c, gemv_p2_s, gemv_trans2_s, zgemv_, + BLAS_Z, row_major) + + + /* ********************************************************************* */ + /* Rank one update. */ + /* ********************************************************************* */ + +# define ger_interface(blas_name, base_type) \ + inline void rank_one_update(const dense_matrix &A, \ + const std::vector &V, \ + const std::vector &W) { \ + GMMLAPACK_TRACE("ger_interface"); \ + int m(int(mat_nrows(A))), lda = m, n(int(mat_ncols(A))); \ + int incx = 1, incy = 1; \ + base_type alpha(1); \ + if (m && n) \ + blas_name(&m, &n, &alpha, &V[0], &incx, &W[0], &incy, &A(0,0), &lda);\ + } + + ger_interface(sger_, BLAS_S) + ger_interface(dger_, BLAS_D) + ger_interface(cgerc_, BLAS_C) + ger_interface(zgerc_, BLAS_Z) + +# define ger_interface_sn(blas_name, base_type) \ + inline void rank_one_update(const dense_matrix &A, \ + gemv_p2_s(base_type), \ + const std::vector &W) { \ + GMMLAPACK_TRACE("ger_interface"); \ + gemv_trans2_s(base_type); \ + int m(int(mat_nrows(A))), lda = m, n(int(mat_ncols(A))); \ + int incx = 1, incy = 1; \ + if (m && n) \ + blas_name(&m, &n, &alpha, &x[0], &incx, &W[0], &incy, &A(0,0), &lda);\ + } + + ger_interface_sn(sger_, BLAS_S) + ger_interface_sn(dger_, BLAS_D) + ger_interface_sn(cgerc_, BLAS_C) + ger_interface_sn(zgerc_, BLAS_Z) + +# define ger_interface_ns(blas_name, base_type) \ + inline void rank_one_update(const dense_matrix &A, \ + const std::vector &V, \ + gemv_p2_s(base_type)) { \ + GMMLAPACK_TRACE("ger_interface"); \ + gemv_trans2_s(base_type); \ + int m(int(mat_nrows(A))), lda = m, n(int(mat_ncols(A))); \ + int incx = 1, incy = 1; \ + base_type al2 = gmm::conj(alpha); \ + if (m && n) \ + blas_name(&m, &n, &al2, &V[0], &incx, &x[0], &incy, &A(0,0), &lda); \ + } + + ger_interface_ns(sger_, BLAS_S) + ger_interface_ns(dger_, BLAS_D) + ger_interface_ns(cgerc_, BLAS_C) + ger_interface_ns(zgerc_, BLAS_Z) + + /* ********************************************************************* */ + /* dense matrix x dense matrix multiplication. */ + /* ********************************************************************* */ + +# define gemm_interface_nn(blas_name, base_type) \ + inline void mult_spec(const dense_matrix &A, \ + const dense_matrix &B, \ + dense_matrix &C, c_mult) { \ + GMMLAPACK_TRACE("gemm_interface_nn"); \ + const char t = 'N'; \ + int m(int(mat_nrows(A))), lda = m, k(int(mat_ncols(A))); \ + int n(int(mat_ncols(B))); \ + int ldb = k, ldc = m; \ + base_type alpha(1), beta(0); \ + if (m && k && n) \ + blas_name(&t, &t, &m, &n, &k, &alpha, \ + &A(0,0), &lda, &B(0,0), &ldb, &beta, &C(0,0), &ldc); \ + else gmm::clear(C); \ + } + + gemm_interface_nn(sgemm_, BLAS_S) + gemm_interface_nn(dgemm_, BLAS_D) + gemm_interface_nn(cgemm_, BLAS_C) + gemm_interface_nn(zgemm_, BLAS_Z) + + /* ********************************************************************* */ + /* transposed(dense matrix) x dense matrix multiplication. */ + /* ********************************************************************* */ + +# define gemm_interface_tn(blas_name, base_type, is_const) \ + inline void mult_spec( \ + const transposed_col_ref *> &A_,\ + const dense_matrix &B, \ + dense_matrix &C, rcmult) { \ + GMMLAPACK_TRACE("gemm_interface_tn"); \ + dense_matrix &A \ + = const_cast &>(*(linalg_origin(A_))); \ + const char t = 'T', u = 'N'; \ + int m(int(mat_ncols(A))), k(int(mat_nrows(A))), n(int(mat_ncols(B))); \ + int lda = k, ldb = k, ldc = m; \ + base_type alpha(1), beta(0); \ + if (m && k && n) \ + blas_name(&t, &u, &m, &n, &k, &alpha, \ + &A(0,0), &lda, &B(0,0), &ldb, &beta, &C(0,0), &ldc); \ + else gmm::clear(C); \ + } + + gemm_interface_tn(sgemm_, BLAS_S, dense_matrix) + gemm_interface_tn(dgemm_, BLAS_D, dense_matrix) + gemm_interface_tn(cgemm_, BLAS_C, dense_matrix) + gemm_interface_tn(zgemm_, BLAS_Z, dense_matrix) + gemm_interface_tn(sgemm_, BLAS_S, const dense_matrix) + gemm_interface_tn(dgemm_, BLAS_D, const dense_matrix) + gemm_interface_tn(cgemm_, BLAS_C, const dense_matrix) + gemm_interface_tn(zgemm_, BLAS_Z, const dense_matrix) + + /* ********************************************************************* */ + /* dense matrix x transposed(dense matrix) multiplication. */ + /* ********************************************************************* */ + +# define gemm_interface_nt(blas_name, base_type, is_const) \ + inline void mult_spec(const dense_matrix &A, \ + const transposed_col_ref *> &B_, \ + dense_matrix &C, r_mult) { \ + GMMLAPACK_TRACE("gemm_interface_nt"); \ + dense_matrix &B \ + = const_cast &>(*(linalg_origin(B_))); \ + const char t = 'N', u = 'T'; \ + int m(int(mat_nrows(A))), lda = m, k(int(mat_ncols(A))); \ + int n(int(mat_nrows(B))); \ + int ldb = n, ldc = m; \ + base_type alpha(1), beta(0); \ + if (m && k && n) \ + blas_name(&t, &u, &m, &n, &k, &alpha, \ + &A(0,0), &lda, &B(0,0), &ldb, &beta, &C(0,0), &ldc); \ + else gmm::clear(C); \ + } + + gemm_interface_nt(sgemm_, BLAS_S, dense_matrix) + gemm_interface_nt(dgemm_, BLAS_D, dense_matrix) + gemm_interface_nt(cgemm_, BLAS_C, dense_matrix) + gemm_interface_nt(zgemm_, BLAS_Z, dense_matrix) + gemm_interface_nt(sgemm_, BLAS_S, const dense_matrix) + gemm_interface_nt(dgemm_, BLAS_D, const dense_matrix) + gemm_interface_nt(cgemm_, BLAS_C, const dense_matrix) + gemm_interface_nt(zgemm_, BLAS_Z, const dense_matrix) + + /* ********************************************************************* */ + /* transposed(dense matrix) x transposed(dense matrix) multiplication. */ + /* ********************************************************************* */ + +# define gemm_interface_tt(blas_name, base_type, isA_const, isB_const) \ + inline void mult_spec( \ + const transposed_col_ref *> &A_, \ + const transposed_col_ref *> &B_, \ + dense_matrix &C, r_mult) { \ + GMMLAPACK_TRACE("gemm_interface_tt"); \ + dense_matrix &A \ + = const_cast &>(*(linalg_origin(A_))); \ + dense_matrix &B \ + = const_cast &>(*(linalg_origin(B_))); \ + const char t = 'T', u = 'T'; \ + int m(int(mat_ncols(A))), k(int(mat_nrows(A))), n(int(mat_nrows(B))); \ + int lda = k, ldb = n, ldc = m; \ + base_type alpha(1), beta(0); \ + if (m && k && n) \ + blas_name(&t, &u, &m, &n, &k, &alpha, \ + &A(0,0), &lda, &B(0,0), &ldb, &beta, &C(0,0), &ldc); \ + else gmm::clear(C); \ + } + + gemm_interface_tt(sgemm_, BLAS_S, dense_matrix, dense_matrix) + gemm_interface_tt(dgemm_, BLAS_D, dense_matrix, dense_matrix) + gemm_interface_tt(cgemm_, BLAS_C, dense_matrix, dense_matrix) + gemm_interface_tt(zgemm_, BLAS_Z, dense_matrix, dense_matrix) + gemm_interface_tt(sgemm_, BLAS_S, const dense_matrix, dense_matrix) + gemm_interface_tt(dgemm_, BLAS_D, const dense_matrix, dense_matrix) + gemm_interface_tt(cgemm_, BLAS_C, const dense_matrix, dense_matrix) + gemm_interface_tt(zgemm_, BLAS_Z, const dense_matrix, dense_matrix) + gemm_interface_tt(sgemm_, BLAS_S, dense_matrix, const dense_matrix) + gemm_interface_tt(dgemm_, BLAS_D, dense_matrix, const dense_matrix) + gemm_interface_tt(cgemm_, BLAS_C, dense_matrix, const dense_matrix) + gemm_interface_tt(zgemm_, BLAS_Z, dense_matrix, const dense_matrix) + gemm_interface_tt(sgemm_, BLAS_S, const dense_matrix, const dense_matrix) + gemm_interface_tt(dgemm_, BLAS_D, const dense_matrix, const dense_matrix) + gemm_interface_tt(cgemm_, BLAS_C, const dense_matrix, const dense_matrix) + gemm_interface_tt(zgemm_, BLAS_Z, const dense_matrix, const dense_matrix) + + + /* ********************************************************************* */ + /* conjugated(dense matrix) x dense matrix multiplication. */ + /* ********************************************************************* */ + +# define gemm_interface_cn(blas_name, base_type) \ + inline void mult_spec( \ + const conjugated_col_matrix_const_ref > &A_,\ + const dense_matrix &B, \ + dense_matrix &C, rcmult) { \ + GMMLAPACK_TRACE("gemm_interface_cn"); \ + dense_matrix &A \ + = const_cast &>(*(linalg_origin(A_))); \ + const char t = 'C', u = 'N'; \ + int m(int(mat_ncols(A))), k(int(mat_nrows(A))), n(int(mat_ncols(B))); \ + int lda = k, ldb = k, ldc = m; \ + base_type alpha(1), beta(0); \ + if (m && k && n) \ + blas_name(&t, &u, &m, &n, &k, &alpha, \ + &A(0,0), &lda, &B(0,0), &ldb, &beta, &C(0,0), &ldc); \ + else gmm::clear(C); \ + } + + gemm_interface_cn(sgemm_, BLAS_S) + gemm_interface_cn(dgemm_, BLAS_D) + gemm_interface_cn(cgemm_, BLAS_C) + gemm_interface_cn(zgemm_, BLAS_Z) + + /* ********************************************************************* */ + /* dense matrix x conjugated(dense matrix) multiplication. */ + /* ********************************************************************* */ + +# define gemm_interface_nc(blas_name, base_type) \ + inline void mult_spec(const dense_matrix &A, \ + const conjugated_col_matrix_const_ref > &B_,\ + dense_matrix &C, c_mult, row_major) { \ + GMMLAPACK_TRACE("gemm_interface_nc"); \ + dense_matrix &B \ + = const_cast &>(*(linalg_origin(B_))); \ + const char t = 'N', u = 'C'; \ + int m(int(mat_nrows(A))), lda = m, k(int(mat_ncols(A))); \ + int n(int(mat_nrows(B))), ldb = n, ldc = m; \ + base_type alpha(1), beta(0); \ + if (m && k && n) \ + blas_name(&t, &u, &m, &n, &k, &alpha, \ + &A(0,0), &lda, &B(0,0), &ldb, &beta, &C(0,0), &ldc); \ + else gmm::clear(C); \ + } + + gemm_interface_nc(sgemm_, BLAS_S) + gemm_interface_nc(dgemm_, BLAS_D) + gemm_interface_nc(cgemm_, BLAS_C) + gemm_interface_nc(zgemm_, BLAS_Z) + + /* ********************************************************************* */ + /* conjugated(dense matrix) x conjugated(dense matrix) multiplication. */ + /* ********************************************************************* */ + +# define gemm_interface_cc(blas_name, base_type) \ + inline void mult_spec( \ + const conjugated_col_matrix_const_ref > &A_,\ + const conjugated_col_matrix_const_ref > &B_,\ + dense_matrix &C, r_mult) { \ + GMMLAPACK_TRACE("gemm_interface_cc"); \ + dense_matrix &A \ + = const_cast &>(*(linalg_origin(A_))); \ + dense_matrix &B \ + = const_cast &>(*(linalg_origin(B_))); \ + const char t = 'C', u = 'C'; \ + int m(int(mat_ncols(A))), k(int(mat_nrows(A))), lda = k; \ + int n(int(mat_nrows(B))), ldb = n, ldc = m; \ + base_type alpha(1), beta(0); \ + if (m && k && n) \ + blas_name(&t, &u, &m, &n, &k, &alpha, \ + &A(0,0), &lda, &B(0,0), &ldb, &beta, &C(0,0), &ldc); \ + else gmm::clear(C); \ + } + + gemm_interface_cc(sgemm_, BLAS_S) + gemm_interface_cc(dgemm_, BLAS_D) + gemm_interface_cc(cgemm_, BLAS_C) + gemm_interface_cc(zgemm_, BLAS_Z) + + /* ********************************************************************* */ + /* Tri solve. */ + /* ********************************************************************* */ + +# define trsv_interface(f_name, loru, param1, trans1, blas_name, base_type)\ + inline void f_name(param1(base_type), std::vector &x, \ + size_type k, bool is_unit) { \ + GMMLAPACK_TRACE("trsv_interface"); \ + loru; trans1(base_type); char d = is_unit ? 'U' : 'N'; \ + int lda(int(mat_nrows(A))), inc(1), n = int(k); \ + if (lda) blas_name(&l, &t, &d, &n, &A(0,0), &lda, &x[0], &inc); \ + } + +# define trsv_upper const char l = 'U' +# define trsv_lower const char l = 'L' + + // X <- LOWER(A)^{-1}X. + trsv_interface(lower_tri_solve, trsv_lower, gem_p1_n, gem_trans1_n, + strsv_, BLAS_S) + trsv_interface(lower_tri_solve, trsv_lower, gem_p1_n, gem_trans1_n, + dtrsv_, BLAS_D) + trsv_interface(lower_tri_solve, trsv_lower, gem_p1_n, gem_trans1_n, + ctrsv_, BLAS_C) + trsv_interface(lower_tri_solve, trsv_lower, gem_p1_n, gem_trans1_n, + ztrsv_, BLAS_Z) + + // X <- UPPER(A)^{-1}X. + trsv_interface(upper_tri_solve, trsv_upper, gem_p1_n, gem_trans1_n, + strsv_, BLAS_S) + trsv_interface(upper_tri_solve, trsv_upper, gem_p1_n, gem_trans1_n, + dtrsv_, BLAS_D) + trsv_interface(upper_tri_solve, trsv_upper, gem_p1_n, gem_trans1_n, + ctrsv_, BLAS_C) + trsv_interface(upper_tri_solve, trsv_upper, gem_p1_n, gem_trans1_n, + ztrsv_, BLAS_Z) + + // X <- LOWER(transposed(A))^{-1}X. + trsv_interface(lower_tri_solve, trsv_upper, gem_p1_t, gem_trans1_t, + strsv_, BLAS_S) + trsv_interface(lower_tri_solve, trsv_upper, gem_p1_t, gem_trans1_t, + dtrsv_, BLAS_D) + trsv_interface(lower_tri_solve, trsv_upper, gem_p1_t, gem_trans1_t, + ctrsv_, BLAS_C) + trsv_interface(lower_tri_solve, trsv_upper, gem_p1_t, gem_trans1_t, + ztrsv_, BLAS_Z) + + // X <- UPPER(transposed(A))^{-1}X. + trsv_interface(upper_tri_solve, trsv_lower, gem_p1_t, gem_trans1_t, + strsv_, BLAS_S) + trsv_interface(upper_tri_solve, trsv_lower, gem_p1_t, gem_trans1_t, + dtrsv_, BLAS_D) + trsv_interface(upper_tri_solve, trsv_lower, gem_p1_t, gem_trans1_t, + ctrsv_, BLAS_C) + trsv_interface(upper_tri_solve, trsv_lower, gem_p1_t, gem_trans1_t, + ztrsv_, BLAS_Z) + + // X <- LOWER(transposed(const A))^{-1}X. + trsv_interface(lower_tri_solve, trsv_upper, gem_p1_tc, gem_trans1_t, + strsv_, BLAS_S) + trsv_interface(lower_tri_solve, trsv_upper, gem_p1_tc, gem_trans1_t, + dtrsv_, BLAS_D) + trsv_interface(lower_tri_solve, trsv_upper, gem_p1_tc, gem_trans1_t, + ctrsv_, BLAS_C) + trsv_interface(lower_tri_solve, trsv_upper, gem_p1_tc, gem_trans1_t, + ztrsv_, BLAS_Z) + + // X <- UPPER(transposed(const A))^{-1}X. + trsv_interface(upper_tri_solve, trsv_lower, gem_p1_tc, gem_trans1_t, + strsv_, BLAS_S) + trsv_interface(upper_tri_solve, trsv_lower, gem_p1_tc, gem_trans1_t, + dtrsv_, BLAS_D) + trsv_interface(upper_tri_solve, trsv_lower, gem_p1_tc, gem_trans1_t, + ctrsv_, BLAS_C) + trsv_interface(upper_tri_solve, trsv_lower, gem_p1_tc, gem_trans1_t, + ztrsv_, BLAS_Z) + + // X <- LOWER(conjugated(A))^{-1}X. + trsv_interface(lower_tri_solve, trsv_upper, gem_p1_c, gem_trans1_c, + strsv_, BLAS_S) + trsv_interface(lower_tri_solve, trsv_upper, gem_p1_c, gem_trans1_c, + dtrsv_, BLAS_D) + trsv_interface(lower_tri_solve, trsv_upper, gem_p1_c, gem_trans1_c, + ctrsv_, BLAS_C) + trsv_interface(lower_tri_solve, trsv_upper, gem_p1_c, gem_trans1_c, + ztrsv_, BLAS_Z) + + // X <- UPPER(conjugated(A))^{-1}X. + trsv_interface(upper_tri_solve, trsv_lower, gem_p1_c, gem_trans1_c, + strsv_, BLAS_S) + trsv_interface(upper_tri_solve, trsv_lower, gem_p1_c, gem_trans1_c, + dtrsv_, BLAS_D) + trsv_interface(upper_tri_solve, trsv_lower, gem_p1_c, gem_trans1_c, + ctrsv_, BLAS_C) + trsv_interface(upper_tri_solve, trsv_lower, gem_p1_c, gem_trans1_c, + ztrsv_, BLAS_Z) + +#endif +} + +#endif // GMM_BLAS_INTERFACE_H + +#endif // GMM_USES_BLAS diff --git a/gmm/gmm_condition_number.h b/gmm/gmm_condition_number.h new file mode 100644 index 000000000..0dac20e6b --- /dev/null +++ b/gmm/gmm_condition_number.h @@ -0,0 +1,147 @@ +/* -*- c++ -*- (enables emacs c++ mode) */ +/*=========================================================================== + + Copyright (C) 2003-2017 Yves Renard, Julien Pommier + + This file is a part of GetFEM++ + + GetFEM++ is free software; you can redistribute it and/or modify it + under the terms of the GNU Lesser General Public License as published + by the Free Software Foundation; either version 3 of the License, or + (at your option) any later version along with the GCC Runtime Library + Exception either version 3.1 or (at your option) any later version. + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public + License and GCC Runtime Library Exception for more details. + You should have received a copy of the GNU Lesser General Public License + along with this program; if not, write to the Free Software Foundation, + Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. + + As a special exception, you may use this file as it is a part of a free + software library without restriction. Specifically, if other files + instantiate templates or use macros or inline functions from this file, + or you compile this file and link it with other files to produce an + executable, this file does not by itself cause the resulting executable + to be covered by the GNU Lesser General Public License. This exception + does not however invalidate any other reasons why the executable file + might be covered by the GNU Lesser General Public License. + +===========================================================================*/ + +/**@file gmm_condition_number.h + @author Yves Renard , Julien Pommier + @date August 27, 2003. + @brief computation of the condition number of dense matrices. +*/ +#ifndef GMM_CONDITION_NUMBER_H__ +#define GMM_CONDITION_NUMBER_H__ + +#include "gmm_dense_qr.h" + +namespace gmm { + + /** computation of the condition number of dense matrices using SVD. + + Uses symmetric_qr_algorithm => dense matrices only. + + @param M a matrix. + @param emin smallest (in magnitude) eigenvalue + @param emax largest eigenvalue. + */ + template + typename number_traits::value_type>::magnitude_type + condition_number(const MAT& M, + typename number_traits::value_type>::magnitude_type& emin, + typename number_traits::value_type>::magnitude_type& emax) { + typedef typename linalg_traits::value_type T; + typedef typename number_traits::magnitude_type R; + + // Added because of errors in complex with zero det + if (sizeof(T) != sizeof(R) && gmm::abs(gmm::lu_det(M)) == R(0)) + return gmm::default_max(R()); + + size_type m = mat_nrows(M), n = mat_ncols(M); + emax = emin = R(0); + std::vector eig(m+n); + + if (m+n == 0) return R(0); + if (is_hermitian(M)) { + eig.resize(m); + gmm::symmetric_qr_algorithm(M, eig); + } + else { + dense_matrix B(m+n, m+n); // not very efficient ?? + gmm::copy(conjugated(M), sub_matrix(B, sub_interval(m, n), sub_interval(0, m))); + gmm::copy(M, sub_matrix(B, sub_interval(0, m), + sub_interval(m, n))); + gmm::symmetric_qr_algorithm(B, eig); + } + emin = emax = gmm::abs(eig[0]); + for (size_type i = 1; i < eig.size(); ++i) { + R e = gmm::abs(eig[i]); + emin = std::min(emin, e); + emax = std::max(emax, e); + } + // cout << "emin = " << emin << " emax = " << emax << endl; + if (emin == R(0)) return gmm::default_max(R()); + return emax / emin; + } + + template + typename number_traits::value_type>::magnitude_type + condition_number(const MAT& M) { + typename number_traits::value_type>::magnitude_type emax, emin; + return condition_number(M, emin, emax); + } + + template + typename number_traits::value_type>::magnitude_type + Frobenius_condition_number_sqr(const MAT& M) { + typedef typename linalg_traits::value_type T; + typedef typename number_traits::magnitude_type R; + size_type m = mat_nrows(M), n = mat_ncols(M); + dense_matrix B(std::min(m,n), std::min(m,n)); + if (m < n) mult(M,gmm::conjugated(M),B); + else mult(gmm::conjugated(M),M,B); + R trB = abs(mat_trace(B)); + lu_inverse(B); + return trB*abs(mat_trace(B)); + } + + template + typename number_traits::value_type>::magnitude_type + Frobenius_condition_number(const MAT& M) + { return sqrt(Frobenius_condition_number_sqr(M)); } + + /** estimation of the condition number (TO BE DONE...) + */ + template + typename number_traits::value_type>::magnitude_type + condest(const MAT& M, + typename number_traits::value_type>::magnitude_type& emin, + typename number_traits::value_type>::magnitude_type& emax) { + return condition_number(M, emin, emax); + } + + template + typename number_traits::value_type>::magnitude_type + condest(const MAT& M) { + typename number_traits::value_type>::magnitude_type emax, emin; + return condest(M, emin, emax); + } +} + +#endif diff --git a/gmm/gmm_conjugated.h b/gmm/gmm_conjugated.h new file mode 100644 index 000000000..1e3e7fc61 --- /dev/null +++ b/gmm/gmm_conjugated.h @@ -0,0 +1,398 @@ +/* -*- c++ -*- (enables emacs c++ mode) */ +/*=========================================================================== + + Copyright (C) 2003-2017 Yves Renard + + This file is a part of GetFEM++ + + GetFEM++ is free software; you can redistribute it and/or modify it + under the terms of the GNU Lesser General Public License as published + by the Free Software Foundation; either version 3 of the License, or + (at your option) any later version along with the GCC Runtime Library + Exception either version 3.1 or (at your option) any later version. + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public + License and GCC Runtime Library Exception for more details. + You should have received a copy of the GNU Lesser General Public License + along with this program; if not, write to the Free Software Foundation, + Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. + + As a special exception, you may use this file as it is a part of a free + software library without restriction. Specifically, if other files + instantiate templates or use macros or inline functions from this file, + or you compile this file and link it with other files to produce an + executable, this file does not by itself cause the resulting executable + to be covered by the GNU Lesser General Public License. This exception + does not however invalidate any other reasons why the executable file + might be covered by the GNU Lesser General Public License. + +===========================================================================*/ + +/**@file gmm_conjugated.h + @author Yves Renard + @date September 18, 2003. + @brief handle conjugation of complex matrices/vectors. +*/ +#ifndef GMM_CONJUGATED_H__ +#define GMM_CONJUGATED_H__ + +#include "gmm_def.h" + +namespace gmm { + ///@cond DOXY_SHOW_ALL_FUNCTIONS + + /* ********************************************************************* */ + /* Conjugated references on vectors */ + /* ********************************************************************* */ + + template struct conjugated_const_iterator { + typedef typename std::iterator_traits::value_type value_type; + typedef typename std::iterator_traits::pointer pointer; + typedef typename std::iterator_traits::reference reference; + typedef typename std::iterator_traits::difference_type difference_type; + typedef typename std::iterator_traits::iterator_category + iterator_category; + + IT it; + + conjugated_const_iterator(void) {} + conjugated_const_iterator(const IT &i) : it(i) {} + + inline size_type index(void) const { return it.index(); } + conjugated_const_iterator operator ++(int) + { conjugated_const_iterator tmp = *this; ++it; return tmp; } + conjugated_const_iterator operator --(int) + { conjugated_const_iterator tmp = *this; --it; return tmp; } + conjugated_const_iterator &operator ++() { ++it; return *this; } + conjugated_const_iterator &operator --() { --it; return *this; } + conjugated_const_iterator &operator +=(difference_type i) + { it += i; return *this; } + conjugated_const_iterator &operator -=(difference_type i) + { it -= i; return *this; } + conjugated_const_iterator operator +(difference_type i) const + { conjugated_const_iterator itb = *this; return (itb += i); } + conjugated_const_iterator operator -(difference_type i) const + { conjugated_const_iterator itb = *this; return (itb -= i); } + difference_type operator -(const conjugated_const_iterator &i) const + { return difference_type(it - i.it); } + + value_type operator *() const { return gmm::conj(*it); } + value_type operator [](size_type ii) const { return gmm::conj(it[ii]); } + + bool operator ==(const conjugated_const_iterator &i) const + { return (i.it == it); } + bool operator !=(const conjugated_const_iterator &i) const + { return (i.it != it); } + bool operator < (const conjugated_const_iterator &i) const + { return (it < i.it); } + }; + + template struct conjugated_vector_const_ref { + typedef conjugated_vector_const_ref this_type; + typedef typename linalg_traits::value_type value_type; + typedef typename linalg_traits::const_iterator iterator; + typedef typename linalg_traits::reference reference; + typedef typename linalg_traits::origin_type origin_type; + + iterator begin_, end_; + const origin_type *origin; + size_type size_; + + conjugated_vector_const_ref(const V &v) + : begin_(vect_const_begin(v)), end_(vect_const_end(v)), + origin(linalg_origin(v)), + size_(vect_size(v)) {} + + reference operator[](size_type i) const + { return gmm::conj(linalg_traits::access(origin, begin_, end_, i)); } + }; + + template struct linalg_traits > { + typedef conjugated_vector_const_ref this_type; + typedef typename linalg_traits::origin_type origin_type; + typedef linalg_const is_reference; + typedef abstract_vector linalg_type; + typedef typename linalg_traits::value_type value_type; + typedef value_type reference; + typedef abstract_null_type iterator; + typedef conjugated_const_iterator::const_iterator> const_iterator; + typedef typename linalg_traits::storage_type storage_type; + typedef typename linalg_traits::index_sorted index_sorted; + static size_type size(const this_type &v) { return v.size_; } + static iterator begin(this_type &v) { return iterator(v.begin_); } + static const_iterator begin(const this_type &v) + { return const_iterator(v.begin_); } + static iterator end(this_type &v) + { return iterator(v.end_); } + static const_iterator end(const this_type &v) + { return const_iterator(v.end_); } + static value_type access(const origin_type *o, const const_iterator &it, + const const_iterator &ite, size_type i) + { return gmm::conj(linalg_traits::access(o, it.it, ite.it, i)); } + static const origin_type* origin(const this_type &v) { return v.origin; } + }; + + template std::ostream &operator << + (std::ostream &o, const conjugated_vector_const_ref& m) + { gmm::write(o,m); return o; } + + /* ********************************************************************* */ + /* Conjugated references on matrices */ + /* ********************************************************************* */ + + template struct conjugated_row_const_iterator { + typedef conjugated_row_const_iterator iterator; + typedef typename linalg_traits::const_row_iterator ITER; + typedef typename linalg_traits::value_type value_type; + typedef ptrdiff_t difference_type; + typedef size_t size_type; + + ITER it; + + iterator operator ++(int) { iterator tmp = *this; it++; return tmp; } + iterator operator --(int) { iterator tmp = *this; it--; return tmp; } + iterator &operator ++() { it++; return *this; } + iterator &operator --() { it--; return *this; } + iterator &operator +=(difference_type i) { it += i; return *this; } + iterator &operator -=(difference_type i) { it -= i; return *this; } + iterator operator +(difference_type i) const + { iterator itt = *this; return (itt += i); } + iterator operator -(difference_type i) const + { iterator itt = *this; return (itt -= i); } + difference_type operator -(const iterator &i) const + { return it - i.it; } + + ITER operator *() const { return it; } + ITER operator [](int i) { return it + i; } + + bool operator ==(const iterator &i) const { return (it == i.it); } + bool operator !=(const iterator &i) const { return !(i == *this); } + bool operator < (const iterator &i) const { return (it < i.it); } + + conjugated_row_const_iterator(void) {} + conjugated_row_const_iterator(const ITER &i) : it(i) { } + + }; + + template struct conjugated_row_matrix_const_ref { + + typedef conjugated_row_matrix_const_ref this_type; + typedef typename linalg_traits::const_row_iterator iterator; + typedef typename linalg_traits::value_type value_type; + typedef typename linalg_traits::origin_type origin_type; + + iterator begin_, end_; + const origin_type *origin; + size_type nr, nc; + + conjugated_row_matrix_const_ref(const M &m) + : begin_(mat_row_begin(m)), end_(mat_row_end(m)), + origin(linalg_origin(m)), nr(mat_ncols(m)), nc(mat_nrows(m)) {} + + value_type operator()(size_type i, size_type j) const + { return gmm::conj(linalg_traits::access(begin_+j, i)); } + }; + + template std::ostream &operator << + (std::ostream &o, const conjugated_row_matrix_const_ref& m) + { gmm::write(o,m); return o; } + + + template struct conjugated_col_const_iterator { + typedef conjugated_col_const_iterator iterator; + typedef typename linalg_traits::const_col_iterator ITER; + typedef typename linalg_traits::value_type value_type; + typedef ptrdiff_t difference_type; + typedef size_t size_type; + + ITER it; + + iterator operator ++(int) { iterator tmp = *this; it++; return tmp; } + iterator operator --(int) { iterator tmp = *this; it--; return tmp; } + iterator &operator ++() { it++; return *this; } + iterator &operator --() { it--; return *this; } + iterator &operator +=(difference_type i) { it += i; return *this; } + iterator &operator -=(difference_type i) { it -= i; return *this; } + iterator operator +(difference_type i) const + { iterator itt = *this; return (itt += i); } + iterator operator -(difference_type i) const + { iterator itt = *this; return (itt -= i); } + difference_type operator -(const iterator &i) const + { return it - i.it; } + + ITER operator *() const { return it; } + ITER operator [](int i) { return it + i; } + + bool operator ==(const iterator &i) const { return (it == i.it); } + bool operator !=(const iterator &i) const { return !(i == *this); } + bool operator < (const iterator &i) const { return (it < i.it); } + + conjugated_col_const_iterator(void) {} + conjugated_col_const_iterator(const ITER &i) : it(i) { } + + }; + + template struct conjugated_col_matrix_const_ref { + + typedef conjugated_col_matrix_const_ref this_type; + typedef typename linalg_traits::const_col_iterator iterator; + typedef typename linalg_traits::value_type value_type; + typedef typename linalg_traits::origin_type origin_type; + + iterator begin_, end_; + const origin_type *origin; + size_type nr, nc; + + conjugated_col_matrix_const_ref(const M &m) + : begin_(mat_col_begin(m)), end_(mat_col_end(m)), + origin(linalg_origin(m)), nr(mat_ncols(m)), nc(mat_nrows(m)) {} + + value_type operator()(size_type i, size_type j) const + { return gmm::conj(linalg_traits::access(begin_+i, j)); } + }; + + + + template std::ostream &operator << + (std::ostream &o, const conjugated_col_matrix_const_ref& m) + { gmm::write(o,m); return o; } + + + template struct conjugated_return__ { + typedef conjugated_row_matrix_const_ref return_type; + }; + template struct conjugated_return__ { + typedef conjugated_col_matrix_const_ref return_type; + }; + template struct conjugated_return_ { + typedef const L & return_type; + }; + template + struct conjugated_return_, abstract_vector> { + typedef conjugated_vector_const_ref return_type; + }; + template + struct conjugated_return_ { + typedef typename conjugated_return__::sub_orientation>::potype + >::return_type return_type; + }; + template struct conjugated_return { + typedef typename + conjugated_return_::value_type, + typename linalg_traits::linalg_type + >::return_type return_type; + }; + + ///@endcond + /** return a conjugated view of the input matrix or vector. */ + template inline + typename conjugated_return::return_type + conjugated(const L &v) { + return conjugated(v, typename linalg_traits::value_type(), + typename linalg_traits::linalg_type()); + } + ///@cond DOXY_SHOW_ALL_FUNCTIONS + + template inline + const L & conjugated(const L &v, T, LT) { return v; } + + template inline + conjugated_vector_const_ref conjugated(const L &v, std::complex, + abstract_vector) + { return conjugated_vector_const_ref(v); } + + template inline + typename conjugated_return__::sub_orientation>::potype>::return_type + conjugated(const L &v, T, abstract_matrix) { + return conjugated(v, typename principal_orientation_type::sub_orientation>::potype()); + } + + template inline + conjugated_row_matrix_const_ref conjugated(const L &v, row_major) + { return conjugated_row_matrix_const_ref(v); } + + template inline + conjugated_col_matrix_const_ref conjugated(const L &v, col_major) + { return conjugated_col_matrix_const_ref(v); } + + template + struct linalg_traits > { + typedef conjugated_row_matrix_const_ref this_type; + typedef typename linalg_traits::origin_type origin_type; + typedef linalg_const is_reference; + typedef abstract_matrix linalg_type; + typedef typename linalg_traits::value_type value_type; + typedef value_type reference; + typedef typename linalg_traits::storage_type storage_type; + typedef typename org_type::const_sub_row_type>::t vector_type; + typedef conjugated_vector_const_ref sub_col_type; + typedef conjugated_vector_const_ref const_sub_col_type; + typedef conjugated_row_const_iterator col_iterator; + typedef conjugated_row_const_iterator const_col_iterator; + typedef abstract_null_type const_sub_row_type; + typedef abstract_null_type sub_row_type; + typedef abstract_null_type const_row_iterator; + typedef abstract_null_type row_iterator; + typedef col_major sub_orientation; + typedef typename linalg_traits::index_sorted index_sorted; + static inline size_type ncols(const this_type &m) { return m.nc; } + static inline size_type nrows(const this_type &m) { return m.nr; } + static inline const_sub_col_type col(const const_col_iterator &it) + { return conjugated(linalg_traits::row(it.it)); } + static inline const_col_iterator col_begin(const this_type &m) + { return const_col_iterator(m.begin_); } + static inline const_col_iterator col_end(const this_type &m) + { return const_col_iterator(m.end_); } + static inline const origin_type* origin(const this_type &m) + { return m.origin; } + static value_type access(const const_col_iterator &it, size_type i) + { return gmm::conj(linalg_traits::access(it.it, i)); } + }; + + template + struct linalg_traits > { + typedef conjugated_col_matrix_const_ref this_type; + typedef typename linalg_traits::origin_type origin_type; + typedef linalg_const is_reference; + typedef abstract_matrix linalg_type; + typedef typename linalg_traits::value_type value_type; + typedef value_type reference; + typedef typename linalg_traits::storage_type storage_type; + typedef typename org_type::const_sub_col_type>::t vector_type; + typedef conjugated_vector_const_ref sub_row_type; + typedef conjugated_vector_const_ref const_sub_row_type; + typedef conjugated_col_const_iterator row_iterator; + typedef conjugated_col_const_iterator const_row_iterator; + typedef abstract_null_type const_sub_col_type; + typedef abstract_null_type sub_col_type; + typedef abstract_null_type const_col_iterator; + typedef abstract_null_type col_iterator; + typedef row_major sub_orientation; + typedef typename linalg_traits::index_sorted index_sorted; + static inline size_type nrows(const this_type &m) { return m.nr; } + static inline size_type ncols(const this_type &m) { return m.nc; } + static inline const_sub_row_type row(const const_row_iterator &it) + { return conjugated(linalg_traits::col(it.it)); } + static inline const_row_iterator row_begin(const this_type &m) + { return const_row_iterator(m.begin_); } + static inline const_row_iterator row_end(const this_type &m) + { return const_row_iterator(m.end_); } + static inline const origin_type* origin(const this_type &m) + { return m.origin; } + static value_type access(const const_row_iterator &it, size_type i) + { return gmm::conj(linalg_traits::access(it.it, i)); } + }; + + ///@endcond + + +} + +#endif // GMM_CONJUGATED_H__ diff --git a/gmm/gmm_def.h b/gmm/gmm_def.h new file mode 100644 index 000000000..603c57b69 --- /dev/null +++ b/gmm/gmm_def.h @@ -0,0 +1,1123 @@ +/* -*- c++ -*- (enables emacs c++ mode) */ +/*=========================================================================== + + Copyright (C) 2002-2017 Yves Renard + + This file is a part of GetFEM++ + + GetFEM++ is free software; you can redistribute it and/or modify it + under the terms of the GNU Lesser General Public License as published + by the Free Software Foundation; either version 3 of the License, or + (at your option) any later version along with the GCC Runtime Library + Exception either version 3.1 or (at your option) any later version. + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public + License and GCC Runtime Library Exception for more details. + You should have received a copy of the GNU Lesser General Public License + along with this program; if not, write to the Free Software Foundation, + Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. + + As a special exception, you may use this file as it is a part of a free + software library without restriction. Specifically, if other files + instantiate templates or use macros or inline functions from this file, + or you compile this file and link it with other files to produce an + executable, this file does not by itself cause the resulting executable + to be covered by the GNU Lesser General Public License. This exception + does not however invalidate any other reasons why the executable file + might be covered by the GNU Lesser General Public License. + +===========================================================================*/ + +/**@file gmm_def.h + @author Yves Renard + @date October 13, 2002. + @brief Basic definitions and tools of GMM. +*/ +#ifndef GMM_DEF_H__ +#define GMM_DEF_H__ + +#include "gmm_ref.h" +#include + +#ifndef M_PI +# define M_E 2.7182818284590452354 /* e */ +# define M_LOG2E 1.4426950408889634074 /* 1/ln(2) */ +# define M_LOG10E 0.43429448190325182765 /* 1/ln(10) */ +# define M_LN2 0.69314718055994530942 /* ln(2) */ +# define M_LN10 2.30258509299404568402 /* ln(10) */ +# define M_PI 3.14159265358979323846 /* pi */ +# define M_PI_2 1.57079632679489661923 /* pi/2 */ +# define M_PI_4 0.78539816339744830962 /* pi/4 */ +# define M_1_PI 0.31830988618379067154 /* 1/pi */ +# define M_2_PI 0.63661977236758134308 /* 2/pi */ +# define M_2_SQRTPI 1.12837916709551257390 /* 2/sqrt(pi) */ +# define M_SQRT2 1.41421356237309504880 /* sqrt(2) */ +# define M_SQRT1_2 0.70710678118654752440 /* sqrt(2)/2 */ +#endif + +#ifndef M_PIl +# define M_PIl 3.1415926535897932384626433832795029L /* pi */ +# define M_PI_2l 1.5707963267948966192313216916397514L /* pi/2 */ +# define M_PI_4l 0.7853981633974483096156608458198757L /* pi/4 */ +# define M_1_PIl 0.3183098861837906715377675267450287L /* 1/pi */ +# define M_2_PIl 0.6366197723675813430755350534900574L /* 2/pi */ +# define M_2_SQRTPIl 1.1283791670955125738961589031215452L /* 2/sqrt(pi) */ +#endif + +namespace gmm { + + typedef size_t size_type; + + /* ******************************************************************** */ + /* Specifier types */ + /* ******************************************************************** */ + /* not perfectly null, required by aCC 3.33 */ + struct abstract_null_type { + abstract_null_type(int=0) {} + template void operator()(A,B,C) {} + }; // specify an information lake. + + struct linalg_true {}; + struct linalg_false {}; + + template struct linalg_and + { typedef linalg_false bool_type; }; + template <> struct linalg_and + { typedef linalg_true bool_type; }; + template struct linalg_or + { typedef linalg_true bool_type; }; + template <> struct linalg_and + { typedef linalg_false bool_type; }; + + struct linalg_const {}; // A reference is either linalg_const, + struct linalg_modifiable {}; // linalg_modifiable or linalg_false. + + struct abstract_vector {}; // The object is a vector + struct abstract_matrix {}; // The object is a matrix + + struct abstract_sparse {}; // sparse matrix or vector + struct abstract_skyline {}; // 'sky-line' matrix or vector + struct abstract_dense {}; // dense matrix or vector + struct abstract_indirect {}; // matrix given by the product with a vector + + struct row_major {}; // matrix with a row access. + struct col_major {}; // matrix with a column access + struct row_and_col {}; // both accesses but row preference + struct col_and_row {}; // both accesses but column preference + + template struct transposed_type; + template<> struct transposed_type {typedef col_major t_type;}; + template<> struct transposed_type {typedef row_major t_type;}; + template<> struct transposed_type {typedef col_and_row t_type;}; + template<> struct transposed_type {typedef row_and_col t_type;}; + + template struct principal_orientation_type + { typedef abstract_null_type potype; }; + template<> struct principal_orientation_type + { typedef row_major potype; }; + template<> struct principal_orientation_type + { typedef col_major potype; }; + template<> struct principal_orientation_type + { typedef row_major potype; }; + template<> struct principal_orientation_type + { typedef col_major potype; }; + + // template struct linalg_traits; + template struct linalg_traits { + typedef abstract_null_type this_type; + typedef abstract_null_type linalg_type; + typedef abstract_null_type value_type; + typedef abstract_null_type is_reference; + typedef abstract_null_type& reference; + typedef abstract_null_type* iterator; + typedef const abstract_null_type* const_iterator; + typedef abstract_null_type index_sorted; + typedef abstract_null_type storage_type; + typedef abstract_null_type origin_type; + typedef abstract_null_type const_sub_row_type; + typedef abstract_null_type sub_row_type; + typedef abstract_null_type const_row_iterator; + typedef abstract_null_type row_iterator; + typedef abstract_null_type const_sub_col_type; + typedef abstract_null_type sub_col_type; + typedef abstract_null_type const_col_iterator; + typedef abstract_null_type col_iterator; + typedef abstract_null_type sub_orientation; + }; + + template struct vect_ref_type; + template struct vect_ref_type

{ + typedef typename linalg_traits::reference access_type; + typedef typename linalg_traits::iterator iterator; + }; + template struct vect_ref_type { + typedef typename linalg_traits::value_type access_type; + typedef typename linalg_traits::const_iterator iterator; + }; + + template struct const_pointer; + template struct const_pointer

+ { typedef const P* pointer; }; + template struct const_pointer + { typedef const P* pointer; }; + + template struct modifiable_pointer; + template struct modifiable_pointer

+ { typedef P* pointer; }; + template struct modifiable_pointer + { typedef P* pointer; }; + + template struct const_reference; + template struct const_reference + { typedef const R &reference; }; + template struct const_reference + { typedef const R &reference; }; + + + inline bool is_sparse(abstract_sparse) { return true; } + inline bool is_sparse(abstract_dense) { return false; } + inline bool is_sparse(abstract_skyline) { return true; } + inline bool is_sparse(abstract_indirect) { return false; } + + template inline bool is_sparse(const L &) + { return is_sparse(typename linalg_traits::storage_type()); } + + inline bool is_row_matrix_(row_major) { return true; } + inline bool is_row_matrix_(col_major) { return false; } + inline bool is_row_matrix_(row_and_col) { return true; } + inline bool is_row_matrix_(col_and_row) { return true; } + + template inline bool is_row_matrix(const L &) + { return is_row_matrix_(typename linalg_traits::sub_orientation()); } + + inline bool is_col_matrix_(row_major) { return false; } + inline bool is_col_matrix_(col_major) { return true; } + inline bool is_col_matrix_(row_and_col) { return true; } + inline bool is_col_matrix_(col_and_row) { return true; } + + template inline bool is_col_matrix(const L &) + { return is_col_matrix_(typename linalg_traits::sub_orientation()); } + + inline bool is_col_matrix(row_major) { return false; } + inline bool is_col_matrix(col_major) { return true; } + inline bool is_row_matrix(row_major) { return true; } + inline bool is_row_matrix(col_major) { return false; } + + template inline bool is_const_reference(L) { return false; } + inline bool is_const_reference(linalg_const) { return true; } + + + template struct is_gmm_interfaced_ { + typedef linalg_true result; + }; + + template<> struct is_gmm_interfaced_ { + typedef linalg_false result; + }; + + template struct is_gmm_interfaced { + typedef typename is_gmm_interfaced_::this_type >::result result; + }; + + /* ******************************************************************** */ + /* Original type from a pointer or a reference. */ + /* ******************************************************************** */ + + template struct org_type { typedef V t; }; + template struct org_type { typedef V t; }; + template struct org_type { typedef V t; }; + template struct org_type { typedef V t; }; + template struct org_type { typedef V t; }; + + /* ******************************************************************** */ + /* Types to deal with const object representing a modifiable reference */ + /* ******************************************************************** */ + + template struct mref_type_ + { typedef abstract_null_type return_type; }; + template struct mref_type_ + { typedef typename org_type::t & return_type; }; + template struct mref_type_ + { typedef const typename org_type::t & return_type; }; + template struct mref_type_ + { typedef const typename org_type::t & return_type; }; + template struct mref_type_ + { typedef const typename org_type::t & return_type; }; + template struct mref_type_ + { typedef typename org_type::t & return_type; }; + template struct mref_type_ + { typedef typename org_type::t & return_type; }; + + template struct mref_type { + typedef typename std::iterator_traits::value_type L; + typedef typename mref_type_::is_reference>::return_type return_type; + }; + + template typename mref_type::return_type + linalg_cast(const L &l) + { return const_cast::return_type>(l); } + + template typename mref_type::return_type linalg_cast(L &l) + { return const_cast::return_type>(l); } + + template struct cref_type_ + { typedef abstract_null_type return_type; }; + template struct cref_type_ + { typedef typename org_type::t & return_type; }; + template struct cref_type { + typedef typename cref_type_::is_reference>::return_type return_type; + }; + + template typename cref_type::return_type + linalg_const_cast(const L &l) + { return const_cast::return_type>(l); } + + + // To be used to select between a reference or a const refercence for + // the return type of a function + // select_return return C1 if L is a const reference, + // C2 otherwise. + // select_return return C2 if L is a modifiable reference + // C1 otherwise. + template struct select_return_ { + typedef abstract_null_type return_type; + }; + template + struct select_return_ { typedef C1 return_type; }; + template + struct select_return_ { typedef C2 return_type; }; + template struct select_return { + typedef typename std::iterator_traits::value_type L; + typedef typename select_return_::return_type>::return_type return_type; + }; + + + // To be used to select between a reference or a const refercence inside + // a structure or a linagl_traits + // select_ref return C1 if L is a const reference, + // C2 otherwise. + // select_ref return C2 in any case. + template struct select_ref_ + { typedef abstract_null_type ref_type; }; + template + struct select_ref_ { typedef C1 ref_type; }; + template + struct select_ref_ { typedef C2 ref_type; }; + template struct select_ref { + typedef typename std::iterator_traits::value_type L; + typedef typename select_ref_::return_type>::ref_type ref_type; + }; + template + struct select_ref + { typedef C1 ref_type; }; + + + template struct is_a_reference_ + { typedef linalg_true reference; }; + template<> struct is_a_reference_ + { typedef linalg_false reference; }; + + template struct is_a_reference { + typedef typename is_a_reference_::is_reference> + ::reference reference; + }; + + + template inline bool is_original_linalg(const L &) + { return is_original_linalg(typename is_a_reference::reference()); } + inline bool is_original_linalg(linalg_false) { return true; } + inline bool is_original_linalg(linalg_true) { return false; } + + + template struct which_reference + { typedef abstract_null_type is_reference; }; + template struct which_reference + { typedef linalg_modifiable is_reference; }; + template struct which_reference + { typedef linalg_const is_reference; }; + + + template struct select_orientation_ + { typedef abstract_null_type return_type; }; + template + struct select_orientation_ + { typedef C1 return_type; }; + template + struct select_orientation_ + { typedef C2 return_type; }; + template struct select_orientation { + typedef typename select_orientation_::sub_orientation>::potype>::return_type return_type; + }; + + /* ******************************************************************** */ + /* Operations on scalars */ + /* ******************************************************************** */ + + template inline T sqr(T a) { return T(a * a); } + template inline T abs(T a) { return (a < T(0)) ? T(-a) : a; } + template inline T abs(std::complex a) + { T x = a.real(), y = a.imag(); return T(::sqrt(x*x+y*y)); } + template inline T abs_sqr(T a) { return T(a*a); } + template inline T abs_sqr(std::complex a) + { return gmm::sqr(a.real()) + gmm::sqr(a.imag()); } + template inline T pos(T a) { return (a < T(0)) ? T(0) : a; } + template inline T neg(T a) { return (a < T(0)) ? T(-a) : T(0); } + template inline T sgn(T a) { return (a < T(0)) ? T(-1) : T(1); } + template inline T Heaviside(T a) { return (a < T(0)) ? T(0) : T(1); } + inline double random() { return double(rand())/(RAND_MAX+0.5); } + template inline T random(T) + { return T(rand()*2.0)/(T(RAND_MAX)+T(1)/T(2)) - T(1); } + template inline std::complex random(std::complex) + { return std::complex(gmm::random(T()), gmm::random(T())); } + template inline T irandom(T max) + { return T(gmm::random() * double(max)); } + template inline T conj(T a) { return a; } + template inline std::complex conj(std::complex a) + { return std::conj(a); } + template inline T real(T a) { return a; } + template inline T real(std::complex a) { return a.real(); } + template inline T imag(T ) { return T(0); } + template inline T imag(std::complex a) { return a.imag(); } + template inline T sqrt(T a) { return T(::sqrt(a)); } + template inline std::complex sqrt(std::complex a) { + T x = a.real(), y = a.imag(); + if (x == T(0)) { + T t = T(::sqrt(gmm::abs(y) / T(2))); + return std::complex(t, y < T(0) ? -t : t); + } + T t = T(::sqrt(T(2) * (gmm::abs(a) + gmm::abs(x)))), u = t / T(2); + return x > T(0) ? std::complex(u, y / t) + : std::complex(gmm::abs(y) / t, y < T(0) ? -u : u); + } + using std::swap; + + + template struct number_traits { + typedef T magnitude_type; + }; + + template struct number_traits > { + typedef T magnitude_type; + }; + + template inline T conj_product(T a, T b) { return a * b; } + template inline + std::complex conj_product(std::complex a, std::complex b) + { return std::conj(a) * b; } // to be optimized ? + + template inline bool is_complex(T) { return false; } + template inline bool is_complex(std::complex ) + { return true; } + +# define magnitude_of_linalg(M) typename number_traits::value_type>::magnitude_type + + /* ******************************************************************** */ + /* types promotion */ + /* ******************************************************************** */ + + /* should be completed for more specific cases etc */ + template + struct strongest_numeric_type_aux { + typedef T1 T; + }; + template + struct strongest_numeric_type_aux { + typedef T2 T; + }; + + template + struct strongest_numeric_type { + typedef typename + strongest_numeric_type_auxsizeof(T2))>::T T; + }; + template + struct strongest_numeric_type > { + typedef typename number_traits::magnitude_type R1; + typedef std::complex::T > T; + }; + template + struct strongest_numeric_type,T2 > { + typedef typename number_traits::magnitude_type R2; + typedef std::complex::T > T; + }; + template + struct strongest_numeric_type,std::complex > { + typedef std::complex::T > T; + }; + + template<> struct strongest_numeric_type { typedef float T; }; + template<> struct strongest_numeric_type { typedef float T; }; + template<> struct strongest_numeric_type { typedef float T; }; + template<> struct strongest_numeric_type { typedef float T; }; + template<> struct strongest_numeric_type { typedef double T; }; + template<> struct strongest_numeric_type { typedef double T; }; + + template + struct strongest_value_type { + typedef typename + strongest_numeric_type::value_type, + typename linalg_traits::value_type>::T + value_type; + }; + template + struct strongest_value_type3 { + typedef typename + strongest_value_type::value_type>::value_type + value_type; + }; + + + + /* ******************************************************************** */ + /* Basic vectors used */ + /* ******************************************************************** */ + + template struct dense_vector_type + { typedef std::vector vector_type; }; + + template class wsvector; + template class rsvector; + template class dsvector; + template struct sparse_vector_type + { typedef wsvector vector_type; }; + + template class slvector; + template class dense_matrix; + template class row_matrix; + template class col_matrix; + + + /* ******************************************************************** */ + /* Selects a temporary vector type */ + /* V if V is a valid vector type, */ + /* wsvector if V is a reference on a sparse vector, */ + /* std::vector if V is a reference on a dense vector. */ + /* ******************************************************************** */ + + + template + struct temporary_vector_ { + typedef abstract_null_type vector_type; + }; + template + struct temporary_vector_ + { typedef wsvector::value_type> vector_type; }; + template + struct temporary_vector_ + { typedef slvector::value_type> vector_type; }; + template + struct temporary_vector_ + { typedef std::vector::value_type> vector_type; }; + template + struct temporary_vector_ + { typedef V vector_type; }; + template + struct temporary_vector_ + { typedef std::vector::value_type> vector_type; }; + template + struct temporary_vector_ + { typedef wsvector::value_type> vector_type; }; + + template struct temporary_vector { + typedef typename temporary_vector_::reference, + typename linalg_traits::storage_type, + typename linalg_traits::linalg_type, + V>::vector_type vector_type; + }; + + /* ******************************************************************** */ + /* Selects a temporary matrix type */ + /* M if M is a valid matrix type, */ + /* row_matrix if M is a reference on a sparse matrix, */ + /* dense_matrix if M is a reference on a dense matrix. */ + /* ******************************************************************** */ + + + template + struct temporary_matrix_ { typedef abstract_null_type matrix_type; }; + template + struct temporary_matrix_ { + typedef typename linalg_traits::value_type T; + typedef row_matrix > matrix_type; + }; + template + struct temporary_matrix_ { + typedef typename linalg_traits::value_type T; + typedef row_matrix > matrix_type; + }; + template + struct temporary_matrix_ + { typedef dense_matrix::value_type> matrix_type; }; + template + struct temporary_matrix_ + { typedef V matrix_type; }; + + template struct temporary_matrix { + typedef typename temporary_matrix_::reference, + typename linalg_traits::storage_type, + typename linalg_traits::linalg_type, + V>::matrix_type matrix_type; + }; + + + template + struct temporary_col_matrix_ { typedef abstract_null_type matrix_type; }; + template + struct temporary_col_matrix_ { + typedef typename linalg_traits::value_type T; + typedef col_matrix > matrix_type; + }; + template + struct temporary_col_matrix_ { + typedef typename linalg_traits::value_type T; + typedef col_matrix > matrix_type; + }; + template + struct temporary_col_matrix_ + { typedef dense_matrix::value_type> matrix_type; }; + + template struct temporary_col_matrix { + typedef typename temporary_col_matrix_< + typename linalg_traits::storage_type, + typename linalg_traits::linalg_type, + V>::matrix_type matrix_type; + }; + + + + + template + struct temporary_row_matrix_ { typedef abstract_null_type matrix_type; }; + template + struct temporary_row_matrix_ { + typedef typename linalg_traits::value_type T; + typedef row_matrix > matrix_type; + }; + template + struct temporary_row_matrix_ { + typedef typename linalg_traits::value_type T; + typedef row_matrix > matrix_type; + }; + template + struct temporary_row_matrix_ + { typedef dense_matrix::value_type> matrix_type; }; + + template struct temporary_row_matrix { + typedef typename temporary_row_matrix_< + typename linalg_traits::storage_type, + typename linalg_traits::linalg_type, + V>::matrix_type matrix_type; + }; + + + + /* ******************************************************************** */ + /* Selects a temporary dense vector type */ + /* V if V is a valid dense vector type, */ + /* std::vector if V is a reference or another type of vector */ + /* ******************************************************************** */ + + template + struct temporary_dense_vector_ { typedef abstract_null_type vector_type; }; + template + struct temporary_dense_vector_ + { typedef std::vector::value_type> vector_type; }; + template + struct temporary_dense_vector_ + { typedef std::vector::value_type> vector_type; }; + template + struct temporary_dense_vector_ + { typedef std::vector::value_type> vector_type; }; + template + struct temporary_dense_vector_ + { typedef V vector_type; }; + + template struct temporary_dense_vector { + typedef typename temporary_dense_vector_::reference, + typename linalg_traits::storage_type, V>::vector_type vector_type; + }; + + /* ******************************************************************** */ + /* Selects a temporary sparse vector type */ + /* V if V is a valid sparse vector type, */ + /* wsvector if V is a reference or another type of vector */ + /* ******************************************************************** */ + + template + struct temporary_sparse_vector_ { typedef abstract_null_type vector_type; }; + template + struct temporary_sparse_vector_ + { typedef wsvector::value_type> vector_type; }; + template + struct temporary_sparse_vector_ + { typedef V vector_type; }; + template + struct temporary_sparse_vector_ + { typedef wsvector::value_type> vector_type; }; + template + struct temporary_sparse_vector_ + { typedef wsvector::value_type> vector_type; }; + + template struct temporary_sparse_vector { + typedef typename temporary_sparse_vector_::reference, + typename linalg_traits::storage_type, V>::vector_type vector_type; + }; + + /* ******************************************************************** */ + /* Selects a temporary sky-line vector type */ + /* V if V is a valid sky-line vector type, */ + /* slvector if V is a reference or another type of vector */ + /* ******************************************************************** */ + + template + struct temporary_skyline_vector_ + { typedef abstract_null_type vector_type; }; + template + struct temporary_skyline_vector_ + { typedef slvector::value_type> vector_type; }; + template + struct temporary_skyline_vector_ + { typedef V vector_type; }; + template + struct temporary_skyline_vector_ + { typedef slvector::value_type> vector_type; }; + template + struct temporary_skyline_vector_ + { typedef slvector::value_type> vector_type; }; + + template struct temporary_skylines_vector { + typedef typename temporary_skyline_vector_::reference, + typename linalg_traits::storage_type, V>::vector_type vector_type; + }; + + /* ********************************************************************* */ + /* Definition & Comparison of origins. */ + /* ********************************************************************* */ + + template + typename select_return::origin_type *, + typename linalg_traits::origin_type *, + L *>::return_type + linalg_origin(L &l) + { return linalg_traits::origin(linalg_cast(l)); } + + template + typename select_return::origin_type *, + typename linalg_traits::origin_type *, + const L *>::return_type + linalg_origin(const L &l) + { return linalg_traits::origin(linalg_cast(l)); } + + template + bool same_porigin(PT1, PT2) { return false; } + + template + bool same_porigin(PT pt1, PT pt2) { return (pt1 == pt2); } + + template + bool same_origin(const L1 &l1, const L2 &l2) + { return same_porigin(linalg_origin(l1), linalg_origin(l2)); } + + + /* ******************************************************************** */ + /* Miscellaneous */ + /* ******************************************************************** */ + + template inline size_type vect_size(const V &v) + { return linalg_traits::size(v); } + + template inline size_type mat_nrows(const MAT &m) + { return linalg_traits::nrows(m); } + + template inline size_type mat_ncols(const MAT &m) + { return linalg_traits::ncols(m); } + + + template inline + typename select_return::const_iterator, + typename linalg_traits::iterator, V *>::return_type + vect_begin(V &v) + { return linalg_traits::begin(linalg_cast(v)); } + + template inline + typename select_return::const_iterator, + typename linalg_traits::iterator, const V *>::return_type + vect_begin(const V &v) + { return linalg_traits::begin(linalg_cast(v)); } + + template inline + typename linalg_traits::const_iterator + vect_const_begin(const V &v) + { return linalg_traits::begin(v); } + + template inline + typename select_return::const_iterator, + typename linalg_traits::iterator, V *>::return_type + vect_end(V &v) + { return linalg_traits::end(linalg_cast(v)); } + + template inline + typename select_return::const_iterator, + typename linalg_traits::iterator, const V *>::return_type + vect_end(const V &v) + { return linalg_traits::end(linalg_cast(v)); } + + template inline + typename linalg_traits::const_iterator + vect_const_end(const V &v) + { return linalg_traits::end(v); } + + template inline + typename select_return::const_row_iterator, + typename linalg_traits::row_iterator, M *>::return_type + mat_row_begin(M &m) { return linalg_traits::row_begin(linalg_cast(m)); } + + template inline + typename select_return::const_row_iterator, + typename linalg_traits::row_iterator, const M *>::return_type + mat_row_begin(const M &m) + { return linalg_traits::row_begin(linalg_cast(m)); } + + template inline typename linalg_traits::const_row_iterator + mat_row_const_begin(const M &m) + { return linalg_traits::row_begin(m); } + + template inline + typename select_return::const_row_iterator, + typename linalg_traits::row_iterator, M *>::return_type + mat_row_end(M &v) { + return linalg_traits::row_end(linalg_cast(v)); + } + + template inline + typename select_return::const_row_iterator, + typename linalg_traits::row_iterator, const M *>::return_type + mat_row_end(const M &v) { + return linalg_traits::row_end(linalg_cast(v)); + } + + template inline + typename linalg_traits::const_row_iterator + mat_row_const_end(const M &v) + { return linalg_traits::row_end(v); } + + template inline + typename select_return::const_col_iterator, + typename linalg_traits::col_iterator, M *>::return_type + mat_col_begin(M &v) { + return linalg_traits::col_begin(linalg_cast(v)); + } + + template inline + typename select_return::const_col_iterator, + typename linalg_traits::col_iterator, const M *>::return_type + mat_col_begin(const M &v) { + return linalg_traits::col_begin(linalg_cast(v)); + } + + template inline + typename linalg_traits::const_col_iterator + mat_col_const_begin(const M &v) + { return linalg_traits::col_begin(v); } + + template inline + typename linalg_traits::const_col_iterator + mat_col_const_end(const M &v) + { return linalg_traits::col_end(v); } + + template inline + typename select_return::const_col_iterator, + typename linalg_traits::col_iterator, + M *>::return_type + mat_col_end(M &m) + { return linalg_traits::col_end(linalg_cast(m)); } + + template inline + typename select_return::const_col_iterator, + typename linalg_traits::col_iterator, + const M *>::return_type + mat_col_end(const M &m) + { return linalg_traits::col_end(linalg_cast(m)); } + + template inline + typename select_return::const_sub_row_type, + typename linalg_traits::sub_row_type, + const MAT *>::return_type + mat_row(const MAT &m, size_type i) + { return linalg_traits::row(mat_row_begin(m) + i); } + + template inline + typename select_return::const_sub_row_type, + typename linalg_traits::sub_row_type, + MAT *>::return_type + mat_row(MAT &m, size_type i) + { return linalg_traits::row(mat_row_begin(m) + i); } + + template inline + typename linalg_traits::const_sub_row_type + mat_const_row(const MAT &m, size_type i) + { return linalg_traits::row(mat_row_const_begin(m) + i); } + + template inline + typename select_return::const_sub_col_type, + typename linalg_traits::sub_col_type, + const MAT *>::return_type + mat_col(const MAT &m, size_type i) + { return linalg_traits::col(mat_col_begin(m) + i); } + + + template inline + typename select_return::const_sub_col_type, + typename linalg_traits::sub_col_type, + MAT *>::return_type + mat_col(MAT &m, size_type i) + { return linalg_traits::col(mat_col_begin(m) + i); } + + template inline + typename linalg_traits::const_sub_col_type + mat_const_col(const MAT &m, size_type i) + { return linalg_traits::col(mat_col_const_begin(m) + i); } + + /* ********************************************************************* */ + /* Set to begin end set to end for iterators on non-const sparse vectors.*/ + /* ********************************************************************* */ + + template inline + void set_to_begin(IT &it, ORG o, VECT *, linalg_false) + { it = vect_begin(*o); } + + template inline + void set_to_begin(IT &it, ORG o, const VECT *, linalg_false) + { it = vect_const_begin(*o); } + + template inline + void set_to_end(IT &it, ORG o, VECT *, linalg_false) + { it = vect_end(*o); } + + template inline + void set_to_end(IT &it, ORG o, const VECT *, linalg_false) + { it = vect_const_end(*o); } + + + template inline + void set_to_begin(IT &, ORG, VECT *, linalg_const) { } + + template inline + void set_to_begin(IT &, ORG, const VECT *, linalg_const) { } + + template inline + void set_to_end(IT &, ORG, VECT *, linalg_const) { } + + template inline + void set_to_end(IT &, ORG, const VECT *, linalg_const) { } + + + template inline + void set_to_begin(IT &, ORG, VECT *v, linalg_modifiable) + { GMM_ASSERT3(!is_sparse(*v), "internal_error"); v = 0; } + + template inline + void set_to_begin(IT &, ORG, const VECT *v, linalg_modifiable) + { GMM_ASSERT3(!is_sparse(*v), "internal_error"); v = 0; } + + template inline + void set_to_end(IT &, ORG, VECT *v, linalg_modifiable) + { GMM_ASSERT3(!is_sparse(*v), "internal_error"); v = 0; } + + template inline + void set_to_end(IT &, ORG, const VECT *v, linalg_modifiable) + { GMM_ASSERT3(!is_sparse(*v), "internal_error"); v = 0; } + + /* ******************************************************************** */ + /* General index for certain algorithms. */ + /* ******************************************************************** */ + + template + size_type index_of_it(const IT &it, size_type, abstract_sparse) + { return it.index(); } + template + size_type index_of_it(const IT &it, size_type, abstract_skyline) + { return it.index(); } + template + size_type index_of_it(const IT &, size_type k, abstract_dense) + { return k; } + + /* ********************************************************************* */ + /* Numeric limits. */ + /* ********************************************************************* */ + + template inline T default_tol(T) { + using namespace std; + static T tol(10); + if (tol == T(10)) { + if (numeric_limits::is_specialized) + tol = numeric_limits::epsilon(); + else { + int i=int(sizeof(T)/4); while(i-- > 0) tol*=T(1E-8); + GMM_WARNING1("The numeric type " << typeid(T).name() + << " has no numeric_limits defined !!\n" + << "Taking " << tol << " as default tolerance"); + } + } + return tol; + } + template inline T default_tol(std::complex) + { return default_tol(T()); } + + template inline T default_min(T) { + using namespace std; + static T mi(10); + if (mi == T(10)) { + if (numeric_limits::is_specialized) + mi = std::numeric_limits::min(); + else { + mi = T(0); + GMM_WARNING1("The numeric type " << typeid(T).name() + << " has no numeric_limits defined !!\n" + << "Taking 0 as default minimum"); + } + } + return mi; + } + template inline T default_min(std::complex) + { return default_min(T()); } + + template inline T default_max(T) { + using namespace std; + static T mi(10); + if (mi == T(10)) { + if (numeric_limits::is_specialized) + mi = std::numeric_limits::max(); + else { + mi = T(1); + GMM_WARNING1("The numeric type " << typeid(T).name() + << " has no numeric_limits defined !!\n" + << "Taking 1 as default maximum !"); + } + } + return mi; + } + template inline T default_max(std::complex) + { return default_max(T()); } + + + /* + use safe_divide to avoid NaNs when dividing very small complex + numbers, for example + std::complex(1e-23,1e-30)/std::complex(1e-23,1e-30) + */ + template inline T safe_divide(T a, T b) { return a/b; } + template inline std::complex + safe_divide(std::complex a, std::complex b) { + T m = std::max(gmm::abs(b.real()), gmm::abs(b.imag())); + a = std::complex(a.real()/m, a.imag()/m); + b = std::complex(b.real()/m, b.imag()/m); + return a / b; + } + + + /* ******************************************************************** */ + /* Write */ + /* ******************************************************************** */ + + template struct cast_char_type { typedef T return_type; }; + template <> struct cast_char_type { typedef int return_type; }; + template <> struct cast_char_type + { typedef unsigned int return_type; }; + template inline typename cast_char_type::return_type + cast_char(const T &c) { return typename cast_char_type::return_type(c); } + + + template inline void write(std::ostream &o, const L &l) + { write(o, l, typename linalg_traits::linalg_type()); } + + template void write(std::ostream &o, const L &l, + abstract_vector) { + o << "vector(" << vect_size(l) << ") ["; + write(o, l, typename linalg_traits::storage_type()); + o << " ]"; + } + + template void write(std::ostream &o, const L &l, + abstract_sparse) { + typename linalg_traits::const_iterator it = vect_const_begin(l), + ite = vect_const_end(l); + for (; it != ite; ++it) + o << " (r" << it.index() << ", " << cast_char(*it) << ")"; + } + + template void write(std::ostream &o, const L &l, + abstract_dense) { + typename linalg_traits::const_iterator it = vect_const_begin(l), + ite = vect_const_end(l); + if (it != ite) o << " " << cast_char(*it++); + for (; it != ite; ++it) o << ", " << cast_char(*it); + } + + template void write(std::ostream &o, const L &l, + abstract_skyline) { + typedef typename linalg_traits::const_iterator const_iterator; + const_iterator it = vect_const_begin(l), ite = vect_const_end(l); + if (it != ite) { + o << ""; + if (it != ite) o << " " << cast_char(*it++); + for (; it != ite; ++it) { o << ", " << cast_char(*it); } + } + } + + template inline void write(std::ostream &o, const L &l, + abstract_matrix) { + write(o, l, typename linalg_traits::sub_orientation()); + } + + + template void write(std::ostream &o, const L &l, + row_major) { + o << "matrix(" << mat_nrows(l) << ", " << mat_ncols(l) << ")" << endl; + for (size_type i = 0; i < mat_nrows(l); ++i) { + o << "("; + write(o, mat_const_row(l, i), typename linalg_traits::storage_type()); + o << " )\n"; + } + } + + template inline + void write(std::ostream &o, const L &l, row_and_col) + { write(o, l, row_major()); } + + template inline + void write(std::ostream &o, const L &l, col_and_row) + { write(o, l, row_major()); } + + template void write(std::ostream &o, const L &l, col_major) { + o << "matrix(" << mat_nrows(l) << ", " << mat_ncols(l) << ")" << endl; + for (size_type i = 0; i < mat_nrows(l); ++i) { + o << "("; + if (is_sparse(l)) { // not optimized ... + for (size_type j = 0; j < mat_ncols(l); ++j) + if (l(i,j) != typename linalg_traits::value_type(0)) + o << " (r" << j << ", " << l(i,j) << ")"; + } + else { + if (mat_ncols(l) != 0) o << ' ' << l(i, 0); + for (size_type j = 1; j < mat_ncols(l); ++j) o << ", " << l(i, j); + } + o << " )\n"; + } + } + +} + +#endif // GMM_DEF_H__ diff --git a/gmm/gmm_dense_Householder.h b/gmm/gmm_dense_Householder.h new file mode 100644 index 000000000..4dcb3cd24 --- /dev/null +++ b/gmm/gmm_dense_Householder.h @@ -0,0 +1,317 @@ +/* -*- c++ -*- (enables emacs c++ mode) */ +/*=========================================================================== + + Copyright (C) 2003-2017 Yves Renard, Caroline Lecalvez + + This file is a part of GetFEM++ + + GetFEM++ is free software; you can redistribute it and/or modify it + under the terms of the GNU Lesser General Public License as published + by the Free Software Foundation; either version 3 of the License, or + (at your option) any later version along with the GCC Runtime Library + Exception either version 3.1 or (at your option) any later version. + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public + License and GCC Runtime Library Exception for more details. + You should have received a copy of the GNU Lesser General Public License + along with this program; if not, write to the Free Software Foundation, + Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. + + As a special exception, you may use this file as it is a part of a free + software library without restriction. Specifically, if other files + instantiate templates or use macros or inline functions from this file, + or you compile this file and link it with other files to produce an + executable, this file does not by itself cause the resulting executable + to be covered by the GNU Lesser General Public License. This exception + does not however invalidate any other reasons why the executable file + might be covered by the GNU Lesser General Public License. + +===========================================================================*/ + +/**@file gmm_dense_Householder.h + @author Caroline Lecalvez + @author Yves Renard + @date June 5, 2003. + @brief Householder for dense matrices. +*/ + +#ifndef GMM_DENSE_HOUSEHOLDER_H +#define GMM_DENSE_HOUSEHOLDER_H + +#include "gmm_kernel.h" + +namespace gmm { + ///@cond DOXY_SHOW_ALL_FUNCTIONS + + /* ********************************************************************* */ + /* Rank one update (complex and real version) */ + /* ********************************************************************* */ + + template + inline void rank_one_update(Matrix &A, const VecX& x, + const VecY& y, row_major) { + typedef typename linalg_traits::value_type T; + size_type N = mat_nrows(A); + GMM_ASSERT2(N <= vect_size(x) && mat_ncols(A) <= vect_size(y), + "dimensions mismatch"); + typename linalg_traits::const_iterator itx = vect_const_begin(x); + for (size_type i = 0; i < N; ++i, ++itx) { + typedef typename linalg_traits::sub_row_type row_type; + row_type row = mat_row(A, i); + typename linalg_traits::t>::iterator + it = vect_begin(row), ite = vect_end(row); + typename linalg_traits::const_iterator ity = vect_const_begin(y); + T tx = *itx; + for (; it != ite; ++it, ++ity) *it += conj_product(*ity, tx); + } + } + + template + inline void rank_one_update(Matrix &A, const VecX& x, + const VecY& y, col_major) { + typedef typename linalg_traits::value_type T; + size_type M = mat_ncols(A); + GMM_ASSERT2(mat_nrows(A) <= vect_size(x) && M <= vect_size(y), + "dimensions mismatch"); + typename linalg_traits::const_iterator ity = vect_const_begin(y); + for (size_type i = 0; i < M; ++i, ++ity) { + typedef typename linalg_traits::sub_col_type col_type; + col_type col = mat_col(A, i); + typename linalg_traits::t>::iterator + it = vect_begin(col), ite = vect_end(col); + typename linalg_traits::const_iterator itx = vect_const_begin(x); + T ty = *ity; + for (; it != ite; ++it, ++itx) *it += conj_product(ty, *itx); + } + } + + ///@endcond + template + inline void rank_one_update(const Matrix &AA, const VecX& x, + const VecY& y) { + Matrix& A = const_cast(AA); + rank_one_update(A, x, y, typename principal_orientation_type::sub_orientation>::potype()); + } + ///@cond DOXY_SHOW_ALL_FUNCTIONS + + /* ********************************************************************* */ + /* Rank two update (complex and real version) */ + /* ********************************************************************* */ + + template + inline void rank_two_update(Matrix &A, const VecX& x, + const VecY& y, row_major) { + typedef typename linalg_traits::value_type value_type; + size_type N = mat_nrows(A); + GMM_ASSERT2(N <= vect_size(x) && mat_ncols(A) <= vect_size(y), + "dimensions mismatch"); + typename linalg_traits::const_iterator itx1 = vect_const_begin(x); + typename linalg_traits::const_iterator ity2 = vect_const_begin(y); + for (size_type i = 0; i < N; ++i, ++itx1, ++ity2) { + typedef typename linalg_traits::sub_row_type row_type; + row_type row = mat_row(A, i); + typename linalg_traits::t>::iterator + it = vect_begin(row), ite = vect_end(row); + typename linalg_traits::const_iterator itx2 = vect_const_begin(x); + typename linalg_traits::const_iterator ity1 = vect_const_begin(y); + value_type tx = *itx1, ty = *ity2; + for (; it != ite; ++it, ++ity1, ++itx2) + *it += conj_product(*ity1, tx) + conj_product(*itx2, ty); + } + } + + template + inline void rank_two_update(Matrix &A, const VecX& x, + const VecY& y, col_major) { + typedef typename linalg_traits::value_type value_type; + size_type M = mat_ncols(A); + GMM_ASSERT2(mat_nrows(A) <= vect_size(x) && M <= vect_size(y), + "dimensions mismatch"); + typename linalg_traits::const_iterator itx2 = vect_const_begin(x); + typename linalg_traits::const_iterator ity1 = vect_const_begin(y); + for (size_type i = 0; i < M; ++i, ++ity1, ++itx2) { + typedef typename linalg_traits::sub_col_type col_type; + col_type col = mat_col(A, i); + typename linalg_traits::t>::iterator + it = vect_begin(col), ite = vect_end(col); + typename linalg_traits::const_iterator itx1 = vect_const_begin(x); + typename linalg_traits::const_iterator ity2 = vect_const_begin(y); + value_type ty = *ity1, tx = *itx2; + for (; it != ite; ++it, ++itx1, ++ity2) + *it += conj_product(ty, *itx1) + conj_product(tx, *ity2); + } + } + + ///@endcond + template + inline void rank_two_update(const Matrix &AA, const VecX& x, + const VecY& y) { + Matrix& A = const_cast(AA); + rank_two_update(A, x, y, typename principal_orientation_type::sub_orientation>::potype()); + } + ///@cond DOXY_SHOW_ALL_FUNCTIONS + + /* ********************************************************************* */ + /* Householder vector computation (complex and real version) */ + /* ********************************************************************* */ + + template void house_vector(const VECT &VV) { + VECT &V = const_cast(VV); + typedef typename linalg_traits::value_type T; + typedef typename number_traits::magnitude_type R; + + R mu = vect_norm2(V), abs_v0 = gmm::abs(V[0]); + if (mu != R(0)) + gmm::scale(V, (abs_v0 == R(0)) ? T(R(1) / mu) + : (safe_divide(T(abs_v0), V[0]) / (abs_v0 + mu))); + if (gmm::real(V[vect_size(V)-1]) * R(0) != R(0)) gmm::clear(V); + V[0] = T(1); + } + + template void house_vector_last(const VECT &VV) { + VECT &V = const_cast(VV); + typedef typename linalg_traits::value_type T; + typedef typename number_traits::magnitude_type R; + + size_type m = vect_size(V); + R mu = vect_norm2(V), abs_v0 = gmm::abs(V[m-1]); + if (mu != R(0)) + gmm::scale(V, (abs_v0 == R(0)) ? T(R(1) / mu) + : ((abs_v0 / V[m-1]) / (abs_v0 + mu))); + if (gmm::real(V[0]) * R(0) != R(0)) gmm::clear(V); + V[m-1] = T(1); + } + + /* ********************************************************************* */ + /* Householder updates (complex and real version) */ + /* ********************************************************************* */ + + // multiply A to the left by the reflector stored in V. W is a temporary. + template inline + void row_house_update(const MAT &AA, const VECT1 &V, const VECT2 &WW) { + VECT2 &W = const_cast(WW); MAT &A = const_cast(AA); + typedef typename linalg_traits::value_type value_type; + typedef typename number_traits::magnitude_type magnitude_type; + + gmm::mult(conjugated(A), + scaled(V, value_type(magnitude_type(-2)/vect_norm2_sqr(V))), W); + rank_one_update(A, V, W); + } + + // multiply A to the right by the reflector stored in V. W is a temporary. + template inline + void col_house_update(const MAT &AA, const VECT1 &V, const VECT2 &WW) { + VECT2 &W = const_cast(WW); MAT &A = const_cast(AA); + typedef typename linalg_traits::value_type value_type; + typedef typename number_traits::magnitude_type magnitude_type; + + gmm::mult(A, + scaled(V, value_type(magnitude_type(-2)/vect_norm2_sqr(V))), W); + rank_one_update(A, W, V); + } + + ///@endcond + + /* ********************************************************************* */ + /* Hessenberg reduction with Householder. */ + /* ********************************************************************* */ + + template + void Hessenberg_reduction(const MAT1& AA, const MAT2 &QQ, bool compute_Q){ + MAT1& A = const_cast(AA); MAT2& Q = const_cast(QQ); + typedef typename linalg_traits::value_type value_type; + if (compute_Q) gmm::copy(identity_matrix(), Q); + size_type n = mat_nrows(A); if (n < 2) return; + std::vector v(n), w(n); + sub_interval SUBK(0,n); + for (size_type k = 1; k+1 < n; ++k) { + sub_interval SUBI(k, n-k), SUBJ(k-1,n-k+1); + v.resize(n-k); + for (size_type j = k; j < n; ++j) v[j-k] = A(j, k-1); + house_vector(v); + row_house_update(sub_matrix(A, SUBI, SUBJ), v, sub_vector(w, SUBJ)); + col_house_update(sub_matrix(A, SUBK, SUBI), v, w); + // is it possible to "unify" the two on the common part of the matrix? + if (compute_Q) col_house_update(sub_matrix(Q, SUBK, SUBI), v, w); + } + } + + /* ********************************************************************* */ + /* Householder tridiagonalization for symmetric matrices */ + /* ********************************************************************* */ + + template + void Householder_tridiagonalization(const MAT1 &AA, const MAT2 &QQ, + bool compute_q) { + MAT1 &A = const_cast(AA); MAT2 &Q = const_cast(QQ); + typedef typename linalg_traits::value_type T; + typedef typename number_traits::magnitude_type R; + + size_type n = mat_nrows(A); if (n < 2) return; + std::vector v(n), p(n), w(n), ww(n); + sub_interval SUBK(0,n); + + for (size_type k = 1; k+1 < n; ++k) { // not optimized ... + sub_interval SUBI(k, n-k); + v.resize(n-k); p.resize(n-k); w.resize(n-k); + for (size_type l = k; l < n; ++l) + { v[l-k] = w[l-k] = A(l, k-1); A(l, k-1) = A(k-1, l) = T(0); } + house_vector(v); + R norm = vect_norm2_sqr(v); + A(k-1, k) = gmm::conj(A(k, k-1) = w[0] - T(2)*v[0]*vect_hp(w, v)/norm); + + gmm::mult(sub_matrix(A, SUBI), gmm::scaled(v, T(-2) / norm), p); + gmm::add(p, gmm::scaled(v, -vect_hp(v, p) / norm), w); + rank_two_update(sub_matrix(A, SUBI), v, w); + // it should be possible to compute only the upper or lower part + + if (compute_q) col_house_update(sub_matrix(Q, SUBK, SUBI), v, ww); + } + } + + /* ********************************************************************* */ + /* Real and complex Givens rotations */ + /* ********************************************************************* */ + + template void Givens_rotation(T a, T b, T &c, T &s) { + typedef typename number_traits::magnitude_type R; + R aa = gmm::abs(a), bb = gmm::abs(b); + if (bb == R(0)) { c = T(1); s = T(0); return; } + if (aa == R(0)) { c = T(0); s = b / bb; return; } + if (bb > aa) + { T t = -safe_divide(a,b); s = T(R(1) / (sqrt(R(1)+gmm::abs_sqr(t)))); c = s * t; } + else + { T t = -safe_divide(b,a); c = T(R(1) / (sqrt(R(1)+gmm::abs_sqr(t)))); s = c * t; } + } + + // Apply Q* v + template inline + void Apply_Givens_rotation_left(T &x, T &y, T c, T s) + { T t1=x, t2=y; x = gmm::conj(c)*t1 - gmm::conj(s)*t2; y = c*t2 + s*t1; } + + // Apply v^T Q + template inline + void Apply_Givens_rotation_right(T &x, T &y, T c, T s) + { T t1=x, t2=y; x = c*t1 - s*t2; y = gmm::conj(c)*t2 + gmm::conj(s)*t1; } + + template + void row_rot(const MAT &AA, T c, T s, size_type i, size_type k) { + MAT &A = const_cast(AA); // can be specialized for row matrices + for (size_type j = 0; j < mat_ncols(A); ++j) + Apply_Givens_rotation_left(A(i,j), A(k,j), c, s); + } + + template + void col_rot(const MAT &AA, T c, T s, size_type i, size_type k) { + MAT &A = const_cast(AA); // can be specialized for column matrices + for (size_type j = 0; j < mat_nrows(A); ++j) + Apply_Givens_rotation_right(A(j,i), A(j,k), c, s); + } + +} + +#endif + diff --git a/gmm/gmm_dense_lu.h b/gmm/gmm_dense_lu.h new file mode 100644 index 000000000..5107abebf --- /dev/null +++ b/gmm/gmm_dense_lu.h @@ -0,0 +1,250 @@ +/* -*- c++ -*- (enables emacs c++ mode) */ +/*=========================================================================== + + Copyright (C) 2003-2017 Yves Renard + + This file is a part of GetFEM++ + + GetFEM++ is free software; you can redistribute it and/or modify it + under the terms of the GNU Lesser General Public License as published + by the Free Software Foundation; either version 3 of the License, or + (at your option) any later version along with the GCC Runtime Library + Exception either version 3.1 or (at your option) any later version. + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public + License and GCC Runtime Library Exception for more details. + You should have received a copy of the GNU Lesser General Public License + along with this program; if not, write to the Free Software Foundation, + Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. + + As a special exception, you may use this file as it is a part of a free + software library without restriction. Specifically, if other files + instantiate templates or use macros or inline functions from this file, + or you compile this file and link it with other files to produce an + executable, this file does not by itself cause the resulting executable + to be covered by the GNU Lesser General Public License. This exception + does not however invalidate any other reasons why the executable file + might be covered by the GNU Lesser General Public License. + +===========================================================================*/ + +// This file is a modified version of lu.h from MTL. +// See http://osl.iu.edu/research/mtl/ +// Following the corresponding Copyright notice. +//=========================================================================== +// +// Copyright (c) 1998-2001, University of Notre Dame. All rights reserved. +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// * Neither the name of the University of Notre Dame nor the +// names of its contributors may be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE TRUSTEES OF INDIANA UNIVERSITY AND +// CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, +// BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE TRUSTEES +// OF INDIANA UNIVERSITY AND CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +// NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +//=========================================================================== + +/**@file gmm_dense_lu.h + @author Andrew Lumsdaine, Jeremy G. Siek, Lie-Quan Lee, Y. Renard + @date June 5, 2003. + @brief LU factorizations and determinant computation for dense matrices. +*/ +#ifndef GMM_DENSE_LU_H +#define GMM_DENSE_LU_H + +#include "gmm_dense_Householder.h" +#include "gmm_opt.h" + +namespace gmm { + + + /** LU Factorization of a general (dense) matrix (real or complex). + + This is the outer product (a level-2 operation) form of the LU + Factorization with pivoting algorithm . This is equivalent to + LAPACK's dgetf2. Also see "Matrix Computations" 3rd Ed. by Golub + and Van Loan section 3.2.5 and especially page 115. + + The pivot indices in ipvt are indexed starting from 1 + so that this is compatible with LAPACK (Fortran). + */ + template + size_type lu_factor(DenseMatrix& A, Pvector& ipvt) { + typedef typename linalg_traits::value_type T; + typedef typename linalg_traits::value_type int_T; + typedef typename number_traits::magnitude_type R; + size_type info(0), i, j, jp, M(mat_nrows(A)), N(mat_ncols(A)); + size_type NN = std::min(M, N); + std::vector c(M), r(N); + + GMM_ASSERT2(ipvt.size()+1 >= NN, "IPVT too small"); + for (i = 0; i+1 < NN; ++i) ipvt[i] = int_T(i); + + if (M || N) { + for (j = 0; j+1 < NN; ++j) { + R max = gmm::abs(A(j,j)); jp = j; + for (i = j+1; i < M; ++i) /* find pivot. */ + if (gmm::abs(A(i,j)) > max) { jp = i; max = gmm::abs(A(i,j)); } + ipvt[j] = int_T(jp + 1); + + if (max == R(0)) { info = j + 1; break; } + if (jp != j) for (i = 0; i < N; ++i) std::swap(A(jp, i), A(j, i)); + + for (i = j+1; i < M; ++i) { A(i, j) /= A(j,j); c[i-j-1] = -A(i, j); } + for (i = j+1; i < N; ++i) r[i-j-1] = A(j, i); // avoid the copy ? + rank_one_update(sub_matrix(A, sub_interval(j+1, M-j-1), + sub_interval(j+1, N-j-1)), c, conjugated(r)); + } + ipvt[NN-1] = int_T(NN); + } + return info; + } + + /** LU Solve : Solve equation Ax=b, given an LU factored matrix.*/ + // Thanks to Valient Gough for this routine! + template + void lu_solve(const DenseMatrix &LU, const Pvector& pvector, + VectorX &x, const VectorB &b) { + typedef typename linalg_traits::value_type T; + copy(b, x); + for(size_type i = 0; i < pvector.size(); ++i) { + size_type perm = pvector[i]-1; // permutations stored in 1's offset + if(i != perm) { T aux = x[i]; x[i] = x[perm]; x[perm] = aux; } + } + /* solve Ax = b -> LUx = b -> Ux = L^-1 b. */ + lower_tri_solve(LU, x, true); + upper_tri_solve(LU, x, false); + } + + template + void lu_solve(const DenseMatrix &A, VectorX &x, const VectorB &b) { + typedef typename linalg_traits::value_type T; + dense_matrix B(mat_nrows(A), mat_ncols(A)); + std::vector ipvt(mat_nrows(A)); + gmm::copy(A, B); + size_type info = lu_factor(B, ipvt); + GMM_ASSERT1(!info, "Singular system, pivot = " << info); + lu_solve(B, ipvt, x, b); + } + + template + void lu_solve_transposed(const DenseMatrix &LU, const Pvector& pvector, + VectorX &x, const VectorB &b) { + typedef typename linalg_traits::value_type T; + copy(b, x); + lower_tri_solve(transposed(LU), x, false); + upper_tri_solve(transposed(LU), x, true); + for(size_type i = pvector.size(); i > 0; --i) { + size_type perm = pvector[i-1]-1; // permutations stored in 1's offset + if(i-1 != perm) { T aux = x[i-1]; x[i-1] = x[perm]; x[perm] = aux; } + } + + } + + + ///@cond DOXY_SHOW_ALL_FUNCTIONS + template + void lu_inverse(const DenseMatrixLU& LU, const Pvector& pvector, + DenseMatrix& AInv, col_major) { + typedef typename linalg_traits::value_type T; + std::vector tmp(pvector.size(), T(0)); + std::vector result(pvector.size()); + for(size_type i = 0; i < pvector.size(); ++i) { + tmp[i] = T(1); + lu_solve(LU, pvector, result, tmp); + copy(result, mat_col(AInv, i)); + tmp[i] = T(0); + } + } + + template + void lu_inverse(const DenseMatrixLU& LU, const Pvector& pvector, + DenseMatrix& AInv, row_major) { + typedef typename linalg_traits::value_type T; + std::vector tmp(pvector.size(), T(0)); + std::vector result(pvector.size()); + for(size_type i = 0; i < pvector.size(); ++i) { + tmp[i] = T(1); // to be optimized !! + // on peut sur le premier tri solve reduire le systeme + // et peut etre faire un solve sur une serie de vecteurs au lieu + // de vecteur a vecteur (accumulation directe de l'inverse dans la + // matrice au fur et a mesure du calcul ... -> evite la copie finale + lu_solve_transposed(LU, pvector, result, tmp); + copy(result, mat_row(AInv, i)); + tmp[i] = T(0); + } + } + ///@endcond + + /** Given an LU factored matrix, build the inverse of the matrix. */ + template + void lu_inverse(const DenseMatrixLU& LU, const Pvector& pvector, + const DenseMatrix& AInv_) { + DenseMatrix& AInv = const_cast(AInv_); + lu_inverse(LU, pvector, AInv, typename principal_orientation_type::sub_orientation>::potype()); + } + + /** Given a dense matrix, build the inverse of the matrix, and + return the determinant */ + template + typename linalg_traits::value_type + lu_inverse(const DenseMatrix& A_, bool doassert = true) { + typedef typename linalg_traits::value_type T; + DenseMatrix& A = const_cast(A_); + dense_matrix B(mat_nrows(A), mat_ncols(A)); + std::vector ipvt(mat_nrows(A)); + gmm::copy(A, B); + size_type info = lu_factor(B, ipvt); + if (doassert) GMM_ASSERT1(!info, "Non invertible matrix, pivot = "< + typename linalg_traits::value_type + lu_det(const DenseMatrixLU& LU, const Pvector &pvector) { + typedef typename linalg_traits::value_type T; + T det(1); + for (size_type j = 0; j < std::min(mat_nrows(LU), mat_ncols(LU)); ++j) + det *= LU(j,j); + for(size_type i = 0; i < pvector.size(); ++i) + if (i != size_type(pvector[i]-1)) { det = -det; } + return det; + } + + template + typename linalg_traits::value_type + lu_det(const DenseMatrix& A) { + typedef typename linalg_traits::value_type T; + dense_matrix B(mat_nrows(A), mat_ncols(A)); + std::vector ipvt(mat_nrows(A)); + gmm::copy(A, B); + lu_factor(B, ipvt); + return lu_det(B, ipvt); + } + +} + +#endif + diff --git a/gmm/gmm_dense_matrix_functions.h b/gmm/gmm_dense_matrix_functions.h new file mode 100644 index 000000000..6005918a4 --- /dev/null +++ b/gmm/gmm_dense_matrix_functions.h @@ -0,0 +1,302 @@ +/* -*- c++ -*- (enables emacs c++ mode) */ +/*=========================================================================== + + Copyright (C) 2014-2017 Konstantinos Poulios + + This file is a part of GetFEM++ + + GetFEM++ is free software; you can redistribute it and/or modify it + under the terms of the GNU Lesser General Public License as published + by the Free Software Foundation; either version 3 of the License, or + (at your option) any later version along with the GCC Runtime Library + Exception either version 3.1 or (at your option) any later version. + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public + License and GCC Runtime Library Exception for more details. + You should have received a copy of the GNU Lesser General Public License + along with this program; if not, write to the Free Software Foundation, + Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. + + As a special exception, you may use this file as it is a part of a free + software library without restriction. Specifically, if other files + instantiate templates or use macros or inline functions from this file, + or you compile this file and link it with other files to produce an + executable, this file does not by itself cause the resulting executable + to be covered by the GNU Lesser General Public License. This exception + does not however invalidate any other reasons why the executable file + might be covered by the GNU Lesser General Public License. + +===========================================================================*/ + +/**@file gmm_dense_matrix_functions.h + @author Konstantinos Poulios + @date December 10, 2014. + @brief Common matrix functions for dense matrices. +*/ +#ifndef GMM_DENSE_MATRIX_FUNCTIONS_H +#define GMM_DENSE_MATRIX_FUNCTIONS_H + + +namespace gmm { + + + /** + Matrix square root for upper triangular matrices (from GNU Octave). + */ + template + void sqrtm_utri_inplace(dense_matrix& A) + { + typedef typename number_traits::magnitude_type R; + bool singular = false; + + // The following code is equivalent to this triple loop: + // + // n = rows (A); + // for j = 1:n + // A(j,j) = sqrt (A(j,j)); + // for i = j-1:-1:1 + // A(i,j) /= (A(i,i) + A(j,j)); + // k = 1:i-1; + // t storing a A(k,j) -= A(k,i) * A(i,j); + // endfor + // endfor + + R tol = R(0); // default_tol(R()) * gmm::mat_maxnorm(A); + + const size_type n = mat_nrows(A); + for (int j=0; j < int(n); j++) { + typename dense_matrix::iterator colj = A.begin() + j*n; + if (gmm::abs(colj[j]) > tol) + colj[j] = gmm::sqrt(colj[j]); + else + singular = true; + + for (int i=j-1; i >= 0; i--) { + typename dense_matrix::const_iterator coli = A.begin() + i*n; + T colji = colj[i] = safe_divide(colj[i], (coli[i] + colj[j])); + for (int k = 0; k < i; k++) + colj[k] -= coli[k] * colji; + } + } + + if (singular) + GMM_WARNING1("Matrix is singular, may not have a square root"); + } + + + template + void sqrtm(const dense_matrix >& A, + dense_matrix >& SQRTMA) + { + GMM_ASSERT1(gmm::mat_nrows(A) == gmm::mat_ncols(A), + "Matrix square root requires a square matrix"); + gmm::resize(SQRTMA, gmm::mat_nrows(A), gmm::mat_ncols(A)); + dense_matrix > S(A), Q(A), TMP(A); + #if defined(GMM_USES_LAPACK) + schur(TMP, S, Q); + #else + GMM_ASSERT1(false, "Please recompile with lapack and blas librairies " + "to use sqrtm matrix function."); + #endif + sqrtm_utri_inplace(S); + gmm::mult(Q, S, TMP); + gmm::mult(TMP, gmm::transposed(Q), SQRTMA); + } + + template + void sqrtm(const dense_matrix& A, + dense_matrix >& SQRTMA) + { + dense_matrix > cA(mat_nrows(A), mat_ncols(A)); + gmm::copy(A, gmm::real_part(cA)); + sqrtm(cA, SQRTMA); + } + + template + void sqrtm(const dense_matrix& A, dense_matrix& SQRTMA) + { + dense_matrix > cA(mat_nrows(A), mat_ncols(A)); + gmm::copy(A, gmm::real_part(cA)); + dense_matrix > cSQRTMA(cA); + sqrtm(cA, cSQRTMA); + gmm::resize(SQRTMA, gmm::mat_nrows(A), gmm::mat_ncols(A)); + gmm::copy(gmm::real_part(cSQRTMA), SQRTMA); +// dense_matrix >::const_reference +// it = cSQRTMA.begin(), ite = cSQRTMA.end(); +// dense_matrix >::reference +// rit = SQRTMA.begin(); +// for (; it != ite; ++it, ++rit) *rit = it->real(); + } + + + /** + Matrix logarithm for upper triangular matrices (from GNU/Octave) + */ + template + void logm_utri_inplace(dense_matrix& S) + { + typedef typename number_traits::magnitude_type R; + + size_type n = gmm::mat_nrows(S); + GMM_ASSERT1(n == gmm::mat_ncols(S), + "Matrix logarithm is not defined for non-square matrices"); + for (size_type i=0; i < n-1; ++i) + if (gmm::abs(S(i+1,i)) > default_tol(T())) { + GMM_ASSERT1(false, "An upper triangular matrix is expected"); + break; + } + for (size_type i=0; i < n-1; ++i) + if (gmm::real(S(i,i)) <= -default_tol(R()) && + gmm::abs(gmm::imag(S(i,i))) <= default_tol(R())) { + GMM_ASSERT1(false, "Principal matrix logarithm is not defined " + "for matrices with negative eigenvalues"); + break; + } + + // Algorithm 11.9 in "Function of matrices", by N. Higham + R theta[] = { R(0),R(0),R(1.61e-2),R(5.38e-2),R(1.13e-1),R(1.86e-1),R(2.6429608311114350e-1) }; + + R scaling(1); + size_type p(0), m(6), opt_iters(100); + for (size_type k=0; k < opt_iters; ++k, scaling *= R(2)) { + dense_matrix auxS(S); + for (size_type i = 0; i < n; ++i) auxS(i,i) -= R(1); + R tau = gmm::mat_norm1(auxS); + if (tau <= theta[6]) { + ++p; + size_type j1(6), j2(6); + for (size_type j=0; j < 6; ++j) + if (tau <= theta[j]) { j1 = j; break; } + for (size_type j=0; j < j1; ++j) + if (tau <= 2*theta[j]) { j2 = j; break; } + if (j1 - j2 <= 1 || p == 2) { m = j1; break; } + } + sqrtm_utri_inplace(S); + if (k == opt_iters-1) + GMM_WARNING1 ("Maximum number of square roots exceeded; " + "the calculated matrix logarithm may still be accurate"); + } + + for (size_type i = 0; i < n; ++i) S(i,i) -= R(1); + + if (m > 0) { + + std::vector nodes, wts; + switch(m) { + case 0: { + R nodes_[] = { R(0.5) }; + R wts_[] = { R(1) }; + nodes.assign(nodes_, nodes_+m+1); + wts.assign(wts_, wts_+m+1); + } break; + case 1: { + R nodes_[] = { R(0.211324865405187),R(0.788675134594813) }; + R wts_[] = { R(0.5),R(0.5) }; + nodes.assign(nodes_, nodes_+m+1); + wts.assign(wts_, wts_+m+1); + } break; + case 2: { + R nodes_[] = { R(0.112701665379258),R(0.500000000000000),R(0.887298334620742) }; + R wts_[] = { R(0.277777777777778),R(0.444444444444444),R(0.277777777777778) }; + nodes.assign(nodes_, nodes_+m+1); + wts.assign(wts_, wts_+m+1); + } break; + case 3: { + R nodes_[] = { R(0.0694318442029737),R(0.3300094782075718),R(0.6699905217924281),R(0.9305681557970263) }; + R wts_[] = { R(0.173927422568727),R(0.326072577431273),R(0.326072577431273),R(0.173927422568727) }; + nodes.assign(nodes_, nodes_+m+1); + wts.assign(wts_, wts_+m+1); + } break; + case 4: { + R nodes_[] = { R(0.0469100770306681),R(0.2307653449471584),R(0.5000000000000000),R(0.7692346550528415),R(0.9530899229693319) }; + R wts_[] = { R(0.118463442528095),R(0.239314335249683),R(0.284444444444444),R(0.239314335249683),R(0.118463442528094) }; + nodes.assign(nodes_, nodes_+m+1); + wts.assign(wts_, wts_+m+1); + } break; + case 5: { + R nodes_[] = { R(0.0337652428984240),R(0.1693953067668678),R(0.3806904069584015),R(0.6193095930415985),R(0.8306046932331322),R(0.9662347571015761) }; + R wts_[] = { R(0.0856622461895853),R(0.1803807865240693),R(0.2339569672863452),R(0.2339569672863459),R(0.1803807865240693),R(0.0856622461895852) }; + nodes.assign(nodes_, nodes_+m+1); + wts.assign(wts_, wts_+m+1); + } break; + case 6: { + R nodes_[] = { R(0.0254460438286208),R(0.1292344072003028),R(0.2970774243113015),R(0.4999999999999999),R(0.7029225756886985),R(0.8707655927996973),R(0.9745539561713792) }; + R wts_[] = { R(0.0647424830844348),R(0.1398526957446384),R(0.1909150252525594),R(0.2089795918367343),R(0.1909150252525595),R(0.1398526957446383),R(0.0647424830844349) }; + nodes.assign(nodes_, nodes_+m+1); + wts.assign(wts_, wts_+m+1); + } break; + } + + dense_matrix auxS1(S), auxS2(S); + std::vector auxvec(n); + gmm::clear(S); + for (size_type j=0; j <= m; ++j) { + gmm::copy(gmm::scaled(auxS1, nodes[j]), auxS2); + gmm::add(gmm::identity_matrix(), auxS2); + // S += wts[i] * auxS1 * inv(auxS2) + for (size_type i=0; i < n; ++i) { + gmm::copy(gmm::mat_row(auxS1, i), auxvec); + gmm::lower_tri_solve(gmm::transposed(auxS2), auxvec, false); + gmm::add(gmm::scaled(auxvec, wts[j]), gmm::mat_row(S, i)); + } + } + } + gmm::scale(S, scaling); + } + + /** + Matrix logarithm (from GNU/Octave) + */ + template + void logm(const dense_matrix& A, dense_matrix& LOGMA) + { + typedef typename number_traits::magnitude_type R; + size_type n = gmm::mat_nrows(A); + GMM_ASSERT1(n == gmm::mat_ncols(A), + "Matrix logarithm is not defined for non-square matrices"); + dense_matrix S(A), Q(A); + #if defined(GMM_USES_LAPACK) + schur(A, S, Q); // A = Q * S * Q^T + #else + GMM_ASSERT1(false, "Please recompile with lapack and blas librairies " + "to use logm matrix function."); + #endif + + bool convert_to_complex(false); + if (!is_complex(T())) + for (size_type i=0; i < n-1; ++i) + if (gmm::abs(S(i+1,i)) > default_tol(T())) { + convert_to_complex = true; + break; + } + + gmm::resize(LOGMA, n, n); + if (convert_to_complex) { + dense_matrix > cS(n,n), cQ(n,n), auxmat(n,n); + gmm::copy(gmm::real_part(S), gmm::real_part(cS)); + gmm::copy(gmm::real_part(Q), gmm::real_part(cQ)); + block2x2_reduction(cS, cQ, default_tol(R())*R(3)); + for (size_type j=0; j < n-1; ++j) + for (size_type i=j+1; i < n; ++i) + cS(i,j) = T(0); + logm_utri_inplace(cS); + gmm::mult(cQ, cS, auxmat); + gmm::mult(auxmat, gmm::transposed(cQ), cS); + // Remove small complex values which may have entered calculation + gmm::copy(gmm::real_part(cS), LOGMA); +// GMM_ASSERT1(gmm::mat_norm1(gmm::imag_part(cS)) < n*default_tol(T()), +// "Internal error, imag part should be zero"); + } else { + dense_matrix auxmat(n,n); + logm_utri_inplace(S); + gmm::mult(Q, S, auxmat); + gmm::mult(auxmat, gmm::transposed(Q), LOGMA); + } + + } + +} + +#endif + diff --git a/gmm/gmm_dense_qr.h b/gmm/gmm_dense_qr.h new file mode 100644 index 000000000..9de7dbeb8 --- /dev/null +++ b/gmm/gmm_dense_qr.h @@ -0,0 +1,789 @@ +/* -*- c++ -*- (enables emacs c++ mode) */ +/*=========================================================================== + + Copyright (C) 2003-2017 Yves Renard + + This file is a part of GetFEM++ + + GetFEM++ is free software; you can redistribute it and/or modify it + under the terms of the GNU Lesser General Public License as published + by the Free Software Foundation; either version 3 of the License, or + (at your option) any later version along with the GCC Runtime Library + Exception either version 3.1 or (at your option) any later version. + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public + License and GCC Runtime Library Exception for more details. + You should have received a copy of the GNU Lesser General Public License + along with this program; if not, write to the Free Software Foundation, + Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. + + As a special exception, you may use this file as it is a part of a free + software library without restriction. Specifically, if other files + instantiate templates or use macros or inline functions from this file, + or you compile this file and link it with other files to produce an + executable, this file does not by itself cause the resulting executable + to be covered by the GNU Lesser General Public License. This exception + does not however invalidate any other reasons why the executable file + might be covered by the GNU Lesser General Public License. + +===========================================================================*/ + +/**@file gmm_dense_qr.h + @author Caroline Lecalvez, Caroline.Lecalvez@gmm.insa-tlse.fr, Yves Renard + @date September 12, 2003. + @brief Dense QR factorization. +*/ +#ifndef GMM_DENSE_QR_H +#define GMM_DENSE_QR_H + +#include "gmm_dense_Householder.h" + +namespace gmm { + + + /** + QR factorization using Householder method (complex and real version). + */ + template + void qr_factor(const MAT1 &A_) { + MAT1 &A = const_cast(A_); + typedef typename linalg_traits::value_type value_type; + + size_type m = mat_nrows(A), n = mat_ncols(A); + GMM_ASSERT2(m >= n, "dimensions mismatch"); + + std::vector W(m), V(m); + + for (size_type j = 0; j < n; ++j) { + sub_interval SUBI(j, m-j), SUBJ(j, n-j); + V.resize(m-j); W.resize(n-j); + + for (size_type i = j; i < m; ++i) V[i-j] = A(i, j); + house_vector(V); + + row_house_update(sub_matrix(A, SUBI, SUBJ), V, W); + for (size_type i = j+1; i < m; ++i) A(i, j) = V[i-j]; + } + } + + + // QR comes from QR_factor(QR) where the upper triangular part stands for R + // and the lower part contains the Householder reflectors. + // A <- AQ + template + void apply_house_right(const MAT1 &QR, const MAT2 &A_) { + MAT2 &A = const_cast(A_); + typedef typename linalg_traits::value_type T; + size_type m = mat_nrows(QR), n = mat_ncols(QR); + GMM_ASSERT2(m == mat_ncols(A), "dimensions mismatch"); + if (m == 0) return; + std::vector V(m), W(mat_nrows(A)); + V[0] = T(1); + for (size_type j = 0; j < n; ++j) { + V.resize(m-j); + for (size_type i = j+1; i < m; ++i) V[i-j] = QR(i, j); + col_house_update(sub_matrix(A, sub_interval(0, mat_nrows(A)), + sub_interval(j, m-j)), V, W); + } + } + + // QR comes from QR_factor(QR) where the upper triangular part stands for R + // and the lower part contains the Householder reflectors. + // A <- Q*A + template + void apply_house_left(const MAT1 &QR, const MAT2 &A_) { + MAT2 &A = const_cast(A_); + typedef typename linalg_traits::value_type T; + size_type m = mat_nrows(QR), n = mat_ncols(QR); + GMM_ASSERT2(m == mat_nrows(A), "dimensions mismatch"); + if (m == 0) return; + std::vector V(m), W(mat_ncols(A)); + V[0] = T(1); + for (size_type j = 0; j < n; ++j) { + V.resize(m-j); + for (size_type i = j+1; i < m; ++i) V[i-j] = QR(i, j); + row_house_update(sub_matrix(A, sub_interval(j, m-j), + sub_interval(0, mat_ncols(A))), V, W); + } + } + + /** Compute the QR factorization, where Q is assembled. */ + template + void qr_factor(const MAT1 &A, const MAT2 &QQ, const MAT3 &RR) { + MAT2 &Q = const_cast(QQ); MAT3 &R = const_cast(RR); + typedef typename linalg_traits::value_type value_type; + + size_type m = mat_nrows(A), n = mat_ncols(A); + GMM_ASSERT2(m >= n, "dimensions mismatch"); + gmm::copy(A, Q); + + std::vector W(m); + dense_matrix VV(m, n); + + for (size_type j = 0; j < n; ++j) { + sub_interval SUBI(j, m-j), SUBJ(j, n-j); + + for (size_type i = j; i < m; ++i) VV(i,j) = Q(i, j); + house_vector(sub_vector(mat_col(VV,j), SUBI)); + + row_house_update(sub_matrix(Q, SUBI, SUBJ), + sub_vector(mat_col(VV,j), SUBI), sub_vector(W, SUBJ)); + } + + gmm::copy(sub_matrix(Q, sub_interval(0, n), sub_interval(0, n)), R); + gmm::copy(identity_matrix(), Q); + + for (size_type j = n-1; j != size_type(-1); --j) { + sub_interval SUBI(j, m-j), SUBJ(j, n-j); + row_house_update(sub_matrix(Q, SUBI, SUBJ), + sub_vector(mat_col(VV,j), SUBI), sub_vector(W, SUBJ)); + } + } + + ///@cond DOXY_SHOW_ALL_FUNCTIONS + template + void extract_eig(const MAT &A, VECT &V, Ttol tol, TA, TV) { + size_type n = mat_nrows(A); + if (n == 0) return; + tol *= Ttol(2); + Ttol tol_i = tol * gmm::abs(A(0,0)), tol_cplx = tol_i; + for (size_type i = 0; i < n; ++i) { + if (i < n-1) { + tol_i = (gmm::abs(A(i,i))+gmm::abs(A(i+1,i+1)))*tol; + tol_cplx = std::max(tol_cplx, tol_i); + } + if ((i < n-1) && gmm::abs(A(i+1,i)) >= tol_i) { + TA tr = A(i,i) + A(i+1, i+1); + TA det = A(i,i)*A(i+1, i+1) - A(i,i+1)*A(i+1, i); + TA delta = tr*tr - TA(4) * det; + if (delta < -tol_cplx) { + GMM_WARNING1("A complex eigenvalue has been detected : " + << std::complex(tr/TA(2), gmm::sqrt(-delta)/TA(2))); + V[i] = V[i+1] = tr / TA(2); + } + else { + delta = std::max(TA(0), delta); + V[i ] = TA(tr + gmm::sqrt(delta))/ TA(2); + V[i+1] = TA(tr - gmm::sqrt(delta))/ TA(2); + } + ++i; + } + else + V[i] = TV(A(i,i)); + } + } + + template + void extract_eig(const MAT &A, VECT &V, Ttol tol, TA, std::complex) { + size_type n = mat_nrows(A); + tol *= Ttol(2); + for (size_type i = 0; i < n; ++i) + if ((i == n-1) || + gmm::abs(A(i+1,i)) < (gmm::abs(A(i,i))+gmm::abs(A(i+1,i+1)))*tol) + V[i] = std::complex(A(i,i)); + else { + TA tr = A(i,i) + A(i+1, i+1); + TA det = A(i,i)*A(i+1, i+1) - A(i,i+1)*A(i+1, i); + TA delta = tr*tr - TA(4) * det; + if (delta < TA(0)) { + V[i] = std::complex(tr / TA(2), gmm::sqrt(-delta) / TA(2)); + V[i+1] = std::complex(tr / TA(2), -gmm::sqrt(-delta)/ TA(2)); + } + else { + V[i ] = TA(tr + gmm::sqrt(delta)) / TA(2); + V[i+1] = TA(tr - gmm::sqrt(delta)) / TA(2); + } + ++i; + } + } + + template + void extract_eig(const MAT &A, VECT &V, Ttol tol, std::complex, TV) { + typedef std::complex T; + size_type n = mat_nrows(A); + if (n == 0) return; + tol *= Ttol(2); + Ttol tol_i = tol * gmm::abs(A(0,0)), tol_cplx = tol_i; + for (size_type i = 0; i < n; ++i) { + if (i < n-1) { + tol_i = (gmm::abs(A(i,i))+gmm::abs(A(i+1,i+1)))*tol; + tol_cplx = std::max(tol_cplx, tol_i); + } + if ((i == n-1) || gmm::abs(A(i+1,i)) < tol_i) { + if (gmm::abs(std::imag(A(i,i))) > tol_cplx) + GMM_WARNING1("A complex eigenvalue has been detected : " + << T(A(i,i)) << " : " << gmm::abs(std::imag(A(i,i))) + / gmm::abs(std::real(A(i,i))) << " : " << tol_cplx); + V[i] = std::real(A(i,i)); + } + else { + T tr = A(i,i) + A(i+1, i+1); + T det = A(i,i)*A(i+1, i+1) - A(i,i+1)*A(i+1, i); + T delta = tr*tr - TA(4) * det; + T a1 = (tr + gmm::sqrt(delta)) / TA(2); + T a2 = (tr - gmm::sqrt(delta)) / TA(2); + if (gmm::abs(std::imag(a1)) > tol_cplx) + GMM_WARNING1("A complex eigenvalue has been detected : " << a1); + if (gmm::abs(std::imag(a2)) > tol_cplx) + GMM_WARNING1("A complex eigenvalue has been detected : " << a2); + + V[i] = std::real(a1); V[i+1] = std::real(a2); + ++i; + } + } + } + + template + void extract_eig(const MAT &A, VECT &V, Ttol tol, + std::complex, std::complex) { + size_type n = mat_nrows(A); + tol *= Ttol(2); + for (size_type i = 0; i < n; ++i) + if ((i == n-1) || + gmm::abs(A(i+1,i)) < (gmm::abs(A(i,i))+gmm::abs(A(i+1,i+1)))*tol) + V[i] = std::complex(A(i,i)); + else { + std::complex tr = A(i,i) + A(i+1, i+1); + std::complex det = A(i,i)*A(i+1, i+1) - A(i,i+1)*A(i+1, i); + std::complex delta = tr*tr - TA(4) * det; + V[i] = (tr + gmm::sqrt(delta)) / TA(2); + V[i+1] = (tr - gmm::sqrt(delta)) / TA(2); + ++i; + } + } + + ///@endcond + /** + Compute eigenvalue vector. + */ + template inline + void extract_eig(const MAT &A, const VECT &V, Ttol tol) { + extract_eig(A, const_cast(V), tol, + typename linalg_traits::value_type(), + typename linalg_traits::value_type()); + } + + /* ********************************************************************* */ + /* Stop criterion for QR algorithms */ + /* ********************************************************************* */ + + template + void qr_stop_criterion(MAT &A, size_type &p, size_type &q, Ttol tol) { + typedef typename linalg_traits::value_type T; + typedef typename number_traits::magnitude_type R; + R rmin = default_min(R()) * R(2); + size_type n = mat_nrows(A); + if (n <= 2) { q = n; p = 0; } + else { + for (size_type i = 1; i < n-q; ++i) + if (gmm::abs(A(i,i-1)) < (gmm::abs(A(i,i))+ gmm::abs(A(i-1,i-1)))*tol + || gmm::abs(A(i,i-1)) < rmin) + A(i,i-1) = T(0); + + while ((q < n-1 && A(n-1-q, n-2-q) == T(0)) || + (q < n-2 && A(n-2-q, n-3-q) == T(0))) ++q; + if (q >= n-2) q = n; + p = n-q; if (p) --p; if (p) --p; + while (p > 0 && A(p,p-1) != T(0)) --p; + } + } + + template inline + void symmetric_qr_stop_criterion(const MAT &AA, size_type &p, size_type &q, + Ttol tol) { + typedef typename linalg_traits::value_type T; + typedef typename number_traits::magnitude_type R; + R rmin = default_min(R()) * R(2); + MAT& A = const_cast(AA); + size_type n = mat_nrows(A); + if (n <= 1) { q = n; p = 0; } + else { + for (size_type i = 1; i < n-q; ++i) + if (gmm::abs(A(i,i-1)) < (gmm::abs(A(i,i))+ gmm::abs(A(i-1,i-1)))*tol + || gmm::abs(A(i,i-1)) < rmin) + A(i,i-1) = T(0); + + while (q < n-1 && A(n-1-q, n-2-q) == T(0)) ++q; + if (q >= n-1) q = n; + p = n-q; if (p) --p; if (p) --p; + while (p > 0 && A(p,p-1) != T(0)) --p; + } + } + + template inline + void symmetric_qr_stop_criterion(const VECT1 &diag, const VECT2 &sdiag_, + size_type &p, size_type &q, Ttol tol) { + typedef typename linalg_traits::value_type T; + typedef typename number_traits::magnitude_type R; + R rmin = default_min(R()) * R(2); + VECT2 &sdiag = const_cast(sdiag_); + size_type n = vect_size(diag); + if (n <= 1) { q = n; p = 0; return; } + for (size_type i = 1; i < n-q; ++i) + if (gmm::abs(sdiag[i-1]) < (gmm::abs(diag[i])+ gmm::abs(diag[i-1]))*tol + || gmm::abs(sdiag[i-1]) < rmin) + sdiag[i-1] = T(0); + while (q < n-1 && sdiag[n-2-q] == T(0)) ++q; + if (q >= n-1) q = n; + p = n-q; if (p) --p; if (p) --p; + while (p > 0 && sdiag[p-1] != T(0)) --p; + } + + /* ********************************************************************* */ + /* 2x2 blocks reduction for Schur vectors */ + /* ********************************************************************* */ + + template + void block2x2_reduction(MATH &H, MATQ &Q, Ttol tol) { + typedef typename linalg_traits::value_type T; + typedef typename number_traits::magnitude_type R; + + size_type n = mat_nrows(H), nq = mat_nrows(Q); + if (n < 2) return; + sub_interval SUBQ(0, nq), SUBL(0, 2); + std::vector v(2), w(std::max(n, nq)); v[0] = T(1); + tol *= Ttol(2); + Ttol tol_i = tol * gmm::abs(H(0,0)), tol_cplx = tol_i; + for (size_type i = 0; i < n-1; ++i) { + tol_i = (gmm::abs(H(i,i))+gmm::abs(H(i+1,i+1)))*tol; + tol_cplx = std::max(tol_cplx, tol_i); + + if (gmm::abs(H(i+1,i)) > tol_i) { // 2x2 block detected + T tr = (H(i+1, i+1) - H(i,i)) / T(2); + T delta = tr*tr + H(i,i+1)*H(i+1, i); + + if (is_complex(T()) || gmm::real(delta) >= R(0)) { + sub_interval SUBI(i, 2); + T theta = (tr - gmm::sqrt(delta)) / H(i+1,i); + R a = gmm::abs(theta); + v[1] = (a == R(0)) ? T(-1) + : gmm::conj(theta) * (R(1) - gmm::sqrt(a*a + R(1)) / a); + row_house_update(sub_matrix(H, SUBI), v, sub_vector(w, SUBL)); + col_house_update(sub_matrix(H, SUBI), v, sub_vector(w, SUBL)); + col_house_update(sub_matrix(Q, SUBQ, SUBI), v, sub_vector(w, SUBQ)); + } + ++i; + } + } + } + + /* ********************************************************************* */ + /* Basic qr algorithm. */ + /* ********************************************************************* */ + + #define tol_type_for_qr typename number_traits::value_type>::magnitude_type + #define default_tol_for_qr \ + (gmm::default_tol(tol_type_for_qr()) * tol_type_for_qr(3)) + + // QR method for real or complex square matrices based on QR factorisation. + // eigval has to be a complex vector if A has complex eigeinvalues. + // Very slow method. Use implicit_qr_method instead. + template + void rudimentary_qr_algorithm(const MAT1 &A, const VECT &eigval_, + const MAT2 &eigvect_, + tol_type_for_qr tol = default_tol_for_qr, + bool compvect = true) { + VECT &eigval = const_cast(eigval_); + MAT2 &eigvect = const_cast(eigvect_); + + typedef typename linalg_traits::value_type value_type; + + size_type n = mat_nrows(A), p, q = 0, ite = 0; + dense_matrix Q(n, n), R(n,n), A1(n,n); + gmm::copy(A, A1); + + Hessenberg_reduction(A1, eigvect, compvect); + qr_stop_criterion(A1, p, q, tol); + + while (q < n) { + qr_factor(A1, Q, R); + gmm::mult(R, Q, A1); + if (compvect) { gmm::mult(eigvect, Q, R); gmm::copy(R, eigvect); } + + qr_stop_criterion(A1, p, q, tol); + ++ite; + GMM_ASSERT1(ite < n*1000, "QR algorithm failed"); + } + if (compvect) block2x2_reduction(A1, Q, tol); + extract_eig(A1, eigval, tol); + } + + template + void rudimentary_qr_algorithm(const MAT1 &a, VECT &eigval, + tol_type_for_qr tol = default_tol_for_qr) { + dense_matrix::value_type> m(0,0); + rudimentary_qr_algorithm(a, eigval, m, tol, false); + } + + /* ********************************************************************* */ + /* Francis QR step. */ + /* ********************************************************************* */ + + template + void Francis_qr_step(const MAT1& HH, const MAT2 &QQ, bool compute_Q) { + MAT1& H = const_cast(HH); MAT2& Q = const_cast(QQ); + typedef typename linalg_traits::value_type value_type; + size_type n = mat_nrows(H), nq = mat_nrows(Q); + + std::vector v(3), w(std::max(n, nq)); + + value_type s = H(n-2, n-2) + H(n-1, n-1); + value_type t = H(n-2, n-2) * H(n-1, n-1) - H(n-2, n-1) * H(n-1, n-2); + value_type x = H(0, 0) * H(0, 0) + H(0,1) * H(1, 0) - s * H(0,0) + t; + value_type y = H(1, 0) * (H(0,0) + H(1,1) - s); + value_type z = H(1, 0) * H(2, 1); + + sub_interval SUBQ(0, nq); + + for (size_type k = 0; k < n - 2; ++k) { + v[0] = x; v[1] = y; v[2] = z; + house_vector(v); + size_type r = std::min(k+4, n), q = (k==0) ? 0 : k-1; + sub_interval SUBI(k, 3), SUBJ(0, r), SUBK(q, n-q); + + row_house_update(sub_matrix(H, SUBI, SUBK), v, sub_vector(w, SUBK)); + col_house_update(sub_matrix(H, SUBJ, SUBI), v, sub_vector(w, SUBJ)); + + if (compute_Q) + col_house_update(sub_matrix(Q, SUBQ, SUBI), v, sub_vector(w, SUBQ)); + + x = H(k+1, k); y = H(k+2, k); + if (k < n-3) z = H(k+3, k); + } + sub_interval SUBI(n-2,2), SUBJ(0, n), SUBK(n-3,3), SUBL(0, 3); + v.resize(2); + v[0] = x; v[1] = y; + house_vector(v); + row_house_update(sub_matrix(H, SUBI, SUBK), v, sub_vector(w, SUBL)); + col_house_update(sub_matrix(H, SUBJ, SUBI), v, sub_vector(w, SUBJ)); + if (compute_Q) + col_house_update(sub_matrix(Q, SUBQ, SUBI), v, sub_vector(w, SUBQ)); + } + + /* ********************************************************************* */ + /* Wilkinson Double shift QR step (from Lapack). */ + /* ********************************************************************* */ + + template + void Wilkinson_double_shift_qr_step(const MAT1& HH, const MAT2 &QQ, + Ttol tol, bool exc, bool compute_Q) { + MAT1& H = const_cast(HH); MAT2& Q = const_cast(QQ); + typedef typename linalg_traits::value_type T; + typedef typename number_traits::magnitude_type R; + + size_type n = mat_nrows(H), nq = mat_nrows(Q), m; + std::vector v(3), w(std::max(n, nq)); + const R dat1(0.75), dat2(-0.4375); + T h33, h44, h43h34, v1(0), v2(0), v3(0); + + if (exc) { /* Exceptional shift. */ + R s = gmm::abs(H(n-1, n-2)) + gmm::abs(H(n-2, n-3)); + h33 = h44 = dat1 * s; + h43h34 = dat2*s*s; + } + else { /* Wilkinson double shift. */ + h44 = H(n-1,n-1); h33 = H(n-2, n-2); + h43h34 = H(n-1, n-2) * H(n-2, n-1); + } + + /* Look for two consecutive small subdiagonal elements. */ + /* Determine the effect of starting the double-shift QR iteration at */ + /* row m, and see if this would make H(m-1, m-2) negligible. */ + for (m = n-2; m != 0; --m) { + T h11 = H(m-1, m-1), h22 = H(m, m); + T h21 = H(m, m-1), h12 = H(m-1, m); + T h44s = h44 - h11, h33s = h33 - h11; + v1 = (h33s*h44s-h43h34) / h21 + h12; + v2 = h22 - h11 - h33s - h44s; + v3 = H(m+1, m); + R s = gmm::abs(v1) + gmm::abs(v2) + gmm::abs(v3); + v1 /= s; v2 /= s; v3 /= s; + if (m == 1) break; + T h00 = H(m-2, m-2); + T h10 = H(m-1, m-2); + R tst1 = gmm::abs(v1)*(gmm::abs(h00)+gmm::abs(h11)+gmm::abs(h22)); + if (gmm::abs(h10)*(gmm::abs(v2)+gmm::abs(v3)) <= tol * tst1) break; + } + + /* Double shift QR step. */ + sub_interval SUBQ(0, nq); + for (size_type k = (m == 0) ? 0 : m-1; k < n-2; ++k) { + v[0] = v1; v[1] = v2; v[2] = v3; + house_vector(v); + size_type r = std::min(k+4, n), q = (k==0) ? 0 : k-1; + sub_interval SUBI(k, 3), SUBJ(0, r), SUBK(q, n-q); + + row_house_update(sub_matrix(H, SUBI, SUBK), v, sub_vector(w, SUBK)); + col_house_update(sub_matrix(H, SUBJ, SUBI), v, sub_vector(w, SUBJ)); + if (k > m-1) { H(k+1, k-1) = T(0); if (k < n-3) H(k+2, k-1) = T(0); } + + if (compute_Q) + col_house_update(sub_matrix(Q, SUBQ, SUBI), v, sub_vector(w, SUBQ)); + + v1 = H(k+1, k); v2 = H(k+2, k); + if (k < n-3) v3 = H(k+3, k); + } + sub_interval SUBI(n-2,2), SUBJ(0, n), SUBK(n-3,3), SUBL(0, 3); + v.resize(2); v[0] = v1; v[1] = v2; + house_vector(v); + row_house_update(sub_matrix(H, SUBI, SUBK), v, sub_vector(w, SUBL)); + col_house_update(sub_matrix(H, SUBJ, SUBI), v, sub_vector(w, SUBJ)); + if (compute_Q) + col_house_update(sub_matrix(Q, SUBQ, SUBI), v, sub_vector(w, SUBQ)); + } + + /* ********************************************************************* */ + /* Implicit QR algorithm. */ + /* ********************************************************************* */ + + // QR method for real or complex square matrices based on an + // implicit QR factorisation. eigval has to be a complex vector + // if A has complex eigenvalues. Complexity about 10n^3, 25n^3 if + // eigenvectors are computed + template + void implicit_qr_algorithm(const MAT1 &A, const VECT &eigval_, + const MAT2 &Q_, + tol_type_for_qr tol = default_tol_for_qr, + bool compvect = true) { + VECT &eigval = const_cast(eigval_); + MAT2 &Q = const_cast(Q_); + typedef typename linalg_traits::value_type value_type; + + size_type n(mat_nrows(A)), q(0), q_old, p(0), ite(0), its(0); + dense_matrix H(n,n); + sub_interval SUBK(0,0); + + gmm::copy(A, H); + Hessenberg_reduction(H, Q, compvect); + qr_stop_criterion(H, p, q, tol); + + while (q < n) { + sub_interval SUBI(p, n-p-q), SUBJ(0, mat_ncols(Q)); + if (compvect) SUBK = SUBI; +// Francis_qr_step(sub_matrix(H, SUBI), +// sub_matrix(Q, SUBJ, SUBK), compvect); + Wilkinson_double_shift_qr_step(sub_matrix(H, SUBI), + sub_matrix(Q, SUBJ, SUBK), + tol, (its == 10 || its == 20), compvect); + q_old = q; + qr_stop_criterion(H, p, q, tol*2); + if (q != q_old) its = 0; + ++its; ++ite; + GMM_ASSERT1(ite < n*100, "QR algorithm failed"); + } + if (compvect) block2x2_reduction(H, Q, tol); + extract_eig(H, eigval, tol); + } + + + template + void implicit_qr_algorithm(const MAT1 &a, VECT &eigval, + tol_type_for_qr tol = default_tol_for_qr) { + dense_matrix::value_type> m(1,1); + implicit_qr_algorithm(a, eigval, m, tol, false); + } + + /* ********************************************************************* */ + /* Implicit symmetric QR step with Wilkinson Shift. */ + /* ********************************************************************* */ + + template + void symmetric_Wilkinson_qr_step(const MAT1& MM, const MAT2 &ZZ, + bool compute_z) { + MAT1& M = const_cast(MM); MAT2& Z = const_cast(ZZ); + typedef typename linalg_traits::value_type T; + typedef typename number_traits::magnitude_type R; + size_type n = mat_nrows(M); + + for (size_type i = 0; i < n; ++i) { + M(i, i) = T(gmm::real(M(i, i))); + if (i > 0) { + T a = (M(i, i-1) + gmm::conj(M(i-1, i)))/R(2); + M(i, i-1) = a; M(i-1, i) = gmm::conj(a); + } + } + + R d = gmm::real(M(n-2, n-2) - M(n-1, n-1)) / R(2); + R e = gmm::abs_sqr(M(n-1, n-2)); + R nu = d + gmm::sgn(d)*gmm::sqrt(d*d+e); + if (nu == R(0)) { M(n-1, n-2) = T(0); return; } + R mu = gmm::real(M(n-1, n-1)) - e / nu; + T x = M(0,0) - T(mu), z = M(1, 0), c, s; + + for (size_type k = 1; k < n; ++k) { + Givens_rotation(x, z, c, s); + + if (k > 1) Apply_Givens_rotation_left(M(k-1,k-2), M(k,k-2), c, s); + Apply_Givens_rotation_left(M(k-1,k-1), M(k,k-1), c, s); + Apply_Givens_rotation_left(M(k-1,k ), M(k,k ), c, s); + if (k < n-1) Apply_Givens_rotation_left(M(k-1,k+1), M(k,k+1), c, s); + if (k > 1) Apply_Givens_rotation_right(M(k-2,k-1), M(k-2,k), c, s); + Apply_Givens_rotation_right(M(k-1,k-1), M(k-1,k), c, s); + Apply_Givens_rotation_right(M(k ,k-1), M(k,k) , c, s); + if (k < n-1) Apply_Givens_rotation_right(M(k+1,k-1), M(k+1,k), c, s); + + if (compute_z) col_rot(Z, c, s, k-1, k); + if (k < n-1) { x = M(k, k-1); z = M(k+1, k-1); } + } + + } + + template + void symmetric_Wilkinson_qr_step(const VECT1& diag_, const VECT2& sdiag_, + const MAT &ZZ, bool compute_z) { + VECT1& diag = const_cast(diag_); + VECT2& sdiag = const_cast(sdiag_); + MAT& Z = const_cast(ZZ); + typedef typename linalg_traits::value_type T; + typedef typename number_traits::magnitude_type R; + + size_type n = vect_size(diag); + R d = (diag[n-2] - diag[n-1]) / R(2); + R e = gmm::abs_sqr(sdiag[n-2]); + R nu = d + gmm::sgn(d)*gmm::sqrt(d*d+e); + if (nu == R(0)) { sdiag[n-2] = T(0); return; } + R mu = diag[n-1] - e / nu; + T x = diag[0] - T(mu), z = sdiag[0], c, s; + + T a01(0), a02(0); + T a10(0), a11(diag[0]), a12(gmm::conj(sdiag[0])), a13(0); + T a20(0), a21(sdiag[0]), a22(diag[1]), a23(gmm::conj(sdiag[1])); + T a31(0), a32(sdiag[1]); + + for (size_type k = 1; k < n; ++k) { + Givens_rotation(x, z, c, s); + + if (k > 1) Apply_Givens_rotation_left(a10, a20, c, s); + Apply_Givens_rotation_left(a11, a21, c, s); + Apply_Givens_rotation_left(a12, a22, c, s); + if (k < n-1) Apply_Givens_rotation_left(a13, a23, c, s); + + if (k > 1) Apply_Givens_rotation_right(a01, a02, c, s); + Apply_Givens_rotation_right(a11, a12, c, s); + Apply_Givens_rotation_right(a21, a22, c, s); + if (k < n-1) Apply_Givens_rotation_right(a31, a32, c, s); + + if (compute_z) col_rot(Z, c, s, k-1, k); + + diag[k-1] = gmm::real(a11); + diag[k] = gmm::real(a22); + if (k > 1) sdiag[k-2] = (gmm::conj(a01) + a10) / R(2); + sdiag[k-1] = (gmm::conj(a12) + a21) / R(2); + + x = sdiag[k-1]; z = (gmm::conj(a13) + a31) / R(2); + + a01 = a12; a02 = a13; + a10 = a21; a11 = a22; a12 = a23; a13 = T(0); + a20 = a31; a21 = a32; a31 = T(0); + + if (k < n-1) { + sdiag[k] = (gmm::conj(a23) + a32) / R(2); + a22 = T(diag[k+1]); a32 = sdiag[k+1]; a23 = gmm::conj(a32); + } + } + } + + /* ********************************************************************* */ + /* Implicit QR algorithm for symmetric or hermitian matrices. */ + /* ********************************************************************* */ + + // implicit QR method for real square symmetric matrices or complex + // hermitian matrices. + // eigval has to be a complex vector if A has complex eigeinvalues. + // complexity about 4n^3/3, 9n^3 if eigenvectors are computed + template + void symmetric_qr_algorithm_old(const MAT1 &A, const VECT &eigval_, + const MAT2 &eigvect_, + tol_type_for_qr tol = default_tol_for_qr, + bool compvect = true) { + VECT &eigval = const_cast(eigval_); + MAT2 &eigvect = const_cast(eigvect_); + typedef typename linalg_traits::value_type T; + typedef typename number_traits::magnitude_type R; + + if (compvect) gmm::copy(identity_matrix(), eigvect); + size_type n = mat_nrows(A), q = 0, p, ite = 0; + dense_matrix Tri(n, n); + gmm::copy(A, Tri); + + Householder_tridiagonalization(Tri, eigvect, compvect); + + symmetric_qr_stop_criterion(Tri, p, q, tol); + + while (q < n) { + + sub_interval SUBI(p, n-p-q), SUBJ(0, mat_ncols(eigvect)), SUBK(p, n-p-q); + if (!compvect) SUBK = sub_interval(0,0); + symmetric_Wilkinson_qr_step(sub_matrix(Tri, SUBI), + sub_matrix(eigvect, SUBJ, SUBK), compvect); + + symmetric_qr_stop_criterion(Tri, p, q, tol*R(2)); + ++ite; + GMM_ASSERT1(ite < n*100, "QR algorithm failed. Probably, your matrix" + " is not real symmetric or complex hermitian"); + } + + extract_eig(Tri, eigval, tol); + } + + template + void symmetric_qr_algorithm(const MAT1 &A, const VECT &eigval_, + const MAT2 &eigvect_, + tol_type_for_qr tol = default_tol_for_qr, + bool compvect = true) { + VECT &eigval = const_cast(eigval_); + MAT2 &eigvect = const_cast(eigvect_); + typedef typename linalg_traits::value_type T; + typedef typename number_traits::magnitude_type R; + + size_type n = mat_nrows(A), q = 0, p, ite = 0; + if (compvect) gmm::copy(identity_matrix(), eigvect); + if (n == 0) return; + if (n == 1) { eigval[0]=gmm::real(A(0,0)); return; } + dense_matrix Tri(n, n); + gmm::copy(A, Tri); + + Householder_tridiagonalization(Tri, eigvect, compvect); + + std::vector diag(n); + std::vector sdiag(n); + for (size_type i = 0; i < n; ++i) + { diag[i] = gmm::real(Tri(i, i)); if (i+1 < n) sdiag[i] = Tri(i+1, i); } + + symmetric_qr_stop_criterion(diag, sdiag, p, q, tol); + + while (q < n) { + sub_interval SUBI(p, n-p-q), SUBJ(0, mat_ncols(eigvect)), SUBK(p, n-p-q); + if (!compvect) SUBK = sub_interval(0,0); + + symmetric_Wilkinson_qr_step(sub_vector(diag, SUBI), + sub_vector(sdiag, SUBI), + sub_matrix(eigvect, SUBJ, SUBK), compvect); + + symmetric_qr_stop_criterion(diag, sdiag, p, q, tol*R(3)); + ++ite; + GMM_ASSERT1(ite < n*100, "QR algorithm failed."); + } + + gmm::copy(diag, eigval); + } + + + template + void symmetric_qr_algorithm(const MAT1 &a, VECT &eigval, + tol_type_for_qr tol = default_tol_for_qr) { + dense_matrix::value_type> m(0,0); + symmetric_qr_algorithm(a, eigval, m, tol, false); + } + + +} + +#endif + diff --git a/gmm/gmm_dense_sylvester.h b/gmm/gmm_dense_sylvester.h new file mode 100644 index 000000000..3b184ccbf --- /dev/null +++ b/gmm/gmm_dense_sylvester.h @@ -0,0 +1,174 @@ +/* -*- c++ -*- (enables emacs c++ mode) */ +/*=========================================================================== + + Copyright (C) 2003-2017 Yves Renard + + This file is a part of GetFEM++ + + GetFEM++ is free software; you can redistribute it and/or modify it + under the terms of the GNU Lesser General Public License as published + by the Free Software Foundation; either version 3 of the License, or + (at your option) any later version along with the GCC Runtime Library + Exception either version 3.1 or (at your option) any later version. + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public + License and GCC Runtime Library Exception for more details. + You should have received a copy of the GNU Lesser General Public License + along with this program; if not, write to the Free Software Foundation, + Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. + + As a special exception, you may use this file as it is a part of a free + software library without restriction. Specifically, if other files + instantiate templates or use macros or inline functions from this file, + or you compile this file and link it with other files to produce an + executable, this file does not by itself cause the resulting executable + to be covered by the GNU Lesser General Public License. This exception + does not however invalidate any other reasons why the executable file + might be covered by the GNU Lesser General Public License. + +===========================================================================*/ + +/** @file gmm_dense_sylvester.h + @author Yves Renard + @date June 5, 2003. + @brief Sylvester equation solver. +*/ +#ifndef GMM_DENSE_SYLVESTER_H +#define GMM_DENSE_SYLVESTER_H + +#include "gmm_kernel.h" + +namespace gmm { + + /* ********************************************************************* */ + /* Kronecker system matrix. */ + /* ********************************************************************* */ + template + void kron(const MAT1 &m1, const MAT2 &m2, const MAT3 &m3_, + bool init = true) { + MAT3 &m3 = const_cast(m3_); + size_type m = mat_nrows(m1), n = mat_ncols(m1); + size_type l = mat_nrows(m2), k = mat_ncols(m2); + + GMM_ASSERT2(mat_nrows(m3) == m*l && mat_ncols(m3) == n*k, + "dimensions mismatch"); + + for (size_type i = 0; i < m; ++i) + for (size_type j = 0; j < m; ++j) + if (init) + gmm::copy(gmm::scaled(m2, m1(i,j)), + gmm::sub_matrix(m3, sub_interval(l*i, l), + sub_interval(k*j, k))); + else + gmm::add(gmm::scaled(m2, m1(i,j)), + gmm::sub_matrix(m3, sub_interval(l*i, l), + sub_interval(k*j, k))); + } + + + /* ********************************************************************* */ + /* Copy a matrix into a vector. */ + /* ********************************************************************* */ + + template + colmatrix_to_vector(const MAT &A, VECT &v, col_major) { + size_type m = mat_nrows(A), n = mat_ncols(A); + GMM_ASSERT2(m*n == vect_size(v), "dimensions mismatch"); + for (size_type i = 0; i < n; ++i) + gmm::copy(mat_col(A, i), sub_vector(v, sub_interval(i*m, m))); + } + + template + colmatrix_to_vector(const MAT &A, VECT &v, row_and_col) + { colmatrix_to_vector(A, v, col_major()); } + + template + colmatrix_to_vector(const MAT &A, VECT &v, col_and_row) + { colmatrix_to_vector(A, v, col_major()); } + + template + colmatrix_to_vector(const MAT &A, VECT &v, row_major) { + size_type m = mat_nrows(mat), n = mat_ncols(A); + GMM_ASSERT2(m*n == vect_size(v), "dimensions mismatch"); + for (size_type i = 0; i < m; ++i) + gmm::copy(mat_row(A, i), sub_vector(v, sub_slice(i, n, m))); + } + + template inline + colmatrix_to_vector(const MAT &A, const VECT &v_) { + VECT &v = const_cast(v_); + colmatrix_to_vector(A, v, typename linalg_traits::sub_orientation()); + } + + + /* ********************************************************************* */ + /* Copy a vector into a matrix. */ + /* ********************************************************************* */ + + template + vector_to_colmatrix(const VECT &v, MAT &A, col_major) { + size_type m = mat_nrows(A), n = mat_ncols(A); + GMM_ASSERT2(m*n == vect_size(v), "dimensions mismatch"); + for (size_type i = 0; i < n; ++i) + gmm::copy(sub_vector(v, sub_interval(i*m, m)), mat_col(A, i)); + } + + template + vector_to_colmatrix(const VECT &v, MAT &A, row_and_col) + { vector_to_colmatrix(v, A, col_major()); } + + template + vector_to_colmatrix(const VECT &v, MAT &A, col_and_row) + { vector_to_colmatrix(v, A, col_major()); } + + template + vector_to_colmatrix(const VECT &v, MAT &A, row_major) { + size_type m = mat_nrows(mat), n = mat_ncols(A); + GMM_ASSERT2(m*n == vect_size(v), "dimensions mismatch"); + for (size_type i = 0; i < m; ++i) + gmm::copy(sub_vector(v, sub_slice(i, n, m)), mat_row(A, i)); + } + + template inline + vector_to_colmatrix(const VECT &v, const MAT &A_) { + MAT &A = const_cast(A_); + vector_to_colmatrix(v, A, typename linalg_traits::sub_orientation()); + } + + /* ********************************************************************* */ + /* Solve sylvester equation. */ + /* ********************************************************************* */ + + // very prohibitive solver, to be replaced ... + template + void sylvester(const MAT1 &m1, const MAT2 &m2, const MAT3 &m3, + const MAT4 &m4_) { + typedef typename linalg_traits::value_type T; + + MAT3 &m4 = const_cast(m4_); + size_type m = mat_nrows(m1), n = mat_ncols(m1); + size_type l = mat_nrows(m2), k = mat_ncols(m2); + + GMM_ASSERT2(m == n && l == k && m == mat_nrows(m3) && + l == mat_ncols(m3) && m == mat_nrows(m4) && l == mat_ncols(m4), + "dimensions mismatch"); + + gmm::dense_matrix akronb(m*l, m*l); + gmm::dense_matrix idm(m, m), idl(l,l); + gmm::copy(identity_matrix(), idm); + gmm::copy(identity_matrix(), idl); + std::vector x(m*l), c(m*l); + + kron(idl, m1, akronb); + kron(gmm::transposed(m2), idm, akronb, false); + + colmatrix_to_vector(m3, c); + lu_solve(akronb, c, x); + vector_to_colmatrix(x, m4); + + } +} + +#endif + diff --git a/gmm/gmm_domain_decomp.h b/gmm/gmm_domain_decomp.h new file mode 100644 index 000000000..89c1841cf --- /dev/null +++ b/gmm/gmm_domain_decomp.h @@ -0,0 +1,165 @@ +/* -*- c++ -*- (enables emacs c++ mode) */ +/*=========================================================================== + + Copyright (C) 2004-2017 Yves Renard + + This file is a part of GetFEM++ + + GetFEM++ is free software; you can redistribute it and/or modify it + under the terms of the GNU Lesser General Public License as published + by the Free Software Foundation; either version 3 of the License, or + (at your option) any later version along with the GCC Runtime Library + Exception either version 3.1 or (at your option) any later version. + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public + License and GCC Runtime Library Exception for more details. + You should have received a copy of the GNU Lesser General Public License + along with this program; if not, write to the Free Software Foundation, + Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. + + As a special exception, you may use this file as it is a part of a free + software library without restriction. Specifically, if other files + instantiate templates or use macros or inline functions from this file, + or you compile this file and link it with other files to produce an + executable, this file does not by itself cause the resulting executable + to be covered by the GNU Lesser General Public License. This exception + does not however invalidate any other reasons why the executable file + might be covered by the GNU Lesser General Public License. + +===========================================================================*/ + +/** @file gmm_domain_decomp.h + @author Yves Renard + @date May 21, 2004. + @brief Domain decomposition. +*/ +#ifndef GMM_DOMAIN_DECOMP_H__ +#define GMM_DOMAIN_DECOMP_H__ + +#include "gmm_kernel.h" +#include + + +namespace gmm { + + /** This function separates into small boxes of size msize with a ratio + * of overlap (in [0,1[) a set of points. The result is given into a + * vector of sparse matrices vB. + */ + template + void rudimentary_regular_decomposition(std::vector pts, + double msize, + double overlap, + std::vector &vB) { + typedef typename linalg_traits::value_type value_type; + typedef abstract_null_type void_type; + typedef std::map map_type; + + size_type nbpts = pts.size(); + if (!nbpts || pts[0].size() == 0) { vB.resize(0); return; } + int dim = int(pts[0].size()); + + // computation of the global box and the number of sub-domains + Point pmin = pts[0], pmax = pts[0]; + for (size_type i = 1; i < nbpts; ++i) + for (int k = 0; k < dim; ++k) { + pmin[k] = std::min(pmin[k], pts[i][k]); + pmax[k] = std::max(pmax[k], pts[i][k]); + } + + std::vector nbsub(dim), mult(dim); + std::vector pts1(dim), pts2(dim); + size_type nbtotsub = 1; + for (int k = 0; k < dim; ++k) { + nbsub[k] = size_type((pmax[k] - pmin[k]) / msize)+1; + mult[k] = nbtotsub; nbtotsub *= nbsub[k]; + } + + std::vector subs(nbtotsub); + // points ventilation + std::vector ns(dim), na(dim), nu(dim); + for (size_type i = 0; i < nbpts; ++i) { + for (int k = 0; k < dim; ++k) { + double a = (pts[i][k] - pmin[k]) / msize; + ns[k] = size_type(a) - 1; na[k] = 0; + pts1[k] = int(a + overlap); pts2[k] = int(ceil(a-1.0-overlap)); + } + size_type sum = 0; + do { + bool ok = 1; + for (int k = 0; k < dim; ++k) + if ((ns[k] >= nbsub[k]) || (pts1[k] < int(ns[k])) + || (pts2[k] > int(ns[k]))) { ok = false; break; } + if (ok) { + size_type ind = ns[0]; + for (int k=1; k < dim; ++k) ind += ns[k]*mult[k]; + subs[ind][i] = void_type(); + } + for (int k = 0; k < dim; ++k) { + if (na[k] < 2) { na[k]++; ns[k]++; ++sum; break; } + na[k] = 0; ns[k] -= 2; sum -= 2; + } + } while (sum); + } + // delete too small domains. + size_type nbmaxinsub = 0; + for (size_type i = 0; i < nbtotsub; ++i) + nbmaxinsub = std::max(nbmaxinsub, subs[i].size()); + + std::fill(ns.begin(), ns.end(), size_type(0)); + for (size_type i = 0; i < nbtotsub; ++i) { + if (subs[i].size() > 0 && subs[i].size() < nbmaxinsub / 10) { + + for (int k = 0; k < dim; ++k) nu[k] = ns[k]; + size_type nbmax = 0, imax = 0; + + for (int l = 0; l < dim; ++l) { + nu[l]--; + for (int m = 0; m < 2; ++m, nu[l]+=2) { + bool ok = true; + for (int k = 0; k < dim && ok; ++k) + if (nu[k] >= nbsub[k]) ok = false; + if (ok) { + size_type ind = ns[0]; + for (int k=1; k < dim; ++k) ind += ns[k]*mult[k]; + if (subs[ind].size() > nbmax) + { nbmax = subs[ind].size(); imax = ind; } + } + } + nu[l]--; + } + + if (nbmax > subs[i].size()) { + for (map_type::iterator it=subs[i].begin(); it!=subs[i].end(); ++it) + subs[imax][it->first] = void_type(); + subs[i].clear(); + } + } + for (int k = 0; k < dim; ++k) + { ns[k]++; if (ns[k] < nbsub[k]) break; ns[k] = 0; } + } + + // delete empty domains. + size_type effnb = 0; + for (size_type i = 0; i < nbtotsub; ++i) { + if (subs[i].size() > 0) + { if (i != effnb) std::swap(subs[i], subs[effnb]); ++effnb; } + } + + // build matrices + subs.resize(effnb); + vB.resize(effnb); + for (size_type i = 0; i < effnb; ++i) { + clear(vB[i]); resize(vB[i], nbpts, subs[i].size()); + size_type j = 0; + for (map_type::iterator it=subs[i].begin(); it!=subs[i].end(); ++it, ++j) + vB[i](it->first, j) = value_type(1); + } + } + + +} + + +#endif diff --git a/gmm/gmm_except.h b/gmm/gmm_except.h new file mode 100644 index 000000000..30b813a26 --- /dev/null +++ b/gmm/gmm_except.h @@ -0,0 +1,328 @@ +/* -*- c++ -*- (enables emacs c++ mode) */ +/*=========================================================================== + + Copyright (C) 2002-2017 Yves Renard + + This file is a part of GetFEM++ + + GetFEM++ is free software; you can redistribute it and/or modify it + under the terms of the GNU Lesser General Public License as published + by the Free Software Foundation; either version 3 of the License, or + (at your option) any later version along with the GCC Runtime Library + Exception either version 3.1 or (at your option) any later version. + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public + License and GCC Runtime Library Exception for more details. + You should have received a copy of the GNU Lesser General Public License + along with this program; if not, write to the Free Software Foundation, + Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. + + As a special exception, you may use this file as it is a part of a free + software library without restriction. Specifically, if other files + instantiate templates or use macros or inline functions from this file, + or you compile this file and link it with other files to produce an + executable, this file does not by itself cause the resulting executable + to be covered by the GNU Lesser General Public License. This exception + does not however invalidate any other reasons why the executable file + might be covered by the GNU Lesser General Public License. + +===========================================================================*/ + +/** @file gmm_except.h + @author Yves Renard + @author Julien Pommier + @date September 01, 2002. + @brief Definition of basic exceptions. +*/ + +#ifndef GMM_EXCEPT_H__ +#define GMM_EXCEPT_H__ + +#include "gmm_std.h" + +//provides external implementation of gmm_exception and logging. +#ifndef EXTERNAL_EXCEPT_ + +namespace gmm { + +/* *********************************************************************** */ +/* GetFEM++ generic errors. */ +/* *********************************************************************** */ + + class gmm_error: public std::logic_error { + public: + gmm_error(const std::string& what_arg): std::logic_error (what_arg) {} + }; + +#ifdef GETFEM_HAVE_PRETTY_FUNCTION +# define GMM_PRETTY_FUNCTION __PRETTY_FUNCTION__ +#else +# define GMM_PRETTY_FUNCTION "" +#endif + + // Errors : GMM_THROW should not be used on its own. + // GMM_ASSERT1 : Non-maskable errors. Typically for in/ouput and + // when the test do not significantly reduces the performance. + // GMM_ASSERT2 : All tests which are potentially performance + // consuming. Not hidden by default. Hidden when NDEBUG is + // defined. + // GMM_ASSERT3 : For internal checks. Hidden by default. Active + // only when DEBUG_MODE is defined. + // __EXCEPTIONS is defined by gcc, _CPPUNWIND is defined by visual c++ +#if defined(__EXCEPTIONS) || defined(_CPPUNWIND) + inline void short_error_throw(const char *file, int line, const char *func, + const char *errormsg) { + std::stringstream msg__; + msg__ << "Error in " << file << ", line " << line << " " << func + << ": \n" << errormsg << std::ends; + throw gmm::gmm_error(msg__.str()); + } +# define GMM_THROW_(type, errormsg) { \ + std::stringstream msg__; \ + msg__ << "Error in " << __FILE__ << ", line " \ + << __LINE__ << " " << GMM_PRETTY_FUNCTION << ": \n" \ + << errormsg << std::ends; \ + throw (type)(msg__.str()); \ + } +#else +#ifndef _MSC_VER +# define abort_no_return() ::abort() +#else +// apparently ::abort() on windows is not declared with __declspec(noreturn) so the compiler spits a lot of warnings when abort is used. +# define abort_no_return() { assert("GMM ABORT"==0); throw "GMM ABORT"; } +#endif + + inline void short_error_throw(const char *file, int line, const char *func, + const char *errormsg) { + std::stringstream msg__; + msg__ << "Error in " << file << ", line " << line << " " << func + << ": \n" << errormsg << std::ends; + std::cerr << msg__.str() << std::endl; + abort_no_return(); + } + +# define GMM_THROW_(type, errormsg) { \ + std::stringstream msg__; \ + msg__ << "Error in " << __FILE__ << ", line " \ + << __LINE__ << " " << GMM_PRETTY_FUNCTION << ": \n" \ + << errormsg; \ + std::cerr << msg__.str() << std::endl; \ + abort_no_return(); \ + } +#endif + +# define GMM_ASSERT1(test, errormsg) \ + { if (!(test)) GMM_THROW_(gmm::gmm_error, errormsg); } + + inline void GMM_THROW() {} +#define GMM_THROW(a, b) { GMM_THROW_(a,b); gmm::GMM_THROW(); } + +#if defined(NDEBUG) +# define GMM_ASSERT2(test, errormsg) {} +# define GMM_ASSERT3(test, errormsg) {} +#elif !defined(GMM_FULL_NDEBUG) +# define GMM_ASSERT2(test, errormsg) \ + { if (!(test)) GMM_THROW_(gmm::gmm_error, errormsg); } +# define GMM_ASSERT3(test, errormsg) \ + { if (!(test)) GMM_THROW_(gmm::gmm_error, errormsg); } +#else +# define GMM_ASSERT2(test, errormsg) \ + { if (!(test)) GMM_THROW_(gmm::gmm_error, errormsg); } +# define GMM_ASSERT3(test, errormsg) +#endif + +/* *********************************************************************** */ +/* GetFEM++ warnings. */ +/* *********************************************************************** */ + + // This allows to dynamically hide warnings + struct warning_level { + static int level(int l = -2) + { static int level_ = 3; return (l != -2) ? (level_ = l) : level_; } + }; + + inline void set_warning_level(int l) { warning_level::level(std::max(0,l)); } + inline int get_warning_level(void) { return warning_level::level(-2); } + + // This allows not to compile some Warnings +#ifndef GMM_WARNING_LEVEL +# define GMM_WARNING_LEVEL 4 +#endif + + // Warning levels : 0 always printed + // 1 very important : specify a possible error in the code. + // 2 important : specify a default of optimization for inst. + // 3 remark + // 4 ignored by default. + +#define GMM_WARNING_MSG(level_, thestr) { \ + std::stringstream msg__; \ + msg__ << "Level " << level_ << " Warning in " << __FILE__ << ", line " \ + << __LINE__ << ": " << thestr; \ + std::cerr << msg__.str() << std::endl; \ + } + +#define GMM_WARNING0(thestr) GMM_WARNING_MSG(0, thestr) + +#if GMM_WARNING_LEVEL > 0 +# define GMM_WARNING1(thestr) \ + { if (1 <= gmm::warning_level::level()) GMM_WARNING_MSG(1, thestr) } +#else +# define GMM_WARNING1(thestr) {} +#endif + +#if GMM_WARNING_LEVEL > 1 +# define GMM_WARNING2(thestr) \ + { if (2 <= gmm::warning_level::level()) GMM_WARNING_MSG(2, thestr) } +#else +# define GMM_WARNING2(thestr) {} +#endif + +#if GMM_WARNING_LEVEL > 2 +# define GMM_WARNING3(thestr) \ + { if (3 <= gmm::warning_level::level()) GMM_WARNING_MSG(3, thestr) } +#else +# define GMM_WARNING3(thestr) {} +#endif + +#if GMM_WARNING_LEVEL > 3 +# define GMM_WARNING4(thestr) \ + { if (4 <= gmm::warning_level::level()) GMM_WARNING_MSG(4, thestr) } +#else +# define GMM_WARNING4(thestr) {} +#endif + +/* *********************************************************************** */ +/* GetFEM++ traces. */ +/* *********************************************************************** */ + + // This allows to dynamically hide traces + struct traces_level { + static int level(int l = -2) + { static int level_ = 3; return (l != -2) ? (level_ = l) : level_; } + }; + + inline void set_traces_level(int l) { traces_level::level(std::max(0,l)); } + + // This allow not too compile some Warnings +#ifndef GMM_TRACES_LEVEL +# define GMM_TRACES_LEVEL 4 +#endif + + // Traces levels : 0 always printed + // 1 Susceptible to occur once in a program. + // 2 Susceptible to occur occasionnaly in a program (10). + // 3 Susceptible to occur often (100). + // 4 Susceptible to occur very often (>1000). + +#define GMM_TRACE_MSG_MPI // for Parallelized version +#define GMM_TRACE_MSG(level_, thestr) { \ + GMM_TRACE_MSG_MPI { \ + std::stringstream msg__; \ + msg__ << "Trace " << level_ << " in " << __FILE__ << ", line " \ + << __LINE__ << ": " << thestr; \ + std::cout << msg__.str() << std::endl; \ + } \ + } + +#define GMM_TRACE0(thestr) GMM_TRACE_MSG(0, thestr) + +#if GMM_TRACES_LEVEL > 0 +# define GMM_TRACE1(thestr) \ + { if (1 <= gmm::traces_level::level()) GMM_TRACE_MSG(1, thestr) } +#else +# define GMM_TRACE1(thestr) {} +#endif + +#if GMM_TRACES_LEVEL > 1 +# define GMM_TRACE2(thestr) \ + { if (2 <= gmm::traces_level::level()) GMM_TRACE_MSG(2, thestr) } +#else +# define GMM_TRACE2(thestr) {} +#endif + +#if GMM_TRACES_LEVEL > 2 +# define GMM_TRACE3(thestr) \ + { if (3 <= gmm::traces_level::level()) GMM_TRACE_MSG(3, thestr) } +#else +# define GMM_TRACE3(thestr) {} +#endif + +#if GMM_TRACES_LEVEL > 3 +# define GMM_TRACE4(thestr) \ + { if (4 <= gmm::traces_level::level()) GMM_TRACE_MSG(4, thestr) } +#else +# define GMM_TRACE4(thestr) {} +#endif + + + /* ********************************************************************* */ + /* Definitions for compatibility with old versions. */ + /* ********************************************************************* */ + +#define GMM_STANDARD_CATCH_ERROR catch(std::logic_error e) \ + { \ + std::cerr << "============================================\n"; \ + std::cerr << "| An error has been detected !!! |\n"; \ + std::cerr << "============================================\n"; \ + std::cerr << e.what() << std::endl << std::endl; \ + exit(1); \ + } \ + catch(const std::runtime_error &e) \ + { \ + std::cerr << "============================================\n"; \ + std::cerr << "| An error has been detected !!! |\n"; \ + std::cerr << "============================================\n"; \ + std::cerr << e.what() << std::endl << std::endl; \ + exit(1); \ + } \ + catch(const std::bad_alloc &) { \ + std::cerr << "============================================\n"; \ + std::cerr << "| A bad allocation has been detected !!! |\n"; \ + std::cerr << "============================================\n"; \ + exit(1); \ + } \ + catch(const std::bad_typeid &) { \ + std::cerr << "============================================\n"; \ + std::cerr << "| A bad typeid has been detected !!! |\n"; \ + std::cerr << "============================================\n"; \ + exit(1); \ + } \ + catch(const std::bad_exception &) { \ + std::cerr << "============================================\n"; \ + std::cerr << "| A bad exception has been detected !!! |\n"; \ + std::cerr << "============================================\n"; \ + exit(1); \ + } \ + catch(const std::bad_cast &) { \ + std::cerr << "============================================\n"; \ + std::cerr << "| A bad cast has been detected !!! |\n"; \ + std::cerr << "============================================\n"; \ + exit(1); \ + } \ + catch(...) { \ + std::cerr << "============================================\n"; \ + std::cerr << "| An unknown error has been detected !!! |\n"; \ + std::cerr << "============================================\n"; \ + exit(1); \ + } + // catch(ios_base::failure) { + // std::cerr << "============================================\n"; + // std::cerr << "| A ios_base::failure has been detected !!!|\n"; + // std::cerr << "============================================\n"; + // exit(1); + // } + +#if defined(__GNUC__) && (__GNUC__ > 3) +# define GMM_SET_EXCEPTION_DEBUG \ + std::set_terminate(__gnu_cxx::__verbose_terminate_handler); +#else +# define GMM_SET_EXCEPTION_DEBUG +#endif + +} +#else +#include +#endif /* EXTERNAL_EXCEPT_*/ +#endif /* GMM_EXCEPT_H__ */ diff --git a/gmm/gmm_inoutput.h b/gmm/gmm_inoutput.h new file mode 100644 index 000000000..0e27b17cc --- /dev/null +++ b/gmm/gmm_inoutput.h @@ -0,0 +1,1176 @@ +/* -*- c++ -*- (enables emacs c++ mode) */ +/*=========================================================================== + + Copyright (C) 2003-2017 Yves Renard, Julien Pommier + + This file is a part of GetFEM++ + + GetFEM++ is free software; you can redistribute it and/or modify it + under the terms of the GNU Lesser General Public License as published + by the Free Software Foundation; either version 3 of the License, or + (at your option) any later version along with the GCC Runtime Library + Exception either version 3.1 or (at your option) any later version. + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public + License and GCC Runtime Library Exception for more details. + You should have received a copy of the GNU Lesser General Public License + along with this program; if not, write to the Free Software Foundation, + Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. + + As a special exception, you may use this file as it is a part of a free + software library without restriction. Specifically, if other files + instantiate templates or use macros or inline functions from this file, + or you compile this file and link it with other files to produce an + executable, this file does not by itself cause the resulting executable + to be covered by the GNU Lesser General Public License. This exception + does not however invalidate any other reasons why the executable file + might be covered by the GNU Lesser General Public License. + +===========================================================================*/ + +/**@file gmm_inoutput.h + @author Yves Renard + @author Julien Pommier + @date July 8, 2003. + @brief Input/output on sparse matrices + + Support Harwell-Boeing and Matrix-Market formats. +*/ +#ifndef GMM_INOUTPUT_H +#define GMM_INOUTPUT_H + +#include +#include "gmm_kernel.h" +namespace gmm { + + /*************************************************************************/ + /* */ + /* Functions to read and write Harwell Boeing format. */ + /* */ + /*************************************************************************/ + + // Fri Aug 15 16:29:47 EDT 1997 + // + // Harwell-Boeing File I/O in C + // V. 1.0 + // + // National Institute of Standards and Technology, MD. + // K.A. Remington + // ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + // NOTICE + // + // Permission to use, copy, modify, and distribute this software and + // its documentation for any purpose and without fee is hereby granted + // provided that the above copyright notice appear in all copies and + // that both the copyright notice and this permission notice appear in + // supporting documentation. + // + // Neither the Author nor the Institution (National Institute of Standards + // and Technology) make any representations about the suitability of this + // software for any purpose. This software is provided "as is" without + // expressed or implied warranty. + // ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + + inline void IOHBTerminate(const char *a) { GMM_ASSERT1(false, a);} + + inline bool is_complex_double__(std::complex) { return true; } + inline bool is_complex_double__(double) { return false; } + + inline int ParseIfmt(const char *fmt, int* perline, int* width) { + if (SECURE_NONCHAR_SSCANF(fmt, " (%dI%d)", perline, width) != 2) { + *perline = 1; + int s = SECURE_NONCHAR_SSCANF(fmt, " (I%d)", width); + GMM_ASSERT1(s == 1, "invalid HB I-format: " << fmt); + } + return *width; + } + + inline int ParseRfmt(const char *fmt, int* perline, int* width, + int* prec, int* flag) { + char p; + *perline = *width = *flag = *prec = 0; +#ifdef GMM_SECURE_CRT + if (sscanf_s(fmt, " (%d%c%d.%d)", perline, &p, sizeof(char), width, prec) + < 3 || !strchr("PEDF", p)) +#else + if (sscanf(fmt, " (%d%c%d.%d)", perline, &p, width, prec) < 3 + || !strchr("PEDF", p)) +#endif + { + *perline = 1; +#ifdef GMM_SECURE_CRT + int s = sscanf_s(fmt, " (%c%d.%d)", &p, sizeof(char), width, prec); +#else + int s = sscanf(fmt, " (%c%d.%d)", &p, width, prec); +#endif + GMM_ASSERT1(s>=2 && strchr("PEDF",p), "invalid HB REAL format: " << fmt); + } + *flag = p; + return *width; + } + + /** matrix input/output for Harwell-Boeing format */ + struct HarwellBoeing_IO { + int nrows() const { return Nrow; } + int ncols() const { return Ncol; } + int nnz() const { return Nnzero; } + int is_complex() const { return Type[0] == 'C'; } + int is_symmetric() const { return Type[1] == 'S'; } + int is_hermitian() const { return Type[1] == 'H'; } + HarwellBoeing_IO() { clear(); } + HarwellBoeing_IO(const char *filename) { clear(); open(filename); } + ~HarwellBoeing_IO() { close(); } + /** open filename and reads header */ + void open(const char *filename); + /** read the opened file */ + template void read(csc_matrix& A); + template void read(MAT &M) IS_DEPRECATED; + template + static void write(const char *filename, const csc_matrix& A); + template + static void write(const char *filename, const csc_matrix& A, + const std::vector &rhs); + template + static void write(const char *filename, + const csc_matrix_ref& A); + template + static void write(const char *filename, + const csc_matrix_ref& A, + const std::vector &rhs); + + /** static method for saving the matrix */ + template static void write(const char *filename, + const MAT& A) IS_DEPRECATED; + private: + FILE *f; + char Title[73], Key[9], Rhstype[4], Type[4]; + int Nrow, Ncol, Nnzero, Nrhs; + char Ptrfmt[17], Indfmt[17], Valfmt[21], Rhsfmt[21]; + int Ptrcrd, Indcrd, Valcrd, Rhscrd; + int lcount; + + + void close() { if (f) fclose(f); clear(); } + void clear() { + Nrow = Ncol = Nnzero = Nrhs = 0; f = 0; lcount = 0; + memset(Type, 0, sizeof Type); + memset(Key, 0, sizeof Key); + memset(Title, 0, sizeof Title); + } + char *getline(char *buf) { + char *p = fgets(buf, BUFSIZ, f); ++lcount; + int s = SECURE_NONCHAR_SSCANF(buf,"%*s"); + GMM_ASSERT1(s >= 0 && p != 0, + "blank line in HB file at line " << lcount); + return buf; + } + + int substrtoi(const char *p, size_type len) { + char s[100]; len = std::min(len, sizeof s - 1); + SECURE_STRNCPY(s, 100, p, len); s[len] = 0; return atoi(s); + } + double substrtod(const char *p, size_type len, int Valflag) { + char s[100]; len = std::min(len, sizeof s - 1); + SECURE_STRNCPY(s, 100, p, len); s[len] = 0; + if ( Valflag != 'F' && !strchr(s,'E')) { + /* insert a char prefix for exp */ + int last = int(strlen(s)); + for (int j=last+1;j>=0;j--) { + s[j] = s[j-1]; + if ( s[j] == '+' || s[j] == '-' ) { + s[j-1] = char(Valflag); + break; + } + } + } + return atof(s); + } + template + int readHB_data(IND_TYPE colptr[], IND_TYPE rowind[], + double val[]) { + /***********************************************************************/ + /* This function opens and reads the specified file, interpreting its */ + /* contents as a sparse matrix stored in the Harwell/Boeing standard */ + /* format and creating compressed column storage scheme vectors to */ + /* hold the index and nonzero value information. */ + /* */ + /* ---------- */ + /* **CAVEAT** */ + /* ---------- */ + /* Parsing real formats from Fortran is tricky, and this file reader */ + /* does not claim to be foolproof. It has been tested for cases */ + /* when the real values are printed consistently and evenly spaced on */ + /* each line, with Fixed (F), and Exponential (E or D) formats. */ + /* */ + /* ** If the input file does not adhere to the H/B format, the ** */ + /* ** results will be unpredictable. ** */ + /* */ + /***********************************************************************/ + int i,ind,col,offset,count; + int Ptrperline, Ptrwidth, Indperline, Indwidth; + int Valperline, Valwidth, Valprec, Nentries; + int Valflag = 'D'; /* Indicates 'E','D', or 'F' float format */ + char line[BUFSIZ]; + gmm::standard_locale sl; + + + /* Parse the array input formats from Line 3 of HB file */ + ParseIfmt(Ptrfmt,&Ptrperline,&Ptrwidth); + ParseIfmt(Indfmt,&Indperline,&Indwidth); + if ( Type[0] != 'P' ) { /* Skip if pattern only */ + ParseRfmt(Valfmt,&Valperline,&Valwidth,&Valprec,&Valflag); + } + + /* Read column pointer array: */ + offset = 0; /* if base 0 storage is declared (via macro def), */ + /* then storage entries are offset by 1 */ + + for (count = 0, i=0;i Ncol) break; + colptr[count] = substrtoi(line+col,Ptrwidth)-offset; + count++; col += Ptrwidth; + } + } + + /* Read row index array: */ + for (count = 0, i=0;i(strchr(line,'D')) )) *p = 'E'; + } + for (col = 0, ind = 0;ind csc matrices */ + template void + HarwellBoeing_IO::read(csc_matrix& A) { + + // typedef typename csc_matrix::IND_TYPE IND_TYPE; + + GMM_ASSERT1(f, "no file opened!"); + GMM_ASSERT1(Type[0] != 'P', + "Bad HB matrix format (pattern matrices not supported)"); + GMM_ASSERT1(!is_complex_double__(T()) || Type[0] != 'R', + "Bad HB matrix format (file contains a REAL matrix)"); + GMM_ASSERT1(is_complex_double__(T()) || Type[0] != 'C', + "Bad HB matrix format (file contains a COMPLEX matrix)"); + A.nc = ncols(); A.nr = nrows(); + A.jc.resize(ncols()+1); + A.ir.resize(nnz()); + A.pr.resize(nnz()); + readHB_data(&A.jc[0], &A.ir[0], (double*)&A.pr[0]); + for (int i = 0; i <= ncols(); ++i) { A.jc[i] += shift; A.jc[i] -= 1; } + for (int i = 0; i < nnz(); ++i) { A.ir[i] += shift; A.ir[i] -= 1; } + } + + template void + HarwellBoeing_IO::read(MAT &M) { + csc_matrix::value_type> csc; + read(csc); + resize(M, mat_nrows(csc), mat_ncols(csc)); + copy(csc, M); + } + + template + inline int writeHB_mat_double(const char* filename, int M, int N, int nz, + const IND_TYPE colptr[], + const IND_TYPE rowind[], + const double val[], int Nrhs, + const double rhs[], const double guess[], + const double exact[], const char* Title, + const char* Key, const char* Type, + const char* Ptrfmt, const char* Indfmt, + const char* Valfmt, const char* Rhsfmt, + const char* Rhstype, int shift) { + /************************************************************************/ + /* The writeHB function opens the named file and writes the specified */ + /* matrix and optional right-hand-side(s) to that file in */ + /* Harwell-Boeing format. */ + /* */ + /* For a description of the Harwell Boeing standard, see: */ + /* Duff, et al., ACM TOMS Vol.15, No.1, March 1989 */ + /* */ + /************************************************************************/ + FILE *out_file; + int i, entry, offset, j, acount, linemod; + int totcrd, ptrcrd, indcrd, valcrd, rhscrd; + int nvalentries, nrhsentries; + int Ptrperline, Ptrwidth, Indperline, Indwidth; + int Rhsperline, Rhswidth, Rhsprec, Rhsflag; + int Valperline, Valwidth, Valprec; + int Valflag; /* Indicates 'E','D', or 'F' float format */ + char pformat[16],iformat[16],vformat[19],rformat[19]; + // char *pValflag, *pRhsflag; + gmm::standard_locale sl; + + if ( Type[0] == 'C' ) + { nvalentries = 2*nz; nrhsentries = 2*M; } + else + { nvalentries = nz; nrhsentries = M; } + + if ( filename != NULL ) { + SECURE_FOPEN(&out_file, filename, "w"); + GMM_ASSERT1(out_file != NULL, "Error: Cannot open file: " << filename); + } else out_file = stdout; + + if ( Ptrfmt == NULL ) Ptrfmt = "(8I10)"; + ParseIfmt(Ptrfmt, &Ptrperline, &Ptrwidth); + SECURE_SPRINTF1(pformat,sizeof(pformat),"%%%dd",Ptrwidth); + ptrcrd = (N+1)/Ptrperline; + if ( (N+1)%Ptrperline != 0) ptrcrd++; + + if ( Indfmt == NULL ) Indfmt = Ptrfmt; + ParseIfmt(Indfmt, &Indperline, &Indwidth); + SECURE_SPRINTF1(iformat,sizeof(iformat), "%%%dd",Indwidth); + indcrd = nz/Indperline; + if ( nz%Indperline != 0) indcrd++; + + if ( Type[0] != 'P' ) { /* Skip if pattern only */ + if ( Valfmt == NULL ) Valfmt = "(4E21.13)"; + ParseRfmt(Valfmt, &Valperline, &Valwidth, &Valprec, &Valflag); +// if (Valflag == 'D') { +// pValflag = (char *) strchr(Valfmt,'D'); +// *pValflag = 'E'; +// } + if (Valflag == 'F') + SECURE_SPRINTF2(vformat, sizeof(vformat), "%% %d.%df", Valwidth, + Valprec); + else + SECURE_SPRINTF2(vformat, sizeof(vformat), "%% %d.%dE", Valwidth, + Valprec); + valcrd = nvalentries/Valperline; + if ( nvalentries%Valperline != 0) valcrd++; + } else valcrd = 0; + + if ( Nrhs > 0 ) { + if ( Rhsfmt == NULL ) Rhsfmt = Valfmt; + ParseRfmt(Rhsfmt,&Rhsperline,&Rhswidth,&Rhsprec, &Rhsflag); + if (Rhsflag == 'F') + SECURE_SPRINTF2(rformat,sizeof(rformat), "%% %d.%df",Rhswidth,Rhsprec); + else + SECURE_SPRINTF2(rformat,sizeof(rformat), "%% %d.%dE",Rhswidth,Rhsprec); +// if (Valflag == 'D') { +// pRhsflag = (char *) strchr(Rhsfmt,'D'); +// *pRhsflag = 'E'; +// } + rhscrd = nrhsentries/Rhsperline; + if ( nrhsentries%Rhsperline != 0) rhscrd++; + if ( Rhstype[1] == 'G' ) rhscrd+=rhscrd; + if ( Rhstype[2] == 'X' ) rhscrd+=rhscrd; + rhscrd*=Nrhs; + } else rhscrd = 0; + + totcrd = 4+ptrcrd+indcrd+valcrd+rhscrd; + + + /* Print header information: */ + + fprintf(out_file,"%-72s%-8s\n%14d%14d%14d%14d%14d\n",Title, Key, totcrd, + ptrcrd, indcrd, valcrd, rhscrd); + fprintf(out_file,"%3s%11s%14d%14d%14d%14d\n",Type," ", M, N, nz, 0); + fprintf(out_file,"%-16s%-16s%-20s", Ptrfmt, Indfmt, Valfmt); + if ( Nrhs != 0 ) { + /* Print Rhsfmt on fourth line and */ + /* optional fifth header line for auxillary vector information:*/ + fprintf(out_file,"%-20s\n%-14s%d\n",Rhsfmt,Rhstype,Nrhs); + } + else + fprintf(out_file,"\n"); + + offset = 1 - shift; /* if base 0 storage is declared (via macro def), */ + /* then storage entries are offset by 1 */ + + /* Print column pointers: */ + for (i = 0; i < N+1; i++) { + entry = colptr[i]+offset; + fprintf(out_file,pformat,entry); + if ( (i+1)%Ptrperline == 0 ) fprintf(out_file,"\n"); + } + + if ( (N+1) % Ptrperline != 0 ) fprintf(out_file,"\n"); + + /* Print row indices: */ + for (i=0;i 0 ) { + for (j=0;j void + HarwellBoeing_IO::write(const char *filename, + const csc_matrix& A) { + write(filename, csc_matrix_ref + (&A.pr[0], &A.ir[0], &A.jc[0], A.nr, A.nc)); + } + + template void + HarwellBoeing_IO::write(const char *filename, + const csc_matrix& A, + const std::vector &rhs) { + write(filename, csc_matrix_ref + (&A.pr[0], &A.ir[0], &A.jc[0], A.nr, A.nc), rhs); + } + + template void + HarwellBoeing_IO::write(const char *filename, + const csc_matrix_ref& A) { + const char *t = 0; + if (is_complex_double__(T())) + if (mat_nrows(A) == mat_ncols(A)) t = "CUA"; else t = "CRA"; + else + if (mat_nrows(A) == mat_ncols(A)) t = "RUA"; else t = "RRA"; + writeHB_mat_double(filename, int(mat_nrows(A)), int(mat_ncols(A)), + A.jc[mat_ncols(A)], A.jc, A.ir, + (const double *)A.pr, + 0, 0, 0, 0, "GETFEM++ CSC MATRIX", "CSCMAT", + t, 0, 0, 0, 0, "F", shift); + } + + template void + HarwellBoeing_IO::write(const char *filename, + const csc_matrix_ref& A, + const std::vector &rhs) { + const char *t = 0; + if (is_complex_double__(T())) + if (mat_nrows(A) == mat_ncols(A)) t = "CUA"; else t = "CRA"; + else + if (mat_nrows(A) == mat_ncols(A)) t = "RUA"; else t = "RRA"; + int Nrhs = gmm::vect_size(rhs) / mat_nrows(A); + writeHB_mat_double(filename, int(mat_nrows(A)), int(mat_ncols(A)), + A.jc[mat_ncols(A)], A.jc, A.ir, + (const double *)A.pr, + Nrhs, (const double *)(&rhs[0]), 0, 0, + "GETFEM++ CSC MATRIX", "CSCMAT", + t, 0, 0, 0, 0, "F ", shift); + } + + + template void + HarwellBoeing_IO::write(const char *filename, const MAT& A) { + gmm::csc_matrix::value_type> + tmp(gmm::mat_nrows(A), gmm::mat_ncols(A)); + gmm::copy(A,tmp); + HarwellBoeing_IO::write(filename, tmp); + } + + /** save a "double" or "std::complex" csc matrix into a + HarwellBoeing file + */ + template inline void + Harwell_Boeing_save(const std::string &filename, + const csc_matrix& A) + { HarwellBoeing_IO::write(filename.c_str(), A); } + + /** save a reference on "double" or "std::complex" csc matrix + into a HarwellBoeing file + */ + template inline void + Harwell_Boeing_save(const std::string &filename, + const csc_matrix_ref& A) + { HarwellBoeing_IO::write(filename.c_str(), A); } + + /** save a "double" or "std::complex" generic matrix + into a HarwellBoeing file making a copy in a csc matrix + */ + template inline void + Harwell_Boeing_save(const std::string &filename, const MAT& A) { + gmm::csc_matrix::value_type> + tmp(gmm::mat_nrows(A), gmm::mat_ncols(A)); + gmm::copy(A, tmp); + HarwellBoeing_IO::write(filename.c_str(), tmp); + } + + template inline void + Harwell_Boeing_save(const std::string &filename, const MAT& A, + const VECT &RHS) { + typedef typename gmm::linalg_traits::value_type T; + gmm::csc_matrix tmp(gmm::mat_nrows(A), gmm::mat_ncols(A)); + gmm::copy(A, tmp); + std::vector tmprhs(gmm::vect_size(RHS)); + gmm::copy(RHS, tmprhs); + HarwellBoeing_IO::write(filename.c_str(), tmp, tmprhs); + } + + /** load a "double" or "std::complex" csc matrix from a + HarwellBoeing file + */ + template void + Harwell_Boeing_load(const std::string &filename, csc_matrix& A) { + HarwellBoeing_IO h(filename.c_str()); h.read(A); + } + + /** load a "double" or "std::complex" generic matrix from a + HarwellBoeing file + */ + template void + Harwell_Boeing_load(const std::string &filename, MAT& A) { + csc_matrix::value_type> csc; + Harwell_Boeing_load(filename, csc); + resize(A, mat_nrows(csc), mat_ncols(csc)); + copy(csc, A); + } + + /*************************************************************************/ + /* */ + /* Functions to read and write MatrixMarket format. */ + /* */ + /*************************************************************************/ + + /* + * Matrix Market I/O library for ANSI C + * + * See http://math.nist.gov/MatrixMarket for details. + * + * + */ + +#define MM_MAX_LINE_LENGTH 1025 +#define MatrixMarketBanner "%%MatrixMarket" +#define MM_MAX_TOKEN_LENGTH 64 + + typedef char MM_typecode[4]; + + /******************* MM_typecode query functions *************************/ + +#define mm_is_matrix(typecode) ((typecode)[0]=='M') + +#define mm_is_sparse(typecode) ((typecode)[1]=='C') +#define mm_is_coordinate(typecode) ((typecode)[1]=='C') +#define mm_is_dense(typecode) ((typecode)[1]=='A') +#define mm_is_array(typecode) ((typecode)[1]=='A') + +#define mm_is_complex(typecode) ((typecode)[2]=='C') +#define mm_is_real(typecode) ((typecode)[2]=='R') +#define mm_is_pattern(typecode) ((typecode)[2]=='P') +#define mm_is_integer(typecode) ((typecode)[2]=='I') + +#define mm_is_symmetric(typecode) ((typecode)[3]=='S') +#define mm_is_general(typecode) ((typecode)[3]=='G') +#define mm_is_skew(typecode) ((typecode)[3]=='K') +#define mm_is_hermitian(typecode) ((typecode)[3]=='H') + + /******************* MM_typecode modify fucntions ************************/ + +#define mm_set_matrix(typecode) ((*typecode)[0]='M') +#define mm_set_coordinate(typecode) ((*typecode)[1]='C') +#define mm_set_array(typecode) ((*typecode)[1]='A') +#define mm_set_dense(typecode) mm_set_array(typecode) +#define mm_set_sparse(typecode) mm_set_coordinate(typecode) + +#define mm_set_complex(typecode) ((*typecode)[2]='C') +#define mm_set_real(typecode) ((*typecode)[2]='R') +#define mm_set_pattern(typecode) ((*typecode)[2]='P') +#define mm_set_integer(typecode) ((*typecode)[2]='I') + + +#define mm_set_symmetric(typecode) ((*typecode)[3]='S') +#define mm_set_general(typecode) ((*typecode)[3]='G') +#define mm_set_skew(typecode) ((*typecode)[3]='K') +#define mm_set_hermitian(typecode) ((*typecode)[3]='H') + +#define mm_clear_typecode(typecode) ((*typecode)[0]=(*typecode)[1]= \ + (*typecode)[2]=' ',(*typecode)[3]='G') + +#define mm_initialize_typecode(typecode) mm_clear_typecode(typecode) + + + /******************* Matrix Market error codes ***************************/ + + +#define MM_COULD_NOT_READ_FILE 11 +#define MM_PREMATURE_EOF 12 +#define MM_NOT_MTX 13 +#define MM_NO_HEADER 14 +#define MM_UNSUPPORTED_TYPE 15 +#define MM_LINE_TOO_LONG 16 +#define MM_COULD_NOT_WRITE_FILE 17 + + + /******************** Matrix Market internal definitions ***************** + + MM_matrix_typecode: 4-character sequence + + object sparse/ data storage + dense type scheme + + string position: [0] [1] [2] [3] + + Matrix typecode: M(atrix) C(oord) R(eal) G(eneral) + A(array) C(omplex) H(ermitian) + P(attern) S(ymmetric) + I(nteger) K(kew) + + ***********************************************************************/ + +#define MM_MTX_STR "matrix" +#define MM_ARRAY_STR "array" +#define MM_DENSE_STR "array" +#define MM_COORDINATE_STR "coordinate" +#define MM_SPARSE_STR "coordinate" +#define MM_COMPLEX_STR "complex" +#define MM_REAL_STR "real" +#define MM_INT_STR "integer" +#define MM_GENERAL_STR "general" +#define MM_SYMM_STR "symmetric" +#define MM_HERM_STR "hermitian" +#define MM_SKEW_STR "skew-symmetric" +#define MM_PATTERN_STR "pattern" + + inline char *mm_typecode_to_str(MM_typecode matcode) { + char buffer[MM_MAX_LINE_LENGTH]; + const char *types[4] = {0,0,0,0}; + /* int error =0; */ + /* int i; */ + + /* check for MTX type */ + if (mm_is_matrix(matcode)) + types[0] = MM_MTX_STR; + /* + else + error=1; + */ + /* check for CRD or ARR matrix */ + if (mm_is_sparse(matcode)) + types[1] = MM_SPARSE_STR; + else + if (mm_is_dense(matcode)) + types[1] = MM_DENSE_STR; + else + return NULL; + + /* check for element data type */ + if (mm_is_real(matcode)) + types[2] = MM_REAL_STR; + else + if (mm_is_complex(matcode)) + types[2] = MM_COMPLEX_STR; + else + if (mm_is_pattern(matcode)) + types[2] = MM_PATTERN_STR; + else + if (mm_is_integer(matcode)) + types[2] = MM_INT_STR; + else + return NULL; + + + /* check for symmetry type */ + if (mm_is_general(matcode)) + types[3] = MM_GENERAL_STR; + else if (mm_is_symmetric(matcode)) + types[3] = MM_SYMM_STR; + else if (mm_is_hermitian(matcode)) + types[3] = MM_HERM_STR; + else if (mm_is_skew(matcode)) + types[3] = MM_SKEW_STR; + else + return NULL; + + SECURE_SPRINTF4(buffer, sizeof(buffer), "%s %s %s %s", types[0], types[1], + types[2], types[3]); + return SECURE_STRDUP(buffer); + + } + + inline int mm_read_banner(FILE *f, MM_typecode *matcode) { + char line[MM_MAX_LINE_LENGTH]; + char banner[MM_MAX_TOKEN_LENGTH]; + char mtx[MM_MAX_TOKEN_LENGTH]; + char crd[MM_MAX_TOKEN_LENGTH]; + char data_type[MM_MAX_TOKEN_LENGTH]; + char storage_scheme[MM_MAX_TOKEN_LENGTH]; + char *p; + gmm::standard_locale sl; + /* int ret_code; */ + + mm_clear_typecode(matcode); + + if (fgets(line, MM_MAX_LINE_LENGTH, f) == NULL) + return MM_PREMATURE_EOF; + +#ifdef GMM_SECURE_CRT + if (sscanf_s(line, "%s %s %s %s %s", banner, sizeof(banner), + mtx, sizeof(mtx), crd, sizeof(crd), data_type, + sizeof(data_type), storage_scheme, + sizeof(storage_scheme)) != 5) +#else + if (sscanf(line, "%s %s %s %s %s", banner, mtx, crd, + data_type, storage_scheme) != 5) +#endif + return MM_PREMATURE_EOF; + + for (p=mtx; *p!='\0'; *p=char(tolower(*p)),p++) {}; /* convert to lower case */ + for (p=crd; *p!='\0'; *p=char(tolower(*p)),p++) {}; + for (p=data_type; *p!='\0'; *p=char(tolower(*p)),p++) {}; + for (p=storage_scheme; *p!='\0'; *p=char(tolower(*p)),p++) {}; + + /* check for banner */ + if (strncmp(banner, MatrixMarketBanner, strlen(MatrixMarketBanner)) != 0) + return MM_NO_HEADER; + + /* first field should be "mtx" */ + if (strcmp(mtx, MM_MTX_STR) != 0) + return MM_UNSUPPORTED_TYPE; + mm_set_matrix(matcode); + + + /* second field describes whether this is a sparse matrix (in coordinate + storgae) or a dense array */ + + + if (strcmp(crd, MM_SPARSE_STR) == 0) + mm_set_sparse(matcode); + else + if (strcmp(crd, MM_DENSE_STR) == 0) + mm_set_dense(matcode); + else + return MM_UNSUPPORTED_TYPE; + + + /* third field */ + + if (strcmp(data_type, MM_REAL_STR) == 0) + mm_set_real(matcode); + else + if (strcmp(data_type, MM_COMPLEX_STR) == 0) + mm_set_complex(matcode); + else + if (strcmp(data_type, MM_PATTERN_STR) == 0) + mm_set_pattern(matcode); + else + if (strcmp(data_type, MM_INT_STR) == 0) + mm_set_integer(matcode); + else + return MM_UNSUPPORTED_TYPE; + + + /* fourth field */ + + if (strcmp(storage_scheme, MM_GENERAL_STR) == 0) + mm_set_general(matcode); + else + if (strcmp(storage_scheme, MM_SYMM_STR) == 0) + mm_set_symmetric(matcode); + else + if (strcmp(storage_scheme, MM_HERM_STR) == 0) + mm_set_hermitian(matcode); + else + if (strcmp(storage_scheme, MM_SKEW_STR) == 0) + mm_set_skew(matcode); + else + return MM_UNSUPPORTED_TYPE; + + return 0; + } + + inline int mm_read_mtx_crd_size(FILE *f, int *M, int *N, int *nz ) { + char line[MM_MAX_LINE_LENGTH]; + /* int ret_code;*/ + int num_items_read; + + /* set return null parameter values, in case we exit with errors */ + *M = *N = *nz = 0; + + /* now continue scanning until you reach the end-of-comments */ + do { + if (fgets(line,MM_MAX_LINE_LENGTH,f) == NULL) + return MM_PREMATURE_EOF; + } while (line[0] == '%'); + + /* line[] is either blank or has M,N, nz */ + if (SECURE_NONCHAR_SSCANF(line, "%d %d %d", M, N, nz) == 3) return 0; + else + do { + num_items_read = SECURE_NONCHAR_FSCANF(f, "%d %d %d", M, N, nz); + if (num_items_read == EOF) return MM_PREMATURE_EOF; + } + while (num_items_read != 3); + + return 0; + } + + + inline int mm_read_mtx_crd_data(FILE *f, int, int, int nz, int II[], + int J[], double val[], MM_typecode matcode) { + int i; + if (mm_is_complex(matcode)) { + for (i=0; i void read(Matrix &A); + /* write a matrix */ + template static void + write(const char *filename, const csc_matrix& A); + template static void + write(const char *filename, + const csc_matrix_ref& A); + template static void + write(const char *filename, const MAT& A); + }; + + /** load a matrix-market file */ + template inline void + MatrixMarket_load(const char *filename, Matrix& A) { + MatrixMarket_IO mm; mm.open(filename); + mm.read(A); + } + /** write a matrix-market file */ + template void + MatrixMarket_save(const char *filename, const csc_matrix& A) { + MatrixMarket_IO mm; mm.write(filename, A); + } + + template inline void + MatrixMarket_save(const char *filename, + const csc_matrix_ref& A) { + MatrixMarket_IO mm; mm.write(filename, A); + } + + + inline void MatrixMarket_IO::open(const char *filename) { + gmm::standard_locale sl; + if (f) { fclose(f); } + SECURE_FOPEN(&f, filename, "r"); + GMM_ASSERT1(f, "Sorry, cannot open file " << filename); + int s1 = mm_read_banner(f, &matcode); + GMM_ASSERT1(s1 == 0, "Sorry, cannnot find the matrix market banner in " + << filename); + int s2 = mm_is_coordinate(matcode), s3 = mm_is_matrix(matcode); + GMM_ASSERT1(s2 > 0 && s3 > 0, + "file is not coordinate storage or is not a matrix"); + int s4 = mm_is_pattern(matcode); + GMM_ASSERT1(s4 == 0, + "the file does only contain the pattern of a sparse matrix"); + int s5 = mm_is_skew(matcode); + GMM_ASSERT1(s5 == 0, "not currently supporting skew symmetric"); + isSymmetric = mm_is_symmetric(matcode) || mm_is_hermitian(matcode); + isHermitian = mm_is_hermitian(matcode); + isComplex = mm_is_complex(matcode); + mm_read_mtx_crd_size(f, &row, &col, &nz); + } + + template void MatrixMarket_IO::read(Matrix &A) { + gmm::standard_locale sl; + typedef typename linalg_traits::value_type T; + GMM_ASSERT1(f, "no file opened!"); + GMM_ASSERT1(!is_complex_double__(T()) || isComplex, + "Bad MM matrix format (complex matrix expected)"); + GMM_ASSERT1(is_complex_double__(T()) || !isComplex, + "Bad MM matrix format (real matrix expected)"); + A = Matrix(row, col); + gmm::clear(A); + + std::vector II(nz), J(nz); + std::vector PR(nz); + mm_read_mtx_crd_data(f, row, col, nz, &II[0], &J[0], + (double*)&PR[0], matcode); + + for (size_type i = 0; i < size_type(nz); ++i) { + A(II[i]-1, J[i]-1) = PR[i]; + + // FIXED MM Format + if (mm_is_hermitian(matcode) && (II[i] != J[i]) ) { + A(J[i]-1, II[i]-1) = gmm::conj(PR[i]); + } + + if (mm_is_symmetric(matcode) && (II[i] != J[i]) ) { + A(J[i]-1, II[i]-1) = PR[i]; + } + + if (mm_is_skew(matcode) && (II[i] != J[i]) ) { + A(J[i]-1, II[i]-1) = -PR[i]; + } + } + } + + template void + MatrixMarket_IO::write(const char *filename, const csc_matrix& A) { + write(filename, csc_matrix_ref + (&A.pr[0], &A.ir[0], &A.jc[0], A.nr, A.nc)); + } + + template void + MatrixMarket_IO::write(const char *filename, + const csc_matrix_ref& A) { + gmm::standard_locale sl; + static MM_typecode t1 = {'M', 'C', 'R', 'G'}; + static MM_typecode t2 = {'M', 'C', 'C', 'G'}; + MM_typecode t; + + if (is_complex_double__(T())) std::copy(&(t2[0]), &(t2[0])+4, &(t[0])); + else std::copy(&(t1[0]), &(t1[0])+4, &(t[0])); + size_type nz = A.jc[mat_ncols(A)]; + std::vector II(nz), J(nz); + for (size_type j=0; j < mat_ncols(A); ++j) { + for (size_type i = A.jc[j]; i < A.jc[j+1]; ++i) { + II[i] = A.ir[i] + 1 - shift; + J[i] = int(j + 1); + } + } + mm_write_mtx_crd(filename, int(mat_nrows(A)), int(mat_ncols(A)), + int(nz), &II[0], &J[0], (const double *)A.pr, t); + } + + + template void + MatrixMarket_IO::write(const char *filename, const MAT& A) { + gmm::csc_matrix::value_type> + tmp(gmm::mat_nrows(A), gmm::mat_ncols(A)); + gmm::copy(A,tmp); + MatrixMarket_IO::write(filename, tmp); + } + + template static void vecsave(std::string fname, const VEC& V, + bool binary=false) { + if (binary) { + std::ofstream f(fname.c_str(), std::ofstream::binary); + for (size_type i=0; i < gmm::vect_size(V); ++i) + f.write(reinterpret_cast(&V[i]), sizeof(V[i])); + } + else { + std::ofstream f(fname.c_str()); f.precision(16); f.imbue(std::locale("C")); + for (size_type i=0; i < gmm::vect_size(V); ++i) f << V[i] << "\n"; + } + } + + template static void vecload(std::string fname, const VEC& V_, + bool binary=false) { + VEC &V(const_cast(V_)); + if (binary) { + std::ifstream f(fname.c_str(), std::ifstream::binary); + for (size_type i=0; i < gmm::vect_size(V); ++i) + f.read(reinterpret_cast(&V[i]), sizeof(V[i])); + } + else { + std::ifstream f(fname.c_str()); f.imbue(std::locale("C")); + for (size_type i=0; i < gmm::vect_size(V); ++i) f >> V[i]; + } + } +} + + +#endif // GMM_INOUTPUT_H diff --git a/gmm/gmm_interface.h b/gmm/gmm_interface.h new file mode 100644 index 000000000..a3c66cd1b --- /dev/null +++ b/gmm/gmm_interface.h @@ -0,0 +1,1068 @@ +/* -*- c++ -*- (enables emacs c++ mode) */ +/*=========================================================================== + + Copyright (C) 2002-2017 Yves Renard + + This file is a part of GetFEM++ + + GetFEM++ is free software; you can redistribute it and/or modify it + under the terms of the GNU Lesser General Public License as published + by the Free Software Foundation; either version 3 of the License, or + (at your option) any later version along with the GCC Runtime Library + Exception either version 3.1 or (at your option) any later version. + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public + License and GCC Runtime Library Exception for more details. + You should have received a copy of the GNU Lesser General Public License + along with this program; if not, write to the Free Software Foundation, + Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. + + As a special exception, you may use this file as it is a part of a free + software library without restriction. Specifically, if other files + instantiate templates or use macros or inline functions from this file, + or you compile this file and link it with other files to produce an + executable, this file does not by itself cause the resulting executable + to be covered by the GNU Lesser General Public License. This exception + does not however invalidate any other reasons why the executable file + might be covered by the GNU Lesser General Public License. + +===========================================================================*/ + + +/**@file gmm_interface.h + @author Yves Renard + @date October 13, 2002. + @brief gmm interface for STL vectors. +*/ + +#ifndef GMM_INTERFACE_H__ +#define GMM_INTERFACE_H__ + +#include "gmm_blas.h" +#include "gmm_sub_index.h" + +namespace gmm { + + /* ********************************************************************* */ + /* */ + /* What is needed for a Vector type : */ + /* Vector v(n) defines a vector with n components. */ + /* v[i] allows to access to the ith component of v. */ + /* linalg_traits should be filled with appropriate definitions */ + /* */ + /* for a dense vector : the minimum is two random iterators (begin and */ + /* end) and a pointer to a valid origin. */ + /* for a sparse vector : the minimum is two forward iterators, with */ + /* a method it.index() which gives the index of */ + /* a non zero element, an interface object */ + /* should describe the method to add new non */ + /* zero element, and a pointer to a valid */ + /* origin. */ + /* */ + /* What is needed for a Matrix type : */ + /* Matrix m(n, m) defines a matrix with n rows and m columns. */ + /* m(i, j) allows to access to the element at row i and column j. */ + /* linalg_traits should be filled with appropriate definitions */ + /* */ + /* What is needed for an iterator on dense vector */ + /* to be standard random access iterator */ + /* */ + /* What is needed for an iterator on a sparse vector */ + /* to be a standard bidirectional iterator */ + /* elt should be sorted with increasing indices. */ + /* it.index() gives the index of the non-zero element. */ + /* */ + /* Remark : If original iterators are not convenient, they could be */ + /* redefined and interfaced in linalg_traits without changing */ + /* the original Vector type. */ + /* */ + /* ********************************************************************* */ + + /* ********************************************************************* */ + /* Simple references on vectors */ + /* ********************************************************************* */ + + template struct simple_vector_ref { + typedef simple_vector_ref this_type; + typedef typename std::iterator_traits::value_type V; + typedef V * CPT; + typedef typename std::iterator_traits::reference ref_V; + typedef typename linalg_traits::iterator iterator; + typedef typename linalg_traits::reference reference; + typedef typename linalg_traits::porigin_type porigin_type; + + iterator begin_, end_; + porigin_type origin; + size_type size_; + + simple_vector_ref(ref_V v) : begin_(vect_begin(const_cast(v))), + end_(vect_end(const_cast(v))), + origin(linalg_origin(const_cast(v))), + size_(vect_size(v)) {} + + simple_vector_ref(const simple_vector_ref &cr) + : begin_(cr.begin_),end_(cr.end_),origin(cr.origin),size_(cr.size_) {} + + simple_vector_ref(void) {} + + reference operator[](size_type i) const + { return linalg_traits::access(origin, begin_, end_, i); } + }; + + template inline + void set_to_begin(IT &it, ORG o, simple_vector_ref *,linalg_modifiable) { + typedef typename linalg_traits >::V_reference ref_t; + set_to_begin(it, o, PT(), ref_t()); + } + + template inline + void set_to_begin(IT &it, ORG o, const simple_vector_ref *, + linalg_modifiable) { + typedef typename linalg_traits >::V_reference ref_t; + set_to_begin(it, o, PT(), ref_t()); + } + + template inline + void set_to_end(IT &it, ORG o, simple_vector_ref *, linalg_modifiable) { + typedef typename linalg_traits >::V_reference ref_t; + set_to_end(it, o, PT(), ref_t()); + } + + template inline + void set_to_end(IT &it, ORG o, const simple_vector_ref *, + linalg_modifiable) { + typedef typename linalg_traits >::V_reference ref_t; + set_to_end(it, o, PT(), ref_t()); + } + + + template struct linalg_traits > { + typedef simple_vector_ref this_type; + typedef this_type *pthis_type; + typedef typename std::iterator_traits::value_type V; + typedef typename linalg_traits::origin_type origin_type; + typedef V *pV; + typedef typename linalg_traits::is_reference V_reference; + typedef typename which_reference::is_reference is_reference; + typedef abstract_vector linalg_type; + typedef typename linalg_traits::value_type value_type; + typedef typename select_ref::reference, PT>::ref_type reference; + typedef typename select_ref::ref_type porigin_type; + typedef typename select_ref::const_iterator, + typename linalg_traits::iterator, PT>::ref_type iterator; + typedef typename linalg_traits::const_iterator const_iterator; + typedef typename linalg_traits::storage_type storage_type; + typedef linalg_true index_sorted; + static size_type size(const this_type &v) { return v.size_; } + static inline iterator begin(this_type &v) { + iterator it = v.begin_; + set_to_begin(it, v.origin, pthis_type(), is_reference()); + return it; + } + static inline const_iterator begin(const this_type &v) { + const_iterator it = v.begin_; + set_to_begin(it, v.origin, pthis_type(), is_reference()); + return it; + } + static inline iterator end(this_type &v) { + iterator it = v.end_; + set_to_end(it, v.origin, pthis_type(), is_reference()); + return it; + } + static inline const_iterator end(const this_type &v) { + const_iterator it = v.end_; + set_to_end(it, v.origin, pthis_type(), is_reference()); + return it; + } + static origin_type* origin(this_type &v) { return v.origin; } + static const origin_type* origin(const this_type &v) { return v.origin; } + static void clear(origin_type* o, const iterator &it, const iterator &ite) + { linalg_traits::clear(o, it, ite); } + static void do_clear(this_type &v) { clear(v.origin, v.begin_, v.end_); } + static value_type access(const origin_type *o, const const_iterator &it, + const const_iterator &ite, size_type i) + { return linalg_traits::access(o, it, ite, i); } + static reference access(origin_type *o, const iterator &it, + const iterator &ite, size_type i) + { return linalg_traits::access(o, it, ite, i); } + }; + + template + std::ostream &operator << (std::ostream &o, const simple_vector_ref& v) + { gmm::write(o,v); return o; } + + template + simple_vector_ref *> + vref(const std::vector &vv) + { return simple_vector_ref *>(vv); } + + + /* ********************************************************************* */ + /* */ + /* Traits for S.T.L. object */ + /* */ + /* ********************************************************************* */ + + template + struct linalg_traits > { + typedef std::vector this_type; + typedef this_type origin_type; + typedef linalg_false is_reference; + typedef abstract_vector linalg_type; + typedef T value_type; + typedef T& reference; + typedef typename this_type::iterator iterator; + typedef typename this_type::const_iterator const_iterator; + typedef abstract_dense storage_type; + typedef linalg_true index_sorted; + static size_type size(const this_type &v) { return v.size(); } + static iterator begin(this_type &v) { return v.begin(); } + static const_iterator begin(const this_type &v) { return v.begin(); } + static iterator end(this_type &v) { return v.end(); } + static const_iterator end(const this_type &v) { return v.end(); } + static origin_type* origin(this_type &v) { return &v; } + static const origin_type* origin(const this_type &v) { return &v; } + static void clear(origin_type*, const iterator &it, const iterator &ite) + { std::fill(it, ite, value_type(0)); } + static void do_clear(this_type &v) { std::fill(v.begin(), v.end(), T(0)); } + static value_type access(const origin_type *, const const_iterator &it, + const const_iterator &, size_type i) + { return it[i]; } + static reference access(origin_type *, const iterator &it, + const iterator &, size_type i) + { return it[i]; } + static void resize(this_type &v, size_type n) { v.resize(n); } + }; + + + + template + inline size_type nnz(const std::vector& l) { return l.size(); } + + /* ********************************************************************* */ + /* */ + /* Traits for ref objects */ + /* */ + /* ********************************************************************* */ + + template + struct tab_ref_with_origin : public gmm::tab_ref { + typedef tab_ref_with_origin this_type; + // next line replaced by the 4 following lines in order to please aCC + //typedef typename linalg_traits::porigin_type porigin_type; + typedef typename linalg_traits::origin_type origin_type; + typedef typename std::iterator_traits::pointer PT; + typedef typename select_ref::ref_type porigin_type; + + + porigin_type origin; + + tab_ref_with_origin(void) {} + template tab_ref_with_origin(const IT &b, const IT &e, PT p) + : gmm::tab_ref(b,e), origin(porigin_type(p)) {} + tab_ref_with_origin(const IT &b, const IT &e, porigin_type p) + : gmm::tab_ref(b,e), origin(p) {} + + tab_ref_with_origin(const V &v, const sub_interval &si) + : gmm::tab_ref(vect_begin(const_cast(v))+si.min, + vect_begin(const_cast(v))+si.max), + origin(linalg_origin(const_cast(v))) {} + tab_ref_with_origin(V &v, const sub_interval &si) + : gmm::tab_ref(vect_begin(const_cast(v))+si.min, + vect_begin(const_cast(v))+si.max), + origin(linalg_origin(const_cast(v))) {} + }; + + template + struct linalg_traits > { + typedef typename std::iterator_traits::pointer PT; + typedef typename linalg_traits::origin_type origin_type; + typedef tab_ref_with_origin this_type; + typedef typename which_reference::is_reference is_reference; + typedef abstract_vector linalg_type; + typedef typename select_ref::ref_type porigin_type; + typedef typename std::iterator_traits::value_type value_type; + typedef typename std::iterator_traits::reference reference; + typedef typename this_type::iterator iterator; + typedef typename this_type::iterator const_iterator; + typedef abstract_dense storage_type; + typedef linalg_true index_sorted; + static size_type size(const this_type &v) { return v.size(); } + static iterator begin(this_type &v) { return v.begin(); } + static const_iterator begin(const this_type &v) { return v.begin(); } + static iterator end(this_type &v) { return v.end(); } + static const_iterator end(const this_type &v) { return v.end(); } + static origin_type* origin(this_type &v) { return v.origin; } + static const origin_type* origin(const this_type &v) { return v.origin; } + static void clear(origin_type*, const iterator &it, const iterator &ite) + { std::fill(it, ite, value_type(0)); } + static inline void do_clear(this_type &v) + { std::fill(v.begin(), v.end(), value_type(0)); } + static value_type access(const origin_type *, const const_iterator &it, + const const_iterator &, size_type i) + { return it[i]; } + static reference access(origin_type *, const iterator &it, + const iterator &, size_type i) + { return it[i]; } + }; + + template std::ostream &operator << + (std::ostream &o, const tab_ref_with_origin& m) + { gmm::write(o,m); return o; } + + + template + struct tab_ref_reg_spaced_with_origin : public gmm::tab_ref_reg_spaced { + typedef tab_ref_reg_spaced_with_origin this_type; + typedef typename linalg_traits::porigin_type porigin_type; + + porigin_type origin; + + tab_ref_reg_spaced_with_origin(void) {} + tab_ref_reg_spaced_with_origin(const IT &b, size_type n, size_type s, + const porigin_type p) + : gmm::tab_ref_reg_spaced(b,n,s), origin(p) {} + tab_ref_reg_spaced_with_origin(const V &v, const sub_slice &si) + : gmm::tab_ref_reg_spaced(vect_begin(const_cast(v)) + si.min, + si.N, (si.max - si.min)/si.N), + origin(linalg_origin(const_cast(v))) {} + tab_ref_reg_spaced_with_origin(V &v, const sub_slice &si) + : gmm::tab_ref_reg_spaced(vect_begin(const_cast(v)) + si.min, + si.N, (si.max - si.min)/si.N), + origin(linalg_origin(const_cast(v))) {} + }; + + template + struct linalg_traits > { + typedef typename std::iterator_traits::pointer PT; + typedef tab_ref_reg_spaced_with_origin this_type; + typedef typename linalg_traits::origin_type origin_type; + typedef typename select_ref::ref_type porigin_type; + typedef typename which_reference::is_reference is_reference; + typedef abstract_vector linalg_type; + typedef typename std::iterator_traits::value_type value_type; + typedef typename std::iterator_traits::reference reference; + typedef typename this_type::iterator iterator; + typedef typename this_type::iterator const_iterator; + typedef abstract_dense storage_type; + typedef linalg_true index_sorted; + static size_type size(const this_type &v) { return v.size(); } + static iterator begin(this_type &v) { return v.begin(); } + static const_iterator begin(const this_type &v) { return v.begin(); } + static iterator end(this_type &v) { return v.end(); } + static const_iterator end(const this_type &v) { return v.end(); } + static origin_type* origin(this_type &v) { return v.origin; } + static const origin_type* origin(const this_type &v) { return v.origin; } + static void clear(origin_type*, const iterator &it, const iterator &ite) + { std::fill(it, ite, value_type(0)); } + static void do_clear(this_type &v) + { std::fill(v.begin(), v.end(), value_type(0)); } + static value_type access(const origin_type *, const const_iterator &it, + const const_iterator &, size_type i) + { return it[i]; } + static reference access(origin_type *, const iterator &it, + const iterator &, size_type i) + { return it[i]; } + }; + + template std::ostream &operator << + (std::ostream &o, const tab_ref_reg_spaced_with_origin& m) + { gmm::write(o,m); return o; } + + + template + struct tab_ref_index_ref_with_origin + : public gmm::tab_ref_index_ref { + typedef tab_ref_index_ref_with_origin this_type; + typedef typename linalg_traits::porigin_type porigin_type; + + porigin_type origin; + + tab_ref_index_ref_with_origin(void) {} + tab_ref_index_ref_with_origin(const IT &b, const ITINDEX &bi, + const ITINDEX &ei, porigin_type p) + : gmm::tab_ref_index_ref(b, bi, ei), origin(p) {} + + tab_ref_index_ref_with_origin(const V &v, const sub_index &si) + : gmm::tab_ref_index_ref(vect_begin(const_cast(v)), + si.begin(), si.end()), + origin(linalg_origin(const_cast(v))) {} + tab_ref_index_ref_with_origin(V &v, const sub_index &si) + : gmm::tab_ref_index_ref(vect_begin(const_cast(v)), + si.begin(), si.end()), + origin(linalg_origin(const_cast(v))) {} + }; + + template + struct linalg_traits > { + typedef typename std::iterator_traits::pointer PT; + typedef tab_ref_index_ref_with_origin this_type; + typedef typename linalg_traits::origin_type origin_type; + typedef typename select_ref::ref_type porigin_type; + typedef typename which_reference::is_reference is_reference; + typedef abstract_vector linalg_type; + typedef typename std::iterator_traits::value_type value_type; + typedef typename std::iterator_traits::reference reference; + typedef typename this_type::iterator iterator; + typedef typename this_type::iterator const_iterator; + typedef abstract_dense storage_type; + typedef linalg_true index_sorted; + static size_type size(const this_type &v) { return v.size(); } + static iterator begin(this_type &v) { return v.begin(); } + static const_iterator begin(const this_type &v) { return v.begin(); } + static iterator end(this_type &v) { return v.end(); } + static const_iterator end(const this_type &v) { return v.end(); } + static origin_type* origin(this_type &v) { return v.origin; } + static const origin_type* origin(const this_type &v) { return v.origin; } + static void clear(origin_type*, const iterator &it, const iterator &ite) + { std::fill(it, ite, value_type(0)); } + static void do_clear(this_type &v) + { std::fill(v.begin(), v.end(), value_type(0)); } + static value_type access(const origin_type *, const const_iterator &it, + const const_iterator &, size_type i) + { return it[i]; } + static reference access(origin_type *, const iterator &it, + const iterator &, size_type i) + { return it[i]; } + }; + + template + std::ostream &operator << + (std::ostream &o, const tab_ref_index_ref_with_origin& m) + { gmm::write(o,m); return o; } + + + template + struct dense_compressed_iterator { + typedef ITER value_type; + typedef ITER *pointer; + typedef ITER &reference; + typedef ptrdiff_t difference_type; + typedef std::random_access_iterator_tag iterator_category; + typedef size_t size_type; + typedef dense_compressed_iterator iterator; + typedef typename std::iterator_traits::value_type *MPT; + + ITER it; + size_type N, nrows, ncols, i; + PT origin; + + iterator operator ++(int) { iterator tmp = *this; i++; return tmp; } + iterator operator --(int) { iterator tmp = *this; i--; return tmp; } + iterator &operator ++() { ++i; return *this; } + iterator &operator --() { --i; return *this; } + iterator &operator +=(difference_type ii) { i += ii; return *this; } + iterator &operator -=(difference_type ii) { i -= ii; return *this; } + iterator operator +(difference_type ii) const + { iterator itt = *this; return (itt += ii); } + iterator operator -(difference_type ii) const + { iterator itt = *this; return (itt -= ii); } + difference_type operator -(const iterator &ii) const + { return (N ? (it - ii.it) / N : 0) + i - ii.i; } + + ITER operator *() const { return it+i*N; } + ITER operator [](int ii) const { return it + (i+ii) * N; } + + bool operator ==(const iterator &ii) const + { return (*this - ii) == difference_type(0); } + bool operator !=(const iterator &ii) const { return !(ii == *this); } + bool operator < (const iterator &ii) const + { return (*this - ii) < difference_type(0); } + + dense_compressed_iterator(void) {} + dense_compressed_iterator(const dense_compressed_iterator &ii) + : it(ii.it), N(ii.N), nrows(ii.nrows), ncols(ii.ncols), i(ii.i), + origin(ii.origin) {} + dense_compressed_iterator(const ITER &iter, size_type n, size_type r, + size_type c, size_type ii, PT o) + : it(iter), N(n), nrows(r), ncols(c), i(ii), origin(o) { } + + }; + + /* ******************************************************************** */ + /* Read only reference on a compressed sparse vector */ + /* ******************************************************************** */ + + template + struct cs_vector_ref_iterator { + PT1 pr; + PT2 ir; + + typedef typename std::iterator_traits::value_type value_type; + typedef PT1 pointer; + typedef typename std::iterator_traits::reference reference; + typedef size_t size_type; + typedef ptrdiff_t difference_type; + typedef std::bidirectional_iterator_tag iterator_category; + typedef cs_vector_ref_iterator iterator; + + cs_vector_ref_iterator(void) {} + cs_vector_ref_iterator(PT1 p1, PT2 p2) : pr(p1), ir(p2) {} + + inline size_type index(void) const { return (*ir) - shift; } + iterator &operator ++() { ++pr; ++ir; return *this; } + iterator operator ++(int) { iterator tmp = *this; ++(*this); return tmp; } + iterator &operator --() { --pr; --ir; return *this; } + iterator operator --(int) { iterator tmp = *this; --(*this); return tmp; } + + reference operator *() const { return *pr; } + pointer operator ->() const { return pr; } + + bool operator ==(const iterator &i) const { return (i.pr==pr);} + bool operator !=(const iterator &i) const { return (i.pr!=pr);} + }; + + template struct cs_vector_ref { + PT1 pr; + PT2 ir; + size_type n, size_; + + typedef cs_vector_ref this_type; + typedef typename std::iterator_traits::value_type value_type; + typedef typename linalg_traits::const_iterator const_iterator; + + cs_vector_ref(PT1 pt1, PT2 pt2, size_type nnz, size_type ns) + : pr(pt1), ir(pt2), n(nnz), size_(ns) {} + cs_vector_ref(void) {} + + size_type size(void) const { return size_; } + + const_iterator begin(void) const { return const_iterator(pr, ir); } + const_iterator end(void) const { return const_iterator(pr+n, ir+n); } + + value_type operator[](size_type i) const + { return linalg_traits::access(pr, begin(), end(),i); } + }; + + template + struct linalg_traits > { + typedef cs_vector_ref this_type; + typedef linalg_const is_reference; + typedef abstract_vector linalg_type; + typedef typename std::iterator_traits::value_type value_type; + typedef value_type origin_type; + typedef typename std::iterator_traits::value_type reference; + typedef cs_vector_ref_iterator::pointer, + typename const_pointer::pointer, shift> const_iterator; + typedef abstract_null_type iterator; + typedef abstract_sparse storage_type; + typedef linalg_true index_sorted; + static size_type size(const this_type &v) { return v.size(); } + static iterator begin(this_type &v) { return v.begin(); } + static const_iterator begin(const this_type &v) { return v.begin(); } + static iterator end(this_type &v) { return v.end(); } + static const_iterator end(const this_type &v) { return v.end(); } + static const origin_type* origin(const this_type &v) { return v.pr; } + static value_type access(const origin_type *, const const_iterator &b, + const const_iterator &e, size_type i) { + if (b.ir == e.ir) return value_type(0); + PT2 p = std::lower_bound(b.ir, e.ir, i+shift); + return (*p == i+shift && p != e.ir) ? b.pr[p-b.ir] : value_type(0); + } + }; + + template + std::ostream &operator << + (std::ostream &o, const cs_vector_ref& m) + { gmm::write(o,m); return o; } + + template + inline size_type nnz(const cs_vector_ref& l) { return l.n; } + + /* ******************************************************************** */ + /* Read only reference on a compressed sparse column matrix */ + /* ******************************************************************** */ + + template + struct sparse_compressed_iterator { + typedef typename std::iterator_traits::value_type value_type; + typedef const value_type *pointer; + typedef const value_type &reference; + typedef ptrdiff_t difference_type; + typedef size_t size_type; + typedef std::random_access_iterator_tag iterator_category; + typedef sparse_compressed_iterator iterator; + + PT1 pr; + PT2 ir; + PT3 jc; + size_type n; + const value_type *origin; + + iterator operator ++(int) { iterator tmp = *this; jc++; return tmp; } + iterator operator --(int) { iterator tmp = *this; jc--; return tmp; } + iterator &operator ++() { jc++; return *this; } + iterator &operator --() { jc--; return *this; } + iterator &operator +=(difference_type i) { jc += i; return *this; } + iterator &operator -=(difference_type i) { jc -= i; return *this; } + iterator operator +(difference_type i) const + { iterator itt = *this; return (itt += i); } + iterator operator -(difference_type i) const + { iterator itt = *this; return (itt -= i); } + difference_type operator -(const iterator &i) const { return jc - i.jc; } + + reference operator *() const { return pr + *jc - shift; } + reference operator [](int ii) { return pr + *(jc+ii) - shift; } + + bool operator ==(const iterator &i) const { return (jc == i.jc); } + bool operator !=(const iterator &i) const { return !(i == *this); } + bool operator < (const iterator &i) const { return (jc < i.jc); } + + sparse_compressed_iterator(void) {} + sparse_compressed_iterator(PT1 p1, PT2 p2, PT3 p3, size_type nn, + const value_type *o) + : pr(p1), ir(p2), jc(p3), n(nn), origin(o) { } + + }; + + template + struct csc_matrix_ref { + PT1 pr; // values. + PT2 ir; // row indexes. + PT3 jc; // column repartition on pr and ir. + size_type nc, nr; + + typedef typename std::iterator_traits::value_type value_type; + csc_matrix_ref(PT1 pt1, PT2 pt2, PT3 pt3, size_type nrr, size_type ncc) + : pr(pt1), ir(pt2), jc(pt3), nc(ncc), nr(nrr) {} + csc_matrix_ref(void) {} + + size_type nrows(void) const { return nr; } + size_type ncols(void) const { return nc; } + + value_type operator()(size_type i, size_type j) const + { return mat_col(*this, j)[i]; } + }; + + template + struct linalg_traits > { + typedef csc_matrix_ref this_type; + typedef linalg_const is_reference; + typedef abstract_matrix linalg_type; + typedef typename std::iterator_traits::value_type value_type; + typedef typename std::iterator_traits::value_type reference; + typedef value_type origin_type; + typedef abstract_sparse storage_type; + typedef abstract_null_type sub_row_type; + typedef abstract_null_type const_sub_row_type; + typedef abstract_null_type row_iterator; + typedef abstract_null_type const_row_iterator; + typedef abstract_null_type sub_col_type; + typedef cs_vector_ref::pointer, + typename const_pointer::pointer, shift> const_sub_col_type; + typedef sparse_compressed_iterator::pointer, + typename const_pointer::pointer, + typename const_pointer::pointer, + shift> const_col_iterator; + typedef abstract_null_type col_iterator; + typedef col_major sub_orientation; + typedef linalg_true index_sorted; + static size_type nrows(const this_type &m) { return m.nrows(); } + static size_type ncols(const this_type &m) { return m.ncols(); } + static const_col_iterator col_begin(const this_type &m) + { return const_col_iterator(m.pr, m.ir, m.jc, m.nr, m.pr); } + static const_col_iterator col_end(const this_type &m) + { return const_col_iterator(m.pr, m.ir, m.jc + m.nc, m.nr, m.pr); } + static const_sub_col_type col(const const_col_iterator &it) { + return const_sub_col_type(it.pr + *(it.jc) - shift, + it.ir + *(it.jc) - shift, *(it.jc + 1) - *(it.jc), it.n); + } + static const origin_type* origin(const this_type &m) { return m.pr; } + static value_type access(const const_col_iterator &itcol, size_type j) + { return col(itcol)[j]; } + }; + + + template + std::ostream &operator << + (std::ostream &o, const csc_matrix_ref& m) + { gmm::write(o,m); return o; } + + /* ******************************************************************** */ + /* Read only reference on a compressed sparse row matrix */ + /* ******************************************************************** */ + + template + struct csr_matrix_ref { + PT1 pr; // values. + PT2 ir; // column indexes. + PT3 jc; // row repartition on pr and ir. + size_type nc, nr; + + typedef typename std::iterator_traits::value_type value_type; + csr_matrix_ref(PT1 pt1, PT2 pt2, PT3 pt3, size_type nrr, size_type ncc) + : pr(pt1), ir(pt2), jc(pt3), nc(ncc), nr(nrr) {} + csr_matrix_ref(void) {} + + size_type nrows(void) const { return nr; } + size_type ncols(void) const { return nc; } + + value_type operator()(size_type i, size_type j) const + { return mat_row(*this, i)[j]; } + }; + + template + struct linalg_traits > { + typedef csr_matrix_ref this_type; + typedef linalg_const is_reference; + typedef abstract_matrix linalg_type; + typedef typename std::iterator_traits::value_type value_type; + typedef typename std::iterator_traits::value_type reference; + typedef value_type origin_type; + typedef abstract_sparse storage_type; + typedef abstract_null_type sub_col_type; + typedef abstract_null_type const_sub_col_type; + typedef abstract_null_type col_iterator; + typedef abstract_null_type const_col_iterator; + typedef abstract_null_type sub_row_type; + typedef cs_vector_ref::pointer, + typename const_pointer::pointer, shift> + const_sub_row_type; + typedef sparse_compressed_iterator::pointer, + typename const_pointer::pointer, + typename const_pointer::pointer, + shift> const_row_iterator; + typedef abstract_null_type row_iterator; + typedef row_major sub_orientation; + typedef linalg_true index_sorted; + static size_type nrows(const this_type &m) { return m.nrows(); } + static size_type ncols(const this_type &m) { return m.ncols(); } + static const_row_iterator row_begin(const this_type &m) + { return const_row_iterator(m.pr, m.ir, m.jc, m.nc, m.pr); } + static const_row_iterator row_end(const this_type &m) + { return const_row_iterator(m.pr, m.ir, m.jc + m.nr, m.nc, m.pr); } + static const_sub_row_type row(const const_row_iterator &it) { + return const_sub_row_type(it.pr + *(it.jc) - shift, + it.ir + *(it.jc) - shift, *(it.jc + 1) - *(it.jc), it.n); + } + static const origin_type* origin(const this_type &m) { return m.pr; } + static value_type access(const const_row_iterator &itrow, size_type j) + { return row(itrow)[j]; } + }; + + template + std::ostream &operator << + (std::ostream &o, const csr_matrix_ref& m) + { gmm::write(o,m); return o; } + + /* ********************************************************************* */ + /* */ + /* Simple interface for C arrays */ + /* */ + /* ********************************************************************* */ + + template struct array1D_reference { + + typedef typename std::iterator_traits::value_type value_type; + + PT begin, end; + + const value_type &operator[](size_type i) const { return *(begin+i); } + value_type &operator[](size_type i) { return *(begin+i); } + + array1D_reference(PT begin_, size_type s) : begin(begin_), end(begin_+s) {} + }; + + template + struct linalg_traits > { + typedef array1D_reference this_type; + typedef this_type origin_type; + typedef typename which_reference::is_reference is_reference; + typedef abstract_vector linalg_type; + typedef typename std::iterator_traits::value_type value_type; + typedef typename std::iterator_traits::reference reference; + typedef PT iterator; + typedef PT const_iterator; + typedef abstract_dense storage_type; + typedef linalg_true index_sorted; + static size_type size(const this_type &v) { return v.end - v.begin; } + static iterator begin(this_type &v) { return v.begin; } + static const_iterator begin(const this_type &v) { return v.begin; } + static iterator end(this_type &v) { return v.end; } + static const_iterator end(const this_type &v) { return v.end; } + static origin_type* origin(this_type &v) { return &v; } + static const origin_type* origin(const this_type &v) { return &v; } + static void clear(origin_type*, const iterator &it, const iterator &ite) + { std::fill(it, ite, value_type(0)); } + static void do_clear(this_type &v) + { std::fill(v.begin, v.end, value_type(0)); } + static value_type access(const origin_type *, const const_iterator &it, + const const_iterator &, size_type i) + { return it[i]; } + static reference access(origin_type *, const iterator &it, + const iterator &, size_type i) + { return it[i]; } + static void resize(this_type &, size_type ) + { GMM_ASSERT1(false, "Not resizable vector"); } + }; + + template std::ostream &operator << + (std::ostream &o, const array1D_reference& v) + { gmm::write(o,v); return o; } + + template struct array2D_col_reference { + + typedef typename std::iterator_traits::value_type T; + typedef typename std::iterator_traits::reference reference; + typedef typename const_reference::reference const_reference; + typedef PT iterator; + typedef typename const_pointer::pointer const_iterator; + + PT begin_; + size_type nbl, nbc; + + inline const_reference operator ()(size_type l, size_type c) const { + GMM_ASSERT2(l < nbl && c < nbc, "out of range"); + return *(begin_ + c*nbl+l); + } + inline reference operator ()(size_type l, size_type c) { + GMM_ASSERT2(l < nbl && c < nbc, "out of range"); + return *(begin_ + c*nbl+l); + } + + void resize(size_type, size_type); + void reshape(size_type m, size_type n) { + GMM_ASSERT2(n*m == nbl*nbc, "dimensions mismatch"); + nbl = m; nbc = n; + } + + void fill(T a, T b = T(0)) { + std::fill(begin_, begin_+nbc*nbl, b); + iterator p = begin_, e = begin_+nbc*nbl; + while (p < e) { *p = a; p += nbl+1; } + } + inline size_type nrows(void) const { return nbl; } + inline size_type ncols(void) const { return nbc; } + + iterator begin(void) { return begin_; } + const_iterator begin(void) const { return begin_; } + iterator end(void) { return begin_+nbl*nbc; } + const_iterator end(void) const { return begin_+nbl*nbc; } + + array2D_col_reference(PT begin__, size_type nrows_, size_type ncols_) + : begin_(begin__), nbl(nrows_), nbc(ncols_) {} + }; + + template struct linalg_traits > { + typedef array2D_col_reference this_type; + typedef this_type origin_type; + typedef typename which_reference::is_reference is_reference; + typedef abstract_matrix linalg_type; + typedef typename std::iterator_traits::value_type value_type; + typedef typename std::iterator_traits::reference reference; + typedef abstract_dense storage_type; + typedef tab_ref_reg_spaced_with_origin sub_row_type; + typedef tab_ref_reg_spaced_with_origin const_sub_row_type; + typedef dense_compressed_iterator row_iterator; + typedef dense_compressed_iterator const_row_iterator; + typedef tab_ref_with_origin sub_col_type; + typedef tab_ref_with_origin const_sub_col_type; + typedef dense_compressed_iterator col_iterator; + typedef dense_compressed_iterator const_col_iterator; + typedef col_and_row sub_orientation; + typedef linalg_true index_sorted; + static size_type nrows(const this_type &m) { return m.nrows(); } + static size_type ncols(const this_type &m) { return m.ncols(); } + static const_sub_row_type row(const const_row_iterator &it) + { return const_sub_row_type(*it, it.nrows, it.ncols, it.origin); } + static const_sub_col_type col(const const_col_iterator &it) + { return const_sub_col_type(*it, *it + it.nrows, it.origin); } + static sub_row_type row(const row_iterator &it) + { return sub_row_type(*it, it.nrows, it.ncols, it.origin); } + static sub_col_type col(const col_iterator &it) + { return sub_col_type(*it, *it + it.nrows, it.origin); } + static row_iterator row_begin(this_type &m) + { return row_iterator(m.begin(), 1, m.nrows(), m.ncols(), 0, &m); } + static row_iterator row_end(this_type &m) + { return row_iterator(m.begin(), 1, m.nrows(), m.ncols(), m.nrows(), &m); } + static const_row_iterator row_begin(const this_type &m) + { return const_row_iterator(m.begin(), 1, m.nrows(), m.ncols(), 0, &m); } + static const_row_iterator row_end(const this_type &m) { + return const_row_iterator(m.begin(), 1, m.nrows(), + m.ncols(), m.nrows(), &m); + } + static col_iterator col_begin(this_type &m) + { return col_iterator(m.begin(), m.nrows(), m.nrows(), m.ncols(), 0, &m); } + static col_iterator col_end(this_type &m) { + return col_iterator(m.begin(), m.nrows(), m.nrows(), m.ncols(), + m.ncols(), &m); + } + static const_col_iterator col_begin(const this_type &m) { + return const_col_iterator(m.begin(), m.nrows(), m.nrows(), + m.ncols(), 0, &m); + } + static const_col_iterator col_end(const this_type &m) { + return const_col_iterator(m.begin(), m.nrows(),m.nrows(),m.ncols(), + m.ncols(), &m); + } + static origin_type* origin(this_type &m) { return &m; } + static const origin_type* origin(const this_type &m) { return &m; } + static void do_clear(this_type &m) { m.fill(value_type(0)); } + static value_type access(const const_col_iterator &itcol, size_type j) + { return (*itcol)[j]; } + static reference access(const col_iterator &itcol, size_type j) + { return (*itcol)[j]; } + static void resize(this_type &v, size_type m, size_type n) + { v.resize(m,n); } + static void reshape(this_type &v, size_type m, size_type n) + { v.reshape(m, n); } + }; + + template std::ostream &operator << + (std::ostream &o, const array2D_col_reference& m) + { gmm::write(o,m); return o; } + + + + template struct array2D_row_reference { + + typedef typename std::iterator_traits::value_type T; + typedef typename std::iterator_traits::reference reference; + typedef typename const_reference::reference const_reference; + typedef PT iterator; + typedef typename const_pointer::pointer const_iterator; + + PT begin_; + size_type nbl, nbc; + + inline const_reference operator ()(size_type l, size_type c) const { + GMM_ASSERT2(l < nbl && c < nbc, "out of range"); + return *(begin_ + l*nbc+c); + } + inline reference operator ()(size_type l, size_type c) { + GMM_ASSERT2(l < nbl && c < nbc, "out of range"); + return *(begin_ + l*nbc+c); + } + + void resize(size_type, size_type); + void reshape(size_type m, size_type n) { + GMM_ASSERT2(n*m == nbl*nbc, "dimensions mismatch"); + nbl = m; nbc = n; + } + + void fill(T a, T b = T(0)) { + std::fill(begin_, begin_+nbc*nbl, b); + iterator p = begin_, e = begin_+nbc*nbl; + while (p < e) { *p = a; p += nbc+1; } + } + inline size_type nrows(void) const { return nbl; } + inline size_type ncols(void) const { return nbc; } + + iterator begin(void) { return begin_; } + const_iterator begin(void) const { return begin_; } + iterator end(void) { return begin_+nbl*nbc; } + const_iterator end(void) const { return begin_+nbl*nbc; } + + array2D_row_reference(PT begin__, size_type nrows_, size_type ncols_) + : begin_(begin__), nbl(nrows_), nbc(ncols_) {} + }; + + template struct linalg_traits > { + typedef array2D_row_reference this_type; + typedef this_type origin_type; + typedef typename which_reference::is_reference is_reference; + typedef abstract_matrix linalg_type; + typedef typename std::iterator_traits::value_type value_type; + typedef typename std::iterator_traits::reference reference; + typedef abstract_dense storage_type; + typedef tab_ref_reg_spaced_with_origin sub_col_type; + typedef tab_ref_reg_spaced_with_origin const_sub_col_type; + typedef dense_compressed_iterator col_iterator; + typedef dense_compressed_iterator const_col_iterator; + typedef tab_ref_with_origin sub_row_type; + typedef tab_ref_with_origin const_sub_row_type; + typedef dense_compressed_iterator row_iterator; + typedef dense_compressed_iterator const_row_iterator; + typedef col_and_row sub_orientation; + typedef linalg_true index_sorted; + static size_type ncols(const this_type &m) { return m.ncols(); } + static size_type nrows(const this_type &m) { return m.nrows(); } + static const_sub_col_type col(const const_col_iterator &it) + { return const_sub_col_type(*it, it.ncols, it.nrows, it.origin); } + static const_sub_row_type row(const const_row_iterator &it) + { return const_sub_row_type(*it, *it + it.ncols, it.origin); } + static sub_col_type col(const col_iterator &it) + { return sub_col_type(*it, *it, it.ncols, it.nrows, it.origin); } + static sub_row_type row(const row_iterator &it) + { return sub_row_type(*it, *it + it.ncols, it.origin); } + static col_iterator col_begin(this_type &m) + { return col_iterator(m.begin(), 1, m.ncols(), m.nrows(), 0, &m); } + static col_iterator col_end(this_type &m) + { return col_iterator(m.begin(), 1, m.ncols(), m.nrows(), m.ncols(), &m); } + static const_col_iterator col_begin(const this_type &m) + { return const_col_iterator(m.begin(), 1, m.ncols(), m.nrows(), 0, &m); } + static const_col_iterator col_end(const this_type &m) { + return const_col_iterator(m.begin(), 1, m.ncols(), + m.nrows(), m.ncols(), &m); + } + static row_iterator row_begin(this_type &m) + { return row_iterator(m.begin(), m.ncols(), m.ncols(), m.nrows(), 0, &m); } + static row_iterator row_end(this_type &m) { + return row_iterator(m.begin(), m.ncols(), m.ncols(), m.nrows(), + m.nrows(), &m); + } + static const_row_iterator row_begin(const this_type &m) { + return const_row_iterator(m.begin(), m.ncols(), m.ncols(), m.nrows(), + 0, &m); + } + static const_row_iterator row_end(const this_type &m) { + return const_row_iterator(m.begin(), m.ncols(), m.ncols(), m.nrows(), + m.nrows(), &m); + } + static origin_type* origin(this_type &m) { return &m; } + static const origin_type* origin(const this_type &m) { return &m; } + static void do_clear(this_type &m) { m.fill(value_type(0)); } + static value_type access(const const_row_iterator &itrow, size_type j) + { return (*itrow)[j]; } + static reference access(const row_iterator &itrow, size_type j) + { return (*itrow)[j]; } + static void resize(this_type &v, size_type m, size_type n) + { v.resize(m,n); } + static void reshape(this_type &v, size_type m, size_type n) + { v.reshape(m, n); } + }; + + template std::ostream &operator << + (std::ostream &o, const array2D_row_reference& m) + { gmm::write(o,m); return o; } + + + + + + +} + + +#endif // GMM_INTERFACE_H__ diff --git a/gmm/gmm_interface_bgeot.h b/gmm/gmm_interface_bgeot.h new file mode 100644 index 000000000..d1d0ae3ab --- /dev/null +++ b/gmm/gmm_interface_bgeot.h @@ -0,0 +1,83 @@ +/* -*- c++ -*- (enables emacs c++ mode) */ +/*=========================================================================== + + Copyright (C) 2002-2017 Yves Renard + + This file is a part of GetFEM++ + + GetFEM++ is free software; you can redistribute it and/or modify it + under the terms of the GNU Lesser General Public License as published + by the Free Software Foundation; either version 3 of the License, or + (at your option) any later version along with the GCC Runtime Library + Exception either version 3.1 or (at your option) any later version. + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public + License and GCC Runtime Library Exception for more details. + You should have received a copy of the GNU Lesser General Public License + along with this program; if not, write to the Free Software Foundation, + Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. + + As a special exception, you may use this file as it is a part of a free + software library without restriction. Specifically, if other files + instantiate templates or use macros or inline functions from this file, + or you compile this file and link it with other files to produce an + executable, this file does not by itself cause the resulting executable + to be covered by the GNU Lesser General Public License. This exception + does not however invalidate any other reasons why the executable file + might be covered by the GNU Lesser General Public License. + +===========================================================================*/ + +/**@file gmm_interface_bgeot.h + @author Yves Renard + @date October 13, 2002. + @brief interface for bgeot::small_vector +*/ +#ifndef GMM_INTERFACE_BGEOT_H__ +#define GMM_INTERFACE_BGEOT_H__ + + +namespace gmm { + + /* ********************************************************************* */ + /* */ + /* Traits for bgeot objects */ + /* */ + /* ********************************************************************* */ + + template struct linalg_traits > { + typedef bgeot::small_vector this_type; + typedef this_type origin_type; + typedef linalg_false is_reference; + typedef abstract_vector linalg_type; + typedef T value_type; + typedef T& reference; + typedef typename this_type::iterator iterator; + typedef typename this_type::const_iterator const_iterator; + typedef abstract_dense storage_type; + typedef linalg_true index_sorted; + static size_type size(const this_type &v) { return v.size(); } + static iterator begin(this_type &v) { return v.begin(); } + static const_iterator begin(const this_type &v) { return v.begin(); } + static iterator end(this_type &v) { return v.end(); } + static const_iterator end(const this_type &v) { return v.end(); } + static origin_type* origin(this_type &v) { return &v; } + static const origin_type* origin(const this_type &v) { return &v; } + static void clear(origin_type* o, const iterator &it, const iterator &ite) + { std::fill(it, ite, value_type(0)); } + static void do_clear(this_type &v) + { std::fill(v.begin(), v.end(), value_type(0)); } + static value_type access(const origin_type *, const const_iterator &it, + const const_iterator &, size_type i) + { return it[i]; } + static reference access(origin_type *, const iterator &it, + const iterator &, size_type i) + { return it[i]; } + static void resize(this_type &v, size_type n) { v.resize(n); } + }; + +} + + +#endif // GMM_INTERFACE_BGEOT_H__ diff --git a/gmm/gmm_iter.h b/gmm/gmm_iter.h new file mode 100644 index 000000000..e82d270f4 --- /dev/null +++ b/gmm/gmm_iter.h @@ -0,0 +1,162 @@ +/* -*- c++ -*- (enables emacs c++ mode) */ +/*=========================================================================== + + Copyright (C) 2002-2017 Yves Renard + + This file is a part of GetFEM++ + + GetFEM++ is free software; you can redistribute it and/or modify it + under the terms of the GNU Lesser General Public License as published + by the Free Software Foundation; either version 3 of the License, or + (at your option) any later version along with the GCC Runtime Library + Exception either version 3.1 or (at your option) any later version. + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public + License and GCC Runtime Library Exception for more details. + You should have received a copy of the GNU Lesser General Public License + along with this program; if not, write to the Free Software Foundation, + Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. + + As a special exception, you may use this file as it is a part of a free + software library without restriction. Specifically, if other files + instantiate templates or use macros or inline functions from this file, + or you compile this file and link it with other files to produce an + executable, this file does not by itself cause the resulting executable + to be covered by the GNU Lesser General Public License. This exception + does not however invalidate any other reasons why the executable file + might be covered by the GNU Lesser General Public License. + +===========================================================================*/ + +/**@file gmm_iter.h + @author Yves Renard + @date February 10, 2003. + @brief Iteration object. +*/ + +#ifndef GMM_ITER_H__ +#define GMM_ITER_H__ + +#include "gmm_kernel.h" +#include + +namespace gmm { + + /** The Iteration object calculates whether the solution has reached the + desired accuracy, or whether the maximum number of iterations has + been reached. + + The method finished() checks the convergence. The first() + method is used to determine the first iteration of the loop. + */ + class iteration { + protected : + double rhsn; /* Right hand side norm. */ + size_type maxiter; /* Max. number of iterations. */ + int noise; /* if noise > 0 iterations are printed. */ + double resmax; /* maximum residu. */ + double resminreach, resadd; + double diverged_res; /* Threshold beyond which the iterative */ + /* is considered to diverge. */ + size_type nit; /* iteration number. */ + double res; /* last computed residu. */ + std::string name; /* eventually, name of the method. */ + bool written; + void (*callback)(const gmm::iteration&); + public : + + void init(void) { + nit = 0; res = 0.0; written = false; + resminreach = 1E200; resadd = 0.0; + callback = 0; + } + + iteration(double r = 1.0E-8, int noi = 0, size_type mit = size_type(-1), + double div_res = 1E200) + : rhsn(1.0), maxiter(mit), noise(noi), resmax(r), diverged_res(div_res) + { init(); } + + void operator ++(int) { nit++; written = false; resadd += res; } + void operator ++() { (*this)++; } + + bool first(void) { return nit == 0; } + + /* get/set the "noisyness" (verbosity) of the solvers */ + int get_noisy(void) const { return noise; } + void set_noisy(int n) { noise = n; } + void reduce_noisy(void) { if (noise > 0) noise--; } + + double get_resmax(void) const { return resmax; } + void set_resmax(double r) { resmax = r; } + + double get_res() const { return res; } + void enforce_converged(bool c = true) + { if (c) res = double(0); else res = rhsn * resmax + double(1); } + + /* change the user-definable callback, called after each iteration */ + void set_callback(void (*t)(const gmm::iteration&)) { + callback = t; + } + + double get_diverged_residual(void) const { return diverged_res; } + void set_diverged_residual(double r) { diverged_res = r; } + + size_type get_iteration(void) const { return nit; } + void set_iteration(size_type i) { nit = i; } + + size_type get_maxiter(void) const { return maxiter; } + void set_maxiter(size_type i) { maxiter = i; } + + double get_rhsnorm(void) const { return rhsn; } + void set_rhsnorm(double r) { rhsn = r; } + + bool converged(void) { + return !isnan(res) && res <= rhsn * resmax; + } + bool converged(double nr) { + res = gmm::abs(nr); + resminreach = std::min(resminreach, res); + return converged(); + } + template bool converged(const VECT &v) + { return converged(gmm::vect_norm2(v)); } + bool diverged(void) { + return isnan(res) || (nit>=maxiter) + || (res>=rhsn*diverged_res && nit > 4); + } + bool diverged(double nr) { + res = gmm::abs(nr); + resminreach = std::min(resminreach, res); + return diverged(); + } + + bool finished(double nr) { + if (callback) callback(*this); + if (noise > 0 && !written) { + double a = (rhsn == 0) ? 1.0 : rhsn; + converged(nr); + cout << name << " iter " << std::setw(3) << nit << " residual " + << std::setw(12) << gmm::abs(nr) / a; +// if (nit % 100 == 0 && nit > 0) { +// cout << " (residual min " << resminreach / a << " mean val " +// << resadd / (100.0 * a) << " )"; +// resadd = 0.0; +// } + cout << endl; + written = true; + } + return (converged(nr) || diverged(nr)); + } + template bool finished_vect(const VECT &v) + { return finished(double(gmm::vect_norm2(v))); } + + + void set_name(const std::string &n) { name = n; } + const std::string &get_name(void) const { return name; } + + }; + +} + +#endif /* GMM_ITER_H__ */ diff --git a/gmm/gmm_iter_solvers.h b/gmm/gmm_iter_solvers.h new file mode 100644 index 000000000..cb34ef088 --- /dev/null +++ b/gmm/gmm_iter_solvers.h @@ -0,0 +1,111 @@ +/* -*- c++ -*- (enables emacs c++ mode) */ +/*=========================================================================== + + Copyright (C) 2002-2017 Yves Renard + + This file is a part of GetFEM++ + + GetFEM++ is free software; you can redistribute it and/or modify it + under the terms of the GNU Lesser General Public License as published + by the Free Software Foundation; either version 3 of the License, or + (at your option) any later version along with the GCC Runtime Library + Exception either version 3.1 or (at your option) any later version. + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public + License and GCC Runtime Library Exception for more details. + You should have received a copy of the GNU Lesser General Public License + along with this program; if not, write to the Free Software Foundation, + Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. + + As a special exception, you may use this file as it is a part of a free + software library without restriction. Specifically, if other files + instantiate templates or use macros or inline functions from this file, + or you compile this file and link it with other files to produce an + executable, this file does not by itself cause the resulting executable + to be covered by the GNU Lesser General Public License. This exception + does not however invalidate any other reasons why the executable file + might be covered by the GNU Lesser General Public License. + +===========================================================================*/ + +/**@file gmm_iter_solvers.h + @author Yves Renard + @date October 13, 2002. + @brief Include standard gmm iterative solvers (cg, gmres, ...) +*/ +#ifndef GMM_ITER_SOLVERS_H__ +#define GMM_ITER_SOLVERS_H__ + +#include "gmm_iter.h" + + +namespace gmm { + + /** mixed method to find a zero of a real function G, a priori + * between a and b. If the zero is not between a and b, iterations + * of secant are applied. When a convenient interval is found, + * iterations of dichotomie and regula falsi are applied. + */ + template + T find_root(const FUNC &G, T a = T(0), T b = T(1), + T tol = gmm::default_tol(T())) { + T c, Ga = G(a), Gb = G(b), Gc, d; + d = gmm::abs(b - a); +#if 0 + for (int i = 0; i < 4; i++) { /* secant iterations. */ + if (d < tol) return (b + a) / 2.0; + c = b - Gb * (b - a) / (Gb - Ga); Gc = G(c); + a = b; b = c; Ga = Gb; Gb = Gc; + d = gmm::abs(b - a); + } +#endif + while (Ga * Gb > 0.0) { /* secant iterations. */ + if (d < tol) return (b + a) / 2.0; + c = b - Gb * (b - a) / (Gb - Ga); Gc = G(c); + a = b; b = c; Ga = Gb; Gb = Gc; + d = gmm::abs(b - a); + } + + c = std::max(a, b); a = std::min(a, b); b = c; + while (d > tol) { + c = b - (b - a) * (Gb / (Gb - Ga)); /* regula falsi. */ + if (c > b) c = b; + if (c < a) c = a; + Gc = G(c); + if (Gc*Gb > 0) { b = c; Gb = Gc; } else { a = c; Ga = Gc; } + c = (b + a) / 2.0 ; Gc = G(c); /* Dichotomie. */ + if (Gc*Gb > 0) { b = c; Gb = Gc; } else { a = c; Ga = Gc; } + d = gmm::abs(b - a); c = (b + a) / 2.0; if ((c == a) || (c == b)) d = 0.0; + } + return (b + a) / 2.0; + } + +} + +#include "gmm_precond_diagonal.h" +#include "gmm_precond_ildlt.h" +#include "gmm_precond_ildltt.h" +#include "gmm_precond_mr_approx_inverse.h" +#include "gmm_precond_ilu.h" +#include "gmm_precond_ilut.h" +#include "gmm_precond_ilutp.h" + + + +#include "gmm_solver_cg.h" +#include "gmm_solver_bicgstab.h" +#include "gmm_solver_qmr.h" +#include "gmm_solver_constrained_cg.h" +#include "gmm_solver_Schwarz_additive.h" +#include "gmm_modified_gram_schmidt.h" +#include "gmm_tri_solve.h" +#include "gmm_solver_gmres.h" +#include "gmm_solver_bfgs.h" +#include "gmm_least_squares_cg.h" + +// #include "gmm_solver_idgmres.h" + + + +#endif // GMM_ITER_SOLVERS_H__ diff --git a/gmm/gmm_kernel.h b/gmm/gmm_kernel.h new file mode 100644 index 000000000..ebd217610 --- /dev/null +++ b/gmm/gmm_kernel.h @@ -0,0 +1,55 @@ +/* -*- c++ -*- (enables emacs c++ mode) */ +/*=========================================================================== + + Copyright (C) 2002-2017 Yves Renard + + This file is a part of GetFEM++ + + GetFEM++ is free software; you can redistribute it and/or modify it + under the terms of the GNU Lesser General Public License as published + by the Free Software Foundation; either version 3 of the License, or + (at your option) any later version along with the GCC Runtime Library + Exception either version 3.1 or (at your option) any later version. + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public + License and GCC Runtime Library Exception for more details. + You should have received a copy of the GNU Lesser General Public License + along with this program; if not, write to the Free Software Foundation, + Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. + + As a special exception, you may use this file as it is a part of a free + software library without restriction. Specifically, if other files + instantiate templates or use macros or inline functions from this file, + or you compile this file and link it with other files to produce an + executable, this file does not by itself cause the resulting executable + to be covered by the GNU Lesser General Public License. This exception + does not however invalidate any other reasons why the executable file + might be covered by the GNU Lesser General Public License. + +===========================================================================*/ + +/**@file gmm_kernel.h + @author Yves Renard + @date November 15, 2003. + @brief Include the base gmm files. + */ + +#ifndef GMM_KERNEL_H__ +#define GMM_KERNEL_H__ + +#include "gmm_def.h" +#include "gmm_blas.h" +#include "gmm_real_part.h" +#include "gmm_interface.h" +#include "gmm_sub_vector.h" +#include "gmm_sub_matrix.h" +#include "gmm_vector_to_matrix.h" +#include "gmm_vector.h" +#include "gmm_matrix.h" +#include "gmm_tri_solve.h" +#include "gmm_blas_interface.h" +#include "gmm_lapack_interface.h" + + +#endif // GMM_KERNEL_H__ diff --git a/gmm/gmm_lapack_interface.h b/gmm/gmm_lapack_interface.h new file mode 100644 index 000000000..7888aea05 --- /dev/null +++ b/gmm/gmm_lapack_interface.h @@ -0,0 +1,470 @@ +/* -*- c++ -*- (enables emacs c++ mode) */ +/*=========================================================================== + + Copyright (C) 2003-2017 Yves Renard + + This file is a part of GetFEM++ + + GetFEM++ is free software; you can redistribute it and/or modify it + under the terms of the GNU Lesser General Public License as published + by the Free Software Foundation; either version 3 of the License, or + (at your option) any later version along with the GCC Runtime Library + Exception either version 3.1 or (at your option) any later version. + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public + License and GCC Runtime Library Exception for more details. + You should have received a copy of the GNU Lesser General Public License + along with this program; if not, write to the Free Software Foundation, + Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. + + As a special exception, you may use this file as it is a part of a free + software library without restriction. Specifically, if other files + instantiate templates or use macros or inline functions from this file, + or you compile this file and link it with other files to produce an + executable, this file does not by itself cause the resulting executable + to be covered by the GNU Lesser General Public License. This exception + does not however invalidate any other reasons why the executable file + might be covered by the GNU Lesser General Public License. + +===========================================================================*/ + +/**@file gmm_lapack_interface.h + @author Yves Renard + @date October 7, 2003. + @brief gmm interface for LAPACK +*/ + +#ifndef GMM_LAPACK_INTERFACE_H +#define GMM_LAPACK_INTERFACE_H + +#include "gmm_blas_interface.h" +#include "gmm_dense_lu.h" +#include "gmm_dense_qr.h" + + +#if defined(GMM_USES_LAPACK) + +namespace gmm { + + /* ********************************************************************* */ + /* Operations interfaced for T = float, double, std::complex */ + /* or std::complex : */ + /* */ + /* lu_factor(dense_matrix, std::vector) */ + /* lu_solve(dense_matrix, std::vector, std::vector) */ + /* lu_solve(dense_matrix, std::vector, std::vector, */ + /* std::vector) */ + /* lu_solve_transposed(dense_matrix, std::vector, std::vector,*/ + /* std::vector) */ + /* lu_inverse(dense_matrix) */ + /* lu_inverse(dense_matrix, std::vector, dense_matrix) */ + /* */ + /* qr_factor(dense_matrix, dense_matrix, dense_matrix) */ + /* */ + /* implicit_qr_algorithm(dense_matrix, std::vector) */ + /* implicit_qr_algorithm(dense_matrix, std::vector, */ + /* dense_matrix) */ + /* implicit_qr_algorithm(dense_matrix, std::vector >) */ + /* implicit_qr_algorithm(dense_matrix, std::vector >, */ + /* dense_matrix) */ + /* */ + /* geev_interface_right */ + /* geev_interface_left */ + /* */ + /* schur(dense_matrix, dense_matrix, dense_matrix) */ + /* */ + /* svd(dense_matrix, dense_matrix, dense_matrix, std::vector)*/ + /* svd(dense_matrix, dense_matrix, dense_matrix, */ + /* std::vector >) */ + /* */ + /* ********************************************************************* */ + + /* ********************************************************************* */ + /* LAPACK functions used. */ + /* ********************************************************************* */ + + extern "C" { + void sgetrf_(...); void dgetrf_(...); void cgetrf_(...); void zgetrf_(...); + void sgetrs_(...); void dgetrs_(...); void cgetrs_(...); void zgetrs_(...); + void sgetri_(...); void dgetri_(...); void cgetri_(...); void zgetri_(...); + void sgeqrf_(...); void dgeqrf_(...); void cgeqrf_(...); void zgeqrf_(...); + void sorgqr_(...); void dorgqr_(...); void cungqr_(...); void zungqr_(...); + void sormqr_(...); void dormqr_(...); void cunmqr_(...); void zunmqr_(...); + void sgees_ (...); void dgees_ (...); void cgees_ (...); void zgees_ (...); + void sgeev_ (...); void dgeev_ (...); void cgeev_ (...); void zgeev_ (...); + void sgeesx_(...); void dgeesx_(...); void cgeesx_(...); void zgeesx_(...); + void sgesvd_(...); void dgesvd_(...); void cgesvd_(...); void zgesvd_(...); + } + + /* ********************************************************************* */ + /* LU decomposition. */ + /* ********************************************************************* */ + +# define getrf_interface(lapack_name, base_type) inline \ + size_type lu_factor(dense_matrix &A, std::vector &ipvt){\ + GMMLAPACK_TRACE("getrf_interface"); \ + int m = int(mat_nrows(A)), n = int(mat_ncols(A)), lda(m), info(0); \ + if (m && n) lapack_name(&m, &n, &A(0,0), &lda, &ipvt[0], &info); \ + return size_type(info); \ + } + + getrf_interface(sgetrf_, BLAS_S) + getrf_interface(dgetrf_, BLAS_D) + getrf_interface(cgetrf_, BLAS_C) + getrf_interface(zgetrf_, BLAS_Z) + + /* ********************************************************************* */ + /* LU solve. */ + /* ********************************************************************* */ + +# define getrs_interface(f_name, trans1, lapack_name, base_type) inline \ + void f_name(const dense_matrix &A, \ + const std::vector &ipvt, std::vector &x, \ + const std::vector &b) { \ + GMMLAPACK_TRACE("getrs_interface"); \ + int n = int(mat_nrows(A)), info, nrhs(1); \ + gmm::copy(b, x); trans1; \ + if (n) \ + lapack_name(&t, &n, &nrhs, &(A(0,0)),&n,&ipvt[0], &x[0], &n, &info); \ + } + +# define getrs_trans_n const char t = 'N' +# define getrs_trans_t const char t = 'T' + + getrs_interface(lu_solve, getrs_trans_n, sgetrs_, BLAS_S) + getrs_interface(lu_solve, getrs_trans_n, dgetrs_, BLAS_D) + getrs_interface(lu_solve, getrs_trans_n, cgetrs_, BLAS_C) + getrs_interface(lu_solve, getrs_trans_n, zgetrs_, BLAS_Z) + getrs_interface(lu_solve_transposed, getrs_trans_t, sgetrs_, BLAS_S) + getrs_interface(lu_solve_transposed, getrs_trans_t, dgetrs_, BLAS_D) + getrs_interface(lu_solve_transposed, getrs_trans_t, cgetrs_, BLAS_C) + getrs_interface(lu_solve_transposed, getrs_trans_t, zgetrs_, BLAS_Z) + + /* ********************************************************************* */ + /* LU inverse. */ + /* ********************************************************************* */ + +# define getri_interface(lapack_name, base_type) inline \ + void lu_inverse(const dense_matrix &LU, \ + std::vector &ipvt, const dense_matrix &A_) { \ + GMMLAPACK_TRACE("getri_interface"); \ + dense_matrix &A \ + = const_cast &>(A_); \ + int n = int(mat_nrows(A)), info, lwork(10000); base_type work[10000]; \ + if (n) { \ + std::copy(LU.begin(), LU.end(), A.begin()); \ + lapack_name(&n, &A(0,0), &n, &ipvt[0], &work[0], &lwork, &info); \ + } \ + } + + getri_interface(sgetri_, BLAS_S) + getri_interface(dgetri_, BLAS_D) + getri_interface(cgetri_, BLAS_C) + getri_interface(zgetri_, BLAS_Z) + + + /* ********************************************************************* */ + /* QR factorization. */ + /* ********************************************************************* */ + +# define geqrf_interface(lapack_name1, base_type) inline \ + void qr_factor(dense_matrix &A){ \ + GMMLAPACK_TRACE("geqrf_interface"); \ + int m = int(mat_nrows(A)), n = int(mat_ncols(A)), info, lwork(-1); \ + base_type work1; \ + if (m && n) { \ + std::vector tau(n); \ + lapack_name1(&m, &n, &A(0,0), &m, &tau[0], &work1 , &lwork, &info); \ + lwork = int(gmm::real(work1)); \ + std::vector work(lwork); \ + lapack_name1(&m, &n, &A(0,0), &m, &tau[0], &work[0], &lwork, &info); \ + GMM_ASSERT1(!info, "QR factorization failed"); \ + } \ + } + + geqrf_interface(sgeqrf_, BLAS_S) + geqrf_interface(dgeqrf_, BLAS_D) + // For complex values, housholder vectors are not the same as in + // gmm::lu_factor. Impossible to interface for the moment. + // geqrf_interface(cgeqrf_, BLAS_C) + // geqrf_interface(zgeqrf_, BLAS_Z) + +# define geqrf_interface2(lapack_name1, lapack_name2, base_type) inline \ + void qr_factor(const dense_matrix &A, \ + dense_matrix &Q, dense_matrix &R) { \ + GMMLAPACK_TRACE("geqrf_interface2"); \ + int m = int(mat_nrows(A)), n = int(mat_ncols(A)), info, lwork(-1); \ + base_type work1; \ + if (m && n) { \ + std::copy(A.begin(), A.end(), Q.begin()); \ + std::vector tau(n); \ + lapack_name1(&m, &n, &Q(0,0), &m, &tau[0], &work1 , &lwork, &info); \ + lwork = int(gmm::real(work1)); \ + std::vector work(lwork); \ + lapack_name1(&m, &n, &Q(0,0), &m, &tau[0], &work[0], &lwork, &info); \ + GMM_ASSERT1(!info, "QR factorization failed"); \ + base_type *p = &R(0,0), *q = &Q(0,0); \ + for (int j = 0; j < n; ++j, q += m-n) \ + for (int i = 0; i < n; ++i, ++p, ++q) \ + *p = (j < i) ? base_type(0) : *q; \ + lapack_name2(&m, &n, &n, &Q(0,0), &m,&tau[0],&work[0],&lwork,&info); \ + } \ + else gmm::clear(Q); \ + } + + geqrf_interface2(sgeqrf_, sorgqr_, BLAS_S) + geqrf_interface2(dgeqrf_, dorgqr_, BLAS_D) + geqrf_interface2(cgeqrf_, cungqr_, BLAS_C) + geqrf_interface2(zgeqrf_, zungqr_, BLAS_Z) + + /* ********************************************************************* */ + /* QR algorithm for eigenvalues search. */ + /* ********************************************************************* */ + +# define gees_interface(lapack_name, base_type) \ + template inline void implicit_qr_algorithm( \ + const dense_matrix &A, const VECT &eigval_, \ + dense_matrix &Q, \ + double tol=gmm::default_tol(base_type()), bool compvect = true) { \ + GMMLAPACK_TRACE("gees_interface"); \ + typedef bool (*L_fp)(...); L_fp p = 0; \ + int n = int(mat_nrows(A)), info, lwork(-1), sdim; base_type work1; \ + if (!n) return; \ + dense_matrix H(n,n); gmm::copy(A, H); \ + char jobvs = (compvect ? 'V' : 'N'), sort = 'N'; \ + std::vector rwork(n), eigv1(n), eigv2(n); \ + lapack_name(&jobvs, &sort, p, &n, &H(0,0), &n, &sdim, &eigv1[0], \ + &eigv2[0], &Q(0,0), &n, &work1, &lwork, &rwork[0], &info); \ + lwork = int(gmm::real(work1)); \ + std::vector work(lwork); \ + lapack_name(&jobvs, &sort, p, &n, &H(0,0), &n, &sdim, &eigv1[0], \ + &eigv2[0], &Q(0,0), &n, &work[0], &lwork, &rwork[0],&info);\ + GMM_ASSERT1(!info, "QR algorithm failed"); \ + extract_eig(H, const_cast(eigval_), tol); \ + } + +# define gees_interface2(lapack_name, base_type) \ + template inline void implicit_qr_algorithm( \ + const dense_matrix &A, const VECT &eigval_, \ + dense_matrix &Q, \ + double tol=gmm::default_tol(base_type()), bool compvect = true) { \ + GMMLAPACK_TRACE("gees_interface2"); \ + typedef bool (*L_fp)(...); L_fp p = 0; \ + int n = int(mat_nrows(A)), info, lwork(-1), sdim; base_type work1; \ + if (!n) return; \ + dense_matrix H(n,n); gmm::copy(A, H); \ + char jobvs = (compvect ? 'V' : 'N'), sort = 'N'; \ + std::vector rwork(n), eigvv(n*2); \ + lapack_name(&jobvs, &sort, p, &n, &H(0,0), &n, &sdim, &eigvv[0], \ + &Q(0,0), &n, &work1, &lwork, &rwork[0], &rwork[0], &info); \ + lwork = int(gmm::real(work1)); \ + std::vector work(lwork); \ + lapack_name(&jobvs, &sort, p, &n, &H(0,0), &n, &sdim, &eigvv[0], \ + &Q(0,0), &n, &work[0], &lwork, &rwork[0], &rwork[0],&info);\ + GMM_ASSERT1(!info, "QR algorithm failed"); \ + extract_eig(H, const_cast(eigval_), tol); \ + } + + gees_interface(sgees_, BLAS_S) + gees_interface(dgees_, BLAS_D) + gees_interface2(cgees_, BLAS_C) + gees_interface2(zgees_, BLAS_Z) + + +# define jobv_right char jobvl = 'N', jobvr = 'V'; +# define jobv_left char jobvl = 'V', jobvr = 'N'; + +# define geev_interface(lapack_name, base_type, side) \ + template inline void geev_interface_ ## side( \ + const dense_matrix &A, const VECT &eigval_, \ + dense_matrix &Q) { \ + GMMLAPACK_TRACE("geev_interface"); \ + int n = int(mat_nrows(A)), info, lwork(-1); base_type work1; \ + if (!n) return; \ + dense_matrix H(n,n); gmm::copy(A, H); \ + jobv_ ## side \ + std::vector eigvr(n), eigvi(n); \ + lapack_name(&jobvl, &jobvr, &n, &H(0,0), &n, &eigvr[0], &eigvi[0], \ + &Q(0,0), &n, &Q(0,0), &n, &work1, &lwork, &info); \ + lwork = int(gmm::real(work1)); \ + std::vector work(lwork); \ + lapack_name(&jobvl, &jobvr, &n, &H(0,0), &n, &eigvr[0], &eigvi[0], \ + &Q(0,0), &n, &Q(0,0), &n, &work[0], &lwork, &info); \ + GMM_ASSERT1(!info, "QR algorithm failed"); \ + gmm::copy(eigvr, gmm::real_part(const_cast(eigval_))); \ + gmm::copy(eigvi, gmm::imag_part(const_cast(eigval_))); \ + } + +# define geev_interface2(lapack_name, base_type, side) \ + template inline void geev_interface_ ## side( \ + const dense_matrix &A, const VECT &eigval_, \ + dense_matrix &Q) { \ + GMMLAPACK_TRACE("geev_interface"); \ + int n = int(mat_nrows(A)), info, lwork(-1); base_type work1; \ + if (!n) return; \ + dense_matrix H(n,n); gmm::copy(A, H); \ + jobv_ ## side \ + std::vector rwork(2*n); \ + std::vector eigv(n); \ + lapack_name(&jobvl, &jobvr, &n, &H(0,0), &n, &eigv[0], &Q(0,0), &n, \ + &Q(0,0), &n, &work1, &lwork, &rwork[0], &info); \ + lwork = int(gmm::real(work1)); \ + std::vector work(lwork); \ + lapack_name(&jobvl, &jobvr, &n, &H(0,0), &n, &eigv[0], &Q(0,0), &n, \ + &Q(0,0), &n, &work[0], &lwork, &rwork[0], &info); \ + GMM_ASSERT1(!info, "QR algorithm failed"); \ + gmm::copy(eigv, const_cast(eigval_)); \ + } + + geev_interface(sgeev_, BLAS_S, right) + geev_interface(dgeev_, BLAS_D, right) + geev_interface2(cgeev_, BLAS_C, right) + geev_interface2(zgeev_, BLAS_Z, right) + + geev_interface(sgeev_, BLAS_S, left) + geev_interface(dgeev_, BLAS_D, left) + geev_interface2(cgeev_, BLAS_C, left) + geev_interface2(zgeev_, BLAS_Z, left) + + + /* ********************************************************************* */ + /* SCHUR algorithm: */ + /* A = Q*S*(Q^T), with Q orthogonal and S upper quasi-triangula */ + /* ********************************************************************* */ + +# define geesx_interface(lapack_name, base_type) inline \ + void schur(dense_matrix &A, \ + dense_matrix &S, \ + dense_matrix &Q) { \ + GMMLAPACK_TRACE("geesx_interface"); \ + int m = int(mat_nrows(A)), n = int(mat_ncols(A)); \ + GMM_ASSERT1(m == n, "Schur decomposition requires square matrix"); \ + char jobvs = 'V', sort = 'N', sense = 'N'; \ + bool select = false; \ + int lwork = 8*n, sdim = 0, liwork = 1; \ + std::vector work(lwork), wr(n), wi(n); \ + std::vector iwork(liwork); \ + std::vector bwork(1); \ + resize(S, n, n); copy(A, S); \ + resize(Q, n, n); \ + base_type rconde(0), rcondv(0); \ + int info = -1; \ + lapack_name(&jobvs, &sort, &select, &sense, &n, &S(0,0), &n, \ + &sdim, &wr[0], &wi[0], &Q(0,0), &n, &rconde, &rcondv, \ + &work[0], &lwork, &iwork[0], &liwork, &bwork[0], &info);\ + GMM_ASSERT1(!info, "SCHUR algorithm failed"); \ + } + +# define geesx_interface2(lapack_name, base_type) inline \ + void schur(dense_matrix &A, \ + dense_matrix &S, \ + dense_matrix &Q) { \ + GMMLAPACK_TRACE("geesx_interface"); \ + int m = int(mat_nrows(A)), n = int(mat_ncols(A)); \ + GMM_ASSERT1(m == n, "Schur decomposition requires square matrix"); \ + char jobvs = 'V', sort = 'N', sense = 'N'; \ + bool select = false; \ + int lwork = 8*n, sdim = 0; \ + std::vector rwork(lwork); \ + std::vector work(lwork), w(n); \ + std::vector bwork(1); \ + resize(S, n, n); copy(A, S); \ + resize(Q, n, n); \ + base_type rconde(0), rcondv(0); \ + int info = -1; \ + lapack_name(&jobvs, &sort, &select, &sense, &n, &S(0,0), &n, \ + &sdim, &w[0], &Q(0,0), &n, &rconde, &rcondv, \ + &work[0], &lwork, &rwork[0], &bwork[0], &info); \ + GMM_ASSERT1(!info, "SCHUR algorithm failed"); \ + } + + geesx_interface(sgeesx_, BLAS_S) + geesx_interface(dgeesx_, BLAS_D) + geesx_interface2(cgeesx_, BLAS_C) + geesx_interface2(zgeesx_, BLAS_Z) + + template + void schur(const MAT &A_, MAT &S, MAT &Q) { + MAT A(A_); + schur(A, S, Q); + } + + + /* ********************************************************************* */ + /* Interface to SVD. Does not correspond to a Gmm++ functionnality. */ + /* Author : Sebastian Nowozin */ + /* ********************************************************************* */ + +# define gesvd_interface(lapack_name, base_type) inline \ + void svd(dense_matrix &X, \ + dense_matrix &U, \ + dense_matrix &Vtransposed, \ + std::vector &sigma) { \ + GMMLAPACK_TRACE("gesvd_interface"); \ + int m = int(mat_nrows(X)), n = int(mat_ncols(X)); \ + int mn_min = m < n ? m : n; \ + sigma.resize(mn_min); \ + std::vector work(15 * mn_min); \ + int lwork = int(work.size()); \ + resize(U, m, m); \ + resize(Vtransposed, n, n); \ + char job = 'A'; \ + int info = -1; \ + lapack_name(&job, &job, &m, &n, &X(0,0), &m, &sigma[0], &U(0,0), \ + &m, &Vtransposed(0,0), &n, &work[0], &lwork, &info); \ + } + +# define cgesvd_interface(lapack_name, base_type, base_type2) inline \ + void svd(dense_matrix &X, \ + dense_matrix &U, \ + dense_matrix &Vtransposed, \ + std::vector &sigma) { \ + GMMLAPACK_TRACE("gesvd_interface"); \ + int m = int(mat_nrows(X)), n = int(mat_ncols(X)); \ + int mn_min = m < n ? m : n; \ + sigma.resize(mn_min); \ + std::vector work(15 * mn_min); \ + std::vector rwork(5 * mn_min); \ + int lwork = int(work.size()); \ + resize(U, m, m); \ + resize(Vtransposed, n, n); \ + char job = 'A'; \ + int info = -1; \ + lapack_name(&job, &job, &m, &n, &X(0,0), &m, &sigma[0], &U(0,0), \ + &m, &Vtransposed(0,0), &n, &work[0], &lwork, \ + &rwork[0], &info); \ + } + + gesvd_interface(sgesvd_, BLAS_S) + gesvd_interface(dgesvd_, BLAS_D) + cgesvd_interface(cgesvd_, BLAS_C, BLAS_S) + cgesvd_interface(zgesvd_, BLAS_Z, BLAS_D) + + template + void svd(const MAT &X_, MAT &U, MAT &Vtransposed, VEC &sigma) { + MAT X(X_); + svd(X, U, Vtransposed, sigma); + } + + + + +} + +#else + +namespace gmm +{ +template +void schur(const MAT &A_, MAT &S, MAT &Q) +{ + GMM_ASSERT1(false, "Use of function schur(A,S,Q) requires GetFEM++ " + "to be built with Lapack"); +} + +}// namespace gmm + +#endif // GMM_USES_LAPACK + +#endif // GMM_LAPACK_INTERFACE_H diff --git a/gmm/gmm_least_squares_cg.h b/gmm/gmm_least_squares_cg.h new file mode 100644 index 000000000..71e446658 --- /dev/null +++ b/gmm/gmm_least_squares_cg.h @@ -0,0 +1,96 @@ +/* -*- c++ -*- (enables emacs c++ mode) */ +/*=========================================================================== + + Copyright (C) 2002-2017 Yves Renard, Benjamin Schleimer + + This file is a part of GetFEM++ + + GetFEM++ is free software; you can redistribute it and/or modify it + under the terms of the GNU Lesser General Public License as published + by the Free Software Foundation; either version 3 of the License, or + (at your option) any later version along with the GCC Runtime Library + Exception either version 3.1 or (at your option) any later version. + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public + License and GCC Runtime Library Exception for more details. + You should have received a copy of the GNU Lesser General Public License + along with this program; if not, write to the Free Software Foundation, + Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. + + As a special exception, you may use this file as it is a part of a free + software library without restriction. Specifically, if other files + instantiate templates or use macros or inline functions from this file, + or you compile this file and link it with other files to produce an + executable, this file does not by itself cause the resulting executable + to be covered by the GNU Lesser General Public License. This exception + does not however invalidate any other reasons why the executable file + might be covered by the GNU Lesser General Public License. + +===========================================================================*/ + +/**@file gmm_leastsquares_cg.h + @author Benjamin Schleimer + @date January 23, 2007. + @brief Conjugate gradient least squares algorithm. + Algorithm taken from http://www.stat.washington.edu/wxs/Stat538-w05/Notes/conjugate-gradients.pdf page 6 +*/ +#ifndef GMM_LEAST_SQUARES_CG_H__ +#define GMM_LEAST_SQUARES_CG_H__ + +#include "gmm_kernel.h" +#include "gmm_iter.h" +#include "gmm_conjugated.h" + +namespace gmm { + + template + void least_squares_cg(const Matrix& C, Vector1& x, const Vector2& y, + iteration &iter) { + + typedef typename temporary_dense_vector::vector_type temp_vector; + typedef typename linalg_traits::value_type T; + + T rho, rho_1(0), a; + temp_vector p(vect_size(x)), q(vect_size(y)), g(vect_size(x)); + temp_vector r(vect_size(y)); + iter.set_rhsnorm(gmm::sqrt(gmm::abs(vect_hp(y, y)))); + + if (iter.get_rhsnorm() == 0.0) + clear(x); + else { + mult(C, scaled(x, T(-1)), y, r); + mult(conjugated(C), r, g); + rho = vect_hp(g, g); + copy(g, p); + + while (!iter.finished_vect(g)) { + + if (!iter.first()) { + rho = vect_hp(g, g); + add(g, scaled(p, rho / rho_1), p); + } + + mult(C, p, q); + + a = rho / vect_hp(q, q); + add(scaled(p, a), x); + add(scaled(q, -a), r); + // NOTE: how do we minimize the impact to the transpose? + mult(conjugated(C), r, g); + rho_1 = rho; + + ++iter; + } + } + } + + template inline + void least_squares_cg(const Matrix& C, const Vector1& x, const Vector2& y, + iteration &iter) + { least_squares_cg(C, linalg_const_cast(x), y, iter); } +} + + +#endif // GMM_SOLVER_CG_H__ diff --git a/gmm/gmm_matrix.h b/gmm/gmm_matrix.h new file mode 100644 index 000000000..23fb9d267 --- /dev/null +++ b/gmm/gmm_matrix.h @@ -0,0 +1,1199 @@ +/* -*- c++ -*- (enables emacs c++ mode) */ +/*=========================================================================== + + Copyright (C) 2002-2017 Yves Renard + + This file is a part of GetFEM++ + + GetFEM++ is free software; you can redistribute it and/or modify it + under the terms of the GNU Lesser General Public License as published + by the Free Software Foundation; either version 3 of the License, or + (at your option) any later version along with the GCC Runtime Library + Exception either version 3.1 or (at your option) any later version. + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public + License and GCC Runtime Library Exception for more details. + You should have received a copy of the GNU Lesser General Public License + along with this program; if not, write to the Free Software Foundation, + Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. + + As a special exception, you may use this file as it is a part of a free + software library without restriction. Specifically, if other files + instantiate templates or use macros or inline functions from this file, + or you compile this file and link it with other files to produce an + executable, this file does not by itself cause the resulting executable + to be covered by the GNU Lesser General Public License. This exception + does not however invalidate any other reasons why the executable file + might be covered by the GNU Lesser General Public License. + +===========================================================================*/ + +/** @file gmm_matrix.h + @author Yves Renard + @date October 13, 2002. + @brief Declaration of some matrix types (gmm::dense_matrix, + gmm::row_matrix, gmm::col_matrix, gmm::csc_matrix, etc.) +*/ + +#ifndef GMM_MATRIX_H__ +#define GMM_MATRIX_H__ + +#include "gmm_vector.h" +#include "gmm_sub_vector.h" +#include "gmm_sub_matrix.h" +#include "gmm_transposed.h" + +namespace gmm +{ + + /* ******************************************************************** */ + /* */ + /* Identity matrix */ + /* */ + /* ******************************************************************** */ + + struct identity_matrix { + template void build_with(const MAT &) {} + }; + + template inline + void add(const identity_matrix&, M &v1) { + size_type n = std::min(gmm::mat_nrows(v1), gmm::mat_ncols(v1)); + for (size_type i = 0; i < n; ++i) + v1(i,i) += typename linalg_traits::value_type(1); + } + template inline + void add(const identity_matrix &II, const M &v1) + { add(II, linalg_const_cast(v1)); } + + template inline + void mult(const identity_matrix&, const V1 &v1, V2 &v2) + { copy(v1, v2); } + template inline + void mult(const identity_matrix&, const V1 &v1, const V2 &v2) + { copy(v1, v2); } + template inline + void mult(const identity_matrix&, const V1 &v1, const V2 &v2, V3 &v3) + { add(v1, v2, v3); } + template inline + void mult(const identity_matrix&, const V1 &v1, const V2 &v2, const V3 &v3) + { add(v1, v2, v3); } + template inline + void left_mult(const identity_matrix&, const V1 &v1, V2 &v2) + { copy(v1, v2); } + template inline + void left_mult(const identity_matrix&, const V1 &v1, const V2 &v2) + { copy(v1, v2); } + template inline + void right_mult(const identity_matrix&, const V1 &v1, V2 &v2) + { copy(v1, v2); } + template inline + void right_mult(const identity_matrix&, const V1 &v1, const V2 &v2) + { copy(v1, v2); } + template inline + void transposed_left_mult(const identity_matrix&, const V1 &v1, V2 &v2) + { copy(v1, v2); } + template inline + void transposed_left_mult(const identity_matrix&, const V1 &v1,const V2 &v2) + { copy(v1, v2); } + template inline + void transposed_right_mult(const identity_matrix&, const V1 &v1, V2 &v2) + { copy(v1, v2); } + template inline + void transposed_right_mult(const identity_matrix&,const V1 &v1,const V2 &v2) + { copy(v1, v2); } + template void copy_ident(const identity_matrix&, M &m) { + size_type i = 0, n = std::min(mat_nrows(m), mat_ncols(m)); + clear(m); + for (; i < n; ++i) m(i,i) = typename linalg_traits::value_type(1); + } + template inline void copy(const identity_matrix&, M &m) + { copy_ident(identity_matrix(), m); } + template inline void copy(const identity_matrix &, const M &m) + { copy_ident(identity_matrix(), linalg_const_cast(m)); } + template inline + typename linalg_traits::value_type + vect_sp(const identity_matrix &, const V1 &v1, const V2 &v2) + { return vect_sp(v1, v2); } + template inline + typename linalg_traits::value_type + vect_hp(const identity_matrix &, const V1 &v1, const V2 &v2) + { return vect_hp(v1, v2); } + template inline bool is_identity(const M&) { return false; } + inline bool is_identity(const identity_matrix&) { return true; } + + /* ******************************************************************** */ + /* */ + /* Row matrix */ + /* */ + /* ******************************************************************** */ + + template class row_matrix { + protected : + std::vector li; /* array of rows. */ + size_type nc; + + public : + + typedef typename linalg_traits::reference reference; + typedef typename linalg_traits::value_type value_type; + + row_matrix(size_type r, size_type c) : li(r, V(c)), nc(c) {} + row_matrix(void) : nc(0) {} + reference operator ()(size_type l, size_type c) + { return li[l][c]; } + value_type operator ()(size_type l, size_type c) const + { return li[l][c]; } + + void clear_mat(); + void resize(size_type m, size_type n); + + typename std::vector::iterator begin(void) + { return li.begin(); } + typename std::vector::iterator end(void) + { return li.end(); } + typename std::vector::const_iterator begin(void) const + { return li.begin(); } + typename std::vector::const_iterator end(void) const + { return li.end(); } + + + V& row(size_type i) { return li[i]; } + const V& row(size_type i) const { return li[i]; } + V& operator[](size_type i) { return li[i]; } + const V& operator[](size_type i) const { return li[i]; } + + inline size_type nrows(void) const { return li.size(); } + inline size_type ncols(void) const { return nc; } + + void swap(row_matrix &m) { std::swap(li, m.li); std::swap(nc, m.nc); } + void swap_row(size_type i, size_type j) { std::swap(li[i], li[j]); } + }; + + template void row_matrix::resize(size_type m, size_type n) { + size_type nr = std::min(nrows(), m); + li.resize(m); + for (size_type i=nr; i < m; ++i) gmm::resize(li[i], n); + if (n != nc) { + for (size_type i=0; i < nr; ++i) gmm::resize(li[i], n); + nc = n; + } + } + + + template void row_matrix::clear_mat() + { for (size_type i=0; i < nrows(); ++i) clear(li[i]); } + + template struct linalg_traits > { + typedef row_matrix this_type; + typedef this_type origin_type; + typedef linalg_false is_reference; + typedef abstract_matrix linalg_type; + typedef typename linalg_traits::value_type value_type; + typedef typename linalg_traits::reference reference; + typedef typename linalg_traits::storage_type storage_type; + typedef V & sub_row_type; + typedef const V & const_sub_row_type; + typedef typename std::vector::iterator row_iterator; + typedef typename std::vector::const_iterator const_row_iterator; + typedef abstract_null_type sub_col_type; + typedef abstract_null_type const_sub_col_type; + typedef abstract_null_type col_iterator; + typedef abstract_null_type const_col_iterator; + typedef row_major sub_orientation; + typedef linalg_true index_sorted; + static size_type nrows(const this_type &m) { return m.nrows(); } + static size_type ncols(const this_type &m) { return m.ncols(); } + static row_iterator row_begin(this_type &m) { return m.begin(); } + static row_iterator row_end(this_type &m) { return m.end(); } + static const_row_iterator row_begin(const this_type &m) + { return m.begin(); } + static const_row_iterator row_end(const this_type &m) + { return m.end(); } + static const_sub_row_type row(const const_row_iterator &it) + { return const_sub_row_type(*it); } + static sub_row_type row(const row_iterator &it) + { return sub_row_type(*it); } + static origin_type* origin(this_type &m) { return &m; } + static const origin_type* origin(const this_type &m) { return &m; } + static void do_clear(this_type &m) { m.clear_mat(); } + static value_type access(const const_row_iterator &itrow, size_type j) + { return (*itrow)[j]; } + static reference access(const row_iterator &itrow, size_type j) + { return (*itrow)[j]; } + static void resize(this_type &v, size_type m, size_type n) + { v.resize(m, n); } + static void reshape(this_type &, size_type, size_type) + { GMM_ASSERT1(false, "Sorry, to be done"); } + }; + + template std::ostream &operator << + (std::ostream &o, const row_matrix& m) { gmm::write(o,m); return o; } + + /* ******************************************************************** */ + /* */ + /* Column matrix */ + /* */ + /* ******************************************************************** */ + + template class col_matrix { + protected : + std::vector li; /* array of columns. */ + size_type nr; + + public : + + typedef typename linalg_traits::reference reference; + typedef typename linalg_traits::value_type value_type; + + col_matrix(size_type r, size_type c) : li(c, V(r)), nr(r) { } + col_matrix(void) : nr(0) {} + reference operator ()(size_type l, size_type c) + { return li[c][l]; } + value_type operator ()(size_type l, size_type c) const + { return li[c][l]; } + + void clear_mat(); + void resize(size_type, size_type); + + V& col(size_type i) { return li[i]; } + const V& col(size_type i) const { return li[i]; } + V& operator[](size_type i) { return li[i]; } + const V& operator[](size_type i) const { return li[i]; } + + typename std::vector::iterator begin(void) + { return li.begin(); } + typename std::vector::iterator end(void) + { return li.end(); } + typename std::vector::const_iterator begin(void) const + { return li.begin(); } + typename std::vector::const_iterator end(void) const + { return li.end(); } + + inline size_type ncols(void) const { return li.size(); } + inline size_type nrows(void) const { return nr; } + + void swap(col_matrix &m) { std::swap(li, m.li); std::swap(nr, m.nr); } + void swap_col(size_type i, size_type j) { std::swap(li[i], li[j]); } + }; + + template void col_matrix::resize(size_type m, size_type n) { + size_type nc = std::min(ncols(), n); + li.resize(n); + for (size_type i=nc; i < n; ++i) gmm::resize(li[i], m); + if (m != nr) { + for (size_type i=0; i < nc; ++i) gmm::resize(li[i], m); + nr = m; + } + } + + template void col_matrix::clear_mat() + { for (size_type i=0; i < ncols(); ++i) clear(li[i]); } + + template struct linalg_traits > { + typedef col_matrix this_type; + typedef this_type origin_type; + typedef linalg_false is_reference; + typedef abstract_matrix linalg_type; + typedef typename linalg_traits::value_type value_type; + typedef typename linalg_traits::reference reference; + typedef typename linalg_traits::storage_type storage_type; + typedef V &sub_col_type; + typedef const V &const_sub_col_type; + typedef typename std::vector::iterator col_iterator; + typedef typename std::vector::const_iterator const_col_iterator; + typedef abstract_null_type sub_row_type; + typedef abstract_null_type const_sub_row_type; + typedef abstract_null_type row_iterator; + typedef abstract_null_type const_row_iterator; + typedef col_major sub_orientation; + typedef linalg_true index_sorted; + static size_type nrows(const this_type &m) { return m.nrows(); } + static size_type ncols(const this_type &m) { return m.ncols(); } + static col_iterator col_begin(this_type &m) { return m.begin(); } + static col_iterator col_end(this_type &m) { return m.end(); } + static const_col_iterator col_begin(const this_type &m) + { return m.begin(); } + static const_col_iterator col_end(const this_type &m) + { return m.end(); } + static const_sub_col_type col(const const_col_iterator &it) + { return *it; } + static sub_col_type col(const col_iterator &it) + { return *it; } + static origin_type* origin(this_type &m) { return &m; } + static const origin_type* origin(const this_type &m) { return &m; } + static void do_clear(this_type &m) { m.clear_mat(); } + static value_type access(const const_col_iterator &itcol, size_type j) + { return (*itcol)[j]; } + static reference access(const col_iterator &itcol, size_type j) + { return (*itcol)[j]; } + static void resize(this_type &v, size_type m, size_type n) + { v.resize(m,n); } + static void reshape(this_type &, size_type, size_type) + { GMM_ASSERT1(false, "Sorry, to be done"); } + }; + + template std::ostream &operator << + (std::ostream &o, const col_matrix& m) { gmm::write(o,m); return o; } + + /* ******************************************************************** */ + /* */ + /* Dense matrix */ + /* */ + /* ******************************************************************** */ + + template class dense_matrix : public std::vector { + public: + typedef typename std::vector::size_type size_type; + typedef typename std::vector::iterator iterator; + typedef typename std::vector::const_iterator const_iterator; + typedef typename std::vector::reference reference; + typedef typename std::vector::const_reference const_reference; + + protected: + size_type nbc, nbl; + + public: + + inline const_reference operator ()(size_type l, size_type c) const { + GMM_ASSERT2(l < nbl && c < nbc, "out of range"); + return *(this->begin() + c*nbl+l); + } + inline reference operator ()(size_type l, size_type c) { + GMM_ASSERT2(l < nbl && c < nbc, "out of range"); + return *(this->begin() + c*nbl+l); + } + + std::vector &as_vector(void) { return *this; } + const std::vector &as_vector(void) const { return *this; } + + void resize(size_type, size_type); + void base_resize(size_type, size_type); + void reshape(size_type, size_type); + + void fill(T a, T b = T(0)); + inline size_type nrows(void) const { return nbl; } + inline size_type ncols(void) const { return nbc; } + void swap(dense_matrix &m) + { std::vector::swap(m); std::swap(nbc, m.nbc); std::swap(nbl, m.nbl); } + + dense_matrix(size_type l, size_type c) + : std::vector(c*l), nbc(c), nbl(l) {} + dense_matrix(void) { nbl = nbc = 0; } + }; + + template void dense_matrix::reshape(size_type m,size_type n) { + GMM_ASSERT2(n*m == nbl*nbc, "dimensions mismatch"); + nbl = m; nbc = n; + } + + template void dense_matrix::base_resize(size_type m, + size_type n) + { std::vector::resize(n*m); nbl = m; nbc = n; } + + template void dense_matrix::resize(size_type m, size_type n) { + if (n*m > nbc*nbl) std::vector::resize(n*m); + if (m < nbl) { + for (size_type i = 1; i < std::min(nbc, n); ++i) + std::copy(this->begin()+i*nbl, this->begin()+(i*nbl+m), + this->begin()+i*m); + for (size_type i = std::min(nbc, n); i < n; ++i) + std::fill(this->begin()+(i*m), this->begin()+(i+1)*m, T(0)); + } + else if (m > nbl) { /* do nothing when the nb of rows does not change */ + for (size_type i = std::min(nbc, n); i > 1; --i) + std::copy(this->begin()+(i-1)*nbl, this->begin()+i*nbl, + this->begin()+(i-1)*m); + for (size_type i = 0; i < std::min(nbc, n); ++i) + std::fill(this->begin()+(i*m+nbl), this->begin()+(i+1)*m, T(0)); + } + if (n*m < nbc*nbl) std::vector::resize(n*m); + nbl = m; nbc = n; + } + + template void dense_matrix::fill(T a, T b) { + std::fill(this->begin(), this->end(), b); + size_type n = std::min(nbl, nbc); + if (a != b) for (size_type i = 0; i < n; ++i) (*this)(i,i) = a; + } + + template struct linalg_traits > { + typedef dense_matrix this_type; + typedef this_type origin_type; + typedef linalg_false is_reference; + typedef abstract_matrix linalg_type; + typedef T value_type; + typedef T& reference; + typedef abstract_dense storage_type; + typedef tab_ref_reg_spaced_with_origin sub_row_type; + typedef tab_ref_reg_spaced_with_origin const_sub_row_type; + typedef dense_compressed_iterator row_iterator; + typedef dense_compressed_iterator const_row_iterator; + typedef tab_ref_with_origin sub_col_type; + typedef tab_ref_with_origin const_sub_col_type; + typedef dense_compressed_iterator col_iterator; + typedef dense_compressed_iterator const_col_iterator; + typedef col_and_row sub_orientation; + typedef linalg_true index_sorted; + static size_type nrows(const this_type &m) { return m.nrows(); } + static size_type ncols(const this_type &m) { return m.ncols(); } + static const_sub_row_type row(const const_row_iterator &it) + { return const_sub_row_type(*it, it.nrows, it.ncols, it.origin); } + static const_sub_col_type col(const const_col_iterator &it) + { return const_sub_col_type(*it, *it + it.nrows, it.origin); } + static sub_row_type row(const row_iterator &it) + { return sub_row_type(*it, it.nrows, it.ncols, it.origin); } + static sub_col_type col(const col_iterator &it) + { return sub_col_type(*it, *it + it.nrows, it.origin); } + static row_iterator row_begin(this_type &m) + { return row_iterator(m.begin(), m.size() ? 1 : 0, m.nrows(), m.ncols(), 0, &m); } + static row_iterator row_end(this_type &m) + { return row_iterator(m.begin(), m.size() ? 1 : 0, m.nrows(), m.ncols(), m.nrows(), &m); } + static const_row_iterator row_begin(const this_type &m) + { return const_row_iterator(m.begin(), m.size() ? 1 : 0, m.nrows(), m.ncols(), 0, &m); } + static const_row_iterator row_end(const this_type &m) + { return const_row_iterator(m.begin(), m.size() ? 1 : 0, m.nrows(), m.ncols(), m.nrows(), &m); } + static col_iterator col_begin(this_type &m) + { return col_iterator(m.begin(), m.nrows(), m.nrows(), m.ncols(), 0, &m); } + static col_iterator col_end(this_type &m) + { return col_iterator(m.begin(), m.nrows(), m.nrows(), m.ncols(), m.ncols(), &m); } + static const_col_iterator col_begin(const this_type &m) + { return const_col_iterator(m.begin(), m.nrows(), m.nrows(), m.ncols(), 0, &m); } + static const_col_iterator col_end(const this_type &m) + { return const_col_iterator(m.begin(),m.nrows(),m.nrows(),m.ncols(),m.ncols(), &m); } + static origin_type* origin(this_type &m) { return &m; } + static const origin_type* origin(const this_type &m) { return &m; } + static void do_clear(this_type &m) { m.fill(value_type(0)); } + static value_type access(const const_col_iterator &itcol, size_type j) + { return (*itcol)[j]; } + static reference access(const col_iterator &itcol, size_type j) + { return (*itcol)[j]; } + static void resize(this_type &v, size_type m, size_type n) + { v.resize(m,n); } + static void reshape(this_type &v, size_type m, size_type n) + { v.reshape(m, n); } + }; + + template std::ostream &operator << + (std::ostream &o, const dense_matrix& m) { gmm::write(o,m); return o; } + + + /* ******************************************************************** */ + /* */ + /* Read only compressed sparse column matrix */ + /* */ + /* ******************************************************************** */ + + template + struct csc_matrix { + typedef unsigned int IND_TYPE; + + std::vector pr; + std::vector ir; + std::vector jc; + size_type nc, nr; + + typedef T value_type; + typedef T& access_type; + + template void init_with_good_format(const Matrix &B); + template void init_with(const Matrix &A); + void init_with(const col_matrix > &B) + { init_with_good_format(B); } + void init_with(const col_matrix > &B) + { init_with_good_format(B); } + template + void init_with(const csc_matrix_ref& B) + { init_with_good_format(B); } + template + void init_with(const csc_matrix& B) + { init_with_good_format(B); } + + void init_with_identity(size_type n); + + csc_matrix(void) : nc(0), nr(0) {} + csc_matrix(size_type nnr, size_type nnc); + + size_type nrows(void) const { return nr; } + size_type ncols(void) const { return nc; } + void swap(csc_matrix &m) { + std::swap(pr, m.pr); + std::swap(ir, m.ir); std::swap(jc, m.jc); + std::swap(nc, m.nc); std::swap(nr, m.nr); + } + value_type operator()(size_type i, size_type j) const + { return mat_col(*this, j)[i]; } + }; + + template template + void csc_matrix::init_with_good_format(const Matrix &B) { + typedef typename linalg_traits::const_sub_col_type col_type; + nc = mat_ncols(B); nr = mat_nrows(B); + jc.resize(nc+1); + jc[0] = shift; + for (size_type j = 0; j < nc; ++j) { + jc[j+1] = IND_TYPE(jc[j] + nnz(mat_const_col(B, j))); + } + pr.resize(jc[nc]); + ir.resize(jc[nc]); + for (size_type j = 0; j < nc; ++j) { + col_type col = mat_const_col(B, j); + typename linalg_traits::t>::const_iterator + it = vect_const_begin(col), ite = vect_const_end(col); + for (size_type k = 0; it != ite; ++it, ++k) { + pr[jc[j]-shift+k] = *it; + ir[jc[j]-shift+k] = IND_TYPE(it.index() + shift); + } + } + } + + template template + void csc_matrix::init_with(const Matrix &A) { + col_matrix > B(mat_nrows(A), mat_ncols(A)); + copy(A, B); + init_with_good_format(B); + } + + template + void csc_matrix::init_with_identity(size_type n) { + nc = nr = n; + pr.resize(nc); ir.resize(nc); jc.resize(nc+1); + for (size_type j = 0; j < nc; ++j) + { ir[j] = jc[j] = shift + j; pr[j] = T(1); } + jc[nc] = shift + nc; + } + + template + csc_matrix::csc_matrix(size_type nnr, size_type nnc) + : nc(nnc), nr(nnr) { + pr.resize(1); ir.resize(1); jc.resize(nc+1); + for (size_type j = 0; j <= nc; ++j) jc[j] = shift; + } + + template + struct linalg_traits > { + typedef csc_matrix this_type; + typedef typename this_type::IND_TYPE IND_TYPE; + typedef linalg_const is_reference; + typedef abstract_matrix linalg_type; + typedef T value_type; + typedef T origin_type; + typedef T reference; + typedef abstract_sparse storage_type; + typedef abstract_null_type sub_row_type; + typedef abstract_null_type const_sub_row_type; + typedef abstract_null_type row_iterator; + typedef abstract_null_type const_row_iterator; + typedef abstract_null_type sub_col_type; + typedef cs_vector_ref + const_sub_col_type; + typedef sparse_compressed_iterator + const_col_iterator; + typedef abstract_null_type col_iterator; + typedef col_major sub_orientation; + typedef linalg_true index_sorted; + static size_type nrows(const this_type &m) { return m.nrows(); } + static size_type ncols(const this_type &m) { return m.ncols(); } + static const_col_iterator col_begin(const this_type &m) + { return const_col_iterator(&m.pr[0],&m.ir[0],&m.jc[0], m.nr, &m.pr[0]); } + static const_col_iterator col_end(const this_type &m) { + return const_col_iterator(&m.pr[0],&m.ir[0],&m.jc[0]+m.nc, + m.nr,&m.pr[0]); + } + static const_sub_col_type col(const const_col_iterator &it) { + return const_sub_col_type(it.pr + *(it.jc) - shift, + it.ir + *(it.jc) - shift, + *(it.jc + 1) - *(it.jc), it.n); + } + static const origin_type* origin(const this_type &m) { return &m.pr[0]; } + static void do_clear(this_type &m) { m.do_clear(); } + static value_type access(const const_col_iterator &itcol, size_type j) + { return col(itcol)[j]; } + }; + + template + std::ostream &operator << + (std::ostream &o, const csc_matrix& m) + { gmm::write(o,m); return o; } + + template + inline void copy(const identity_matrix &, csc_matrix& M) + { M.init_with_identity(mat_nrows(M)); } + + template + inline void copy(const Matrix &A, csc_matrix& M) + { M.init_with(A); } + + /* ******************************************************************** */ + /* */ + /* Read only compressed sparse row matrix */ + /* */ + /* ******************************************************************** */ + + template + struct csr_matrix { + + typedef unsigned int IND_TYPE; + + std::vector pr; // values. + std::vector ir; // col indices. + std::vector jc; // row repartition on pr and ir. + size_type nc, nr; + + typedef T value_type; + typedef T& access_type; + + + template void init_with_good_format(const Matrix &B); + void init_with(const row_matrix > &B) + { init_with_good_format(B); } + void init_with(const row_matrix > &B) + { init_with_good_format(B); } + template + void init_with(const csr_matrix_ref& B) + { init_with_good_format(B); } + template + void init_with(const csr_matrix& B) + { init_with_good_format(B); } + + template void init_with(const Matrix &A); + void init_with_identity(size_type n); + + csr_matrix(void) : nc(0), nr(0) {} + csr_matrix(size_type nnr, size_type nnc); + + size_type nrows(void) const { return nr; } + size_type ncols(void) const { return nc; } + void swap(csr_matrix &m) { + std::swap(pr, m.pr); + std::swap(ir,m.ir); std::swap(jc, m.jc); + std::swap(nc, m.nc); std::swap(nr,m.nr); + } + + value_type operator()(size_type i, size_type j) const + { return mat_row(*this, i)[j]; } + }; + + template template + void csr_matrix::init_with_good_format(const Matrix &B) { + typedef typename linalg_traits::const_sub_row_type row_type; + nc = mat_ncols(B); nr = mat_nrows(B); + jc.resize(nr+1); + jc[0] = shift; + for (size_type j = 0; j < nr; ++j) { + jc[j+1] = IND_TYPE(jc[j] + nnz(mat_const_row(B, j))); + } + pr.resize(jc[nr]); + ir.resize(jc[nr]); + for (size_type j = 0; j < nr; ++j) { + row_type row = mat_const_row(B, j); + typename linalg_traits::t>::const_iterator + it = vect_const_begin(row), ite = vect_const_end(row); + for (size_type k = 0; it != ite; ++it, ++k) { + pr[jc[j]-shift+k] = *it; + ir[jc[j]-shift+k] = IND_TYPE(it.index()+shift); + } + } + } + + template template + void csr_matrix::init_with(const Matrix &A) { + row_matrix > B(mat_nrows(A), mat_ncols(A)); + copy(A, B); + init_with_good_format(B); + } + + template + void csr_matrix::init_with_identity(size_type n) { + nc = nr = n; + pr.resize(nr); ir.resize(nr); jc.resize(nr+1); + for (size_type j = 0; j < nr; ++j) + { ir[j] = jc[j] = shift + j; pr[j] = T(1); } + jc[nr] = shift + nr; + } + + template + csr_matrix::csr_matrix(size_type nnr, size_type nnc) + : nc(nnc), nr(nnr) { + pr.resize(1); ir.resize(1); jc.resize(nr+1); + for (size_type j = 0; j < nr; ++j) jc[j] = shift; + jc[nr] = shift; + } + + + template + struct linalg_traits > { + typedef csr_matrix this_type; + typedef typename this_type::IND_TYPE IND_TYPE; + typedef linalg_const is_reference; + typedef abstract_matrix linalg_type; + typedef T value_type; + typedef T origin_type; + typedef T reference; + typedef abstract_sparse storage_type; + typedef abstract_null_type sub_col_type; + typedef abstract_null_type const_sub_col_type; + typedef abstract_null_type col_iterator; + typedef abstract_null_type const_col_iterator; + typedef abstract_null_type sub_row_type; + typedef cs_vector_ref + const_sub_row_type; + typedef sparse_compressed_iterator + const_row_iterator; + typedef abstract_null_type row_iterator; + typedef row_major sub_orientation; + typedef linalg_true index_sorted; + static size_type nrows(const this_type &m) { return m.nrows(); } + static size_type ncols(const this_type &m) { return m.ncols(); } + static const_row_iterator row_begin(const this_type &m) + { return const_row_iterator(&m.pr[0], &m.ir[0], &m.jc[0], m.nc, &m.pr[0]); } + static const_row_iterator row_end(const this_type &m) + { return const_row_iterator(&m.pr[0], &m.ir[0], &m.jc[0] + m.nr, m.nc, &m.pr[0]); } + static const_sub_row_type row(const const_row_iterator &it) { + return const_sub_row_type(it.pr + *(it.jc) - shift, + it.ir + *(it.jc) - shift, + *(it.jc + 1) - *(it.jc), it.n); + } + static const origin_type* origin(const this_type &m) { return &m.pr[0]; } + static void do_clear(this_type &m) { m.do_clear(); } + static value_type access(const const_row_iterator &itrow, size_type j) + { return row(itrow)[j]; } + }; + + template + std::ostream &operator << + (std::ostream &o, const csr_matrix& m) + { gmm::write(o,m); return o; } + + template + inline void copy(const identity_matrix &, csr_matrix& M) + { M.init_with_identity(mat_nrows(M)); } + + template + inline void copy(const Matrix &A, csr_matrix& M) + { M.init_with(A); } + + /* ******************************************************************** */ + /* */ + /* Block matrix */ + /* */ + /* ******************************************************************** */ + + template class block_matrix { + protected : + std::vector blocks; + size_type nrowblocks_; + size_type ncolblocks_; + std::vector introw, intcol; + + public : + typedef typename linalg_traits::value_type value_type; + typedef typename linalg_traits::reference reference; + + size_type nrows(void) const { return introw[nrowblocks_-1].max; } + size_type ncols(void) const { return intcol[ncolblocks_-1].max; } + size_type nrowblocks(void) const { return nrowblocks_; } + size_type ncolblocks(void) const { return ncolblocks_; } + const sub_interval &subrowinterval(size_type i) const { return introw[i]; } + const sub_interval &subcolinterval(size_type i) const { return intcol[i]; } + const MAT &block(size_type i, size_type j) const + { return blocks[j*ncolblocks_+i]; } + MAT &block(size_type i, size_type j) + { return blocks[j*ncolblocks_+i]; } + void do_clear(void); + // to be done : read and write access to a component + value_type operator() (size_type i, size_type j) const { + size_type k, l; + for (k = 0; k < nrowblocks_; ++k) + if (i >= introw[k].min && i < introw[k].max) break; + for (l = 0; l < nrowblocks_; ++l) + if (j >= introw[l].min && j < introw[l].max) break; + return (block(k, l))(i - introw[k].min, j - introw[l].min); + } + reference operator() (size_type i, size_type j) { + size_type k, l; + for (k = 0; k < nrowblocks_; ++k) + if (i >= introw[k].min && i < introw[k].max) break; + for (l = 0; l < nrowblocks_; ++l) + if (j >= introw[l].min && j < introw[l].max) break; + return (block(k, l))(i - introw[k].min, j - introw[l].min); + } + + template void resize(const CONT &c1, const CONT &c2); + template block_matrix(const CONT &c1, const CONT &c2) + { resize(c1, c2); } + block_matrix(void) {} + + }; + + template struct linalg_traits > { + typedef block_matrix this_type; + typedef linalg_false is_reference; + typedef abstract_matrix linalg_type; + typedef this_type origin_type; + typedef typename linalg_traits::value_type value_type; + typedef typename linalg_traits::reference reference; + typedef typename linalg_traits::storage_type storage_type; + typedef abstract_null_type sub_row_type; // to be done ... + typedef abstract_null_type const_sub_row_type; // to be done ... + typedef abstract_null_type row_iterator; // to be done ... + typedef abstract_null_type const_row_iterator; // to be done ... + typedef abstract_null_type sub_col_type; // to be done ... + typedef abstract_null_type const_sub_col_type; // to be done ... + typedef abstract_null_type col_iterator; // to be done ... + typedef abstract_null_type const_col_iterator; // to be done ... + typedef abstract_null_type sub_orientation; // to be done ... + typedef linalg_true index_sorted; + static size_type nrows(const this_type &m) { return m.nrows(); } + static size_type ncols(const this_type &m) { return m.ncols(); } + static origin_type* origin(this_type &m) { return &m; } + static const origin_type* origin(const this_type &m) { return &m; } + static void do_clear(this_type &m) { m.do_clear(); } + // access to be done ... + static void resize(this_type &, size_type , size_type) + { GMM_ASSERT1(false, "Sorry, to be done"); } + static void reshape(this_type &, size_type , size_type) + { GMM_ASSERT1(false, "Sorry, to be done"); } + }; + + template void block_matrix::do_clear(void) { + for (size_type j = 0, l = 0; j < ncolblocks_; ++j) + for (size_type i = 0, k = 0; i < nrowblocks_; ++i) + clear(block(i,j)); + } + + template template + void block_matrix::resize(const CONT &c1, const CONT &c2) { + nrowblocks_ = c1.size(); ncolblocks_ = c2.size(); + blocks.resize(nrowblocks_ * ncolblocks_); + intcol.resize(ncolblocks_); + introw.resize(nrowblocks_); + for (size_type j = 0, l = 0; j < ncolblocks_; ++j) { + intcol[j] = sub_interval(l, c2[j]); l += c2[j]; + for (size_type i = 0, k = 0; i < nrowblocks_; ++i) { + if (j == 0) { introw[i] = sub_interval(k, c1[i]); k += c1[i]; } + block(i, j) = MAT(c1[i], c2[j]); + } + } + } + + template + void copy(const block_matrix &m1, M2 &m2) { + for (size_type j = 0; j < m1.ncolblocks(); ++j) + for (size_type i = 0; i < m1.nrowblocks(); ++i) + copy(m1.block(i,j), sub_matrix(m2, m1.subrowinterval(i), + m1.subcolinterval(j))); + } + + template + void copy(const block_matrix &m1, const M2 &m2) + { copy(m1, linalg_const_cast(m2)); } + + + template + void mult(const block_matrix &m, const V1 &v1, V2 &v2) { + clear(v2); + typename sub_vector_type::vector_type sv; + for (size_type i = 0; i < m.nrowblocks() ; ++i) + for (size_type j = 0; j < m.ncolblocks() ; ++j) { + sv = sub_vector(v2, m.subrowinterval(i)); + mult(m.block(i,j), + sub_vector(v1, m.subcolinterval(j)), sv, sv); + } + } + + template + void mult(const block_matrix &m, const V1 &v1, const V2 &v2, V3 &v3) { + typename sub_vector_type::vector_type sv; + for (size_type i = 0; i < m.nrowblocks() ; ++i) + for (size_type j = 0; j < m.ncolblocks() ; ++j) { + sv = sub_vector(v3, m.subrowinterval(i)); + if (j == 0) + mult(m.block(i,j), + sub_vector(v1, m.subcolinterval(j)), + sub_vector(v2, m.subrowinterval(i)), sv); + else + mult(m.block(i,j), + sub_vector(v1, m.subcolinterval(j)), sv, sv); + } + + } + + template + void mult(const block_matrix &m, const V1 &v1, const V2 &v2) + { mult(m, v1, linalg_const_cast(v2)); } + + template + void mult(const block_matrix &m, const V1 &v1, const V2 &v2, + const V3 &v3) + { mult_const(m, v1, v2, linalg_const_cast(v3)); } + +} + /* ******************************************************************** */ + /* */ + /* Distributed matrices */ + /* */ + /* ******************************************************************** */ + +#ifdef GMM_USES_MPI +# include + +namespace gmm { + + + + template inline MPI_Datatype mpi_type(T) + { GMM_ASSERT1(false, "Sorry unsupported type"); return MPI_FLOAT; } + inline MPI_Datatype mpi_type(double) { return MPI_DOUBLE; } + inline MPI_Datatype mpi_type(float) { return MPI_FLOAT; } + inline MPI_Datatype mpi_type(long double) { return MPI_LONG_DOUBLE; } +#ifndef LAM_MPI + inline MPI_Datatype mpi_type(std::complex) { return MPI_COMPLEX; } + inline MPI_Datatype mpi_type(std::complex) { return MPI_DOUBLE_COMPLEX; } +#endif + inline MPI_Datatype mpi_type(int) { return MPI_INT; } + inline MPI_Datatype mpi_type(unsigned int) { return MPI_UNSIGNED; } + inline MPI_Datatype mpi_type(long) { return MPI_LONG; } + inline MPI_Datatype mpi_type(unsigned long) { return MPI_UNSIGNED_LONG; } + + template struct mpi_distributed_matrix { + MAT M; + + mpi_distributed_matrix(size_type n, size_type m) : M(n, m) {} + mpi_distributed_matrix() {} + + const MAT &local_matrix(void) const { return M; } + MAT &local_matrix(void) { return M; } + }; + + template inline MAT &eff_matrix(MAT &m) { return m; } + template inline + const MAT &eff_matrix(const MAT &m) { return m; } + template inline + MAT &eff_matrix(mpi_distributed_matrix &m) { return m.M; } + template inline + const MAT &eff_matrix(const mpi_distributed_matrix &m) { return m.M; } + + + template + inline void copy(const mpi_distributed_matrix &m1, + mpi_distributed_matrix &m2) + { copy(eff_matrix(m1), eff_matrix(m2)); } + template + inline void copy(const mpi_distributed_matrix &m1, + const mpi_distributed_matrix &m2) + { copy(m1.M, m2.M); } + + template + inline void copy(const mpi_distributed_matrix &m1, MAT2 &m2) + { copy(m1.M, m2); } + template + inline void copy(const mpi_distributed_matrix &m1, const MAT2 &m2) + { copy(m1.M, m2); } + + + template inline + typename strongest_value_type3::value_type + vect_sp(const mpi_distributed_matrix &ps, const V1 &v1, + const V2 &v2) { + typedef typename strongest_value_type3::value_type T; + T res = vect_sp(ps.M, v1, v2), rest; + MPI_Allreduce(&res, &rest, 1, mpi_type(T()), MPI_SUM,MPI_COMM_WORLD); + return rest; + } + + template + inline void mult_add(const mpi_distributed_matrix &m, const V1 &v1, + V2 &v2) { + typedef typename linalg_traits::value_type T; + std::vector v3(vect_size(v2)), v4(vect_size(v2)); + static double tmult_tot = 0.0; + static double tmult_tot2 = 0.0; + double t_ref = MPI_Wtime(); + gmm::mult(m.M, v1, v3); + if (is_sparse(v2)) GMM_WARNING2("Using a plain temporary, here."); + double t_ref2 = MPI_Wtime(); + MPI_Allreduce(&(v3[0]), &(v4[0]),gmm::vect_size(v2), mpi_type(T()), + MPI_SUM,MPI_COMM_WORLD); + tmult_tot2 = MPI_Wtime()-t_ref2; + cout << "reduce mult mpi = " << tmult_tot2 << endl; + gmm::add(v4, v2); + tmult_tot = MPI_Wtime()-t_ref; + cout << "tmult mpi = " << tmult_tot << endl; + } + + template + void mult_add(const mpi_distributed_matrix &m, const V1 &v1, + const V2 &v2_) + { mult_add(m, v1, const_cast(v2_)); } + + template + inline void mult(const mpi_distributed_matrix &m, const V1 &v1, + const V2 &v2_) + { V2 &v2 = const_cast(v2_); clear(v2); mult_add(m, v1, v2); } + + template + inline void mult(const mpi_distributed_matrix &m, const V1 &v1, + V2 &v2) + { clear(v2); mult_add(m, v1, v2); } + + template + inline void mult(const mpi_distributed_matrix &m, const V1 &v1, + const V2 &v2, const V3 &v3_) + { V3 &v3 = const_cast(v3_); gmm::copy(v2, v3); mult_add(m, v1, v3); } + + template + inline void mult(const mpi_distributed_matrix &m, const V1 &v1, + const V2 &v2, V3 &v3) + { gmm::copy(v2, v3); mult_add(m, v1, v3); } + + + template inline + size_type mat_nrows(const mpi_distributed_matrix &M) + { return mat_nrows(M.M); } + template inline + size_type mat_ncols(const mpi_distributed_matrix &M) + { return mat_nrows(M.M); } + template inline + void resize(mpi_distributed_matrix &M, size_type m, size_type n) + { resize(M.M, m, n); } + template inline void clear(mpi_distributed_matrix &M) + { clear(M.M); } + + + // For compute reduced system + template inline + void mult(const MAT1 &M1, const mpi_distributed_matrix &M2, + mpi_distributed_matrix &M3) + { mult(M1, M2.M, M3.M); } + template inline + void mult(const mpi_distributed_matrix &M2, + const MAT1 &M1, mpi_distributed_matrix &M3) + { mult(M2.M, M1, M3.M); } + template inline + void mult(const MAT1 &M1, const mpi_distributed_matrix &M2, + MAT3 &M3) + { mult(M1, M2.M, M3); } + template inline + void mult(const MAT1 &M1, const mpi_distributed_matrix &M2, + const MAT3 &M3) + { mult(M1, M2.M, M3); } + + template + struct sub_matrix_type *, SUBI1, SUBI2> + { typedef abstract_null_type matrix_type; }; + + template + struct sub_matrix_type *, SUBI1, SUBI2> + { typedef abstract_null_type matrix_type; }; + + template inline + typename select_return + ::matrix_type, typename sub_matrix_type::matrix_type, + M *>::return_type + sub_matrix(mpi_distributed_matrix &m, const SUBI1 &si1, const SUBI2 &si2) + { return sub_matrix(m.M, si1, si2); } + + template inline + typename select_return + ::matrix_type, typename sub_matrix_type::matrix_type, + const MAT *>::return_type + sub_matrix(const mpi_distributed_matrix &m, const SUBI1 &si1, + const SUBI2 &si2) + { return sub_matrix(m.M, si1, si2); } + + template inline + typename select_return + ::matrix_type, typename sub_matrix_type::matrix_type, + M *>::return_type + sub_matrix(mpi_distributed_matrix &m, const SUBI1 &si1) + { return sub_matrix(m.M, si1, si1); } + + template inline + typename select_return + ::matrix_type, typename sub_matrix_type::matrix_type, + const M *>::return_type + sub_matrix(const mpi_distributed_matrix &m, const SUBI1 &si1) + { return sub_matrix(m.M, si1, si1); } + + + template struct transposed_return *> + { typedef abstract_null_type return_type; }; + template struct transposed_return *> + { typedef abstract_null_type return_type; }; + + template inline typename transposed_return::return_type + transposed(const mpi_distributed_matrix &l) + { return transposed(l.M); } + + template inline typename transposed_return::return_type + transposed(mpi_distributed_matrix &l) + { return transposed(l.M); } + + + template + struct linalg_traits > { + typedef mpi_distributed_matrix this_type; + typedef MAT origin_type; + typedef linalg_false is_reference; + typedef abstract_matrix linalg_type; + typedef typename linalg_traits::value_type value_type; + typedef typename linalg_traits::reference reference; + typedef typename linalg_traits::storage_type storage_type; + typedef abstract_null_type sub_row_type; + typedef abstract_null_type const_sub_row_type; + typedef abstract_null_type row_iterator; + typedef abstract_null_type const_row_iterator; + typedef abstract_null_type sub_col_type; + typedef abstract_null_type const_sub_col_type; + typedef abstract_null_type col_iterator; + typedef abstract_null_type const_col_iterator; + typedef abstract_null_type sub_orientation; + typedef abstract_null_type index_sorted; + static size_type nrows(const this_type &m) { return nrows(m.M); } + static size_type ncols(const this_type &m) { return ncols(m.M); } + static void do_clear(this_type &m) { clear(m.M); } + }; + +} + + +#endif // GMM_USES_MPI + +namespace std { + template + void swap(gmm::row_matrix &m1, gmm::row_matrix &m2) + { m1.swap(m2); } + template + void swap(gmm::col_matrix &m1, gmm::col_matrix &m2) + { m1.swap(m2); } + template + void swap(gmm::dense_matrix &m1, gmm::dense_matrix &m2) + { m1.swap(m2); } + template void + swap(gmm::csc_matrix &m1, gmm::csc_matrix &m2) + { m1.swap(m2); } + template void + swap(gmm::csr_matrix &m1, gmm::csr_matrix &m2) + { m1.swap(m2); } +} + + + + +#endif /* GMM_MATRIX_H__ */ diff --git a/gmm/gmm_modified_gram_schmidt.h b/gmm/gmm_modified_gram_schmidt.h new file mode 100644 index 000000000..34d54ae3f --- /dev/null +++ b/gmm/gmm_modified_gram_schmidt.h @@ -0,0 +1,127 @@ +/* -*- c++ -*- (enables emacs c++ mode) */ +/*=========================================================================== + + Copyright (C) 2002-2017 Yves Renard + + This file is a part of GetFEM++ + + GetFEM++ is free software; you can redistribute it and/or modify it + under the terms of the GNU Lesser General Public License as published + by the Free Software Foundation; either version 3 of the License, or + (at your option) any later version along with the GCC Runtime Library + Exception either version 3.1 or (at your option) any later version. + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public + License and GCC Runtime Library Exception for more details. + You should have received a copy of the GNU Lesser General Public License + along with this program; if not, write to the Free Software Foundation, + Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. + + As a special exception, you may use this file as it is a part of a free + software library without restriction. Specifically, if other files + instantiate templates or use macros or inline functions from this file, + or you compile this file and link it with other files to produce an + executable, this file does not by itself cause the resulting executable + to be covered by the GNU Lesser General Public License. This exception + does not however invalidate any other reasons why the executable file + might be covered by the GNU Lesser General Public License. + +===========================================================================*/ + +//=========================================================================== +// +// Copyright (c) 1998-2001, University of Notre Dame. All rights reserved. +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// * Neither the name of the University of Notre Dame nor the +// names of its contributors may be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE TRUSTEES OF INDIANA UNIVERSITY AND +// CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, +// BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE TRUSTEES +// OF INDIANA UNIVERSITY AND CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +// NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +//=========================================================================== + +/**@file gmm_modified_gram_schmidt.h + @author Andrew Lumsdaine , Lie-Quan Lee + @date October 13, 2002. + @brief Modified Gram-Schmidt orthogonalization +*/ + +#ifndef GMM_MODIFIED_GRAM_SCHMIDT_H +#define GMM_MODIFIED_GRAM_SCHMIDT_H + +#include "gmm_kernel.h" + +namespace gmm { + + template + class modified_gram_schmidt { + protected: + typedef dense_matrix MAT; + MAT M; + + public: + + modified_gram_schmidt(int restart, size_t s) : M(s, restart+1) {} + + typename linalg_traits::const_sub_col_type + operator[](size_t i) const { return mat_const_col(M, i); } + + typename linalg_traits::sub_col_type + operator[](size_t i) { return mat_col(M, i); } + + inline size_type nrows(void) const { return M.nrows(); } + inline size_type ncols(void) const { return M.ncols(); } + MAT &mat(void) { return M; } + const MAT &mat(void) const { return M; } + + }; + + template inline + void orthogonalize(modified_gram_schmidt& V, const VecHi& Hi_, size_t i) { + VecHi& Hi = const_cast(Hi_); + + for (size_t k = 0; k <= i; k++) { + Hi[k] = gmm::vect_hp(V[i+1], V[k]); + gmm::add(gmm::scaled(V[k], -Hi[k]), V[i+1]); + } + } + + template + void orthogonalize_with_refinment(modified_gram_schmidt& V, + const VecHi& Hi_, size_t i) { + VecHi& Hi = const_cast(Hi_); + orthogonalize(V, Hi_, i); + + sub_interval SUBI(0, V.nrows()), SUBJ(0, i+1); + std::vector corr(i+1); + gmm::mult(conjugated(sub_matrix(V.mat(), SUBI, SUBJ)), + V[i+1], corr); + gmm::mult(sub_matrix(V.mat(), SUBI, SUBJ), + scaled(corr, T(-1)), V[i+1],V[i+1]); + gmm::add(corr, sub_vector(Hi, SUBJ)); + } + + template + void combine(modified_gram_schmidt& V, const VecS& s, VecX& x, size_t i) + { for (size_t j = 0; j < i; ++j) gmm::add(gmm::scaled(V[j], s[j]), x); } +} + +#endif diff --git a/gmm/gmm_opt.h b/gmm/gmm_opt.h new file mode 100644 index 000000000..e73af4153 --- /dev/null +++ b/gmm/gmm_opt.h @@ -0,0 +1,128 @@ +/* -*- c++ -*- (enables emacs c++ mode) */ +/*=========================================================================== + + Copyright (C) 2003-2017 Yves Renard + + This file is a part of GetFEM++ + + GetFEM++ is free software; you can redistribute it and/or modify it + under the terms of the GNU Lesser General Public License as published + by the Free Software Foundation; either version 3 of the License, or + (at your option) any later version along with the GCC Runtime Library + Exception either version 3.1 or (at your option) any later version. + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public + License and GCC Runtime Library Exception for more details. + You should have received a copy of the GNU Lesser General Public License + along with this program; if not, write to the Free Software Foundation, + Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. + + As a special exception, you may use this file as it is a part of a free + software library without restriction. Specifically, if other files + instantiate templates or use macros or inline functions from this file, + or you compile this file and link it with other files to produce an + executable, this file does not by itself cause the resulting executable + to be covered by the GNU Lesser General Public License. This exception + does not however invalidate any other reasons why the executable file + might be covered by the GNU Lesser General Public License. + +===========================================================================*/ + +/**@file gmm_opt.h + @author Yves Renard + @date July 9, 2003. + @brief Optimization for some small cases (inversion of 2x2 matrices etc.) +*/ +#ifndef GMM_OPT_H__ +#define GMM_OPT_H__ + +namespace gmm { + + /* ********************************************************************* */ + /* Optimized determinant and inverse for small matrices (2x2 and 3x3) */ + /* with dense_matrix. */ + /* ********************************************************************* */ + + template T lu_det(const dense_matrix &A) { + size_type n(mat_nrows(A)); + if (n) { + const T *p = &(A(0,0)); + switch (n) { + case 1 : return (*p); + case 2 : return (*p) * (*(p+3)) - (*(p+1)) * (*(p+2)); +// Not stable for nearly singular matrices +// case 3 : return (*p) * ((*(p+4)) * (*(p+8)) - (*(p+5)) * (*(p+7))) +// - (*(p+1)) * ((*(p+3)) * (*(p+8)) - (*(p+5)) * (*(p+6))) +// + (*(p+2)) * ((*(p+3)) * (*(p+7)) - (*(p+4)) * (*(p+6))); + default : + { + dense_matrix B(mat_nrows(A), mat_ncols(A)); + std::vector ipvt(mat_nrows(A)); + gmm::copy(A, B); + lu_factor(B, ipvt); + return lu_det(B, ipvt); + } + } + } + return T(1); + } + + + template T lu_inverse(const dense_matrix &A_, bool doassert = true) { + dense_matrix& A = const_cast &>(A_); + size_type N = mat_nrows(A); + T det(1); + if (N) { + T *p = &(A(0,0)); + if (N <= 2) { + switch (N) { + case 1 : { + det = *p; + if (doassert) GMM_ASSERT1(det!=T(0), "non invertible matrix"); + if (det == T(0)) break; + *p = T(1) / det; + } break; + case 2 : { + det = (*p) * (*(p+3)) - (*(p+1)) * (*(p+2)); + if (doassert) GMM_ASSERT1(det!=T(0), "non invertible matrix"); + if (det == T(0)) break; + std::swap(*p, *(p+3)); + *p++ /= det; *p++ /= -det; *p++ /= -det; *p++ /= det; + } break; +// case 3 : { // not stable for nearly singular matrices +// T a, b, c, d, e, f, g, h, i; +// a = (*(p+4)) * (*(p+8)) - (*(p+5)) * (*(p+7)); +// b = - (*(p+1)) * (*(p+8)) + (*(p+2)) * (*(p+7)); +// c = (*(p+1)) * (*(p+5)) - (*(p+2)) * (*(p+4)); +// d = - (*(p+3)) * (*(p+8)) + (*(p+5)) * (*(p+6)); +// e = (*(p+0)) * (*(p+8)) - (*(p+2)) * (*(p+6)); +// f = - (*(p+0)) * (*(p+5)) + (*(p+2)) * (*(p+3)); +// g = (*(p+3)) * (*(p+7)) - (*(p+4)) * (*(p+6)); +// h = - (*(p+0)) * (*(p+7)) + (*(p+1)) * (*(p+6)); +// i = (*(p+0)) * (*(p+4)) - (*(p+1)) * (*(p+3)); +// det = (*p) * a + (*(p+1)) * d + (*(p+2)) * g; +// GMM_ASSERT1(det!=T(0), "non invertible matrix"); +// *p++ = a / det; *p++ = b / det; *p++ = c / det; +// *p++ = d / det; *p++ = e / det; *p++ = f / det; +// *p++ = g / det; *p++ = h / det; *p++ = i / det; +// } break; + } + } + else { + dense_matrix B(mat_nrows(A), mat_ncols(A)); + std::vector ipvt(mat_nrows(A)); + gmm::copy(A, B); + size_type info = lu_factor(B, ipvt); + GMM_ASSERT1(!info, "non invertible matrix"); + lu_inverse(B, ipvt, A); + return lu_det(B, ipvt); + } + } + return det; + } + + +} + +#endif // GMM_OPT_H__ diff --git a/gmm/gmm_precond.h b/gmm/gmm_precond.h new file mode 100644 index 000000000..fca4f35d4 --- /dev/null +++ b/gmm/gmm_precond.h @@ -0,0 +1,65 @@ +/* -*- c++ -*- (enables emacs c++ mode) */ +/*=========================================================================== + + Copyright (C) 2004-2017 Yves Renard + + This file is a part of GetFEM++ + + GetFEM++ is free software; you can redistribute it and/or modify it + under the terms of the GNU Lesser General Public License as published + by the Free Software Foundation; either version 3 of the License, or + (at your option) any later version along with the GCC Runtime Library + Exception either version 3.1 or (at your option) any later version. + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public + License and GCC Runtime Library Exception for more details. + You should have received a copy of the GNU Lesser General Public License + along with this program; if not, write to the Free Software Foundation, + Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. + + As a special exception, you may use this file as it is a part of a free + software library without restriction. Specifically, if other files + instantiate templates or use macros or inline functions from this file, + or you compile this file and link it with other files to produce an + executable, this file does not by itself cause the resulting executable + to be covered by the GNU Lesser General Public License. This exception + does not however invalidate any other reasons why the executable file + might be covered by the GNU Lesser General Public License. + +===========================================================================*/ +#ifndef GMM_PRECOND_H +#define GMM_PRECOND_H + +#include "gmm_kernel.h" + +/** @file gmm_precond.h + @author Yves Renard + @date March 29, 2004. + @brief gmm preconditioners. + */ + +/* Preconditioner concept : */ +/* */ +/* A the matrix, P the preconditioner PA well conditioned. */ +/* PRECOND precontioner type. */ +/* mult(P, v, w) : w <- P v */ +/* transposed_mult(P, v, w) : w <- transposed(P) v */ +/* left_mult(P, v, w) : see qmr solver */ +/* right_mult(P, v, w) : see qmr solver */ +/* transposed_left_mult(P, v, w) : see qmr solver */ +/* transposed_right_mult(P, v, w) : see qmr solver */ +/* */ +/* PRECOND P() : empty preconditioner. */ +/* PRECOND P(A, ...) : preconditioner for the matrix A, with optional */ +/* parameters */ +/* PRECOND(...) : empty precondtioner with parameters set. */ +/* P.build_with(A) : build a precondtioner for A. */ +/* */ +/* *********************************************************************** */ + + + + +#endif + diff --git a/gmm/gmm_precond_diagonal.h b/gmm/gmm_precond_diagonal.h new file mode 100644 index 000000000..19d46095b --- /dev/null +++ b/gmm/gmm_precond_diagonal.h @@ -0,0 +1,132 @@ +/* -*- c++ -*- (enables emacs c++ mode) */ +/*=========================================================================== + + Copyright (C) 2003-2017 Yves Renard + + This file is a part of GetFEM++ + + GetFEM++ is free software; you can redistribute it and/or modify it + under the terms of the GNU Lesser General Public License as published + by the Free Software Foundation; either version 3 of the License, or + (at your option) any later version along with the GCC Runtime Library + Exception either version 3.1 or (at your option) any later version. + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public + License and GCC Runtime Library Exception for more details. + You should have received a copy of the GNU Lesser General Public License + along with this program; if not, write to the Free Software Foundation, + Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. + + As a special exception, you may use this file as it is a part of a free + software library without restriction. Specifically, if other files + instantiate templates or use macros or inline functions from this file, + or you compile this file and link it with other files to produce an + executable, this file does not by itself cause the resulting executable + to be covered by the GNU Lesser General Public License. This exception + does not however invalidate any other reasons why the executable file + might be covered by the GNU Lesser General Public License. + +===========================================================================*/ + +/**@file gmm_precond_diagonal.h + @author Yves Renard + @date June 5, 2003. + @brief Diagonal matrix preconditoner. +*/ + +#ifndef GMM_PRECOND_DIAGONAL_H +#define GMM_PRECOND_DIAGONAL_H + +#include "gmm_precond.h" + +namespace gmm { + + /** Diagonal preconditioner. */ + template struct diagonal_precond { + typedef typename linalg_traits::value_type value_type; + typedef typename number_traits::magnitude_type magnitude_type; + + std::vector diag; + + void build_with(const Matrix &M) { + diag.resize(mat_nrows(M)); + for (size_type i = 0; i < mat_nrows(M); ++i) { + magnitude_type x = gmm::abs(M(i, i)); + if (x == magnitude_type(0)) { + x = magnitude_type(1); + GMM_WARNING2("The matrix has a zero on its diagonal"); + } + diag[i] = magnitude_type(1) / x; + } + } + size_type memsize() const { return sizeof(*this) + diag.size() * sizeof(value_type); } + diagonal_precond(const Matrix &M) { build_with(M); } + diagonal_precond(void) {} + }; + + template inline + void mult_diag_p(const diagonal_precond& P, V2 &v2, abstract_sparse){ + typename linalg_traits::iterator it = vect_begin(v2), + ite = vect_end(v2); + for (; it != ite; ++it) *it *= P.diag[it.index()]; + } + + template inline + void mult_diag_p(const diagonal_precond& P,V2 &v2, abstract_skyline) + { mult_diag_p(P, v2, abstract_sparse()); } + + template inline + void mult_diag_p(const diagonal_precond& P, V2 &v2, abstract_dense){ + for (size_type i = 0; i < P.diag.size(); ++i) v2[i] *= P.diag[i]; + } + + template inline + void mult(const diagonal_precond& P, const V1 &v1, V2 &v2) { + GMM_ASSERT2(P.diag.size() == vect_size(v2),"dimensions mismatch"); + copy(v1, v2); + mult_diag_p(P, v2, typename linalg_traits::storage_type()); + } + + template inline + void transposed_mult(const diagonal_precond& P,const V1 &v1,V2 &v2) { + mult(P, v1, v2); + } + + // # define DIAG_LEFT_MULT_SQRT + + template inline + void left_mult(const diagonal_precond& P, const V1 &v1, V2 &v2) { + GMM_ASSERT2(P.diag.size() == vect_size(v2), "dimensions mismatch"); + copy(v1, v2); +# ifdef DIAG_LEFT_MULT_SQRT + for (size_type i= 0; i < P.diag.size(); ++i) v2[i] *= gmm::sqrt(P.diag[i]); +# else + for (size_type i= 0; i < P.diag.size(); ++i) v2[i] *= P.diag[i]; +# endif + } + + template inline + void transposed_left_mult(const diagonal_precond& P, + const V1 &v1, V2 &v2) + { left_mult(P, v1, v2); } + + template inline + void right_mult(const diagonal_precond& P, const V1 &v1, V2 &v2) { + // typedef typename linalg_traits::value_type T; + GMM_ASSERT2(P.diag.size() == vect_size(v2), "dimensions mismatch"); + copy(v1, v2); +# ifdef DIAG_LEFT_MULT_SQRT + for (size_type i= 0; i < P.diag.size(); ++i) v2[i] *= gmm::sqrt(P.diag[i]); +# endif + } + + template inline + void transposed_right_mult(const diagonal_precond& P, + const V1 &v1, V2 &v2) + { right_mult(P, v1, v2); } + +} + +#endif + diff --git a/gmm/gmm_precond_ildlt.h b/gmm/gmm_precond_ildlt.h new file mode 100644 index 000000000..22484df73 --- /dev/null +++ b/gmm/gmm_precond_ildlt.h @@ -0,0 +1,241 @@ +/* -*- c++ -*- (enables emacs c++ mode) */ +/*=========================================================================== + + Copyright (C) 2003-2017 Yves Renard + + This file is a part of GetFEM++ + + GetFEM++ is free software; you can redistribute it and/or modify it + under the terms of the GNU Lesser General Public License as published + by the Free Software Foundation; either version 3 of the License, or + (at your option) any later version along with the GCC Runtime Library + Exception either version 3.1 or (at your option) any later version. + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public + License and GCC Runtime Library Exception for more details. + You should have received a copy of the GNU Lesser General Public License + along with this program; if not, write to the Free Software Foundation, + Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. + + As a special exception, you may use this file as it is a part of a free + software library without restriction. Specifically, if other files + instantiate templates or use macros or inline functions from this file, + or you compile this file and link it with other files to produce an + executable, this file does not by itself cause the resulting executable + to be covered by the GNU Lesser General Public License. This exception + does not however invalidate any other reasons why the executable file + might be covered by the GNU Lesser General Public License. + +===========================================================================*/ + +// This file is a modified version of cholesky.h from ITL. +// See http://osl.iu.edu/research/itl/ +// Following the corresponding Copyright notice. +//=========================================================================== +// +// Copyright (c) 1998-2001, University of Notre Dame. All rights reserved. +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// * Neither the name of the University of Notre Dame nor the +// names of its contributors may be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE TRUSTEES OF INDIANA UNIVERSITY AND +// CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, +// BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE TRUSTEES +// OF INDIANA UNIVERSITY AND CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +// NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +//=========================================================================== + +#ifndef GMM_PRECOND_ILDLT_H +#define GMM_PRECOND_ILDLT_H + +/**@file gmm_precond_ildlt.h + @author Andrew Lumsdaine + @author Lie-Quan Lee + @author Yves Renard + @date June 5, 2003. + @brief Incomplete Level 0 ILDLT Preconditioner. +*/ + +#include "gmm_precond.h" + +namespace gmm { + + /** Incomplete Level 0 LDLT Preconditioner. + + For use with symmetric real or hermitian complex sparse matrices. + + Notes: The idea under a concrete Preconditioner such as Incomplete + Cholesky is to create a Preconditioner object to use in iterative + methods. + + + Y. Renard : Transformed in LDLT for stability reason. + + U=LT is stored in csr format. D is stored on the diagonal of U. + */ + template + class ildlt_precond { + + public : + typedef typename linalg_traits::value_type value_type; + typedef typename number_traits::magnitude_type magnitude_type; + typedef csr_matrix_ref tm_type; + + tm_type U; + + protected : + std::vector Tri_val; + std::vector Tri_ind, Tri_ptr; + + template void do_ildlt(const M& A, row_major); + void do_ildlt(const Matrix& A, col_major); + + public: + + size_type nrows(void) const { return mat_nrows(U); } + size_type ncols(void) const { return mat_ncols(U); } + value_type &D(size_type i) { return Tri_val[Tri_ptr[i]]; } + const value_type &D(size_type i) const { return Tri_val[Tri_ptr[i]]; } + ildlt_precond(void) {} + void build_with(const Matrix& A) { + Tri_ptr.resize(mat_nrows(A)+1); + do_ildlt(A, typename principal_orientation_type::sub_orientation>::potype()); + } + ildlt_precond(const Matrix& A) { build_with(A); } + size_type memsize() const { + return sizeof(*this) + + Tri_val.size() * sizeof(value_type) + + (Tri_ind.size()+Tri_ptr.size()) * sizeof(size_type); + } + }; + + template template + void ildlt_precond::do_ildlt(const M& A, row_major) { + typedef typename linalg_traits::storage_type store_type; + typedef value_type T; + typedef typename number_traits::magnitude_type R; + + size_type Tri_loc = 0, n = mat_nrows(A), d, g, h, i, j, k; + if (n == 0) return; + T z, zz; + Tri_ptr[0] = 0; + R prec = default_tol(R()); + R max_pivot = gmm::abs(A(0,0)) * prec; + + for (int count = 0; count < 2; ++count) { + if (count) { Tri_val.resize(Tri_loc); Tri_ind.resize(Tri_loc); } + for (Tri_loc = 0, i = 0; i < n; ++i) { + typedef typename linalg_traits::const_sub_row_type row_type; + row_type row = mat_const_row(A, i); + typename linalg_traits::t>::const_iterator + it = vect_const_begin(row), ite = vect_const_end(row); + + if (count) { Tri_val[Tri_loc] = T(0); Tri_ind[Tri_loc] = i; } + ++Tri_loc; // diagonal element + + for (k = 0; it != ite; ++it, ++k) { + j = index_of_it(it, k, store_type()); + if (i == j) { + if (count) Tri_val[Tri_loc-1] = *it; + } + else if (j > i) { + if (count) { Tri_val[Tri_loc] = *it; Tri_ind[Tri_loc]=j; } + ++Tri_loc; + } + } + Tri_ptr[i+1] = Tri_loc; + } + } + + if (A(0,0) == T(0)) { + Tri_val[Tri_ptr[0]] = T(1); + GMM_WARNING2("pivot 0 is too small"); + } + + for (k = 0; k < n; k++) { + d = Tri_ptr[k]; + z = T(gmm::real(Tri_val[d])); Tri_val[d] = z; + if (gmm::abs(z) <= max_pivot) { + Tri_val[d] = z = T(1); + GMM_WARNING2("pivot " << k << " is too small [" << gmm::abs(z) << "]"); + } + max_pivot = std::max(max_pivot, std::min(gmm::abs(z) * prec, R(1))); + + for (i = d + 1; i < Tri_ptr[k+1]; ++i) Tri_val[i] /= z; + for (i = d + 1; i < Tri_ptr[k+1]; ++i) { + zz = gmm::conj(Tri_val[i] * z); + h = Tri_ind[i]; + g = i; + + for (j = Tri_ptr[h] ; j < Tri_ptr[h+1]; ++j) + for ( ; g < Tri_ptr[k+1] && Tri_ind[g] <= Tri_ind[j]; ++g) + if (Tri_ind[g] == Tri_ind[j]) + Tri_val[j] -= zz * Tri_val[g]; + } + } + U = tm_type(&(Tri_val[0]), &(Tri_ind[0]), &(Tri_ptr[0]), + n, mat_ncols(A)); + } + + template + void ildlt_precond::do_ildlt(const Matrix& A, col_major) + { do_ildlt(gmm::conjugated(A), row_major()); } + + template inline + void mult(const ildlt_precond& P, const V1 &v1, V2 &v2) { + gmm::copy(v1, v2); + gmm::lower_tri_solve(gmm::conjugated(P.U), v2, true); + for (size_type i = 0; i < mat_nrows(P.U); ++i) v2[i] /= P.D(i); + gmm::upper_tri_solve(P.U, v2, true); + } + + template inline + void transposed_mult(const ildlt_precond& P,const V1 &v1,V2 &v2) + { mult(P, v1, v2); } + + template inline + void left_mult(const ildlt_precond& P, const V1 &v1, V2 &v2) { + copy(v1, v2); + gmm::lower_tri_solve(gmm::conjugated(P.U), v2, true); + for (size_type i = 0; i < mat_nrows(P.U); ++i) v2[i] /= P.D(i); + } + + template inline + void right_mult(const ildlt_precond& P, const V1 &v1, V2 &v2) + { copy(v1, v2); gmm::upper_tri_solve(P.U, v2, true); } + + template inline + void transposed_left_mult(const ildlt_precond& P, const V1 &v1, + V2 &v2) { + copy(v1, v2); + gmm::upper_tri_solve(P.U, v2, true); + for (size_type i = 0; i < mat_nrows(P.U); ++i) v2[i] /= P.D(i); + } + + template inline + void transposed_right_mult(const ildlt_precond& P, const V1 &v1, + V2 &v2) + { copy(v1, v2); gmm::lower_tri_solve(gmm::conjugated(P.U), v2, true); } + + +} + +#endif + diff --git a/gmm/gmm_precond_ildltt.h b/gmm/gmm_precond_ildltt.h new file mode 100644 index 000000000..380106a40 --- /dev/null +++ b/gmm/gmm_precond_ildltt.h @@ -0,0 +1,174 @@ +/* -*- c++ -*- (enables emacs c++ mode) */ +/*=========================================================================== + + Copyright (C) 2003-2017 Yves Renard + + This file is a part of GetFEM++ + + GetFEM++ is free software; you can redistribute it and/or modify it + under the terms of the GNU Lesser General Public License as published + by the Free Software Foundation; either version 3 of the License, or + (at your option) any later version along with the GCC Runtime Library + Exception either version 3.1 or (at your option) any later version. + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public + License and GCC Runtime Library Exception for more details. + You should have received a copy of the GNU Lesser General Public License + along with this program; if not, write to the Free Software Foundation, + Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. + + As a special exception, you may use this file as it is a part of a free + software library without restriction. Specifically, if other files + instantiate templates or use macros or inline functions from this file, + or you compile this file and link it with other files to produce an + executable, this file does not by itself cause the resulting executable + to be covered by the GNU Lesser General Public License. This exception + does not however invalidate any other reasons why the executable file + might be covered by the GNU Lesser General Public License. + +===========================================================================*/ + +/**@file gmm_precond_ildltt.h + @author Yves Renard + @date June 30, 2003. + @brief incomplete LDL^t (cholesky) preconditioner with fill-in and threshold. +*/ + +#ifndef GMM_PRECOND_ILDLTT_H +#define GMM_PRECOND_ILDLTT_H + +// Store U = LT and D in indiag. On each line, the fill-in is the number +// of non-zero elements on the line of the original matrix plus K, except if +// the matrix is dense. In this case the fill-in is K on each line. + +#include "gmm_precond_ilut.h" + +namespace gmm { + /** incomplete LDL^t (cholesky) preconditioner with fill-in and + threshold. */ + template + class ildltt_precond { + public : + typedef typename linalg_traits::value_type value_type; + typedef typename number_traits::magnitude_type magnitude_type; + + typedef rsvector svector; + + row_matrix U; + std::vector indiag; + + protected: + size_type K; + double eps; + + template void do_ildltt(const M&, row_major); + void do_ildltt(const Matrix&, col_major); + + public: + void build_with(const Matrix& A, int k_ = -1, double eps_ = double(-1)) { + if (k_ >= 0) K = k_; + if (eps_ >= double(0)) eps = eps_; + gmm::resize(U, mat_nrows(A), mat_ncols(A)); + indiag.resize(std::min(mat_nrows(A), mat_ncols(A))); + do_ildltt(A, typename principal_orientation_type::sub_orientation>::potype()); + } + ildltt_precond(const Matrix& A, int k_, double eps_) + : U(mat_nrows(A),mat_ncols(A)), K(k_), eps(eps_) { build_with(A); } + ildltt_precond(void) { K=10; eps = 1E-7; } + ildltt_precond(size_type k_, double eps_) : K(k_), eps(eps_) {} + size_type memsize() const { + return sizeof(*this) + nnz(U)*sizeof(value_type) + indiag.size() * sizeof(magnitude_type); + } + }; + + template template + void ildltt_precond::do_ildltt(const M& A,row_major) { + typedef value_type T; + typedef typename number_traits::magnitude_type R; + + size_type n = mat_nrows(A); + if (n == 0) return; + svector w(n); + T tmp; + R prec = default_tol(R()), max_pivot = gmm::abs(A(0,0)) * prec; + + gmm::clear(U); + for (size_type i = 0; i < n; ++i) { + gmm::copy(mat_const_row(A, i), w); + double norm_row = gmm::vect_norm2(w); + + for (size_type krow = 0, k; krow < w.nb_stored(); ++krow) { + typename svector::iterator wk = w.begin() + krow; + if ((k = wk->c) >= i) break; + if (gmm::is_complex(wk->e)) { + tmp = gmm::conj(U(k, i))/indiag[k]; // not completely satisfactory .. + gmm::add(scaled(mat_row(U, k), -tmp), w); + } + else { + tmp = wk->e; + if (gmm::abs(tmp) < eps * norm_row) { w.sup(k); --krow; } + else { wk->e += tmp; gmm::add(scaled(mat_row(U, k), -tmp), w); } + } + } + tmp = w[i]; + + if (gmm::abs(gmm::real(tmp)) <= max_pivot) + { GMM_WARNING2("pivot " << i << " is too small"); tmp = T(1); } + + max_pivot = std::max(max_pivot, std::min(gmm::abs(tmp) * prec, R(1))); + indiag[i] = R(1) / gmm::real(tmp); + gmm::clean(w, eps * norm_row); + gmm::scale(w, T(indiag[i])); + std::sort(w.begin(), w.end(), elt_rsvector_value_less_()); + typename svector::const_iterator wit = w.begin(), wite = w.end(); + for (size_type nnu = 0; wit != wite; ++wit) // copy to be optimized ... + if (wit->c > i) { if (nnu < K) { U(i, wit->c) = wit->e; ++nnu; } } + } + } + + template + void ildltt_precond::do_ildltt(const Matrix& A, col_major) + { do_ildltt(gmm::conjugated(A), row_major()); } + + template inline + void mult(const ildltt_precond& P, const V1 &v1, V2 &v2) { + gmm::copy(v1, v2); + gmm::lower_tri_solve(gmm::conjugated(P.U), v2, true); + for (size_type i = 0; i < P.indiag.size(); ++i) v2[i] *= P.indiag[i]; + gmm::upper_tri_solve(P.U, v2, true); + } + + template inline + void transposed_mult(const ildltt_precond& P,const V1 &v1, V2 &v2) + { mult(P, v1, v2); } + + template inline + void left_mult(const ildltt_precond& P, const V1 &v1, V2 &v2) { + copy(v1, v2); + gmm::lower_tri_solve(gmm::conjugated(P.U), v2, true); + for (size_type i = 0; i < P.indiag.size(); ++i) v2[i] *= P.indiag[i]; + } + + template inline + void right_mult(const ildltt_precond& P, const V1 &v1, V2 &v2) + { copy(v1, v2); gmm::upper_tri_solve(P.U, v2, true); } + + template inline + void transposed_left_mult(const ildltt_precond& P, const V1 &v1, + V2 &v2) { + copy(v1, v2); + gmm::upper_tri_solve(P.U, v2, true); + for (size_type i = 0; i < P.indiag.size(); ++i) v2[i] *= P.indiag[i]; + } + + template inline + void transposed_right_mult(const ildltt_precond& P, const V1 &v1, + V2 &v2) + { copy(v1, v2); gmm::lower_tri_solve(gmm::conjugated(P.U), v2, true); } + +} + +#endif + diff --git a/gmm/gmm_precond_ilu.h b/gmm/gmm_precond_ilu.h new file mode 100644 index 000000000..9256b86a2 --- /dev/null +++ b/gmm/gmm_precond_ilu.h @@ -0,0 +1,280 @@ +/* -*- c++ -*- (enables emacs c++ mode) */ +/*=========================================================================== + + Copyright (C) 2002-2017 Yves Renard + + This file is a part of GetFEM++ + + GetFEM++ is free software; you can redistribute it and/or modify it + under the terms of the GNU Lesser General Public License as published + by the Free Software Foundation; either version 3 of the License, or + (at your option) any later version along with the GCC Runtime Library + Exception either version 3.1 or (at your option) any later version. + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public + License and GCC Runtime Library Exception for more details. + You should have received a copy of the GNU Lesser General Public License + along with this program; if not, write to the Free Software Foundation, + Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. + + As a special exception, you may use this file as it is a part of a free + software library without restriction. Specifically, if other files + instantiate templates or use macros or inline functions from this file, + or you compile this file and link it with other files to produce an + executable, this file does not by itself cause the resulting executable + to be covered by the GNU Lesser General Public License. This exception + does not however invalidate any other reasons why the executable file + might be covered by the GNU Lesser General Public License. + +===========================================================================*/ + +// This file is a modified version of ilu.h from ITL. +// See http://osl.iu.edu/research/itl/ +// Following the corresponding Copyright notice. +//=========================================================================== +// +// Copyright (c) 1998-2001, University of Notre Dame. All rights reserved. +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// * Neither the name of the University of Notre Dame nor the +// names of its contributors may be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE TRUSTEES OF INDIANA UNIVERSITY AND +// CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, +// BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE TRUSTEES +// OF INDIANA UNIVERSITY AND CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +// NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +//=========================================================================== + +/**@file gmm_precond_ilu.h + @author Andrew Lumsdaine + @author Lie-Quan Lee + @author Yves Renard + @date June 5, 2003. + @brief Incomplete LU without fill-in Preconditioner. +*/ + +#ifndef GMM_PRECOND_ILU_H +#define GMM_PRECOND_ILU_H + +// +// Notes: The idea under a concrete Preconditioner such +// as Incomplete LU is to create a Preconditioner +// object to use in iterative methods. +// + +#include "gmm_precond.h" + +namespace gmm { + /** Incomplete LU without fill-in Preconditioner. */ + template + class ilu_precond { + + public : + typedef typename linalg_traits::value_type value_type; + typedef csr_matrix_ref tm_type; + + tm_type U, L; + bool invert; + protected : + std::vector L_val, U_val; + std::vector L_ind, U_ind, L_ptr, U_ptr; + + template void do_ilu(const M& A, row_major); + void do_ilu(const Matrix& A, col_major); + + public: + + size_type nrows(void) const { return mat_nrows(L); } + size_type ncols(void) const { return mat_ncols(U); } + + void build_with(const Matrix& A) { + invert = false; + L_ptr.resize(mat_nrows(A)+1); + U_ptr.resize(mat_nrows(A)+1); + do_ilu(A, typename principal_orientation_type::sub_orientation>::potype()); + } + ilu_precond(const Matrix& A) { build_with(A); } + ilu_precond(void) {} + size_type memsize() const { + return sizeof(*this) + + (L_val.size()+U_val.size()) * sizeof(value_type) + + (L_ind.size()+L_ptr.size()) * sizeof(size_type) + + (U_ind.size()+U_ptr.size()) * sizeof(size_type); + } + }; + + template template + void ilu_precond::do_ilu(const M& A, row_major) { + typedef typename linalg_traits::storage_type store_type; + typedef value_type T; + typedef typename number_traits::magnitude_type R; + + size_type L_loc = 0, U_loc = 0, n = mat_nrows(A), i, j, k; + if (n == 0) return; + L_ptr[0] = 0; U_ptr[0] = 0; + R prec = default_tol(R()); + R max_pivot = gmm::abs(A(0,0)) * prec; + + + for (int count = 0; count < 2; ++count) { + if (count) { + L_val.resize(L_loc); L_ind.resize(L_loc); + U_val.resize(U_loc); U_ind.resize(U_loc); + } + L_loc = U_loc = 0; + for (i = 0; i < n; ++i) { + typedef typename linalg_traits::const_sub_row_type row_type; + row_type row = mat_const_row(A, i); + typename linalg_traits::t>::const_iterator + it = vect_const_begin(row), ite = vect_const_end(row); + + if (count) { U_val[U_loc] = T(0); U_ind[U_loc] = i; } + ++U_loc; // diagonal element + + for (k = 0; it != ite && k < 1000; ++it, ++k) { + // if a plain row is present, retains only the 1000 firsts + // nonzero elements. ---> a sort should be done. + j = index_of_it(it, k, store_type()); + if (j < i) { + if (count) { L_val[L_loc] = *it; L_ind[L_loc] = j; } + L_loc++; + } + else if (i == j) { + if (count) U_val[U_loc-1] = *it; + } + else { + if (count) { U_val[U_loc] = *it; U_ind[U_loc] = j; } + U_loc++; + } + } + L_ptr[i+1] = L_loc; U_ptr[i+1] = U_loc; + } + } + + if (A(0,0) == T(0)) { + U_val[U_ptr[0]] = T(1); + GMM_WARNING2("pivot 0 is too small"); + } + + size_type qn, pn, rn; + for (i = 1; i < n; i++) { + + pn = U_ptr[i]; + if (gmm::abs(U_val[pn]) <= max_pivot) { + U_val[pn] = T(1); + GMM_WARNING2("pivot " << i << " is too small"); + } + max_pivot = std::max(max_pivot, + std::min(gmm::abs(U_val[pn]) * prec, R(1))); + + for (j = L_ptr[i]; j < L_ptr[i+1]; j++) { + pn = U_ptr[L_ind[j]]; + + T multiplier = (L_val[j] /= U_val[pn]); + + qn = j + 1; + rn = U_ptr[i]; + + for (pn++; pn < U_ptr[L_ind[j]+1] && U_ind[pn] < i; pn++) { + while (qn < L_ptr[i+1] && L_ind[qn] < U_ind[pn]) + qn++; + if (qn < L_ptr[i+1] && U_ind[pn] == L_ind[qn]) + L_val[qn] -= multiplier * U_val[pn]; + } + for (; pn < U_ptr[L_ind[j]+1]; pn++) { + while (rn < U_ptr[i+1] && U_ind[rn] < U_ind[pn]) + rn++; + if (rn < U_ptr[i+1] && U_ind[pn] == U_ind[rn]) + U_val[rn] -= multiplier * U_val[pn]; + } + } + } + + L = tm_type(&(L_val[0]), &(L_ind[0]), &(L_ptr[0]), n, mat_ncols(A)); + U = tm_type(&(U_val[0]), &(U_ind[0]), &(U_ptr[0]), n, mat_ncols(A)); + } + + template + void ilu_precond::do_ilu(const Matrix& A, col_major) { + do_ilu(gmm::transposed(A), row_major()); + invert = true; + } + + template inline + void mult(const ilu_precond& P, const V1 &v1, V2 &v2) { + gmm::copy(v1, v2); + if (P.invert) { + gmm::lower_tri_solve(gmm::transposed(P.U), v2, false); + gmm::upper_tri_solve(gmm::transposed(P.L), v2, true); + } + else { + gmm::lower_tri_solve(P.L, v2, true); + gmm::upper_tri_solve(P.U, v2, false); + } + } + + template inline + void transposed_mult(const ilu_precond& P,const V1 &v1,V2 &v2) { + gmm::copy(v1, v2); + if (P.invert) { + gmm::lower_tri_solve(P.L, v2, true); + gmm::upper_tri_solve(P.U, v2, false); + } + else { + gmm::lower_tri_solve(gmm::transposed(P.U), v2, false); + gmm::upper_tri_solve(gmm::transposed(P.L), v2, true); + } + } + + template inline + void left_mult(const ilu_precond& P, const V1 &v1, V2 &v2) { + copy(v1, v2); + if (P.invert) gmm::lower_tri_solve(gmm::transposed(P.U), v2, false); + else gmm::lower_tri_solve(P.L, v2, true); + } + + template inline + void right_mult(const ilu_precond& P, const V1 &v1, V2 &v2) { + copy(v1, v2); + if (P.invert) gmm::upper_tri_solve(gmm::transposed(P.L), v2, true); + else gmm::upper_tri_solve(P.U, v2, false); + } + + template inline + void transposed_left_mult(const ilu_precond& P, const V1 &v1, + V2 &v2) { + copy(v1, v2); + if (P.invert) gmm::upper_tri_solve(P.U, v2, false); + else gmm::upper_tri_solve(gmm::transposed(P.L), v2, true); + } + + template inline + void transposed_right_mult(const ilu_precond& P, const V1 &v1, + V2 &v2) { + copy(v1, v2); + if (P.invert) gmm::lower_tri_solve(P.L, v2, true); + else gmm::lower_tri_solve(gmm::transposed(P.U), v2, false); + } + + +} + +#endif + diff --git a/gmm/gmm_precond_ilut.h b/gmm/gmm_precond_ilut.h new file mode 100644 index 000000000..0860324f0 --- /dev/null +++ b/gmm/gmm_precond_ilut.h @@ -0,0 +1,263 @@ +/* -*- c++ -*- (enables emacs c++ mode) */ +/*=========================================================================== + + Copyright (C) 2002-2017 Yves Renard + + This file is a part of GetFEM++ + + GetFEM++ is free software; you can redistribute it and/or modify it + under the terms of the GNU Lesser General Public License as published + by the Free Software Foundation; either version 3 of the License, or + (at your option) any later version along with the GCC Runtime Library + Exception either version 3.1 or (at your option) any later version. + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public + License and GCC Runtime Library Exception for more details. + You should have received a copy of the GNU Lesser General Public License + along with this program; if not, write to the Free Software Foundation, + Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. + + As a special exception, you may use this file as it is a part of a free + software library without restriction. Specifically, if other files + instantiate templates or use macros or inline functions from this file, + or you compile this file and link it with other files to produce an + executable, this file does not by itself cause the resulting executable + to be covered by the GNU Lesser General Public License. This exception + does not however invalidate any other reasons why the executable file + might be covered by the GNU Lesser General Public License. + +===========================================================================*/ + +// This file is a modified version of ilut.h from ITL. +// See http://osl.iu.edu/research/itl/ +// Following the corresponding Copyright notice. +//=========================================================================== +// +// Copyright (c) 1998-2001, University of Notre Dame. All rights reserved. +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// * Neither the name of the University of Notre Dame nor the +// names of its contributors may be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE TRUSTEES OF INDIANA UNIVERSITY AND +// CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, +// BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE TRUSTEES +// OF INDIANA UNIVERSITY AND CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +// NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +//=========================================================================== + +#ifndef GMM_PRECOND_ILUT_H +#define GMM_PRECOND_ILUT_H + +/**@file gmm_precond_ilut.h + @author Andrew Lumsdaine , Lie-Quan Lee + @date June 5, 2003. + @brief ILUT: Incomplete LU with threshold and K fill-in Preconditioner. +*/ + +/* + Performane comparing for SSOR, ILU and ILUT based on sherman 5 matrix + in Harwell-Boeing collection on Sun Ultra 30 UPA/PCI (UltraSPARC-II 296MHz) + Preconditioner & Factorization time & Number of Iteration \\ \hline + SSOR & 0.010577 & 41 \\ + ILU & 0.019336 & 32 \\ + ILUT with 0 fill-in and threshold of 1.0e-6 & 0.343612 & 23 \\ + ILUT with 5 fill-in and threshold of 1.0e-6 & 0.343612 & 18 \\ \hline +*/ + +#include "gmm_precond.h" + +namespace gmm { + + template struct elt_rsvector_value_less_ { + inline bool operator()(const elt_rsvector_& a, + const elt_rsvector_& b) const + { return (gmm::abs(a.e) > gmm::abs(b.e)); } + }; + + /** Incomplete LU with threshold and K fill-in Preconditioner. + + The algorithm of ILUT(A, 0, 1.0e-6) is slower than ILU(A). If No + fill-in is arrowed, you can use ILU instead of ILUT. + + Notes: The idea under a concrete Preconditioner such as ilut is to + create a Preconditioner object to use in iterative methods. + */ + template + class ilut_precond { + public : + typedef typename linalg_traits::value_type value_type; + typedef wsvector _wsvector; + typedef rsvector _rsvector; + typedef row_matrix<_rsvector> LU_Matrix; + + bool invert; + LU_Matrix L, U; + + protected: + size_type K; + double eps; + + template void do_ilut(const M&, row_major); + void do_ilut(const Matrix&, col_major); + + public: + void build_with(const Matrix& A, int k_ = -1, double eps_ = double(-1)) { + if (k_ >= 0) K = k_; + if (eps_ >= double(0)) eps = eps_; + invert = false; + gmm::resize(L, mat_nrows(A), mat_ncols(A)); + gmm::resize(U, mat_nrows(A), mat_ncols(A)); + do_ilut(A, typename principal_orientation_type::sub_orientation>::potype()); + } + ilut_precond(const Matrix& A, int k_, double eps_) + : L(mat_nrows(A), mat_ncols(A)), U(mat_nrows(A), mat_ncols(A)), + K(k_), eps(eps_) { build_with(A); } + ilut_precond(size_type k_, double eps_) : K(k_), eps(eps_) {} + ilut_precond(void) { K = 10; eps = 1E-7; } + size_type memsize() const { + return sizeof(*this) + (nnz(U)+nnz(L))*sizeof(value_type); + } + }; + + template template + void ilut_precond::do_ilut(const M& A, row_major) { + typedef value_type T; + typedef typename number_traits::magnitude_type R; + + size_type n = mat_nrows(A); + if (n == 0) return; + std::vector indiag(n); + _wsvector w(mat_ncols(A)); + _rsvector ww(mat_ncols(A)), wL(mat_ncols(A)), wU(mat_ncols(A)); + T tmp; + gmm::clear(U); gmm::clear(L); + R prec = default_tol(R()); + R max_pivot = gmm::abs(A(0,0)) * prec; + + for (size_type i = 0; i < n; ++i) { + gmm::copy(mat_const_row(A, i), w); + double norm_row = gmm::vect_norm2(w); + + typename _wsvector::iterator wkold = w.end(); + for (typename _wsvector::iterator wk = w.begin(); + wk != w.end() && wk->first < i; ) { + size_type k = wk->first; + tmp = (wk->second) * indiag[k]; + if (gmm::abs(tmp) < eps * norm_row) w.erase(k); + else { wk->second += tmp; gmm::add(scaled(mat_row(U, k), -tmp), w); } + if (wkold == w.end()) wk = w.begin(); else { wk = wkold; ++wk; } + if (wk != w.end() && wk->first == k) + { if (wkold == w.end()) wkold = w.begin(); else ++wkold; ++wk; } + } + tmp = w[i]; + + if (gmm::abs(tmp) <= max_pivot) { + GMM_WARNING2("pivot " << i << " too small. try with ilutp ?"); + w[i] = tmp = T(1); + } + + max_pivot = std::max(max_pivot, std::min(gmm::abs(tmp) * prec, R(1))); + indiag[i] = T(1) / tmp; + gmm::clean(w, eps * norm_row); + gmm::copy(w, ww); + std::sort(ww.begin(), ww.end(), elt_rsvector_value_less_()); + typename _rsvector::const_iterator wit = ww.begin(), wite = ww.end(); + + size_type nnl = 0, nnu = 0; + wL.base_resize(K); wU.base_resize(K+1); + typename _rsvector::iterator witL = wL.begin(), witU = wU.begin(); + for (; wit != wite; ++wit) + if (wit->c < i) { if (nnl < K) { *witL++ = *wit; ++nnl; } } + else { if (nnu < K || wit->c == i) { *witU++ = *wit; ++nnu; } } + wL.base_resize(nnl); wU.base_resize(nnu); + std::sort(wL.begin(), wL.end()); + std::sort(wU.begin(), wU.end()); + gmm::copy(wL, L.row(i)); + gmm::copy(wU, U.row(i)); + } + + } + + template + void ilut_precond::do_ilut(const Matrix& A, col_major) { + do_ilut(gmm::transposed(A), row_major()); + invert = true; + } + + template inline + void mult(const ilut_precond& P, const V1 &v1, V2 &v2) { + gmm::copy(v1, v2); + if (P.invert) { + gmm::lower_tri_solve(gmm::transposed(P.U), v2, false); + gmm::upper_tri_solve(gmm::transposed(P.L), v2, true); + } + else { + gmm::lower_tri_solve(P.L, v2, true); + gmm::upper_tri_solve(P.U, v2, false); + } + } + + template inline + void transposed_mult(const ilut_precond& P,const V1 &v1,V2 &v2) { + gmm::copy(v1, v2); + if (P.invert) { + gmm::lower_tri_solve(P.L, v2, true); + gmm::upper_tri_solve(P.U, v2, false); + } + else { + gmm::lower_tri_solve(gmm::transposed(P.U), v2, false); + gmm::upper_tri_solve(gmm::transposed(P.L), v2, true); + } + } + + template inline + void left_mult(const ilut_precond& P, const V1 &v1, V2 &v2) { + copy(v1, v2); + if (P.invert) gmm::lower_tri_solve(gmm::transposed(P.U), v2, false); + else gmm::lower_tri_solve(P.L, v2, true); + } + + template inline + void right_mult(const ilut_precond& P, const V1 &v1, V2 &v2) { + copy(v1, v2); + if (P.invert) gmm::upper_tri_solve(gmm::transposed(P.L), v2, true); + else gmm::upper_tri_solve(P.U, v2, false); + } + + template inline + void transposed_left_mult(const ilut_precond& P, const V1 &v1, + V2 &v2) { + copy(v1, v2); + if (P.invert) gmm::upper_tri_solve(P.U, v2, false); + else gmm::upper_tri_solve(gmm::transposed(P.L), v2, true); + } + + template inline + void transposed_right_mult(const ilut_precond& P, const V1 &v1, + V2 &v2) { + copy(v1, v2); + if (P.invert) gmm::lower_tri_solve(P.L, v2, true); + else gmm::lower_tri_solve(gmm::transposed(P.U), v2, false); + } + +} + +#endif + diff --git a/gmm/gmm_precond_ilutp.h b/gmm/gmm_precond_ilutp.h new file mode 100644 index 000000000..d867d6053 --- /dev/null +++ b/gmm/gmm_precond_ilutp.h @@ -0,0 +1,284 @@ +/* -*- c++ -*- (enables emacs c++ mode) */ +/*=========================================================================== + + Copyright (C) 2004-2017 Yves Renard + + This file is a part of GetFEM++ + + GetFEM++ is free software; you can redistribute it and/or modify it + under the terms of the GNU Lesser General Public License as published + by the Free Software Foundation; either version 3 of the License, or + (at your option) any later version along with the GCC Runtime Library + Exception either version 3.1 or (at your option) any later version. + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public + License and GCC Runtime Library Exception for more details. + You should have received a copy of the GNU Lesser General Public License + along with this program; if not, write to the Free Software Foundation, + Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. + + As a special exception, you may use this file as it is a part of a free + software library without restriction. Specifically, if other files + instantiate templates or use macros or inline functions from this file, + or you compile this file and link it with other files to produce an + executable, this file does not by itself cause the resulting executable + to be covered by the GNU Lesser General Public License. This exception + does not however invalidate any other reasons why the executable file + might be covered by the GNU Lesser General Public License. + +===========================================================================*/ + +/**@file gmm_precond_ilutp.h + @author Yves Renard + @date October 14, 2004. + @brief ILUTP: Incomplete LU with threshold and K fill-in Preconditioner and + column pivoting. + + +*/ +#ifndef GMM_PRECOND_ILUTP_H +#define GMM_PRECOND_ILUTP_H + +#include "gmm_precond_ilut.h" + +namespace gmm { + + /** + ILUTP: Incomplete LU with threshold and K fill-in Preconditioner and + column pivoting. + + See Yousef Saad, Iterative Methods for + sparse linear systems, PWS Publishing Company, section 10.4.4 + + TODO : store the permutation by cycles to avoid the temporary vector + */ + template + class ilutp_precond { + public : + typedef typename linalg_traits::value_type value_type; + typedef wsvector _wsvector; + typedef rsvector _rsvector; + typedef row_matrix<_rsvector> LU_Matrix; + typedef col_matrix<_wsvector> CLU_Matrix; + + bool invert; + LU_Matrix L, U; + gmm::unsorted_sub_index indperm; + gmm::unsorted_sub_index indperminv; + mutable std::vector temporary; + + protected: + size_type K; + double eps; + + template void do_ilutp(const M&, row_major); + void do_ilutp(const Matrix&, col_major); + + public: + void build_with(const Matrix& A, int k_ = -1, double eps_ = double(-1)) { + if (k_ >= 0) K = k_; + if (eps_ >= double(0)) eps = eps_; + invert = false; + gmm::resize(L, mat_nrows(A), mat_ncols(A)); + gmm::resize(U, mat_nrows(A), mat_ncols(A)); + do_ilutp(A, typename principal_orientation_type::sub_orientation>::potype()); + } + ilutp_precond(const Matrix& A, size_type k_, double eps_) + : L(mat_nrows(A), mat_ncols(A)), U(mat_nrows(A), mat_ncols(A)), + K(k_), eps(eps_) { build_with(A); } + ilutp_precond(int k_, double eps_) : K(k_), eps(eps_) {} + ilutp_precond(void) { K = 10; eps = 1E-7; } + size_type memsize() const { + return sizeof(*this) + (nnz(U)+nnz(L))*sizeof(value_type); + } + }; + + + template template + void ilutp_precond::do_ilutp(const M& A, row_major) { + typedef value_type T; + typedef typename number_traits::magnitude_type R; + + size_type n = mat_nrows(A); + CLU_Matrix CU(n,n); + if (n == 0) return; + std::vector indiag(n); + temporary.resize(n); + std::vector ipvt(n), ipvtinv(n); + for (size_type i = 0; i < n; ++i) ipvt[i] = ipvtinv[i] = i; + indperm = unsorted_sub_index(ipvt); + indperminv = unsorted_sub_index(ipvtinv); + _wsvector w(mat_ncols(A)); + _rsvector ww(mat_ncols(A)); + + T tmp = T(0); + gmm::clear(L); gmm::clear(U); + R prec = default_tol(R()); + R max_pivot = gmm::abs(A(0,0)) * prec; + + for (size_type i = 0; i < n; ++i) { + + copy(sub_vector(mat_const_row(A, i), indperm), w); + double norm_row = gmm::vect_norm2(mat_const_row(A, i)); + + typename _wsvector::iterator wkold = w.end(); + for (typename _wsvector::iterator wk = w.begin(); + wk != w.end() && wk->first < i; ) { + size_type k = wk->first; + tmp = (wk->second) * indiag[k]; + if (gmm::abs(tmp) < eps * norm_row) w.erase(k); + else { wk->second += tmp; gmm::add(scaled(mat_row(U, k), -tmp), w); } + if (wkold == w.end()) wk = w.begin(); else { wk = wkold; ++wk; } + if (wk != w.end() && wk->first == k) + { if (wkold == w.end()) wkold = w.begin(); else ++wkold; ++wk; } + } + + gmm::clean(w, eps * norm_row); + gmm::copy(w, ww); + + std::sort(ww.begin(), ww.end(), elt_rsvector_value_less_()); + typename _rsvector::const_iterator wit = ww.begin(), wite = ww.end(); + size_type ip = size_type(-1); + + for (; wit != wite; ++wit) + if (wit->c >= i) { ip = wit->c; tmp = wit->e; break; } + if (ip == size_type(-1) || gmm::abs(tmp) <= max_pivot) + { GMM_WARNING2("pivot " << i << " too small"); ip=i; ww[i]=tmp=T(1); } + max_pivot = std::max(max_pivot, std::min(gmm::abs(tmp) * prec, R(1))); + indiag[i] = T(1) / tmp; + wit = ww.begin(); + + size_type nnl = 0, nnu = 0; + L[i].base_resize(K); U[i].base_resize(K+1); + typename _rsvector::iterator witL = L[i].begin(), witU = U[i].begin(); + for (; wit != wite; ++wit) { + if (wit->c < i) { if (nnl < K) { *witL++ = *wit; ++nnl; } } + else if (nnu < K || wit->c == i) + { CU(i, wit->c) = wit->e; *witU++ = *wit; ++nnu; } + } + L[i].base_resize(nnl); U[i].base_resize(nnu); + std::sort(L[i].begin(), L[i].end()); + std::sort(U[i].begin(), U[i].end()); + + if (ip != i) { + typename _wsvector::const_iterator iti = CU.col(i).begin(); + typename _wsvector::const_iterator itie = CU.col(i).end(); + typename _wsvector::const_iterator itp = CU.col(ip).begin(); + typename _wsvector::const_iterator itpe = CU.col(ip).end(); + + while (iti != itie && itp != itpe) { + if (iti->first < itp->first) + { U.row(iti->first).swap_indices(i, ip); ++iti; } + else if (iti->first > itp->first) + { U.row(itp->first).swap_indices(i,ip);++itp; } + else + { U.row(iti->first).swap_indices(i, ip); ++iti; ++itp; } + } + + for( ; iti != itie; ++iti) U.row(iti->first).swap_indices(i, ip); + for( ; itp != itpe; ++itp) U.row(itp->first).swap_indices(i, ip); + + CU.swap_col(i, ip); + + indperm.swap(i, ip); + indperminv.swap(ipvt[i], ipvt[ip]); + std::swap(ipvtinv[ipvt[i]], ipvtinv[ipvt[ip]]); + std::swap(ipvt[i], ipvt[ip]); + } + } + } + + template + void ilutp_precond::do_ilutp(const Matrix& A, col_major) { + do_ilutp(gmm::transposed(A), row_major()); + invert = true; + } + + template inline + void mult(const ilutp_precond& P, const V1 &v1, V2 &v2) { + if (P.invert) { + gmm::copy(gmm::sub_vector(v1, P.indperm), v2); + gmm::lower_tri_solve(gmm::transposed(P.U), v2, false); + gmm::upper_tri_solve(gmm::transposed(P.L), v2, true); + } + else { + gmm::copy(v1, P.temporary); + gmm::lower_tri_solve(P.L, P.temporary, true); + gmm::upper_tri_solve(P.U, P.temporary, false); + gmm::copy(gmm::sub_vector(P.temporary, P.indperminv), v2); + } + } + + template inline + void transposed_mult(const ilutp_precond& P,const V1 &v1,V2 &v2) { + if (P.invert) { + gmm::copy(v1, P.temporary); + gmm::lower_tri_solve(P.L, P.temporary, true); + gmm::upper_tri_solve(P.U, P.temporary, false); + gmm::copy(gmm::sub_vector(P.temporary, P.indperminv), v2); + } + else { + gmm::copy(gmm::sub_vector(v1, P.indperm), v2); + gmm::lower_tri_solve(gmm::transposed(P.U), v2, false); + gmm::upper_tri_solve(gmm::transposed(P.L), v2, true); + } + } + + template inline + void left_mult(const ilutp_precond& P, const V1 &v1, V2 &v2) { + if (P.invert) { + gmm::copy(gmm::sub_vector(v1, P.indperm), v2); + gmm::lower_tri_solve(gmm::transposed(P.U), v2, false); + } + else { + copy(v1, v2); + gmm::lower_tri_solve(P.L, v2, true); + } + } + + template inline + void right_mult(const ilutp_precond& P, const V1 &v1, V2 &v2) { + if (P.invert) { + copy(v1, v2); + gmm::upper_tri_solve(gmm::transposed(P.L), v2, true); + } + else { + copy(v1, P.temporary); + gmm::upper_tri_solve(P.U, P.temporary, false); + gmm::copy(gmm::sub_vector(P.temporary, P.indperminv), v2); + } + } + + template inline + void transposed_left_mult(const ilutp_precond& P, const V1 &v1, + V2 &v2) { + if (P.invert) { + copy(v1, P.temporary); + gmm::upper_tri_solve(P.U, P.temporary, false); + gmm::copy(gmm::sub_vector(P.temporary, P.indperminv), v2); + } + else { + copy(v1, v2); + gmm::upper_tri_solve(gmm::transposed(P.L), v2, true); + } + } + + template inline + void transposed_right_mult(const ilutp_precond& P, const V1 &v1, + V2 &v2) { + if (P.invert) { + copy(v1, v2); + gmm::lower_tri_solve(P.L, v2, true); + } + else { + gmm::copy(gmm::sub_vector(v1, P.indperm), v2); + gmm::lower_tri_solve(gmm::transposed(P.U), v2, false); + } + } + +} + +#endif + diff --git a/gmm/gmm_precond_mr_approx_inverse.h b/gmm/gmm_precond_mr_approx_inverse.h new file mode 100644 index 000000000..7504f48fb --- /dev/null +++ b/gmm/gmm_precond_mr_approx_inverse.h @@ -0,0 +1,149 @@ +/* -*- c++ -*- (enables emacs c++ mode) */ +/*=========================================================================== + + Copyright (C) 2002-2017 Yves Renard + + This file is a part of GetFEM++ + + GetFEM++ is free software; you can redistribute it and/or modify it + under the terms of the GNU Lesser General Public License as published + by the Free Software Foundation; either version 3 of the License, or + (at your option) any later version along with the GCC Runtime Library + Exception either version 3.1 or (at your option) any later version. + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public + License and GCC Runtime Library Exception for more details. + You should have received a copy of the GNU Lesser General Public License + along with this program; if not, write to the Free Software Foundation, + Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. + + As a special exception, you may use this file as it is a part of a free + software library without restriction. Specifically, if other files + instantiate templates or use macros or inline functions from this file, + or you compile this file and link it with other files to produce an + executable, this file does not by itself cause the resulting executable + to be covered by the GNU Lesser General Public License. This exception + does not however invalidate any other reasons why the executable file + might be covered by the GNU Lesser General Public License. + +===========================================================================*/ + + +// This file is a modified version of approximate_inverse.h from ITL. +// See http://osl.iu.edu/research/itl/ +// Following the corresponding Copyright notice. +//=========================================================================== +// +// Copyright (c) 1998-2001, University of Notre Dame. All rights reserved. +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// * Neither the name of the University of Notre Dame nor the +// names of its contributors may be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE TRUSTEES OF INDIANA UNIVERSITY AND +// CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, +// BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE TRUSTEES +// OF INDIANA UNIVERSITY AND CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +// NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +//=========================================================================== + +/**@file gmm_precond_mr_approx_inverse.h + @author Andrew Lumsdaine + @author Lie-Quan Lee + @author Yves Renard + @date June 5, 2003. + @brief Approximate inverse via MR iteration. +*/ + +#ifndef GMM_PRECOND_MR_APPROX_INVERSE_H +#define GMM_PRECOND_MR_APPROX_INVERSE_H + + +#include "gmm_precond.h" + +namespace gmm { + + /** Approximate inverse via MR iteration (see P301 of Saad book). + */ + template + struct mr_approx_inverse_precond { + + typedef typename linalg_traits::value_type value_type; + typedef typename number_traits::magnitude_type magnitude_type; + typedef typename principal_orientation_type::sub_orientation>::potype sub_orientation; + typedef wsvector VVector; + typedef col_matrix MMatrix; + + MMatrix M; + size_type nb_it; + magnitude_type threshold; + + void build_with(const Matrix& A); + mr_approx_inverse_precond(const Matrix& A, size_type nb_it_, + magnitude_type threshold_) + : M(mat_nrows(A), mat_ncols(A)) + { threshold = threshold_; nb_it = nb_it_; build_with(A); } + mr_approx_inverse_precond(void) + { threshold = magnitude_type(1E-7); nb_it = 5; } + mr_approx_inverse_precond(size_type nb_it_, magnitude_type threshold_) + { threshold = threshold_; nb_it = nb_it_; } + const MMatrix &approx_inverse(void) const { return M; } + }; + + template inline + void mult(const mr_approx_inverse_precond& P, const V1 &v1, V2 &v2) + { mult(P.M, v1, v2); } + + template inline + void transposed_mult(const mr_approx_inverse_precond& P, + const V1 &v1,V2 &v2) + { mult(gmm::conjugated(P.M), v1, v2); } + + template + void mr_approx_inverse_precond::build_with(const Matrix& A) { + gmm::resize(M, mat_nrows(A), mat_ncols(A)); + typedef value_type T; + typedef magnitude_type R; + VVector m(mat_ncols(A)),r(mat_ncols(A)),ei(mat_ncols(A)),Ar(mat_ncols(A)); + T alpha = mat_trace(A)/ mat_euclidean_norm_sqr(A); + if (alpha == T(0)) alpha = T(1); + + for (size_type i = 0; i < mat_nrows(A); ++i) { + gmm::clear(m); gmm::clear(ei); + m[i] = alpha; + ei[i] = T(1); + + for (size_type j = 0; j < nb_it; ++j) { + gmm::mult(A, gmm::scaled(m, T(-1)), r); + gmm::add(ei, r); + gmm::mult(A, r, Ar); + T nAr = vect_sp(Ar,Ar); + if (gmm::abs(nAr) > R(0)) { + gmm::add(gmm::scaled(r, gmm::safe_divide(vect_sp(r, Ar), vect_sp(Ar, Ar))), m); + gmm::clean(m, threshold * gmm::vect_norm2(m)); + } else gmm::clear(m); + } + if (gmm::vect_norm2(m) == R(0)) m[i] = alpha; + gmm::copy(m, M.col(i)); + } + } +} + +#endif + diff --git a/gmm/gmm_range_basis.h b/gmm/gmm_range_basis.h new file mode 100644 index 000000000..05a71a0c8 --- /dev/null +++ b/gmm/gmm_range_basis.h @@ -0,0 +1,499 @@ +/* -*- c++ -*- (enables emacs c++ mode) */ +/*=========================================================================== + + Copyright (C) 2009-2017 Yves Renard + + This file is a part of GetFEM++ + + GetFEM++ is free software; you can redistribute it and/or modify it + under the terms of the GNU Lesser General Public License as published + by the Free Software Foundation; either version 3 of the License, or + (at your option) any later version along with the GCC Runtime Library + Exception either version 3.1 or (at your option) any later version. + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public + License and GCC Runtime Library Exception for more details. + You should have received a copy of the GNU Lesser General Public License + along with this program; if not, write to the Free Software Foundation, + Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. + + As a special exception, you may use this file as it is a part of a free + software library without restriction. Specifically, if other files + instantiate templates or use macros or inline functions from this file, + or you compile this file and link it with other files to produce an + executable, this file does not by itself cause the resulting executable + to be covered by the GNU Lesser General Public License. This exception + does not however invalidate any other reasons why the executable file + might be covered by the GNU Lesser General Public License. + +===========================================================================*/ + +/**@file gmm_range_basis.h + @author Yves Renard + @date March 10, 2009. + @brief Extract a basis of the range of a (large sparse) matrix from the + columns of this matrix. +*/ +#ifndef GMM_RANGE_BASIS_H +#define GMM_RANGE_BASIS_H +#include "gmm_dense_qr.h" +#include "gmm_dense_lu.h" + +#include "gmm_kernel.h" +#include "gmm_iter.h" +#include +#include + + +namespace gmm { + + + template + void tridiag_qr_algorithm + (std::vector::magnitude_type> diag, + std::vector sdiag, const VECT &eigval_, const MAT1 &eigvect_, + bool compvect, tol_type_for_qr tol = default_tol_for_qr) { + VECT &eigval = const_cast(eigval_); + MAT1 &eigvect = const_cast(eigvect_); + typedef typename number_traits::magnitude_type R; + + if (compvect) gmm::copy(identity_matrix(), eigvect); + + size_type n = diag.size(), q = 0, p, ite = 0; + if (n == 0) return; + if (n == 1) { eigval[0] = gmm::real(diag[0]); return; } + + symmetric_qr_stop_criterion(diag, sdiag, p, q, tol); + + while (q < n) { + sub_interval SUBI(p, n-p-q), SUBJ(0, mat_ncols(eigvect)), SUBK(p, n-p-q); + if (!compvect) SUBK = sub_interval(0,0); + + symmetric_Wilkinson_qr_step(sub_vector(diag, SUBI), + sub_vector(sdiag, SUBI), + sub_matrix(eigvect, SUBJ, SUBK), compvect); + + symmetric_qr_stop_criterion(diag, sdiag, p, q, tol*R(3)); + ++ite; + GMM_ASSERT1(ite < n*100, "QR algorithm failed."); + } + + gmm::copy(diag, eigval); + } + + // Range basis with a restarted Lanczos method + template + void range_basis_eff_Lanczos(const Mat &BB, std::set &columns, + double EPS=1E-12) { + typedef std::set TAB; + typedef typename linalg_traits::value_type T; + typedef typename number_traits::magnitude_type R; + + size_type nc_r = columns.size(), k; + col_matrix< rsvector > B(mat_nrows(BB), mat_ncols(BB)); + + k = 0; + for (TAB::iterator it = columns.begin(); it!=columns.end(); ++it, ++k){ + gmm::copy(scaled(mat_col(BB, *it), T(1)/vect_norm2(mat_col(BB, *it))), + mat_col(B, *it)); + } + std::vector w(mat_nrows(B)); + size_type restart = 120; + std::vector sdiag(restart); + std::vector eigval(restart), diag(restart); + dense_matrix eigvect(restart, restart); + + R rho = R(-1), rho2; + while (nc_r) { + + std::vector v(nc_r), v0(nc_r), wl(nc_r); + dense_matrix lv(nc_r, restart); + + if (rho < R(0)) { // Estimate of the spectral radius of B^* B + gmm::fill_random(v); + for (size_type i = 0; i < 100; ++i) { + gmm::scale(v, T(1)/vect_norm2(v)); + gmm::copy(v, v0); + k = 0; gmm::clear(w); + for (TAB::iterator it=columns.begin(); it!=columns.end(); ++it, ++k) + add(scaled(mat_col(B, *it), v[k]), w); + k = 0; + for (TAB::iterator it=columns.begin(); it!=columns.end(); ++it, ++k) + v[k] = vect_hp(w, mat_col(B, *it)); + rho = gmm::abs(vect_hp(v, v0) / vect_hp(v0, v0)); + } + rho *= R(2); + } + + // Computing vectors of the null space of de B^* B with restarted Lanczos + rho2 = 0; + gmm::fill_random(v); + size_type iter = 0; + for(;;++iter) { + R rho_old = rho2; + R beta = R(0), alpha; + gmm::scale(v, T(1)/vect_norm2(v)); + size_type eff_restart = restart; + if (sdiag.size() != restart) { + sdiag.resize(restart); eigval.resize(restart); diag.resize(restart); gmm::resize(eigvect, restart, restart); + gmm::resize(lv, nc_r, restart); + } + + for (size_type i = 0; i < restart; ++i) { // Lanczos iterations + gmm::copy(v, mat_col(lv, i)); + gmm::clear(w); + k = 0; + for (TAB::iterator it=columns.begin(); it!=columns.end(); ++it, ++k) + add(scaled(mat_col(B, *it), v[k]), w); + + k = 0; + for (TAB::iterator it=columns.begin(); it!=columns.end(); ++it, ++k) + wl[k] = v[k]*rho - vect_hp(w, mat_col(B, *it)) - beta*v0[k]; + alpha = gmm::real(vect_hp(wl, v)); + diag[i] = alpha; + gmm::add(gmm::scaled(v, -alpha), wl); + sdiag[i] = beta = vect_norm2(wl); + gmm::copy(v, v0); + if (beta < EPS) { eff_restart = i+1; break; } + gmm::copy(gmm::scaled(wl, T(1) / beta), v); + } + if (eff_restart != restart) { + sdiag.resize(eff_restart); eigval.resize(eff_restart); diag.resize(eff_restart); + gmm::resize(eigvect, eff_restart, eff_restart); gmm::resize(lv, nc_r, eff_restart); + } + tridiag_qr_algorithm(diag, sdiag, eigval, eigvect, true); + + size_type num = size_type(-1); + rho2 = R(0); + for (size_type j = 0; j < eff_restart; ++j) + { R nvp=gmm::abs(eigval[j]); if (nvp > rho2) { rho2=nvp; num=j; }} + + GMM_ASSERT1(num != size_type(-1), "Internal error"); + + gmm::mult(lv, mat_col(eigvect, num), v); + + if (gmm::abs(rho2-rho_old) < rho_old*R(EPS)) break; + // if (gmm::abs(rho-rho2) < rho*R(gmm::sqrt(EPS))) break; + if (gmm::abs(rho-rho2) < rho*R(EPS)*R(100)) break; + } + + if (gmm::abs(rho-rho2) < rho*R(EPS*10.)) { + size_type j_max = size_type(-1), j = 0; + R val_max = R(0); + for (TAB::iterator it=columns.begin(); it!=columns.end(); ++it, ++j) + if (gmm::abs(v[j]) > val_max) + { val_max = gmm::abs(v[j]); j_max = *it; } + columns.erase(j_max); nc_r = columns.size(); + } + else break; + } + } + + // Range basis with LU decomposition. Not stable from a numerical viewpoint. + // Complex version not verified + template + void range_basis_eff_lu(const Mat &B, std::set &columns, + std::vector &c_ortho, double EPS) { + + typedef std::set TAB; + typedef typename linalg_traits::value_type T; + typedef typename number_traits::magnitude_type R; + + size_type nc_r = 0, nc_o = 0, nc = mat_ncols(B), nr = mat_nrows(B), i, j; + + for (TAB::iterator it=columns.begin(); it!=columns.end(); ++it) + if (!(c_ortho[*it])) ++nc_r; else nc_o++; + + if (nc_r > 0) { + + gmm::row_matrix< gmm::rsvector > Hr(nc, nc_r), Ho(nc, nc_o); + gmm::row_matrix< gmm::rsvector > BBr(nr, nc_r), BBo(nr, nc_o); + + i = j = 0; + for (TAB::iterator it=columns.begin(); it!=columns.end(); ++it) + if (!(c_ortho[*it])) + { Hr(*it, i) = T(1)/ vect_norminf(mat_col(B, *it)); ++i; } + else + { Ho(*it, j) = T(1)/ vect_norm2(mat_col(B, *it)); ++j; } + + gmm::mult(B, Hr, BBr); + gmm::mult(B, Ho, BBo); + gmm::dense_matrix M(nc_r, nc_r), BBB(nc_r, nc_o), MM(nc_r, nc_r); + gmm::mult(gmm::conjugated(BBr), BBr, M); + gmm::mult(gmm::conjugated(BBr), BBo, BBB); + gmm::mult(BBB, gmm::conjugated(BBB), MM); + gmm::add(gmm::scaled(MM, T(-1)), M); + + std::vector ipvt(nc_r); + gmm::lu_factor(M, ipvt); + + R emax = R(0); + for (i = 0; i < nc_r; ++i) emax = std::max(emax, gmm::abs(M(i,i))); + + i = 0; + std::set c = columns; + for (TAB::iterator it = c.begin(); it != c.end(); ++it) + if (!(c_ortho[*it])) { + if (gmm::abs(M(i,i)) <= R(EPS)*emax) columns.erase(*it); + ++i; + } + } + } + + + // Range basis with Gram-Schmidt orthogonalization (sparse version) + // The sparse version is better when the sparsity is high and less efficient + // than the dense version for high degree elements (P3, P4 ...) + // Complex version not verified + template + void range_basis_eff_Gram_Schmidt_sparse(const Mat &BB, + std::set &columns, + std::vector &c_ortho, + double EPS) { + + typedef std::set TAB; + typedef typename linalg_traits::value_type T; + typedef typename number_traits::magnitude_type R; + + size_type nc = mat_ncols(BB), nr = mat_nrows(BB); + std::set c = columns, rc = columns; + + gmm::col_matrix< rsvector > B(nr, nc); + for (std::set::iterator it = columns.begin(); + it != columns.end(); ++it) { + gmm::copy(mat_col(BB, *it), mat_col(B, *it)); + gmm::scale(mat_col(B, *it), T(1)/vect_norm2(mat_col(B, *it))); + } + + for (std::set::iterator it = c.begin(); it != c.end(); ++it) + if (c_ortho[*it]) { + for (std::set::iterator it2 = rc.begin(); + it2 != rc.end(); ++it2) + if (!(c_ortho[*it2])) { + T r = -vect_hp(mat_col(B, *it2), mat_col(B, *it)); + if (r != T(0)) add(scaled(mat_col(B, *it), r), mat_col(B, *it2)); + } + rc.erase(*it); + } + + while (rc.size()) { + R nmax = R(0); size_type cmax = size_type(-1); + for (std::set::iterator it=rc.begin(); it != rc.end();) { + TAB::iterator itnext = it; ++itnext; + R n = vect_norm2(mat_col(B, *it)); + if (nmax < n) { nmax = n; cmax = *it; } + if (n < R(EPS)) { columns.erase(*it); rc.erase(*it); } + it = itnext; + } + + if (nmax < R(EPS)) break; + + gmm::scale(mat_col(B, cmax), T(1)/vect_norm2(mat_col(B, cmax))); + rc.erase(cmax); + for (std::set::iterator it=rc.begin(); it!=rc.end(); ++it) { + T r = -vect_hp(mat_col(B, *it), mat_col(B, cmax)); + if (r != T(0)) add(scaled(mat_col(B, cmax), r), mat_col(B, *it)); + } + } + for (std::set::iterator it=rc.begin(); it!=rc.end(); ++it) + columns.erase(*it); + } + + + // Range basis with Gram-Schmidt orthogonalization (dense version) + template + void range_basis_eff_Gram_Schmidt_dense(const Mat &B, + std::set &columns, + std::vector &c_ortho, + double EPS) { + + typedef std::set TAB; + typedef typename linalg_traits::value_type T; + typedef typename number_traits::magnitude_type R; + + size_type nc_r = columns.size(), nc = mat_ncols(B), nr = mat_nrows(B), i; + std::set rc; + + row_matrix< gmm::rsvector > H(nc, nc_r), BB(nr, nc_r); + std::vector v(nc_r); + std::vector ind(nc_r); + + i = 0; + for (TAB::iterator it = columns.begin(); it != columns.end(); ++it, ++i) + H(*it, i) = T(1) / vect_norm2(mat_col(B, *it)); + + mult(B, H, BB); + dense_matrix M(nc_r, nc_r); + mult(gmm::conjugated(BB), BB, M); + + i = 0; + for (TAB::iterator it = columns.begin(); it != columns.end(); ++it, ++i) + if (c_ortho[*it]) { + gmm::copy(mat_row(M, i), v); + rank_one_update(M, scaled(v, T(-1)), v); + M(i, i) = T(1); + } + else { rc.insert(i); ind[i] = *it; } + + while (rc.size() > 0) { + + // Next pivot + R nmax = R(0); size_type imax = size_type(-1); + for (TAB::iterator it = rc.begin(); it != rc.end();) { + TAB::iterator itnext = it; ++itnext; + R a = gmm::abs(M(*it, *it)); + if (a > nmax) { nmax = a; imax = *it; } + if (a < R(EPS)) { columns.erase(ind[*it]); rc.erase(*it); } + it = itnext; + } + + if (nmax < R(EPS)) break; + + // Normalization + gmm::scale(mat_row(M, imax), T(1) / sqrt(nmax)); + gmm::scale(mat_col(M, imax), T(1) / sqrt(nmax)); + + // orthogonalization + copy(mat_row(M, imax), v); + rank_one_update(M, scaled(v, T(-1)), v); + M(imax, imax) = T(1); + + rc.erase(imax); + } + for (std::set::iterator it=rc.begin(); it!=rc.end(); ++it) + columns.erase(ind[*it]); + } + + template size_type nnz_eps(const L& l, double eps) { + typename linalg_traits::const_iterator it = vect_const_begin(l), + ite = vect_const_end(l); + size_type res(0); + for (; it != ite; ++it) if (gmm::abs(*it) >= eps) ++res; + return res; + } + + template + bool reserve__rb(const L& l, std::vector &b, double eps) { + typename linalg_traits::const_iterator it = vect_const_begin(l), + ite = vect_const_end(l); + bool ok = true; + for (; it != ite; ++it) + if (gmm::abs(*it) >= eps && b[it.index()]) ok = false; + if (ok) { + for (it = vect_const_begin(l); it != ite; ++it) + if (gmm::abs(*it) >= eps) b[it.index()] = true; + } + return ok; + } + + template + void range_basis(const Mat &B, std::set &columns, + double EPS, col_major, bool skip_init=false) { + + typedef typename linalg_traits::value_type T; + typedef typename number_traits::magnitude_type R; + + size_type nc = mat_ncols(B), nr = mat_nrows(B); + + std::vector norms(nc); + std::vector c_ortho(nc), booked(nr); + std::vector< std::set > nnzs(mat_nrows(B)); + + if (!skip_init) { + + R norm_max = R(0); + for (size_type i = 0; i < nc; ++i) { + norms[i] = vect_norminf(mat_col(B, i)); + norm_max = std::max(norm_max, norms[i]); + } + + columns.clear(); + for (size_type i = 0; i < nc; ++i) + if (norms[i] > norm_max*R(EPS)) { + columns.insert(i); + nnzs[nnz_eps(mat_col(B, i), R(EPS) * norms[i])].insert(i); + } + + for (size_type i = 1; i < nr; ++i) + for (std::set::iterator it = nnzs[i].begin(); + it != nnzs[i].end(); ++it) + if (reserve__rb(mat_col(B, *it), booked, R(EPS) * norms[*it])) + c_ortho[*it] = true; + } + + size_type sizesm[7] = {125, 200, 350, 550, 800, 1100, 1500}, actsize; + for (int k = 0; k < 7; ++k) { + size_type nc_r = columns.size(); + std::set c1, cres; + actsize = sizesm[k]; + for (std::set::iterator it = columns.begin(); + it != columns.end(); ++it) { + c1.insert(*it); + if (c1.size() >= actsize) { + range_basis_eff_Gram_Schmidt_dense(B, c1, c_ortho, EPS); + for (std::set::iterator it2=c1.begin(); it2 != c1.end(); + ++it2) cres.insert(*it2); + c1.clear(); + } + } + if (c1.size() > 1) + range_basis_eff_Gram_Schmidt_dense(B, c1, c_ortho, EPS); + for (std::set::iterator it = c1.begin(); it != c1.end(); ++it) + cres.insert(*it); + columns = cres; + if (nc_r <= actsize) return; + if (columns.size() == nc_r) break; + if (sizesm[k] >= 350 && columns.size() > (nc_r*19)/20) break; + } + if (columns.size() > std::max(size_type(10), actsize)) + range_basis_eff_Lanczos(B, columns, EPS); + else + range_basis_eff_Gram_Schmidt_dense(B, columns, c_ortho, EPS); + } + + + template + void range_basis(const Mat &B, std::set &columns, + double EPS, row_major) { + typedef typename linalg_traits::value_type T; + gmm::col_matrix< rsvector > BB(mat_nrows(B), mat_ncols(B)); + GMM_WARNING3("A copy of a row matrix is done into a column matrix " + "for range basis algorithm."); + gmm::copy(B, BB); + range_basis(BB, columns, EPS); + } + + /** Range Basis : + Extract a basis of the range of a (large sparse) matrix selecting some + column vectors of this matrix. This is in particular useful to select + an independent set of linear constraints. + + The algorithm is optimized for two cases : + - when the (non trivial) kernel is small. An iterativ algorithm + based on Lanczos method is applied + - when the (non trivial) kernel is large and most of the dependencies + can be detected locally. A block Gram-Schmidt is applied first then + a restarted Lanczos method when the remaining kernel is greatly + smaller. + The restarted Lanczos method could be improved or replaced by a block + Lanczos method, a block Wiedelann method (in order to be parallelized for + instance) or simply could compute more than one vector of the null + space at each iteration. + The LU decomposition has been tested for local elimination but gives bad + results : the algorithm is unstable and do not permit to give the right + number of vector at the end of the process. Moreover, the number of final + vectors depends greatly on the number of vectors in a block of the local + analysis. + */ + template + void range_basis(const Mat &B, std::set &columns, + double EPS=1E-12) { + range_basis(B, columns, EPS, + typename principal_orientation_type + ::sub_orientation>::potype()); +} + +} + +#endif diff --git a/gmm/gmm_real_part.h b/gmm/gmm_real_part.h new file mode 100644 index 000000000..c4e61d815 --- /dev/null +++ b/gmm/gmm_real_part.h @@ -0,0 +1,605 @@ +/* -*- c++ -*- (enables emacs c++ mode) */ +/*=========================================================================== + + Copyright (C) 2003-2017 Yves Renard + + This file is a part of GetFEM++ + + GetFEM++ is free software; you can redistribute it and/or modify it + under the terms of the GNU Lesser General Public License as published + by the Free Software Foundation; either version 3 of the License, or + (at your option) any later version along with the GCC Runtime Library + Exception either version 3.1 or (at your option) any later version. + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public + License and GCC Runtime Library Exception for more details. + You should have received a copy of the GNU Lesser General Public License + along with this program; if not, write to the Free Software Foundation, + Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. + + As a special exception, you may use this file as it is a part of a free + software library without restriction. Specifically, if other files + instantiate templates or use macros or inline functions from this file, + or you compile this file and link it with other files to produce an + executable, this file does not by itself cause the resulting executable + to be covered by the GNU Lesser General Public License. This exception + does not however invalidate any other reasons why the executable file + might be covered by the GNU Lesser General Public License. + +===========================================================================*/ + +/**@file gmm_real_part.h + @author Yves Renard + @date September 18, 2003. + @brief extract the real/imaginary part of vectors/matrices +*/ +#ifndef GMM_REAL_PART_H +#define GMM_REAL_PART_H + +#include "gmm_def.h" +#include "gmm_vector.h" + +namespace gmm { + + struct linalg_real_part {}; + struct linalg_imag_part {}; + template struct which_part {}; + + template typename number_traits::magnitude_type + real_or_imag_part(C x, linalg_real_part) { return gmm::real(x); } + template typename number_traits::magnitude_type + real_or_imag_part(C x, linalg_imag_part) { return gmm::imag(x); } + template C + complex_from(T x, C y, OP op, linalg_real_part) { return std::complex(op(std::real(y), x), std::imag(y)); } + template C + complex_from(T x, C y, OP op,linalg_imag_part) { return std::complex(std::real(y), op(std::imag(y), x)); } + + template struct project2nd { + T operator()(T , T b) const { return b; } + }; + + template class ref_elt_vector > { + + R r; + + public : + + operator T() const { return real_or_imag_part(std::complex(r), PART()); } + ref_elt_vector(R r_) : r(r_) {} + inline ref_elt_vector &operator =(T v) + { r = complex_from(v, std::complex(r), gmm::project2nd(), PART()); return *this; } + inline bool operator ==(T v) const { return (r == v); } + inline bool operator !=(T v) const { return (r != v); } + inline ref_elt_vector &operator +=(T v) + { r = complex_from(v, std::complex(r), std::plus(), PART()); return *this; } + inline ref_elt_vector &operator -=(T v) + { r = complex_from(v, std::complex(r), std::minus(), PART()); return *this; } + inline ref_elt_vector &operator /=(T v) + { r = complex_from(v, std::complex(r), std::divides(), PART()); return *this; } + inline ref_elt_vector &operator *=(T v) + { r = complex_from(v, std::complex(r), std::multiplies(), PART()); return *this; } + inline ref_elt_vector &operator =(const ref_elt_vector &re) + { *this = T(re); return *this; } + T operator +() { return T(*this); } // necessary for unknow reason + T operator -() { return -T(*this); } // necessary for unknow reason + T operator +(T v) { return T(*this)+ v; } // necessary for unknow reason + T operator -(T v) { return T(*this)- v; } // necessary for unknow reason + T operator *(T v) { return T(*this)* v; } // necessary for unknow reason + T operator /(T v) { return T(*this)/ v; } // necessary for unknow reason + }; + + template struct ref_or_value_type { + template + static W r(const T &x, linalg_real_part, W) { + return gmm::real(x); + } + template + static W r(const T &x, linalg_imag_part, W) { + return gmm::imag(x); + } + }; + + template + struct ref_or_value_type > > { + template + static const T &r(const T &x, linalg_real_part, W) + { return x; } + template + static const T &r(const T &x, linalg_imag_part, W) { + return x; + } + template + static T &r(T &x, linalg_real_part, W) + { return x; } + template + static T &r(T &x, linalg_imag_part, W) { + return x; + } + }; + + + /* ********************************************************************* */ + /* Reference to the real part of (complex) vectors */ + /* ********************************************************************* */ + + template + struct part_vector_iterator { + typedef typename std::iterator_traits::value_type vtype; + typedef typename gmm::number_traits::magnitude_type value_type; + typedef value_type *pointer; + typedef ref_elt_vector::reference, PART> > reference; + typedef typename std::iterator_traits::difference_type difference_type; + typedef typename std::iterator_traits::iterator_category + iterator_category; + + IT it; + + part_vector_iterator(void) {} + explicit part_vector_iterator(const IT &i) : it(i) {} + part_vector_iterator(const part_vector_iterator &i) : it(i.it) {} + + + size_type index(void) const { return it.index(); } + part_vector_iterator operator ++(int) + { part_vector_iterator tmp = *this; ++it; return tmp; } + part_vector_iterator operator --(int) + { part_vector_iterator tmp = *this; --it; return tmp; } + part_vector_iterator &operator ++() { ++it; return *this; } + part_vector_iterator &operator --() { --it; return *this; } + part_vector_iterator &operator +=(difference_type i) + { it += i; return *this; } + part_vector_iterator &operator -=(difference_type i) + { it -= i; return *this; } + part_vector_iterator operator +(difference_type i) const + { part_vector_iterator itb = *this; return (itb += i); } + part_vector_iterator operator -(difference_type i) const + { part_vector_iterator itb = *this; return (itb -= i); } + difference_type operator -(const part_vector_iterator &i) const + { return difference_type(it - i.it); } + + reference operator *() const { return reference(*it); } + reference operator [](size_type ii) const { return reference(it[ii]); } + + bool operator ==(const part_vector_iterator &i) const + { return (i.it == it); } + bool operator !=(const part_vector_iterator &i) const + { return (i.it != it); } + bool operator < (const part_vector_iterator &i) const + { return (it < i.it); } + }; + + + template struct part_vector { + typedef part_vector this_type; + typedef typename std::iterator_traits::value_type V; + typedef V * CPT; + typedef typename select_ref::const_iterator, + typename linalg_traits::iterator, PT>::ref_type iterator; + typedef typename linalg_traits::reference reference; + typedef typename linalg_traits::value_type value_type; + typedef typename linalg_traits::porigin_type porigin_type; + + iterator begin_, end_; + porigin_type origin; + size_type size_; + + size_type size(void) const { return size_; } + + reference operator[](size_type i) const { + return reference(ref_or_value_type::r( + linalg_traits::access(origin, begin_, end_, i), + PART(), value_type())); + } + + part_vector(V &v) + : begin_(vect_begin(v)), end_(vect_end(v)), + origin(linalg_origin(v)), size_(gmm::vect_size(v)) {} + part_vector(const V &v) + : begin_(vect_begin(const_cast(v))), + end_(vect_end(const_cast(v))), + origin(linalg_origin(const_cast(v))), size_(gmm::vect_size(v)) {} + part_vector() {} + part_vector(const part_vector &cr) + : begin_(cr.begin_),end_(cr.end_),origin(cr.origin), size_(cr.size_) {} + }; + + template inline + void set_to_begin(part_vector_iterator &it, + ORG o, part_vector *, linalg_modifiable) { + typedef part_vector VECT; + typedef typename linalg_traits::V_reference ref_t; + set_to_begin(it.it, o, typename linalg_traits::pV(), ref_t()); + } + template inline + void set_to_begin(part_vector_iterator &it, + ORG o, const part_vector *, linalg_modifiable) { + typedef part_vector VECT; + typedef typename linalg_traits::V_reference ref_t; + set_to_begin(it.it, o, typename linalg_traits::pV(), ref_t()); + } + template inline + void set_to_end(part_vector_iterator &it, + ORG o, part_vector *, linalg_modifiable) { + typedef part_vector VECT; + typedef typename linalg_traits::V_reference ref_t; + set_to_end(it.it, o, typename linalg_traits::pV(), ref_t()); + } + template inline + void set_to_end(part_vector_iterator &it, + ORG o, const part_vector *, + linalg_modifiable) { + typedef part_vector VECT; + typedef typename linalg_traits::V_reference ref_t; + set_to_end(it.it, o, typename linalg_traits::pV(), ref_t()); + } + + template std::ostream &operator << + (std::ostream &o, const part_vector& m) + { gmm::write(o,m); return o; } + + + /* ********************************************************************* */ + /* Reference to the real or imaginary part of (complex) matrices */ + /* ********************************************************************* */ + + + template struct part_row_ref { + + typedef part_row_ref this_type; + typedef typename std::iterator_traits::value_type M; + typedef M * CPT; + typedef typename std::iterator_traits::reference ref_M; + typedef typename select_ref + ::const_row_iterator, typename linalg_traits + ::row_iterator, PT>::ref_type iterator; + typedef typename linalg_traits::value_type value_type; + typedef typename linalg_traits::reference reference; + typedef typename linalg_traits::porigin_type porigin_type; + + iterator begin_, end_; + porigin_type origin; + size_type nr, nc; + + part_row_ref(ref_M m) + : begin_(mat_row_begin(m)), end_(mat_row_end(m)), + origin(linalg_origin(m)), nr(mat_nrows(m)), nc(mat_ncols(m)) {} + + part_row_ref(const part_row_ref &cr) : + begin_(cr.begin_),end_(cr.end_), origin(cr.origin),nr(cr.nr),nc(cr.nc) {} + + reference operator()(size_type i, size_type j) const { + return reference(ref_or_value_type::r( + linalg_traits::access(begin_+i, j), + PART(), value_type())); + } + }; + + template std::ostream &operator << + (std::ostream &o, const part_row_ref& m) + { gmm::write(o,m); return o; } + + template struct part_col_ref { + + typedef part_col_ref this_type; + typedef typename std::iterator_traits::value_type M; + typedef M * CPT; + typedef typename std::iterator_traits::reference ref_M; + typedef typename select_ref + ::const_col_iterator, typename linalg_traits + ::col_iterator, PT>::ref_type iterator; + typedef typename linalg_traits::value_type value_type; + typedef typename linalg_traits::reference reference; + typedef typename linalg_traits::porigin_type porigin_type; + + iterator begin_, end_; + porigin_type origin; + size_type nr, nc; + + part_col_ref(ref_M m) + : begin_(mat_col_begin(m)), end_(mat_col_end(m)), + origin(linalg_origin(m)), nr(mat_nrows(m)), nc(mat_ncols(m)) {} + + part_col_ref(const part_col_ref &cr) : + begin_(cr.begin_),end_(cr.end_), origin(cr.origin),nr(cr.nr),nc(cr.nc) {} + + reference operator()(size_type i, size_type j) const { + return reference(ref_or_value_type::r( + linalg_traits::access(begin_+j, i), + PART(), value_type())); + } + }; + + + + template std::ostream &operator << + (std::ostream &o, const part_col_ref& m) + { gmm::write(o,m); return o; } + + + + + + +template + struct part_return_ { + typedef abstract_null_type return_type; + }; + template + struct part_return_ { + typedef typename std::iterator_traits::value_type L; + typedef typename select_return, + part_row_ref< L *, PART>, PT>::return_type return_type; + }; + template + struct part_return_ { + typedef typename std::iterator_traits::value_type L; + typedef typename select_return, + part_col_ref, PT>::return_type return_type; + }; + + template struct part_return__{ + typedef abstract_null_type return_type; + }; + + template + struct part_return__ { + typedef typename std::iterator_traits::value_type L; + typedef typename part_return_::sub_orientation>::potype, PART, + PT>::return_type return_type; + }; + + template + struct part_return__ { + typedef typename std::iterator_traits::value_type L; + typedef typename select_return, + part_vector, PT>::return_type return_type; + }; + + template struct part_return { + typedef typename std::iterator_traits::value_type L; + typedef typename part_return__::linalg_type>::return_type return_type; + }; + + template inline + typename part_return::return_type + real_part(const L &l) { + return typename part_return::return_type + (linalg_cast(const_cast(l))); + } + + template inline + typename part_return::return_type + real_part(L &l) { + return typename part_return::return_type(linalg_cast(l)); + } + + template inline + typename part_return::return_type + imag_part(const L &l) { + return typename part_return::return_type + (linalg_cast(const_cast(l))); + } + + template inline + typename part_return::return_type + imag_part(L &l) { + return typename part_return::return_type(linalg_cast(l)); + } + + + template + struct linalg_traits > { + typedef part_vector this_type; + typedef this_type * pthis_type; + typedef PT pV; + typedef typename std::iterator_traits::value_type V; + typedef typename linalg_traits::index_sorted index_sorted; + typedef typename linalg_traits::is_reference V_reference; + typedef typename linalg_traits::origin_type origin_type; + typedef typename select_ref::ref_type porigin_type; + typedef typename which_reference::is_reference is_reference; + typedef abstract_vector linalg_type; + typedef typename linalg_traits::value_type vtype; + typedef typename number_traits::magnitude_type value_type; + typedef typename select_ref::reference, + PART> >, PT>::ref_type reference; + typedef typename select_ref::const_iterator, + typename linalg_traits::iterator, PT>::ref_type pre_iterator; + typedef typename select_ref, + PT>::ref_type iterator; + typedef part_vector_iterator::const_iterator, + pre_iterator, PART> const_iterator; + typedef typename linalg_traits::storage_type storage_type; + static size_type size(const this_type &v) { return v.size(); } + static iterator begin(this_type &v) { + iterator it; it.it = v.begin_; + if (!is_const_reference(is_reference()) && is_sparse(storage_type())) + set_to_begin(it, v.origin, pthis_type(), is_reference()); + return it; + } + static const_iterator begin(const this_type &v) { + const_iterator it(v.begin_); + if (!is_const_reference(is_reference()) && is_sparse(storage_type())) + { set_to_begin(it, v.origin, pthis_type(), is_reference()); } + return it; + } + static iterator end(this_type &v) { + iterator it(v.end_); + if (!is_const_reference(is_reference()) && is_sparse(storage_type())) + set_to_end(it, v.origin, pthis_type(), is_reference()); + return it; + } + static const_iterator end(const this_type &v) { + const_iterator it(v.end_); + if (!is_const_reference(is_reference()) && is_sparse(storage_type())) + set_to_end(it, v.origin, pthis_type(), is_reference()); + return it; + } + static origin_type* origin(this_type &v) { return v.origin; } + static const origin_type* origin(const this_type &v) { return v.origin; } + + static void clear(origin_type* o, const iterator &begin_, + const iterator &end_, abstract_sparse) { + std::deque ind; + iterator it = begin_; + for (; it != end_; ++it) ind.push_front(it.index()); + for (; !(ind.empty()); ind.pop_back()) + access(o, begin_, end_, ind.back()) = value_type(0); + } + static void clear(origin_type* o, const iterator &begin_, + const iterator &end_, abstract_skyline) { + clear(o, begin_, end_, abstract_sparse()); + } + static void clear(origin_type* o, const iterator &begin_, + const iterator &end_, abstract_dense) { + for (iterator it = begin_; it != end_; ++it) *it = value_type(0); + } + + static void clear(origin_type* o, const iterator &begin_, + const iterator &end_) + { clear(o, begin_, end_, storage_type()); } + static void do_clear(this_type &v) { clear(v.origin, begin(v), end(v)); } + static value_type access(const origin_type *o, const const_iterator &it, + const const_iterator &ite, size_type i) { + return real_or_imag_part(linalg_traits::access(o, it.it, ite.it,i), + PART()); + } + static reference access(origin_type *o, const iterator &it, + const iterator &ite, size_type i) + { return reference(linalg_traits::access(o, it.it, ite.it,i)); } + }; + + template + struct linalg_traits > { + typedef part_row_ref this_type; + typedef typename std::iterator_traits::value_type M; + typedef typename linalg_traits::origin_type origin_type; + typedef typename select_ref::ref_type porigin_type; + typedef typename which_reference::is_reference is_reference; + typedef abstract_matrix linalg_type; + typedef typename linalg_traits::value_type vtype; + typedef typename number_traits::magnitude_type value_type; + typedef typename linalg_traits::storage_type storage_type; + typedef abstract_null_type sub_col_type; + typedef abstract_null_type const_sub_col_type; + typedef abstract_null_type col_iterator; + typedef abstract_null_type const_col_iterator; + typedef typename org_type::const_sub_row_type>::t + pre_const_sub_row_type; + typedef typename org_type::sub_row_type>::t pre_sub_row_type; + typedef part_vector + const_sub_row_type; + typedef typename select_ref, PT>::ref_type sub_row_type; + typedef typename linalg_traits::const_row_iterator const_row_iterator; + typedef typename select_ref::row_iterator, PT>::ref_type row_iterator; + typedef typename select_ref< + typename linalg_traits::reference, + typename linalg_traits::reference, + PT>::ref_type reference; + typedef row_major sub_orientation; + typedef typename linalg_traits::index_sorted index_sorted; + static size_type ncols(const this_type &v) { return v.nc; } + static size_type nrows(const this_type &v) { return v.nr; } + static const_sub_row_type row(const const_row_iterator &it) + { return const_sub_row_type(linalg_traits::row(it)); } + static sub_row_type row(const row_iterator &it) + { return sub_row_type(linalg_traits::row(it)); } + static row_iterator row_begin(this_type &m) { return m.begin_; } + static row_iterator row_end(this_type &m) { return m.end_; } + static const_row_iterator row_begin(const this_type &m) + { return m.begin_; } + static const_row_iterator row_end(const this_type &m) { return m.end_; } + static origin_type* origin(this_type &v) { return v.origin; } + static const origin_type* origin(const this_type &v) { return v.origin; } + static void do_clear(this_type &v); + static value_type access(const const_row_iterator &itrow, size_type i) + { return real_or_imag_part(linalg_traits::access(itrow, i), PART()); } + static reference access(const row_iterator &itrow, size_type i) { + return reference(ref_or_value_type::r( + linalg_traits::access(itrow, i), + PART(), value_type())); + } + }; + + template + struct linalg_traits > { + typedef part_col_ref this_type; + typedef typename std::iterator_traits::value_type M; + typedef typename linalg_traits::origin_type origin_type; + typedef typename select_ref::ref_type porigin_type; + typedef typename which_reference::is_reference is_reference; + typedef abstract_matrix linalg_type; + typedef typename linalg_traits::value_type vtype; + typedef typename number_traits::magnitude_type value_type; + typedef typename linalg_traits::storage_type storage_type; + typedef abstract_null_type sub_row_type; + typedef abstract_null_type const_sub_row_type; + typedef abstract_null_type row_iterator; + typedef abstract_null_type const_row_iterator; + typedef typename org_type::const_sub_col_type>::t + pre_const_sub_col_type; + typedef typename org_type::sub_col_type>::t pre_sub_col_type; + typedef part_vector + const_sub_col_type; + typedef typename select_ref, PT>::ref_type sub_col_type; + typedef typename linalg_traits::const_col_iterator const_col_iterator; + typedef typename select_ref::col_iterator, PT>::ref_type col_iterator; + typedef typename select_ref< + typename linalg_traits::reference, + typename linalg_traits::reference, + PT>::ref_type reference; + typedef col_major sub_orientation; + typedef typename linalg_traits::index_sorted index_sorted; + static size_type nrows(const this_type &v) { return v.nr; } + static size_type ncols(const this_type &v) { return v.nc; } + static const_sub_col_type col(const const_col_iterator &it) + { return const_sub_col_type(linalg_traits::col(it)); } + static sub_col_type col(const col_iterator &it) + { return sub_col_type(linalg_traits::col(it)); } + static col_iterator col_begin(this_type &m) { return m.begin_; } + static col_iterator col_end(this_type &m) { return m.end_; } + static const_col_iterator col_begin(const this_type &m) + { return m.begin_; } + static const_col_iterator col_end(const this_type &m) { return m.end_; } + static origin_type* origin(this_type &v) { return v.origin; } + static const origin_type* origin(const this_type &v) { return v.origin; } + static void do_clear(this_type &v); + static value_type access(const const_col_iterator &itcol, size_type i) + { return real_or_imag_part(linalg_traits::access(itcol, i), PART()); } + static reference access(const col_iterator &itcol, size_type i) { + return reference(ref_or_value_type::r( + linalg_traits::access(itcol, i), + PART(), value_type())); + } + }; + + template + void linalg_traits >::do_clear(this_type &v) { + col_iterator it = mat_col_begin(v), ite = mat_col_end(v); + for (; it != ite; ++it) clear(col(it)); + } + + template + void linalg_traits >::do_clear(this_type &v) { + row_iterator it = mat_row_begin(v), ite = mat_row_end(v); + for (; it != ite; ++it) clear(row(it)); + } +} + +#endif // GMM_REAL_PART_H diff --git a/gmm/gmm_ref.h b/gmm/gmm_ref.h new file mode 100644 index 000000000..67af37739 --- /dev/null +++ b/gmm/gmm_ref.h @@ -0,0 +1,526 @@ +/* -*- c++ -*- (enables emacs c++ mode) */ +/*=========================================================================== + + Copyright (C) 2000-2017 Yves Renard + + This file is a part of GetFEM++ + + GetFEM++ is free software; you can redistribute it and/or modify it + under the terms of the GNU Lesser General Public License as published + by the Free Software Foundation; either version 3 of the License, or + (at your option) any later version along with the GCC Runtime Library + Exception either version 3.1 or (at your option) any later version. + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public + License and GCC Runtime Library Exception for more details. + You should have received a copy of the GNU Lesser General Public License + along with this program; if not, write to the Free Software Foundation, + Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. + + As a special exception, you may use this file as it is a part of a free + software library without restriction. Specifically, if other files + instantiate templates or use macros or inline functions from this file, + or you compile this file and link it with other files to produce an + executable, this file does not by itself cause the resulting executable + to be covered by the GNU Lesser General Public License. This exception + does not however invalidate any other reasons why the executable file + might be covered by the GNU Lesser General Public License. + +===========================================================================*/ + + +#ifndef GMM_REF_H__ +#define GMM_REF_H__ + +/** @file gmm_ref.h + @author Yves Renard + @date August 26, 2000. + * @brief Provide some simple pseudo-containers. + * + * WARNING : modifiying the container infirm the validity of references. + */ + + +#include +#include "gmm_except.h" + +namespace gmm { + + /* ********************************************************************* */ + /* Simple reference. */ + /* ********************************************************************* */ + + template class tab_ref { + + protected : + + ITER begin_, end_; + + public : + + typedef typename std::iterator_traits::value_type value_type; + typedef typename std::iterator_traits::pointer pointer; + typedef typename std::iterator_traits::pointer const_pointer; + typedef typename std::iterator_traits::reference reference; + typedef typename std::iterator_traits::reference const_reference; + typedef typename std::iterator_traits::difference_type + difference_type; + typedef ITER iterator; + typedef ITER const_iterator; + typedef std::reverse_iterator const_reverse_iterator; + typedef std::reverse_iterator reverse_iterator; + typedef size_t size_type; + + bool empty(void) const { return begin_ == end_; } + size_type size(void) const { return end_ - begin_; } + + const iterator &begin(void) { return begin_; } + const const_iterator &begin(void) const { return begin_; } + const iterator &end(void) { return end_; } + const const_iterator &end(void) const { return end_; } + reverse_iterator rbegin(void) { return reverse_iterator(end()); } + const_reverse_iterator rbegin(void) const + { return const_reverse_iterator(end()); } + reverse_iterator rend(void) { return reverse_iterator(begin()); } + const_reverse_iterator rend(void) const + { return const_reverse_iterator(begin()); } + + reference front(void) { return *begin(); } + const_reference front(void) const { return *begin(); } + reference back(void) { return *(--(end())); } + const_reference back(void) const { return *(--(end())); } + void pop_front(void) { ++begin_; } + + const_reference operator [](size_type ii) const { return *(begin_ + ii);} + reference operator [](size_type ii) { return *(begin_ + ii); } + + tab_ref(void) {} + tab_ref(const ITER &b, const ITER &e) : begin_(b), end_(e) {} + }; + + + /* ********************************************************************* */ + /* Reference with index. */ + /* ********************************************************************* */ + +// template struct tab_ref_index_iterator_ +// : public dynamic_array::const_iterator +// { +// typedef typename std::iterator_traits::value_type value_type; +// typedef typename std::iterator_traits::pointer pointer; +// typedef typename std::iterator_traits::reference reference; +// typedef typename std::iterator_traits::difference_type +// difference_type; +// typedef std::random_access_iterator_tag iterator_category; +// typedef size_t size_type; +// typedef dynamic_array::const_iterator dnas_iterator_; +// typedef tab_ref_index_iterator_ iterator; + + +// ITER piter; + +// iterator operator ++(int) +// { iterator tmp = *this; ++(*((dnas_iterator_ *)(this))); return tmp; } +// iterator operator --(int) +// { iterator tmp = *this; --(*((dnas_iterator_ *)(this))); return tmp; } +// iterator &operator ++() +// { ++(*((dnas_iterator_ *)(this))); return *this; } +// iterator &operator --() +// { --(*((dnas_iterator_ *)(this))); return *this; } +// iterator &operator +=(difference_type i) +// { (*((dnas_iterator_ *)(this))) += i; return *this; } +// iterator &operator -=(difference_type i) +// { (*((dnas_iterator_ *)(this))) -= i; return *this; } +// iterator operator +(difference_type i) const +// { iterator it = *this; return (it += i); } +// iterator operator -(difference_type i) const +// { iterator it = *this; return (it -= i); } +// difference_type operator -(const iterator &i) const +// { return *((dnas_iterator_ *)(this)) - *((dnas_iterator_ *)(&i)); } + +// reference operator *() const +// { return *(piter + *((*((dnas_iterator_ *)(this))))); } +// reference operator [](int ii) +// { return *(piter + *((*((dnas_iterator_ *)(this+ii))))); } + +// bool operator ==(const iterator &i) const +// { +// return ((piter) == ((i.piter)) +// && *((dnas_iterator_ *)(this)) == *((*((dnas_iterator_ *)(this))))); +// } +// bool operator !=(const iterator &i) const +// { return !(i == *this); } +// bool operator < (const iterator &i) const +// { +// return ((piter) == ((i.piter)) +// && *((dnas_iterator_ *)(this)) < *((*((dnas_iterator_ *)(this))))); +// } + +// tab_ref_index_iterator_(void) {} +// tab_ref_index_iterator_(const ITER &iter, const dnas_iterator_ &dnas_iter) +// : dnas_iterator_(dnas_iter), piter(iter) {} +// }; + + +// template class tab_ref_index +// { +// public : + +// typedef typename std::iterator_traits::value_type value_type; +// typedef typename std::iterator_traits::pointer pointer; +// typedef typename std::iterator_traits::pointer const_pointer; +// typedef typename std::iterator_traits::reference reference; +// typedef typename std::iterator_traits::reference const_reference; +// typedef typename std::iterator_traits::difference_type +// difference_type; +// typedef size_t size_type; +// typedef tab_ref_index_iterator_ iterator; +// typedef iterator const_iterator; +// typedef std::reverse_iterator const_reverse_iterator; +// typedef std::reverse_iterator reverse_iterator; + +// protected : + +// ITER begin_; +// dynamic_array index_; + +// public : + +// bool empty(void) const { return index_.empty(); } +// size_type size(void) const { return index_.size(); } + + +// iterator begin(void) { return iterator(begin_, index_.begin()); } +// const_iterator begin(void) const +// { return iterator(begin_, index_.begin()); } +// iterator end(void) { return iterator(begin_, index_.end()); } +// const_iterator end(void) const { return iterator(begin_, index_.end()); } +// reverse_iterator rbegin(void) { return reverse_iterator(end()); } +// const_reverse_iterator rbegin(void) const +// { return const_reverse_iterator(end()); } +// reverse_iterator rend(void) { return reverse_iterator(begin()); } +// const_reverse_iterator rend(void) const +// { return const_reverse_iterator(begin()); } + + +// reference front(void) { return *(begin_ +index_[0]); } +// const_reference front(void) const { return *(begin_ +index_[0]); } +// reference back(void) { return *(--(end())); } +// const_reference back(void) const { return *(--(end())); } + +// tab_ref_index(void) {} +// tab_ref_index(const ITER &b, const dynamic_array &ind) +// { begin_ = b; index_ = ind; } + +// // to be changed in a const_reference ? +// value_type operator [](size_type ii) const +// { return *(begin_ + index_[ii]);} +// reference operator [](size_type ii) { return *(begin_ + index_[ii]); } + +// }; + + + /// iterator over a gmm::tab_ref_index_ref + template + struct tab_ref_index_ref_iterator_ + { + typedef typename std::iterator_traits::value_type value_type; + typedef typename std::iterator_traits::pointer pointer; + typedef typename std::iterator_traits::reference reference; + typedef typename std::iterator_traits::difference_type + difference_type; + typedef std::random_access_iterator_tag iterator_category; + typedef tab_ref_index_ref_iterator_ iterator; + typedef size_t size_type; + + ITER piter; + ITER_INDEX iter_index; + + iterator operator ++(int) + { iterator tmp = *this; ++iter_index; return tmp; } + iterator operator --(int) + { iterator tmp = *this; --iter_index; return tmp; } + iterator &operator ++() { ++iter_index; return *this; } + iterator &operator --() { --iter_index; return *this; } + iterator &operator +=(difference_type i) + { iter_index += i; return *this; } + iterator &operator -=(difference_type i) + { iter_index -= i; return *this; } + iterator operator +(difference_type i) const + { iterator it = *this; return (it += i); } + iterator operator -(difference_type i) const + { iterator it = *this; return (it -= i); } + difference_type operator -(const iterator &i) const + { return iter_index - i.iter_index; } + + reference operator *() const + { return *(piter + *iter_index); } + reference operator [](size_type ii) const + { return *(piter + *(iter_index+ii)); } + + bool operator ==(const iterator &i) const + { return ((piter) == ((i.piter)) && iter_index == i.iter_index); } + bool operator !=(const iterator &i) const { return !(i == *this); } + bool operator < (const iterator &i) const + { return ((piter) == ((i.piter)) && iter_index < i.iter_index); } + + tab_ref_index_ref_iterator_(void) {} + tab_ref_index_ref_iterator_(const ITER &iter, + const ITER_INDEX &dnas_iter) + : piter(iter), iter_index(dnas_iter) {} + + }; + + /** + convenience template function for quick obtention of a indexed iterator + without having to specify its (long) typename + */ + template + tab_ref_index_ref_iterator_ + index_ref_iterator(ITER it, ITER_INDEX it_i) { + return tab_ref_index_ref_iterator_(it, it_i); + } + + /** indexed array reference (given a container X, and a set of indexes I, + this class provides a pseudo-container Y such that + @code Y[i] = X[I[i]] @endcode + */ + template class tab_ref_index_ref { + public : + + typedef std::iterator_traits traits_type; + typedef typename traits_type::value_type value_type; + typedef typename traits_type::pointer pointer; + typedef typename traits_type::pointer const_pointer; + typedef typename traits_type::reference reference; + typedef typename traits_type::reference const_reference; + typedef typename traits_type::difference_type difference_type; + typedef size_t size_type; + typedef tab_ref_index_ref_iterator_ iterator; + typedef iterator const_iterator; + typedef std::reverse_iterator const_reverse_iterator; + typedef std::reverse_iterator reverse_iterator; + + protected : + + ITER begin_; + ITER_INDEX index_begin_, index_end_; + + public : + + bool empty(void) const { return index_begin_ == index_end_; } + size_type size(void) const { return index_end_ - index_begin_; } + + iterator begin(void) { return iterator(begin_, index_begin_); } + const_iterator begin(void) const + { return iterator(begin_, index_begin_); } + iterator end(void) { return iterator(begin_, index_end_); } + const_iterator end(void) const { return iterator(begin_, index_end_); } + reverse_iterator rbegin(void) { return reverse_iterator(end()); } + const_reverse_iterator rbegin(void) const + { return const_reverse_iterator(end()); } + reverse_iterator rend(void) { return reverse_iterator(begin()); } + const_reverse_iterator rend(void) const + { return const_reverse_iterator(begin()); } + + reference front(void) { return *(begin_ + *index_begin_); } + const_reference front(void) const { return *(begin_ + *index_begin_); } + reference back(void) { return *(--(end())); } + const_reference back(void) const { return *(--(end())); } + void pop_front(void) { ++index_begin_; } + + tab_ref_index_ref(void) {} + tab_ref_index_ref(const ITER &b, const ITER_INDEX &bi, + const ITER_INDEX &ei) + : begin_(b), index_begin_(bi), index_end_(ei) {} + + // to be changed in a const_reference ? + const_reference operator [](size_type ii) const + { return *(begin_ + index_begin_[ii]);} + reference operator [](size_type ii) + { return *(begin_ + index_begin_[ii]); } + + }; + + + /* ********************************************************************* */ + /* Reference on regularly spaced elements. */ + /* ********************************************************************* */ + + template struct tab_ref_reg_spaced_iterator_ { + + typedef typename std::iterator_traits::value_type value_type; + typedef typename std::iterator_traits::pointer pointer; + typedef typename std::iterator_traits::reference reference; + typedef typename std::iterator_traits::difference_type + difference_type; + typedef typename std::iterator_traits::iterator_category + iterator_category; + typedef size_t size_type; + typedef tab_ref_reg_spaced_iterator_ iterator; + + ITER it; + size_type N, i; + + iterator operator ++(int) { iterator tmp = *this; i++; return tmp; } + iterator operator --(int) { iterator tmp = *this; i--; return tmp; } + iterator &operator ++() { i++; return *this; } + iterator &operator --() { i--; return *this; } + iterator &operator +=(difference_type ii) { i+=ii; return *this; } + iterator &operator -=(difference_type ii) { i-=ii; return *this; } + iterator operator +(difference_type ii) const + { iterator itt = *this; return (itt += ii); } + iterator operator -(difference_type ii) const + { iterator itt = *this; return (itt -= ii); } + difference_type operator -(const iterator &ii) const + { return (N ? (it - ii.it) / N : 0) + i - ii.i; } + + reference operator *() const { return *(it + i*N); } + reference operator [](size_type ii) const { return *(it + (i+ii)*N); } + + bool operator ==(const iterator &ii) const + { return (*this - ii) == difference_type(0); } + bool operator !=(const iterator &ii) const + { return (*this - ii) != difference_type(0); } + bool operator < (const iterator &ii) const + { return (*this - ii) < difference_type(0); } + + tab_ref_reg_spaced_iterator_(void) {} + tab_ref_reg_spaced_iterator_(const ITER &iter, size_type n, size_type ii) + : it(iter), N(n), i(ii) { } + + }; + + /** + convenience template function for quick obtention of a strided iterator + without having to specify its (long) typename + */ + template tab_ref_reg_spaced_iterator_ + reg_spaced_iterator(ITER it, size_t stride) { + return tab_ref_reg_spaced_iterator_(it, stride); + } + + /** + provide a "strided" view a of container + */ + template class tab_ref_reg_spaced { + public : + + typedef typename std::iterator_traits::value_type value_type; + typedef typename std::iterator_traits::pointer pointer; + typedef typename std::iterator_traits::pointer const_pointer; + typedef typename std::iterator_traits::reference reference; + typedef typename std::iterator_traits::reference const_reference; + typedef typename std::iterator_traits::difference_type + difference_type; + typedef size_t size_type; + typedef tab_ref_reg_spaced_iterator_ iterator; + typedef iterator const_iterator; + typedef std::reverse_iterator const_reverse_iterator; + typedef std::reverse_iterator reverse_iterator; + + protected : + + ITER begin_; + size_type N, size_; + + public : + + bool empty(void) const { return size_ == 0; } + size_type size(void) const { return size_; } + + iterator begin(void) { return iterator(begin_, N, 0); } + const_iterator begin(void) const { return iterator(begin_, N, 0); } + iterator end(void) { return iterator(begin_, N, size_); } + const_iterator end(void) const { return iterator(begin_, N, size_); } + reverse_iterator rbegin(void) { return reverse_iterator(end()); } + const_reverse_iterator rbegin(void) const + { return const_reverse_iterator(end()); } + reverse_iterator rend(void) { return reverse_iterator(begin()); } + const_reverse_iterator rend(void) const + { return const_reverse_iterator(begin()); } + + reference front(void) { return *begin_; } + const_reference front(void) const { return *begin_; } + reference back(void) { return *(begin_ + N * (size_-1)); } + const_reference back(void) const { return *(begin_ + N * (size_-1)); } + void pop_front(void) { begin_ += N; } + + tab_ref_reg_spaced(void) {} + tab_ref_reg_spaced(const ITER &b, size_type n, size_type s) + : begin_(b), N(n), size_(s) {} + + + const_reference operator [](size_type ii) const + { return *(begin_ + ii * N);} + reference operator [](size_type ii) { return *(begin_ + ii * N); } + + }; + + /// iterator over a tab_ref_with_selection + template + struct tab_ref_with_selection_iterator_ : public ITER { + typedef typename std::iterator_traits::value_type value_type; + typedef typename std::iterator_traits::pointer pointer; + typedef typename std::iterator_traits::reference reference; + typedef typename std::iterator_traits::difference_type + difference_type; + typedef std::forward_iterator_tag iterator_category; + typedef tab_ref_with_selection_iterator_ iterator; + const COND cond; + + void forward(void) { while (!(cond)(*this)) ITER::operator ++(); } + iterator &operator ++() + { ITER::operator ++(); forward(); return *this; } + iterator operator ++(int) + { iterator tmp = *this; ++(*this); return tmp; } + + tab_ref_with_selection_iterator_(void) {} + tab_ref_with_selection_iterator_(const ITER &iter, const COND c) + : ITER(iter), cond(c) {} + + }; + + /** + given a container X and a predicate P, provide pseudo-container Y + of all elements of X such that P(X[i]). + */ + template class tab_ref_with_selection { + + protected : + + ITER begin_, end_; + COND cond; + + public : + + typedef typename std::iterator_traits::value_type value_type; + typedef typename std::iterator_traits::pointer pointer; + typedef typename std::iterator_traits::pointer const_pointer; + typedef typename std::iterator_traits::reference reference; + typedef typename std::iterator_traits::reference const_reference; + typedef size_t size_type; + typedef tab_ref_with_selection_iterator_ iterator; + typedef iterator const_iterator; + + iterator begin(void) const + { iterator it(begin_, cond); it.forward(); return it; } + iterator end(void) const { return iterator(end_, cond); } + bool empty(void) const { return begin_ == end_; } + + value_type front(void) const { return *begin(); } + void pop_front(void) { ++begin_; begin_ = begin(); } + + COND &condition(void) { return cond; } + const COND &condition(void) const { return cond; } + + tab_ref_with_selection(void) {} + tab_ref_with_selection(const ITER &b, const ITER &e, const COND &c) + : begin_(b), end_(e), cond(c) { begin_ = begin(); } + + }; + +} + +#endif /* GMM_REF_H__ */ diff --git a/gmm/gmm_scaled.h b/gmm/gmm_scaled.h new file mode 100644 index 000000000..485af32a1 --- /dev/null +++ b/gmm/gmm_scaled.h @@ -0,0 +1,434 @@ +/* -*- c++ -*- (enables emacs c++ mode) */ +/*=========================================================================== + + Copyright (C) 2002-2017 Yves Renard + + This file is a part of GetFEM++ + + GetFEM++ is free software; you can redistribute it and/or modify it + under the terms of the GNU Lesser General Public License as published + by the Free Software Foundation; either version 3 of the License, or + (at your option) any later version along with the GCC Runtime Library + Exception either version 3.1 or (at your option) any later version. + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public + License and GCC Runtime Library Exception for more details. + You should have received a copy of the GNU Lesser General Public License + along with this program; if not, write to the Free Software Foundation, + Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. + + As a special exception, you may use this file as it is a part of a free + software library without restriction. Specifically, if other files + instantiate templates or use macros or inline functions from this file, + or you compile this file and link it with other files to produce an + executable, this file does not by itself cause the resulting executable + to be covered by the GNU Lesser General Public License. This exception + does not however invalidate any other reasons why the executable file + might be covered by the GNU Lesser General Public License. + +===========================================================================*/ + +/**@file gmm_scaled.h + @author Yves Renard + @date November 10, 2002. + @brief get a scaled view of a vector/matrix. +*/ +#ifndef GMM_SCALED_H__ +#define GMM_SCALED_H__ + +#include "gmm_def.h" + +namespace gmm { + + /* ********************************************************************* */ + /* Scaled references on vectors */ + /* ********************************************************************* */ + + template struct scaled_const_iterator { + typedef typename strongest_numeric_type::value_type, + S>::T value_type; + + typedef typename std::iterator_traits::pointer pointer; + typedef typename std::iterator_traits::reference reference; + typedef typename std::iterator_traits::difference_type difference_type; + typedef typename std::iterator_traits::iterator_category + iterator_category; + + IT it; + S r; + + scaled_const_iterator(void) {} + scaled_const_iterator(const IT &i, S x) : it(i), r(x) {} + + inline size_type index(void) const { return it.index(); } + inline scaled_const_iterator operator ++(int) + { scaled_const_iterator tmp = *this; ++it; return tmp; } + inline scaled_const_iterator operator --(int) + { scaled_const_iterator tmp = *this; --it; return tmp; } + inline scaled_const_iterator &operator ++() { ++it; return *this; } + inline scaled_const_iterator &operator --() { --it; return *this; } + inline scaled_const_iterator &operator +=(difference_type i) + { it += i; return *this; } + inline scaled_const_iterator &operator -=(difference_type i) + { it -= i; return *this; } + inline scaled_const_iterator operator +(difference_type i) const + { scaled_const_iterator itb = *this; return (itb += i); } + inline scaled_const_iterator operator -(difference_type i) const + { scaled_const_iterator itb = *this; return (itb -= i); } + inline difference_type operator -(const scaled_const_iterator &i) const + { return difference_type(it - i.it); } + + inline value_type operator *() const { return (*it) * value_type(r); } + inline value_type operator [](size_type ii) const { return it[ii] * r; } + + inline bool operator ==(const scaled_const_iterator &i) const + { return (i.it == it); } + inline bool operator !=(const scaled_const_iterator &i) const + { return (i.it != it); } + inline bool operator < (const scaled_const_iterator &i) const + { return (it < i.it); } + }; + + template struct scaled_vector_const_ref { + typedef scaled_vector_const_ref this_type; + typedef typename linalg_traits::value_type value_type; + typedef typename linalg_traits::const_iterator iterator; + typedef typename linalg_traits::reference reference; + typedef typename linalg_traits::origin_type origin_type; + + iterator begin_, end_; + const origin_type *origin; + size_type size_; + S r; + + scaled_vector_const_ref(const V &v, S rr) + : begin_(vect_const_begin(v)), end_(vect_const_end(v)), + origin(linalg_origin(v)), size_(vect_size(v)), r(rr) {} + + reference operator[](size_type i) const + { return value_type(r) * linalg_traits::access(origin, begin_, end_, i); } + }; + + + template std::ostream &operator << + (std::ostream &o, const scaled_vector_const_ref& m) + { gmm::write(o,m); return o; } + + /* ********************************************************************* */ + /* Scaled references on matrices */ + /* ********************************************************************* */ + + template struct scaled_row_const_iterator { + typedef scaled_row_const_iterator iterator; + typedef typename linalg_traits::const_row_iterator ITER; + typedef ptrdiff_t difference_type; + typedef size_t size_type; + + ITER it; + S r; + + inline iterator operator ++(int) { iterator tmp=*this; it++; return tmp; } + inline iterator operator --(int) { iterator tmp=*this; it--; return tmp; } + inline iterator &operator ++() { it++; return *this; } + inline iterator &operator --() { it--; return *this; } + iterator &operator +=(difference_type i) { it += i; return *this; } + iterator &operator -=(difference_type i) { it -= i; return *this; } + iterator operator +(difference_type i) const + { iterator itt = *this; return (itt += i); } + iterator operator -(difference_type i) const + { iterator itt = *this; return (itt -= i); } + difference_type operator -(const iterator &i) const + { return it - i.it; } + + inline ITER operator *() const { return it; } + inline ITER operator [](int i) { return it + i; } + + inline bool operator ==(const iterator &i) const { return (it == i.it); } + inline bool operator !=(const iterator &i) const { return !(i == *this); } + inline bool operator < (const iterator &i) const { return (it < i.it); } + + scaled_row_const_iterator(void) {} + scaled_row_const_iterator(const ITER &i, S rr) + : it(i), r(rr) { } + + }; + + template struct scaled_row_matrix_const_ref { + + typedef scaled_row_matrix_const_ref this_type; + typedef typename linalg_traits::const_row_iterator iterator; + typedef typename linalg_traits::value_type value_type; + typedef typename linalg_traits::origin_type origin_type; + + iterator begin_, end_; + const origin_type *origin; + S r; + size_type nr, nc; + + scaled_row_matrix_const_ref(const M &m, S rr) + : begin_(mat_row_begin(m)), end_(mat_row_end(m)), + origin(linalg_origin(m)), r(rr), nr(mat_nrows(m)), nc(mat_ncols(m)) {} + + value_type operator()(size_type i, size_type j) const + { return r * linalg_traits::access(begin_+i, j); } + }; + + + template std::ostream &operator << + (std::ostream &o, const scaled_row_matrix_const_ref& m) + { gmm::write(o,m); return o; } + + + template struct scaled_col_const_iterator { + typedef scaled_col_const_iterator iterator; + typedef typename linalg_traits::const_col_iterator ITER; + typedef ptrdiff_t difference_type; + typedef size_t size_type; + + ITER it; + S r; + + iterator operator ++(int) { iterator tmp = *this; it++; return tmp; } + iterator operator --(int) { iterator tmp = *this; it--; return tmp; } + iterator &operator ++() { it++; return *this; } + iterator &operator --() { it--; return *this; } + iterator &operator +=(difference_type i) { it += i; return *this; } + iterator &operator -=(difference_type i) { it -= i; return *this; } + iterator operator +(difference_type i) const + { iterator itt = *this; return (itt += i); } + iterator operator -(difference_type i) const + { iterator itt = *this; return (itt -= i); } + difference_type operator -(const iterator &i) const + { return it - i.it; } + + ITER operator *() const { return it; } + ITER operator [](int i) { return it + i; } + + bool operator ==(const iterator &i) const { return (it == i.it); } + bool operator !=(const iterator &i) const { return !(i == *this); } + bool operator < (const iterator &i) const { return (it < i.it); } + + scaled_col_const_iterator(void) {} + scaled_col_const_iterator(const ITER &i, S rr) + : it(i), r(rr) { } + + }; + + template struct scaled_col_matrix_const_ref { + + typedef scaled_col_matrix_const_ref this_type; + typedef typename linalg_traits::const_col_iterator iterator; + typedef typename linalg_traits::value_type value_type; + typedef typename linalg_traits::origin_type origin_type; + + iterator begin_, end_; + const origin_type *origin; + S r; + size_type nr, nc; + + scaled_col_matrix_const_ref(const M &m, S rr) + : begin_(mat_col_begin(m)), end_(mat_col_end(m)), + origin(linalg_origin(m)), r(rr), nr(mat_nrows(m)), nc(mat_ncols(m)) {} + + value_type operator()(size_type i, size_type j) const + { return r * linalg_traits::access(begin_+j, i); } + }; + + + + template std::ostream &operator << + (std::ostream &o, const scaled_col_matrix_const_ref& m) + { gmm::write(o,m); return o; } + + + template struct scaled_return__ { + typedef abstract_null_type return_type; + }; + template struct scaled_return__ + { typedef scaled_row_matrix_const_ref return_type; }; + template struct scaled_return__ + { typedef scaled_col_matrix_const_ref return_type; }; + + + template struct scaled_return_ { + typedef abstract_null_type return_type; + }; + template struct scaled_return_ + { typedef scaled_vector_const_ref return_type; }; + template struct scaled_return_ { + typedef typename scaled_return__::sub_orientation>::potype>::return_type return_type; + }; + + template struct scaled_return { + typedef typename scaled_return_::linalg_type>::return_type return_type; + }; + + template inline + typename scaled_return::return_type + scaled(const L &v, S x) + { return scaled(v, x, typename linalg_traits::linalg_type()); } + + template inline + typename scaled_return::return_type + scaled(const V &v, S x, abstract_vector) + { return scaled_vector_const_ref(v, x); } + + template inline + typename scaled_return::return_type + scaled(const M &m, S x,abstract_matrix) { + return scaled(m, x, typename principal_orientation_type::sub_orientation>::potype()); + } + + template inline + typename scaled_return::return_type + scaled(const M &m, S x, row_major) { + return scaled_row_matrix_const_ref(m, x); + } + + template inline + typename scaled_return::return_type + scaled(const M &m, S x, col_major) { + return scaled_col_matrix_const_ref(m, x); + } + + + /* ******************************************************************** */ + /* matrix or vector scale */ + /* ******************************************************************** */ + + template inline + void scale(L& l, typename linalg_traits::value_type a) + { scale(l, a, typename linalg_traits::linalg_type()); } + + template inline + void scale(const L& l, typename linalg_traits::value_type a) + { scale(linalg_const_cast(l), a); } + + template inline + void scale(L& l, typename linalg_traits::value_type a, abstract_vector) { + typename linalg_traits::iterator it = vect_begin(l), ite = vect_end(l); + for ( ; it != ite; ++it) *it *= a; + } + + template + void scale(L& l, typename linalg_traits::value_type a, abstract_matrix) { + scale(l, a, typename principal_orientation_type::sub_orientation>::potype()); + } + + template + void scale(L& l, typename linalg_traits::value_type a, row_major) { + typename linalg_traits::row_iterator it = mat_row_begin(l), + ite = mat_row_end(l); + for ( ; it != ite; ++it) scale(linalg_traits::row(it), a); + } + + template + void scale(L& l, typename linalg_traits::value_type a, col_major) { + typename linalg_traits::col_iterator it = mat_col_begin(l), + ite = mat_col_end(l); + for ( ; it != ite; ++it) scale(linalg_traits::col(it), a); + } + + template struct linalg_traits > { + typedef scaled_vector_const_ref this_type; + typedef linalg_const is_reference; + typedef abstract_vector linalg_type; + typedef typename strongest_numeric_type::value_type>::T value_type; + typedef typename linalg_traits::origin_type origin_type; + typedef value_type reference; + typedef abstract_null_type iterator; + typedef scaled_const_iterator::const_iterator, S> + const_iterator; + typedef typename linalg_traits::storage_type storage_type; + typedef typename linalg_traits::index_sorted index_sorted; + static size_type size(const this_type &v) { return v.size_; } + static const_iterator begin(const this_type &v) + { return const_iterator(v.begin_, v.r); } + static const_iterator end(const this_type &v) + { return const_iterator(v.end_, v.r); } + static const origin_type* origin(const this_type &v) { return v.origin; } + static value_type access(const origin_type *o, const const_iterator &it, + const const_iterator &ite, size_type i) + { return it.r * (linalg_traits::access(o, it.it, ite.it, i)); } + + }; + + + template struct linalg_traits > { + typedef scaled_row_matrix_const_ref this_type; + typedef linalg_const is_reference; + typedef abstract_matrix linalg_type; + typedef typename linalg_traits::origin_type origin_type; + typedef typename strongest_numeric_type::value_type>::T value_type; + typedef value_type reference; + typedef typename linalg_traits::storage_type storage_type; + typedef typename org_type::const_sub_row_type>::t vector_type; + typedef scaled_vector_const_ref sub_row_type; + typedef scaled_vector_const_ref const_sub_row_type; + typedef scaled_row_const_iterator row_iterator; + typedef scaled_row_const_iterator const_row_iterator; + typedef abstract_null_type const_sub_col_type; + typedef abstract_null_type sub_col_type; + typedef abstract_null_type const_col_iterator; + typedef abstract_null_type col_iterator; + typedef row_major sub_orientation; + typedef typename linalg_traits::index_sorted index_sorted; + static size_type nrows(const this_type &m) + { return m.nr; } + static size_type ncols(const this_type &m) + { return m.nc; } + static const_sub_row_type row(const const_row_iterator &it) + { return scaled(linalg_traits::row(it.it), it.r); } + static const_row_iterator row_begin(const this_type &m) + { return const_row_iterator(m.begin_, m.r); } + static const_row_iterator row_end(const this_type &m) + { return const_row_iterator(m.end_, m.r); } + static const origin_type* origin(const this_type &m) { return m.origin; } + static value_type access(const const_row_iterator &it, size_type i) + { return it.r * (linalg_traits::access(it.it, i)); } + }; + + template struct linalg_traits > { + typedef scaled_col_matrix_const_ref this_type; + typedef linalg_const is_reference; + typedef abstract_matrix linalg_type; + typedef typename strongest_numeric_type::value_type>::T value_type; + typedef typename linalg_traits::origin_type origin_type; + typedef value_type reference; + typedef typename linalg_traits::storage_type storage_type; + typedef typename org_type::const_sub_col_type>::t vector_type; + typedef abstract_null_type sub_col_type; + typedef scaled_vector_const_ref const_sub_col_type; + typedef abstract_null_type col_iterator; + typedef scaled_col_const_iterator const_col_iterator; + typedef abstract_null_type const_sub_row_type; + typedef abstract_null_type sub_row_type; + typedef abstract_null_type const_row_iterator; + typedef abstract_null_type row_iterator; + typedef col_major sub_orientation; + typedef typename linalg_traits::index_sorted index_sorted; + static size_type ncols(const this_type &m) + { return m.nc; } + static size_type nrows(const this_type &m) + { return m.nr; } + static const_sub_col_type col(const const_col_iterator &it) + { return scaled(linalg_traits::col(it.it), it.r); } + static const_col_iterator col_begin(const this_type &m) + { return const_col_iterator(m.begin_, m.r); } + static const_col_iterator col_end(const this_type &m) + { return const_col_iterator(m.end_, m.r); } + static const origin_type* origin(const this_type &m) { return m.origin; } + static value_type access(const const_col_iterator &it, size_type i) + { return it.r * (linalg_traits::access(it.it, i)); } + }; + + +} + +#endif // GMM_SCALED_H__ diff --git a/gmm/gmm_solver_Schwarz_additive.h b/gmm/gmm_solver_Schwarz_additive.h new file mode 100644 index 000000000..7f8554b5a --- /dev/null +++ b/gmm/gmm_solver_Schwarz_additive.h @@ -0,0 +1,805 @@ +/* -*- c++ -*- (enables emacs c++ mode) */ +/*=========================================================================== + + Copyright (C) 2002-2017 Yves Renard + + This file is a part of GetFEM++ + + GetFEM++ is free software; you can redistribute it and/or modify it + under the terms of the GNU Lesser General Public License as published + by the Free Software Foundation; either version 3 of the License, or + (at your option) any later version along with the GCC Runtime Library + Exception either version 3.1 or (at your option) any later version. + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public + License and GCC Runtime Library Exception for more details. + You should have received a copy of the GNU Lesser General Public License + along with this program; if not, write to the Free Software Foundation, + Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. + + As a special exception, you may use this file as it is a part of a free + software library without restriction. Specifically, if other files + instantiate templates or use macros or inline functions from this file, + or you compile this file and link it with other files to produce an + executable, this file does not by itself cause the resulting executable + to be covered by the GNU Lesser General Public License. This exception + does not however invalidate any other reasons why the executable file + might be covered by the GNU Lesser General Public License. + +===========================================================================*/ + +/**@file gmm_solver_Schwarz_additive.h + @author Yves Renard + @author Michel Fournie + @date October 13, 2002. +*/ + +#ifndef GMM_SOLVERS_SCHWARZ_ADDITIVE_H__ +#define GMM_SOLVERS_SCHWARZ_ADDITIVE_H__ + +#include "gmm_kernel.h" +#include "gmm_superlu_interface.h" +#include "gmm_solver_cg.h" +#include "gmm_solver_gmres.h" +#include "gmm_solver_bicgstab.h" +#include "gmm_solver_qmr.h" + +namespace gmm { + + /* ******************************************************************** */ + /* Additive Schwarz interfaced local solvers */ + /* ******************************************************************** */ + + struct using_cg {}; + struct using_gmres {}; + struct using_bicgstab {}; + struct using_qmr {}; + + template + struct actual_precond { + typedef P APrecond; + static const APrecond &transform(const P &PP) { return PP; } + }; + + template + void AS_local_solve(using_cg, const Matrix1 &A, Vector &x, const Vector &b, + const Precond &P, iteration &iter) + { cg(A, x, b, P, iter); } + + template + void AS_local_solve(using_gmres, const Matrix1 &A, Vector &x, + const Vector &b, const Precond &P, iteration &iter) + { gmres(A, x, b, P, 100, iter); } + + template + void AS_local_solve(using_bicgstab, const Matrix1 &A, Vector &x, + const Vector &b, const Precond &P, iteration &iter) + { bicgstab(A, x, b, P, iter); } + + template + void AS_local_solve(using_qmr, const Matrix1 &A, Vector &x, + const Vector &b, const Precond &P, iteration &iter) + { qmr(A, x, b, P, iter); } + +#if defined(GMM_USES_SUPERLU) + struct using_superlu {}; + + template + struct actual_precond { + typedef typename linalg_traits::value_type value_type; + typedef SuperLU_factor APrecond; + template + static APrecond transform(const PR &) { return APrecond(); } + static const APrecond &transform(const APrecond &PP) { return PP; } + }; + + template + void AS_local_solve(using_superlu, const Matrix1 &, Vector &x, + const Vector &b, const Precond &P, iteration &iter) + { P.solve(x, b); iter.set_iteration(1); } +#endif + + /* ******************************************************************** */ + /* Additive Schwarz Linear system */ + /* ******************************************************************** */ + + template + struct add_schwarz_mat{ + typedef typename linalg_traits::value_type value_type; + + const Matrix1 *A; + const std::vector *vB; + std::vector vAloc; + mutable iteration iter; + double residual; + mutable size_type itebilan; + mutable std::vector > gi, fi; + std::vector::APrecond> precond1; + + void init(const Matrix1 &A_, const std::vector &vB_, + iteration iter_, const Precond &P, double residual_); + + add_schwarz_mat(void) {} + add_schwarz_mat(const Matrix1 &A_, const std::vector &vB_, + iteration iter_, const Precond &P, double residual_) + { init(A_, vB_, iter_, P, residual_); } + }; + + template + void add_schwarz_mat::init( + const Matrix1 &A_, const std::vector &vB_, + iteration iter_, const Precond &P, double residual_) { + + vB = &vB_; A = &A_; iter = iter_; + residual = residual_; + + size_type nb_sub = vB->size(); + vAloc.resize(nb_sub); + gi.resize(nb_sub); fi.resize(nb_sub); + precond1.resize(nb_sub); + std::fill(precond1.begin(), precond1.end(), + actual_precond::transform(P)); + itebilan = 0; + + if (iter.get_noisy()) cout << "Init pour sub dom "; +#ifdef GMM_USES_MPI + int size,tranche,borne_sup,borne_inf,rank,tag1=11,tag2=12,tag3=13,sizepr = 0; + // int tab[4]; + double t_ref,t_final; + MPI_Status status; + t_ref=MPI_Wtime(); + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + MPI_Comm_size(MPI_COMM_WORLD, &size); + tranche=nb_sub/size; + borne_inf=rank*tranche; + borne_sup=(rank+1)*tranche; + // if (rank==size-1) borne_sup = nb_sub; + + cout << "Nombre de sous domaines " << borne_sup - borne_inf << endl; + + int sizeA = mat_nrows(*A); + gmm::csr_matrix Acsr(sizeA, sizeA), Acsrtemp(sizeA, sizeA); + gmm::copy(gmm::eff_matrix(*A), Acsr); + int next = (rank + 1) % size; + int previous = (rank + size - 1) % size; + //communication of local information on ring pattern + //Each process receive Nproc-1 contributions + + for (int nproc = 0; nproc < size; ++nproc) { + for (size_type i = size_type(borne_inf); i < size_type(borne_sup); ++i) { +// for (size_type i = 0; i < nb_sub/size; ++i) { +// for (size_type i = 0; i < nb_sub; ++i) { + // size_type i=(rank+size*(j-1)+nb_sub)%nb_sub; + + cout << "Sous domaines " << i << " : " << mat_ncols((*vB)[i]) << endl; +#else + for (size_type i = 0; i < nb_sub; ++i) { +#endif + + if (iter.get_noisy()) cout << i << " " << std::flush; + Matrix2 Maux(mat_ncols((*vB)[i]), mat_nrows((*vB)[i])); + +#ifdef GMM_USES_MPI + Matrix2 Maux2(mat_ncols((*vB)[i]), mat_ncols((*vB)[i])); + if (nproc == 0) { + gmm::resize(vAloc[i], mat_ncols((*vB)[i]), mat_ncols((*vB)[i])); + gmm::clear(vAloc[i]); + } + gmm::mult(gmm::transposed((*vB)[i]), Acsr, Maux); + gmm::mult(Maux, (*vB)[i], Maux2); + gmm::add(Maux2, vAloc[i]); +#else + gmm::resize(vAloc[i], mat_ncols((*vB)[i]), mat_ncols((*vB)[i])); + gmm::mult(gmm::transposed((*vB)[i]), *A, Maux); + gmm::mult(Maux, (*vB)[i], vAloc[i]); +#endif + +#ifdef GMM_USES_MPI + if (nproc == size - 1 ) { +#endif + precond1[i].build_with(vAloc[i]); + gmm::resize(fi[i], mat_ncols((*vB)[i])); + gmm::resize(gi[i], mat_ncols((*vB)[i])); +#ifdef GMM_USES_MPI + } +#else + } +#endif +#ifdef GMM_USES_MPI + } + if (nproc != size - 1) { + MPI_Sendrecv(&(Acsr.jc[0]), sizeA+1, MPI_INT, next, tag2, + &(Acsrtemp.jc[0]), sizeA+1, MPI_INT, previous, tag2, + MPI_COMM_WORLD, &status); + if (Acsrtemp.jc[sizeA] > size_type(sizepr)) { + sizepr = Acsrtemp.jc[sizeA]; + gmm::resize(Acsrtemp.pr, sizepr); + gmm::resize(Acsrtemp.ir, sizepr); + } + MPI_Sendrecv(&(Acsr.ir[0]), Acsr.jc[sizeA], MPI_INT, next, tag1, + &(Acsrtemp.ir[0]), Acsrtemp.jc[sizeA], MPI_INT, previous, tag1, + MPI_COMM_WORLD, &status); + + MPI_Sendrecv(&(Acsr.pr[0]), Acsr.jc[sizeA], mpi_type(value_type()), next, tag3, + &(Acsrtemp.pr[0]), Acsrtemp.jc[sizeA], mpi_type(value_type()), previous, tag3, + MPI_COMM_WORLD, &status); + gmm::copy(Acsrtemp, Acsr); + } + } + t_final=MPI_Wtime(); + cout<<"temps boucle precond "<< t_final-t_ref< + void mult(const add_schwarz_mat &M, + const Vector2 &p, Vector3 &q) { + size_type itebilan = 0; +#ifdef GMM_USES_MPI + static double tmult_tot = 0.0; + double t_ref = MPI_Wtime(); +#endif + // cout << "tmult AS begin " << endl; + mult(*(M.A), p, q); +#ifdef GMM_USES_MPI + tmult_tot += MPI_Wtime()-t_ref; + cout << "tmult_tot = " << tmult_tot << endl; +#endif + std::vector qbis(gmm::vect_size(q)); + std::vector qter(gmm::vect_size(q)); +#ifdef GMM_USES_MPI + // MPI_Status status; + // MPI_Request request,request1; + // int tag=111; + int size,tranche,borne_sup,borne_inf,rank; + size_type nb_sub=M.fi.size(); + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + MPI_Comm_size(MPI_COMM_WORLD, &size); + tranche=nb_sub/size; + borne_inf=rank*tranche; + borne_sup=(rank+1)*tranche; + // if (rank==size-1) borne_sup=nb_sub; + // int next = (rank + 1) % size; + // int previous = (rank + size - 1) % size; + t_ref = MPI_Wtime(); + for (size_type i = size_type(borne_inf); i < size_type(borne_sup); ++i) +// for (size_type i = 0; i < nb_sub/size; ++i) + // for (size_type j = 0; j < nb_sub; ++j) +#else + for (size_type i = 0; i < M.fi.size(); ++i) +#endif + { +#ifdef GMM_USES_MPI + // size_type i=j; // (rank+size*(j-1)+nb_sub)%nb_sub; +#endif + gmm::mult(gmm::transposed((*(M.vB))[i]), q, M.fi[i]); + M.iter.init(); + AS_local_solve(local_solver(), (M.vAloc)[i], (M.gi)[i], + (M.fi)[i],(M.precond1)[i],M.iter); + itebilan = std::max(itebilan, M.iter.get_iteration()); + } + +#ifdef GMM_USES_MPI + cout << "First AS loop time " << MPI_Wtime() - t_ref << endl; +#endif + + gmm::clear(q); +#ifdef GMM_USES_MPI + t_ref = MPI_Wtime(); + // for (size_type j = 0; j < nb_sub; ++j) + for (size_type i = size_type(borne_inf); i < size_type(borne_sup); ++i) + +#else + for (size_type i = 0; i < M.gi.size(); ++i) +#endif + { + +#ifdef GMM_USES_MPI + // size_type i=j; // (rank+size*(j-1)+nb_sub)%nb_sub; +// gmm::mult((*(M.vB))[i], M.gi[i], qbis,qbis); + gmm::mult((*(M.vB))[i], M.gi[i], qter); + add(qter,qbis,qbis); +#else + gmm::mult((*(M.vB))[i], M.gi[i], q, q); +#endif + } +#ifdef GMM_USES_MPI + //WARNING this add only if you use the ring pattern below + // need to do this below if using a n explicit ring pattern communication + +// add(qbis,q,q); + cout << "Second AS loop time " << MPI_Wtime() - t_ref << endl; +#endif + + +#ifdef GMM_USES_MPI + // int tag1=11; + static double t_tot = 0.0; + double t_final; + t_ref=MPI_Wtime(); +// int next = (rank + 1) % size; +// int previous = (rank + size - 1) % size; + //communication of local information on ring pattern + //Each process receive Nproc-1 contributions + +// if (size > 1) { +// for (int nproc = 0; nproc < size-1; ++nproc) +// { + +// MPI_Sendrecv(&(qbis[0]), gmm::vect_size(q), MPI_DOUBLE, next, tag1, +// &(qter[0]), gmm::vect_size(q),MPI_DOUBLE,previous,tag1, +// MPI_COMM_WORLD,&status); +// gmm::copy(qter, qbis); +// add(qbis,q,q); +// } +// } + MPI_Allreduce(&(qbis[0]), &(q[0]),gmm::vect_size(q), MPI_DOUBLE, + MPI_SUM,MPI_COMM_WORLD); + t_final=MPI_Wtime(); + t_tot += t_final-t_ref; + cout<<"["<< rank<<"] temps reduce Resol "<< t_final-t_ref << " t_tot = " << t_tot << endl; +#endif + + if (M.iter.get_noisy() > 0) cout << "itebloc = " << itebilan << endl; + M.itebilan += itebilan; + M.iter.set_resmax((M.iter.get_resmax() + M.residual) * 0.5); + } + + template + void mult(const add_schwarz_mat &M, + const Vector2 &p, const Vector3 &q) { + mult(M, p, const_cast(q)); + } + + template + void mult(const add_schwarz_mat &M, + const Vector2 &p, const Vector3 &p2, Vector4 &q) + { mult(M, p, q); add(p2, q); } + + template + void mult(const add_schwarz_mat &M, + const Vector2 &p, const Vector3 &p2, const Vector4 &q) + { mult(M, p, const_cast(q)); add(p2, q); } + + /* ******************************************************************** */ + /* Additive Schwarz interfaced global solvers */ + /* ******************************************************************** */ + + template + void AS_global_solve(using_cg, const ASM_type &ASM, Vect &x, + const Vect &b, iteration &iter) + { cg(ASM, x, b, *(ASM.A), identity_matrix(), iter); } + + template + void AS_global_solve(using_gmres, const ASM_type &ASM, Vect &x, + const Vect &b, iteration &iter) + { gmres(ASM, x, b, identity_matrix(), 100, iter); } + + template + void AS_global_solve(using_bicgstab, const ASM_type &ASM, Vect &x, + const Vect &b, iteration &iter) + { bicgstab(ASM, x, b, identity_matrix(), iter); } + + template + void AS_global_solve(using_qmr,const ASM_type &ASM, Vect &x, + const Vect &b, iteration &iter) + { qmr(ASM, x, b, identity_matrix(), iter); } + +#if defined(GMM_USES_SUPERLU) + template + void AS_global_solve(using_superlu, const ASM_type &, Vect &, + const Vect &, iteration &) { + GMM_ASSERT1(false, "You cannot use SuperLU as " + "global solver in additive Schwarz meethod"); + } +#endif + + /* ******************************************************************** */ + /* Linear Additive Schwarz method */ + /* ******************************************************************** */ + /* ref : Domain decomposition algorithms for the p-version finite */ + /* element method for elliptic problems, Luca F. Pavarino, */ + /* PhD thesis, Courant Institute of Mathematical Sciences, 1992. */ + /* ******************************************************************** */ + + /** Function to call if the ASM matrix is precomputed for successive solve + * with the same system. + */ + template + void additive_schwarz( + add_schwarz_mat &ASM, Vector3 &u, + const Vector2 &f, iteration &iter, const global_solver&) { + + typedef typename linalg_traits::value_type value_type; + + size_type nb_sub = ASM.vB->size(), nb_dof = gmm::vect_size(f); + ASM.itebilan = 0; + std::vector g(nb_dof); + std::vector gbis(nb_dof); +#ifdef GMM_USES_MPI + double t_init=MPI_Wtime(); + int size,tranche,borne_sup,borne_inf,rank; + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + MPI_Comm_size(MPI_COMM_WORLD, &size); + tranche=nb_sub/size; + borne_inf=rank*tranche; + borne_sup=(rank+1)*tranche; + // if (rank==size-1) borne_sup=nb_sub*size; + for (size_type i = size_type(borne_inf); i < size_type(borne_sup); ++i) +// for (size_type i = 0; i < nb_sub/size; ++i) + // for (size_type j = 0; j < nb_sub; ++j) + // for (size_type i = rank; i < nb_sub; i+=size) +#else + for (size_type i = 0; i < nb_sub; ++i) +#endif + { + +#ifdef GMM_USES_MPI + // size_type i=j; // (rank+size*(j-1)+nb_sub)%nb_sub; +#endif + gmm::mult(gmm::transposed((*(ASM.vB))[i]), f, ASM.fi[i]); + ASM.iter.init(); + AS_local_solve(local_solver(), ASM.vAloc[i], ASM.gi[i], ASM.fi[i], + ASM.precond1[i], ASM.iter); + ASM.itebilan = std::max(ASM.itebilan, ASM.iter.get_iteration()); +#ifdef GMM_USES_MPI + gmm::mult((*(ASM.vB))[i], ASM.gi[i], gbis,gbis); +#else + gmm::mult((*(ASM.vB))[i], ASM.gi[i], g, g); +#endif + } +#ifdef GMM_USES_MPI + cout<<"temps boucle init "<< MPI_Wtime()-t_init< + void additive_schwarz(const Matrix1 &A, Vector3 &u, + const Vector2 &f, const Precond &P, + const std::vector &vB, + iteration &iter, local_solver, + global_solver) { + iter.set_rhsnorm(vect_norm2(f)); + if (iter.get_rhsnorm() == 0.0) { gmm::clear(u); return; } + iteration iter2 = iter; iter2.reduce_noisy(); + iter2.set_maxiter(size_type(-1)); + add_schwarz_mat + ASM(A, vB, iter2, P, iter.get_resmax()); + additive_schwarz(ASM, u, f, iter, global_solver()); + } + + /* ******************************************************************** */ + /* Sequential Non-Linear Additive Schwarz method */ + /* ******************************************************************** */ + /* ref : Nonlinearly Preconditionned Inexact Newton Algorithms, */ + /* Xiao-Chuan Cai, David E. Keyes, */ + /* SIAM J. Sci. Comp. 24: p183-200. l */ + /* ******************************************************************** */ + + template + class NewtonAS_struct { + + public : + typedef Matrixt tangent_matrix_type; + typedef MatrixBi B_matrix_type; + typedef typename linalg_traits::value_type value_type; + typedef std::vector Vector; + + virtual size_type size(void) = 0; + virtual const std::vector &get_vB() = 0; + + virtual void compute_F(Vector &f, Vector &x) = 0; + virtual void compute_tangent_matrix(Matrixt &M, Vector &x) = 0; + // compute Bi^T grad(F(X)) Bi + virtual void compute_sub_tangent_matrix(Matrixt &Mloc, Vector &x, + size_type i) = 0; + // compute Bi^T F(X) + virtual void compute_sub_F(Vector &fi, Vector &x, size_type i) = 0; + + virtual ~NewtonAS_struct() {} + }; + + template + struct AS_exact_gradient { + const std::vector &vB; + std::vector vM; + std::vector vMloc; + + void init(void) { + for (size_type i = 0; i < vB.size(); ++i) { + Matrixt aux(gmm::mat_ncols(vB[i]), gmm::mat_ncols(vM[i])); + gmm::resize(vMloc[i], gmm::mat_ncols(vB[i]), gmm::mat_ncols(vB[i])); + gmm::mult(gmm::transposed(vB[i]), vM[i], aux); + gmm::mult(aux, vB[i], vMloc[i]); + } + } + AS_exact_gradient(const std::vector &vB_) : vB(vB_) { + vM.resize(vB.size()); vMloc.resize(vB.size()); + for (size_type i = 0; i < vB.size(); ++i) { + gmm::resize(vM[i], gmm::mat_nrows(vB[i]), gmm::mat_nrows(vB[i])); + } + } + }; + + template + void mult(const AS_exact_gradient &M, + const Vector2 &p, Vector3 &q) { + gmm::clear(q); + typedef typename gmm::linalg_traits::value_type T; + std::vector v(gmm::vect_size(p)), w, x; + for (size_type i = 0; i < M.vB.size(); ++i) { + w.resize(gmm::mat_ncols(M.vB[i])); + x.resize(gmm::mat_ncols(M.vB[i])); + gmm::mult(M.vM[i], p, v); + gmm::mult(gmm::transposed(M.vB[i]), v, w); + double rcond; + SuperLU_solve(M.vMloc[i], x, w, rcond); + // gmm::iteration iter(1E-10, 0, 100000); + //gmm::gmres(M.vMloc[i], x, w, gmm::identity_matrix(), 50, iter); + gmm::mult_add(M.vB[i], x, q); + } + } + + template + void mult(const AS_exact_gradient &M, + const Vector2 &p, const Vector3 &q) { + mult(M, p, const_cast(q)); + } + + template + void mult(const AS_exact_gradient &M, + const Vector2 &p, const Vector3 &p2, Vector4 &q) + { mult(M, p, q); add(p2, q); } + + template + void mult(const AS_exact_gradient &M, + const Vector2 &p, const Vector3 &p2, const Vector4 &q) + { mult(M, p, const_cast(q)); add(p2, q); } + + struct S_default_newton_line_search { + + double conv_alpha, conv_r; + size_t it, itmax, glob_it; + + double alpha, alpha_old, alpha_mult, first_res, alpha_max_ratio; + double alpha_min_ratio, alpha_min; + size_type count, count_pat; + bool max_ratio_reached; + double alpha_max_ratio_reached, r_max_ratio_reached; + size_type it_max_ratio_reached; + + + double converged_value(void) { return conv_alpha; }; + double converged_residual(void) { return conv_r; }; + + virtual void init_search(double r, size_t git, double = 0.0) { + alpha_min_ratio = 0.9; + alpha_min = 1e-10; + alpha_max_ratio = 10.0; + alpha_mult = 0.25; + itmax = size_type(-1); + glob_it = git; if (git <= 1) count_pat = 0; + conv_alpha = alpha = alpha_old = 1.; + conv_r = first_res = r; it = 0; + count = 0; + max_ratio_reached = false; + } + virtual double next_try(void) { + alpha_old = alpha; + if (alpha >= 0.4) alpha *= 0.5; else alpha *= alpha_mult; ++it; + return alpha_old; + } + virtual bool is_converged(double r, double = 0.0) { + // cout << "r = " << r << " alpha = " << alpha / alpha_mult << " count_pat = " << count_pat << endl; + if (!max_ratio_reached && r < first_res * alpha_max_ratio) { + alpha_max_ratio_reached = alpha_old; r_max_ratio_reached = r; + it_max_ratio_reached = it; max_ratio_reached = true; + } + if (max_ratio_reached && r < r_max_ratio_reached * 0.5 + && r > first_res * 1.1 && it <= it_max_ratio_reached+1) { + alpha_max_ratio_reached = alpha_old; r_max_ratio_reached = r; + it_max_ratio_reached = it; + } + if (count == 0 || r < conv_r) + { conv_r = r; conv_alpha = alpha_old; count = 1; } + if (conv_r < first_res) ++count; + + if (r < first_res * alpha_min_ratio) + { count_pat = 0; return true; } + if (count >= 5 || (alpha < alpha_min && max_ratio_reached)) { + if (conv_r < first_res * 0.99) count_pat = 0; + if (/*gmm::random() * 50. < -log(conv_alpha)-4.0 ||*/ count_pat >= 3) + { conv_r=r_max_ratio_reached; conv_alpha=alpha_max_ratio_reached; } + if (conv_r >= first_res * 0.9999) count_pat++; + return true; + } + return false; + } + S_default_newton_line_search(void) { count_pat = 0; } + }; + + + + template + void Newton_additive_Schwarz(NewtonAS_struct &NS, + const Vector &u_, + iteration &iter, const Precond &P, + local_solver, global_solver) { + Vector &u = const_cast(u_); + typedef typename linalg_traits::value_type value_type; + typedef typename number_traits::magnitude_type mtype; + typedef actual_precond chgt_precond; + + double residual = iter.get_resmax(); + + S_default_newton_line_search internal_ls; + S_default_newton_line_search external_ls; + + typename chgt_precond::APrecond PP = chgt_precond::transform(P); + iter.set_rhsnorm(mtype(1)); + iteration iternc(iter); + iternc.reduce_noisy(); iternc.set_maxiter(size_type(-1)); + iteration iter2(iternc); + iteration iter3(iter2); iter3.reduce_noisy(); + iteration iter4(iter3); + iternc.set_name("Local Newton"); + iter2.set_name("Linear System for Global Newton"); + iternc.set_resmax(residual/100.0); + iter3.set_resmax(residual/10000.0); + iter2.set_resmax(residual/1000.0); + iter4.set_resmax(residual/1000.0); + std::vector rhs(NS.size()), x(NS.size()), d(NS.size()); + std::vector xi, xii, fi, di; + + std::vector< std::vector > vx(NS.get_vB().size()); + for (size_type i = 0; i < NS.get_vB().size(); ++i) // for exact gradient + vx[i].resize(NS.size()); // for exact gradient + + Matrixt Mloc, M(NS.size(), NS.size()); + NS.compute_F(rhs, u); + mtype act_res=gmm::vect_norm2(rhs), act_res_new(0), precond_res = act_res; + mtype alpha; + + while(!iter.finished(std::min(act_res, precond_res))) { + for (int SOR_step = 0; SOR_step >= 0; --SOR_step) { + gmm::clear(rhs); + for (size_type isd = 0; isd < NS.get_vB().size(); ++isd) { + const MatrixBi &Bi = (NS.get_vB())[isd]; + size_type si = mat_ncols(Bi); + gmm::resize(Mloc, si, si); + xi.resize(si); xii.resize(si); fi.resize(si); di.resize(si); + + iternc.init(); + iternc.set_maxiter(30); // ? + if (iternc.get_noisy()) + cout << "Non-linear local problem " << isd << endl; + gmm::clear(xi); + gmm::copy(u, x); + NS.compute_sub_F(fi, x, isd); gmm::scale(fi, value_type(-1)); + mtype r = gmm::vect_norm2(fi), r_t(r); + if (r > value_type(0)) { + iternc.set_rhsnorm(std::max(r, mtype(1))); + while(!iternc.finished(r)) { + NS.compute_sub_tangent_matrix(Mloc, x, isd); + + PP.build_with(Mloc); + iter3.init(); + AS_local_solve(local_solver(), Mloc, di, fi, PP, iter3); + + internal_ls.init_search(r, iternc.get_iteration()); + do { + alpha = internal_ls.next_try(); + gmm::add(xi, gmm::scaled(di, -alpha), xii); + gmm::mult(Bi, gmm::scaled(xii, -1.0), u, x); + NS.compute_sub_F(fi, x, isd); gmm::scale(fi, value_type(-1)); + r_t = gmm::vect_norm2(fi); + } while (!internal_ls.is_converged(r_t)); + + if (alpha != internal_ls.converged_value()) { + alpha = internal_ls.converged_value(); + gmm::add(xi, gmm::scaled(di, -alpha), xii); + gmm::mult(Bi, gmm::scaled(xii, -1.0), u, x); + NS.compute_sub_F(fi, x, isd); gmm::scale(fi, value_type(-1)); + r_t = gmm::vect_norm2(fi); + } + gmm::copy(x, vx[isd]); // for exact gradient + + if (iternc.get_noisy()) cout << "(step=" << alpha << ")\t"; + ++iternc; r = r_t; gmm::copy(xii, xi); + } + if (SOR_step) gmm::mult(Bi, gmm::scaled(xii, -1.0), u, u); + gmm::mult(Bi, gmm::scaled(xii, -1.0), rhs, rhs); + } + } + precond_res = gmm::vect_norm2(rhs); + if (SOR_step) cout << "SOR step residual = " << precond_res << endl; + if (precond_res < residual) break; + cout << "Precond residual = " << precond_res << endl; + } + + iter2.init(); + // solving linear system for the global Newton method + if (0) { + NS.compute_tangent_matrix(M, u); + add_schwarz_mat + ASM(M, NS.get_vB(), iter4, P, iter.get_resmax()); + AS_global_solve(global_solver(), ASM, d, rhs, iter2); + } + else { // for exact gradient + AS_exact_gradient eg(NS.get_vB()); + for (size_type i = 0; i < NS.get_vB().size(); ++i) { + NS.compute_tangent_matrix(eg.vM[i], vx[i]); + } + eg.init(); + gmres(eg, d, rhs, gmm::identity_matrix(), 50, iter2); + } + + // gmm::add(gmm::scaled(rhs, 0.1), u); ++iter; + external_ls.init_search(act_res, iter.get_iteration()); + do { + alpha = external_ls.next_try(); + gmm::add(gmm::scaled(d, alpha), u, x); + NS.compute_F(rhs, x); + act_res_new = gmm::vect_norm2(rhs); + } while (!external_ls.is_converged(act_res_new)); + + if (alpha != external_ls.converged_value()) { + alpha = external_ls.converged_value(); + gmm::add(gmm::scaled(d, alpha), u, x); + NS.compute_F(rhs, x); + act_res_new = gmm::vect_norm2(rhs); + } + + if (iter.get_noisy() > 1) cout << endl; + act_res = act_res_new; + if (iter.get_noisy()) cout << "(step=" << alpha << ")\t unprecond res = " << act_res << " "; + + + ++iter; gmm::copy(x, u); + } + } + +} + + +#endif // GMM_SOLVERS_SCHWARZ_ADDITIVE_H__ diff --git a/gmm/gmm_solver_bfgs.h b/gmm/gmm_solver_bfgs.h new file mode 100644 index 000000000..28a1bc01f --- /dev/null +++ b/gmm/gmm_solver_bfgs.h @@ -0,0 +1,210 @@ +/* -*- c++ -*- (enables emacs c++ mode) */ +/*=========================================================================== + + Copyright (C) 2004-2017 Yves Renard + + This file is a part of GetFEM++ + + GetFEM++ is free software; you can redistribute it and/or modify it + under the terms of the GNU Lesser General Public License as published + by the Free Software Foundation; either version 3 of the License, or + (at your option) any later version along with the GCC Runtime Library + Exception either version 3.1 or (at your option) any later version. + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public + License and GCC Runtime Library Exception for more details. + You should have received a copy of the GNU Lesser General Public License + along with this program; if not, write to the Free Software Foundation, + Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. + + As a special exception, you may use this file as it is a part of a free + software library without restriction. Specifically, if other files + instantiate templates or use macros or inline functions from this file, + or you compile this file and link it with other files to produce an + executable, this file does not by itself cause the resulting executable + to be covered by the GNU Lesser General Public License. This exception + does not however invalidate any other reasons why the executable file + might be covered by the GNU Lesser General Public License. + +===========================================================================*/ + +/**@file gmm_solver_bfgs.h + @author Yves Renard + @date October 14 2004. + @brief Implements BFGS (Broyden, Fletcher, Goldfarb, Shanno) algorithm. + */ +#ifndef GMM_BFGS_H +#define GMM_BFGS_H + +#include "gmm_kernel.h" +#include "gmm_iter.h" + +namespace gmm { + + // BFGS algorithm (Broyden, Fletcher, Goldfarb, Shanno) + // Quasi Newton method for optimization problems. + // with Wolfe Line search. + + + // delta[k] = x[k+1] - x[k] + // gamma[k] = grad f(x[k+1]) - grad f(x[k]) + // H[0] = I + // BFGS : zeta[k] = delta[k] - H[k] gamma[k] + // DFP : zeta[k] = H[k] gamma[k] + // tau[k] = gamma[k]^T zeta[k] + // rho[k] = 1 / gamma[k]^T delta[k] + // BFGS : H[k+1] = H[k] + rho[k](zeta[k] delta[k]^T + delta[k] zeta[k]^T) + // - rho[k]^2 tau[k] delta[k] delta[k]^T + // DFP : H[k+1] = H[k] + rho[k] delta[k] delta[k]^T + // - (1/tau[k])zeta[k] zeta[k]^T + + // Object representing the inverse of the Hessian + template struct bfgs_invhessian { + + typedef typename linalg_traits::value_type T; + typedef typename number_traits::magnitude_type R; + + std::vector delta, gamma, zeta; + std::vector tau, rho; + int version; + + template void hmult(const VEC1 &X, VEC2 &Y) { + copy(X, Y); + for (size_type k = 0 ; k < delta.size(); ++k) { + T xdelta = vect_sp(X, delta[k]), xzeta = vect_sp(X, zeta[k]); + switch (version) { + case 0 : // BFGS + add(scaled(zeta[k], rho[k]*xdelta), Y); + add(scaled(delta[k], rho[k]*(xzeta-rho[k]*tau[k]*xdelta)), Y); + break; + case 1 : // DFP + add(scaled(delta[k], rho[k]*xdelta), Y); + add(scaled(zeta[k], -xzeta/tau[k]), Y); + break; + } + } + } + + void restart(void) { + delta.resize(0); gamma.resize(0); zeta.resize(0); + tau.resize(0); rho.resize(0); + } + + template + void update(const VECT1 &deltak, const VECT2 &gammak) { + T vsp = vect_sp(deltak, gammak); + if (vsp == T(0)) return; + size_type N = vect_size(deltak), k = delta.size(); + VECTOR Y(N); + hmult(gammak, Y); + delta.resize(k+1); gamma.resize(k+1); zeta.resize(k+1); + tau.resize(k+1); rho.resize(k+1); + resize(delta[k], N); resize(gamma[k], N); resize(zeta[k], N); + gmm::copy(deltak, delta[k]); + gmm::copy(gammak, gamma[k]); + rho[k] = R(1) / vsp; + if (version == 0) + add(delta[k], scaled(Y, -1), zeta[k]); + else + gmm::copy(Y, zeta[k]); + tau[k] = vect_sp(gammak, zeta[k]); + } + + bfgs_invhessian(int v = 0) { version = v; } + }; + + + template + void bfgs(const FUNCTION &f, const DERIVATIVE &grad, VECTOR &x, + int restart, iteration& iter, int version = 0, + double lambda_init=0.001, double print_norm=1.0) { + + typedef typename linalg_traits::value_type T; + typedef typename number_traits::magnitude_type R; + + bfgs_invhessian invhessian(version); + VECTOR r(vect_size(x)), d(vect_size(x)), y(vect_size(x)), r2(vect_size(x)); + grad(x, r); + R lambda = lambda_init, valx = f(x), valy; + int nb_restart(0); + + if (iter.get_noisy() >= 1) cout << "value " << valx / print_norm << " "; + while (! iter.finished_vect(r)) { + + invhessian.hmult(r, d); gmm::scale(d, T(-1)); + + // Wolfe Line search + R derivative = gmm::vect_sp(r, d); + R lambda_min(0), lambda_max(0), m1 = 0.27, m2 = 0.57; + bool unbounded = true, blocked = false, grad_computed = false; + + for(;;) { + add(x, scaled(d, lambda), y); + valy = f(y); + if (iter.get_noisy() >= 2) { + cout.precision(15); + cout << "Wolfe line search, lambda = " << lambda + << " value = " << valy /print_norm << endl; +// << " derivative = " << derivative +// << " lambda min = " << lambda_min << " lambda max = " +// << lambda_max << endl; getchar(); + } + if (valy <= valx + m1 * lambda * derivative) { + grad(y, r2); grad_computed = true; + T derivative2 = gmm::vect_sp(r2, d); + if (derivative2 >= m2*derivative) break; + lambda_min = lambda; + } + else { + lambda_max = lambda; + unbounded = false; + } + if (unbounded) lambda *= R(10); + else lambda = (lambda_max + lambda_min) / R(2); + if (lambda == lambda_max || lambda == lambda_min) break; + // valy <= R(2)*valx replaced by + // valy <= valx + gmm::abs(derivative)*lambda_init + // for compatibility with negative values (08.24.07). + if (valy <= valx + R(2)*gmm::abs(derivative)*lambda && + (lambda < R(lambda_init*1E-8) || + (!unbounded && lambda_max-lambda_min < R(lambda_init*1E-8)))) + { blocked = true; lambda = lambda_init; break; } + } + + // Rank two update + ++iter; + if (!grad_computed) grad(y, r2); + gmm::add(scaled(r2, -1), r); + if ((iter.get_iteration() % restart) == 0 || blocked) { + if (iter.get_noisy() >= 1) cout << "Restart\n"; + invhessian.restart(); + if (++nb_restart > 10) { + if (iter.get_noisy() >= 1) cout << "BFGS is blocked, exiting\n"; + return; + } + } + else { + invhessian.update(gmm::scaled(d,lambda), gmm::scaled(r,-1)); + nb_restart = 0; + } + copy(r2, r); copy(y, x); valx = valy; + if (iter.get_noisy() >= 1) + cout << "BFGS value " << valx/print_norm << "\t"; + } + + } + + + template + inline void dfp(const FUNCTION &f, const DERIVATIVE &grad, VECTOR &x, + int restart, iteration& iter, int version = 1) { + bfgs(f, grad, x, restart, iter, version); + + } + + +} + +#endif + diff --git a/gmm/gmm_solver_bicgstab.h b/gmm/gmm_solver_bicgstab.h new file mode 100644 index 000000000..858478fbe --- /dev/null +++ b/gmm/gmm_solver_bicgstab.h @@ -0,0 +1,160 @@ +/* -*- c++ -*- (enables emacs c++ mode) */ +/*=========================================================================== + + Copyright (C) 2002-2017 Yves Renard + + This file is a part of GetFEM++ + + GetFEM++ is free software; you can redistribute it and/or modify it + under the terms of the GNU Lesser General Public License as published + by the Free Software Foundation; either version 3 of the License, or + (at your option) any later version along with the GCC Runtime Library + Exception either version 3.1 or (at your option) any later version. + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public + License and GCC Runtime Library Exception for more details. + You should have received a copy of the GNU Lesser General Public License + along with this program; if not, write to the Free Software Foundation, + Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. + + As a special exception, you may use this file as it is a part of a free + software library without restriction. Specifically, if other files + instantiate templates or use macros or inline functions from this file, + or you compile this file and link it with other files to produce an + executable, this file does not by itself cause the resulting executable + to be covered by the GNU Lesser General Public License. This exception + does not however invalidate any other reasons why the executable file + might be covered by the GNU Lesser General Public License. + +===========================================================================*/ + +// This file is a modified version of bicgstab.h from ITL. +// See http://osl.iu.edu/research/itl/ +// Following the corresponding Copyright notice. +//=========================================================================== +// +// Copyright (c) 1998-2001, University of Notre Dame. All rights reserved. +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// * Neither the name of the University of Notre Dame nor the +// names of its contributors may be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE TRUSTEES OF INDIANA UNIVERSITY AND +// CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, +// BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE TRUSTEES +// OF INDIANA UNIVERSITY AND CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +// NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +//=========================================================================== + +/**@file gmm_solver_bicgstab.h + @author Andrew Lumsdaine + @author Lie-Quan Lee + @author Yves Renard + @date October 13, 2002. + @brief BiCGStab iterative solver. +*/ + +#ifndef GMM_SOLVER_BICGSTAB_H__ +#define GMM_SOLVER_BICGSTAB_H__ + +#include "gmm_kernel.h" +#include "gmm_iter.h" + +namespace gmm { + + /* ******************************************************************** */ + /* BiConjugate Gradient Stabilized */ + /* (preconditionned, with parametrable scalar product) */ + /* ******************************************************************** */ + + template + void bicgstab(const Matrix& A, Vector& x, const VectorB& b, + const Preconditioner& M, iteration &iter) { + + typedef typename linalg_traits::value_type T; + typedef typename number_traits::magnitude_type R; + typedef typename temporary_dense_vector::vector_type temp_vector; + + T rho_1, rho_2(0), alpha(0), beta, omega(0); + temp_vector p(vect_size(x)), phat(vect_size(x)), s(vect_size(x)), + shat(vect_size(x)), + t(vect_size(x)), v(vect_size(x)), r(vect_size(x)), rtilde(vect_size(x)); + + gmm::mult(A, gmm::scaled(x, -T(1)), b, r); + gmm::copy(r, rtilde); + R norm_r = gmm::vect_norm2(r); + iter.set_rhsnorm(gmm::vect_norm2(b)); + + if (iter.get_rhsnorm() == 0.0) { clear(x); return; } + + while (!iter.finished(norm_r)) { + + rho_1 = gmm::vect_sp(rtilde, r); + if (rho_1 == T(0)) { + if (iter.get_maxiter() == size_type(-1)) + { GMM_ASSERT1(false, "Bicgstab failed to converge"); } + else { GMM_WARNING1("Bicgstab failed to converge"); return; } + } + + if (iter.first()) + gmm::copy(r, p); + else { + if (omega == T(0)) { + if (iter.get_maxiter() == size_type(-1)) + { GMM_ASSERT1(false, "Bicgstab failed to converge"); } + else { GMM_WARNING1("Bicgstab failed to converge"); return; } + } + + beta = (rho_1 / rho_2) * (alpha / omega); + + gmm::add(gmm::scaled(v, -omega), p); + gmm::add(r, gmm::scaled(p, beta), p); + } + gmm::mult(M, p, phat); + gmm::mult(A, phat, v); + alpha = rho_1 / gmm::vect_sp(v, rtilde); + gmm::add(r, gmm::scaled(v, -alpha), s); + + if (iter.finished_vect(s)) + { gmm::add(gmm::scaled(phat, alpha), x); break; } + + gmm::mult(M, s, shat); + gmm::mult(A, shat, t); + omega = gmm::vect_sp(t, s) / gmm::vect_norm2_sqr(t); + + gmm::add(gmm::scaled(phat, alpha), x); + gmm::add(gmm::scaled(shat, omega), x); + gmm::add(s, gmm::scaled(t, -omega), r); + norm_r = gmm::vect_norm2(r); + rho_2 = rho_1; + + ++iter; + } + } + + template + void bicgstab(const Matrix& A, const Vector& x, const VectorB& b, + const Preconditioner& M, iteration &iter) + { bicgstab(A, linalg_const_cast(x), b, M, iter); } + +} + + +#endif // GMM_SOLVER_BICGSTAB_H__ diff --git a/gmm/gmm_solver_cg.h b/gmm/gmm_solver_cg.h new file mode 100644 index 000000000..a2876786a --- /dev/null +++ b/gmm/gmm_solver_cg.h @@ -0,0 +1,180 @@ +/* -*- c++ -*- (enables emacs c++ mode) */ +/*=========================================================================== + + Copyright (C) 2002-2017 Yves Renard + + This file is a part of GetFEM++ + + GetFEM++ is free software; you can redistribute it and/or modify it + under the terms of the GNU Lesser General Public License as published + by the Free Software Foundation; either version 3 of the License, or + (at your option) any later version along with the GCC Runtime Library + Exception either version 3.1 or (at your option) any later version. + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public + License and GCC Runtime Library Exception for more details. + You should have received a copy of the GNU Lesser General Public License + along with this program; if not, write to the Free Software Foundation, + Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. + + As a special exception, you may use this file as it is a part of a free + software library without restriction. Specifically, if other files + instantiate templates or use macros or inline functions from this file, + or you compile this file and link it with other files to produce an + executable, this file does not by itself cause the resulting executable + to be covered by the GNU Lesser General Public License. This exception + does not however invalidate any other reasons why the executable file + might be covered by the GNU Lesser General Public License. + +===========================================================================*/ + +// This file is a modified version of cg.h from ITL. +// See http://osl.iu.edu/research/itl/ +// Following the corresponding Copyright notice. +//=========================================================================== +// +// Copyright (c) 1998-2001, University of Notre Dame. All rights reserved. +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// * Neither the name of the University of Notre Dame nor the +// names of its contributors may be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE TRUSTEES OF INDIANA UNIVERSITY AND +// CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, +// BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE TRUSTEES +// OF INDIANA UNIVERSITY AND CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +// NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +//=========================================================================== + +/**@file gmm_solver_cg.h + @author Andrew Lumsdaine + @author Lie-Quan Lee + @author Yves Renard + @date October 13, 2002. + @brief Conjugate gradient iterative solver. +*/ +#ifndef GMM_SOLVER_CG_H__ +#define GMM_SOLVER_CG_H__ + +#include "gmm_kernel.h" +#include "gmm_iter.h" + +namespace gmm { + + /* ******************************************************************** */ + /* conjugate gradient */ + /* (preconditionned, with parametrable additional scalar product) */ + /* ******************************************************************** */ + + template + void cg(const Matrix& A, Vector1& x, const Vector2& b, const Matps& PS, + const Precond &P, iteration &iter) { + + typedef typename temporary_dense_vector::vector_type temp_vector; + typedef typename linalg_traits::value_type T; + + T rho, rho_1(0), a; + temp_vector p(vect_size(x)), q(vect_size(x)), r(vect_size(x)), + z(vect_size(x)); + iter.set_rhsnorm(gmm::sqrt(gmm::abs(vect_hp(PS, b, b)))); + + if (iter.get_rhsnorm() == 0.0) + clear(x); + else { + mult(A, scaled(x, T(-1)), b, r); + mult(P, r, z); + rho = vect_hp(PS, z, r); + copy(z, p); + + while (!iter.finished_vect(r)) { + + if (!iter.first()) { + mult(P, r, z); + rho = vect_hp(PS, z, r); + add(z, scaled(p, rho / rho_1), p); + } + mult(A, p, q); + + a = rho / vect_hp(PS, q, p); + add(scaled(p, a), x); + add(scaled(q, -a), r); + rho_1 = rho; + + ++iter; + } + } + } + + template + void cg(const Matrix& A, Vector1& x, const Vector2& b, const Matps& PS, + const gmm::identity_matrix &, iteration &iter) { + + typedef typename temporary_dense_vector::vector_type temp_vector; + typedef typename linalg_traits::value_type T; + + T rho, rho_1(0), a; + temp_vector p(vect_size(x)), q(vect_size(x)), r(vect_size(x)); + iter.set_rhsnorm(gmm::sqrt(gmm::abs(vect_hp(PS, b, b)))); + + if (iter.get_rhsnorm() == 0.0) + clear(x); + else { + mult(A, scaled(x, T(-1)), b, r); + rho = vect_hp(PS, r, r); + copy(r, p); + + while (!iter.finished_vect(r)) { + + if (!iter.first()) { + rho = vect_hp(PS, r, r); + add(r, scaled(p, rho / rho_1), p); + } + mult(A, p, q); + a = rho / vect_hp(PS, q, p); + add(scaled(p, a), x); + add(scaled(q, -a), r); + rho_1 = rho; + ++iter; + } + } + } + + template inline + void cg(const Matrix& A, const Vector1& x, const Vector2& b, const Matps& PS, + const Precond &P, iteration &iter) + { cg(A, linalg_const_cast(x), b, PS, P, iter); } + + template inline + void cg(const Matrix& A, Vector1& x, const Vector2& b, + const Precond &P, iteration &iter) + { cg(A, x , b, identity_matrix(), P, iter); } + + template inline + void cg(const Matrix& A, const Vector1& x, const Vector2& b, + const Precond &P, iteration &iter) + { cg(A, x , b , identity_matrix(), P , iter); } + +} + + +#endif // GMM_SOLVER_CG_H__ diff --git a/gmm/gmm_solver_constrained_cg.h b/gmm/gmm_solver_constrained_cg.h new file mode 100644 index 000000000..44716bffe --- /dev/null +++ b/gmm/gmm_solver_constrained_cg.h @@ -0,0 +1,165 @@ +/* -*- c++ -*- (enables emacs c++ mode) */ +/*=========================================================================== + + Copyright (C) 2002-2017 Yves Renard + + This file is a part of GetFEM++ + + GetFEM++ is free software; you can redistribute it and/or modify it + under the terms of the GNU Lesser General Public License as published + by the Free Software Foundation; either version 3 of the License, or + (at your option) any later version along with the GCC Runtime Library + Exception either version 3.1 or (at your option) any later version. + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public + License and GCC Runtime Library Exception for more details. + You should have received a copy of the GNU Lesser General Public License + along with this program; if not, write to the Free Software Foundation, + Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. + + As a special exception, you may use this file as it is a part of a free + software library without restriction. Specifically, if other files + instantiate templates or use macros or inline functions from this file, + or you compile this file and link it with other files to produce an + executable, this file does not by itself cause the resulting executable + to be covered by the GNU Lesser General Public License. This exception + does not however invalidate any other reasons why the executable file + might be covered by the GNU Lesser General Public License. + +===========================================================================*/ + +/**@file gmm_solver_constrained_cg.h + @author Yves Renard + @date October 13, 2002. + @brief Constrained conjugate gradient. */ +// preconditionning does not work + +#ifndef GMM_SOLVER_CCG_H__ +#define GMM_SOLVER_CCG_H__ + +#include "gmm_kernel.h" +#include "gmm_iter.h" + +namespace gmm { + + template + void pseudo_inverse(const CMatrix &C, CINVMatrix &CINV, + const Matps& /* PS */, VectorX&) { + // compute the pseudo inverse of the non-square matrix C such + // CINV = inv(C * trans(C)) * C. + // based on a conjugate gradient method. + + // optimisable : copie de la ligne, precalcul de C * trans(C). + + typedef VectorX TmpVec; + typedef typename linalg_traits::value_type value_type; + + size_type nr = mat_nrows(C), nc = mat_ncols(C); + + TmpVec d(nr), e(nr), l(nc), p(nr), q(nr), r(nr); + value_type rho, rho_1, alpha; + clear(d); + clear(CINV); + + for (size_type i = 0; i < nr; ++i) { + d[i] = 1.0; rho = 1.0; + clear(e); + copy(d, r); + copy(d, p); + + while (rho >= 1E-38) { /* conjugate gradient to compute e */ + /* which is the i nd row of inv(C * trans(C)) */ + mult(gmm::transposed(C), p, l); + mult(C, l, q); + alpha = rho / vect_sp(p, q); + add(scaled(p, alpha), e); + add(scaled(q, -alpha), r); + rho_1 = rho; + rho = vect_sp(r, r); + add(r, scaled(p, rho / rho_1), p); + } + + mult(transposed(C), e, l); /* l is the i nd row of CINV */ + // cout << "l = " << l << endl; + clean(l, 1E-15); + copy(l, mat_row(CINV, i)); + + d[i] = 0.0; + } + } + + /** Compute the minimum of @f$ 1/2((Ax).x) - bx @f$ under the contraint @f$ Cx <= f @f$ */ + template < typename Matrix, typename CMatrix, typename Matps, + typename VectorX, typename VectorB, typename VectorF, + typename Preconditioner > + void constrained_cg(const Matrix& A, const CMatrix& C, VectorX& x, + const VectorB& b, const VectorF& f,const Matps& PS, + const Preconditioner& M, iteration &iter) { + typedef typename temporary_dense_vector::vector_type TmpVec; + typedef typename temporary_vector::vector_type TmpCVec; + typedef row_matrix TmpCmat; + + typedef typename linalg_traits::value_type value_type; + value_type rho = 1.0, rho_1, lambda, gamma; + TmpVec p(vect_size(x)), q(vect_size(x)), q2(vect_size(x)), + r(vect_size(x)), old_z(vect_size(x)), z(vect_size(x)), + memox(vect_size(x)); + std::vector satured(mat_nrows(C)); + clear(p); + iter.set_rhsnorm(sqrt(vect_sp(PS, b, b))); + if (iter.get_rhsnorm() == 0.0) iter.set_rhsnorm(1.0); + + TmpCmat CINV(mat_nrows(C), mat_ncols(C)); + pseudo_inverse(C, CINV, PS, x); + + while(true) { + // computation of residu + copy(z, old_z); + copy(x, memox); + mult(A, scaled(x, -1.0), b, r); + mult(M, r, z); // preconditionner not coherent + bool transition = false; + for (size_type i = 0; i < mat_nrows(C); ++i) { + value_type al = vect_sp(mat_row(C, i), x) - f[i]; + if (al >= -1.0E-15) { + if (!satured[i]) { satured[i] = true; transition = true; } + value_type bb = vect_sp(mat_row(CINV, i), z); + if (bb > 0.0) add(scaled(mat_row(C, i), -bb), z); + } + else + satured[i] = false; + } + + // descent direction + rho_1 = rho; rho = vect_sp(PS, r, z); // ... + + if (iter.finished(rho)) break; + + if (iter.get_noisy() > 0 && transition) std::cout << "transition\n"; + if (transition || iter.first()) gamma = 0.0; + else gamma = std::max(0.0, (rho - vect_sp(PS, old_z, z) ) / rho_1); + // std::cout << "gamma = " << gamma << endl; + // itl::add(r, itl::scaled(p, gamma), p); + add(z, scaled(p, gamma), p); // ... + + ++iter; + // one dimensionnal optimization + mult(A, p, q); + lambda = rho / vect_sp(PS, q, p); + for (size_type i = 0; i < mat_nrows(C); ++i) + if (!satured[i]) { + value_type bb = vect_sp(mat_row(C, i), p) - f[i]; + if (bb > 0.0) + lambda = std::min(lambda, (f[i]-vect_sp(mat_row(C, i), x)) / bb); + } + add(x, scaled(p, lambda), x); + add(memox, scaled(x, -1.0), memox); + + } + } + +} + +#endif // GMM_SOLVER_CCG_H__ diff --git a/gmm/gmm_solver_gmres.h b/gmm/gmm_solver_gmres.h new file mode 100644 index 000000000..b124905e2 --- /dev/null +++ b/gmm/gmm_solver_gmres.h @@ -0,0 +1,173 @@ +/* -*- c++ -*- (enables emacs c++ mode) */ +/*=========================================================================== + + Copyright (C) 2002-2017 Yves Renard + + This file is a part of GetFEM++ + + GetFEM++ is free software; you can redistribute it and/or modify it + under the terms of the GNU Lesser General Public License as published + by the Free Software Foundation; either version 3 of the License, or + (at your option) any later version along with the GCC Runtime Library + Exception either version 3.1 or (at your option) any later version. + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public + License and GCC Runtime Library Exception for more details. + You should have received a copy of the GNU Lesser General Public License + along with this program; if not, write to the Free Software Foundation, + Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. + + As a special exception, you may use this file as it is a part of a free + software library without restriction. Specifically, if other files + instantiate templates or use macros or inline functions from this file, + or you compile this file and link it with other files to produce an + executable, this file does not by itself cause the resulting executable + to be covered by the GNU Lesser General Public License. This exception + does not however invalidate any other reasons why the executable file + might be covered by the GNU Lesser General Public License. + +===========================================================================*/ + +// This file is a modified version of gmres.h from ITL. +// See http://osl.iu.edu/research/itl/ +// Following the corresponding Copyright notice. +//=========================================================================== +// +// Copyright (c) 1998-2001, University of Notre Dame. All rights reserved. +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// * Neither the name of the University of Notre Dame nor the +// names of its contributors may be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE TRUSTEES OF INDIANA UNIVERSITY AND +// CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, +// BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE TRUSTEES +// OF INDIANA UNIVERSITY AND CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +// NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +//=========================================================================== + +/**@file gmm_solver_gmres.h + @author Andrew Lumsdaine + @author Lie-Quan Lee + @author Yves Renard + @date October 13, 2002. + @brief GMRES (Generalized Minimum Residual) iterative solver. +*/ +#ifndef GMM_KRYLOV_GMRES_H +#define GMM_KRYLOV_GMRES_H + +#include "gmm_kernel.h" +#include "gmm_iter.h" +#include "gmm_modified_gram_schmidt.h" + +namespace gmm { + + /** Generalized Minimum Residual + + This solve the unsymmetric linear system Ax = b using restarted GMRES. + + See: Y. Saad and M. Schulter. GMRES: A generalized minimum residual + algorithm for solving nonsysmmetric linear systems, SIAM + J. Sci. Statist. Comp. 7(1986), pp, 856-869 + */ + template + void gmres(const Mat &A, Vec &x, const VecB &b, const Precond &M, + int restart, iteration &outer, Basis& KS) { + + typedef typename linalg_traits::value_type T; + typedef typename number_traits::magnitude_type R; + + std::vector w(vect_size(x)), r(vect_size(x)), u(vect_size(x)); + std::vector c_rot(restart+1), s_rot(restart+1), s(restart+1); + gmm::dense_matrix H(restart+1, restart); +#ifdef GMM_USES_MPI + double t_ref, t_prec = MPI_Wtime(), t_tot = 0; + static double tmult_tot = 0.0; +t_ref = MPI_Wtime(); + cout << "GMRES " << endl; +#endif + mult(M,b,r); + outer.set_rhsnorm(gmm::vect_norm2(r)); + if (outer.get_rhsnorm() == 0.0) { clear(x); return; } + + mult(A, scaled(x, T(-1)), b, w); + mult(M, w, r); + R beta = gmm::vect_norm2(r), beta_old = beta; + int blocked = 0; + + iteration inner = outer; + inner.reduce_noisy(); + inner.set_maxiter(restart); + inner.set_name("GMRes inner"); + + while (! outer.finished(beta)) { + + gmm::copy(gmm::scaled(r, R(1)/beta), KS[0]); + gmm::clear(s); + s[0] = beta; + + size_type i = 0; inner.init(); + + do { + mult(A, KS[i], u); + mult(M, u, KS[i+1]); + orthogonalize(KS, mat_col(H, i), i); + R a = gmm::vect_norm2(KS[i+1]); + H(i+1, i) = T(a); + gmm::scale(KS[i+1], T(1) / a); + for (size_type k = 0; k < i; ++k) + Apply_Givens_rotation_left(H(k,i), H(k+1,i), c_rot[k], s_rot[k]); + + Givens_rotation(H(i,i), H(i+1,i), c_rot[i], s_rot[i]); + Apply_Givens_rotation_left(H(i,i), H(i+1,i), c_rot[i], s_rot[i]); + Apply_Givens_rotation_left(s[i], s[i+1], c_rot[i], s_rot[i]); + + ++inner, ++outer, ++i; + } while (! inner.finished(gmm::abs(s[i]))); + + upper_tri_solve(H, s, i, false); + combine(KS, s, x, i); + mult(A, gmm::scaled(x, T(-1)), b, w); + mult(M, w, r); + beta_old = std::min(beta, beta_old); beta = gmm::vect_norm2(r); + if (int(inner.get_iteration()) < restart -1 || beta_old <= beta) + ++blocked; else blocked = 0; + if (blocked > 10) { + if (outer.get_noisy()) cout << "Gmres is blocked, exiting\n"; + break; + } +#ifdef GMM_USES_MPI + t_tot = MPI_Wtime() - t_ref; + cout << "temps GMRES : " << t_tot << endl; +#endif + } + } + + + template + void gmres(const Mat &A, Vec &x, const VecB &b, + const Precond &M, int restart, iteration& outer) { + typedef typename linalg_traits::value_type T; + modified_gram_schmidt orth(restart, vect_size(x)); + gmres(A, x, b, M, restart, outer, orth); + } + +} + +#endif diff --git a/gmm/gmm_solver_idgmres.h b/gmm/gmm_solver_idgmres.h new file mode 100644 index 000000000..79bb9064d --- /dev/null +++ b/gmm/gmm_solver_idgmres.h @@ -0,0 +1,805 @@ +/* -*- c++ -*- (enables emacs c++ mode) */ +/*=========================================================================== + + Copyright (C) 2003-2017 Yves Renard, Caroline Lecalvez + + This file is a part of GetFEM++ + + GetFEM++ is free software; you can redistribute it and/or modify it + under the terms of the GNU Lesser General Public License as published + by the Free Software Foundation; either version 3 of the License, or + (at your option) any later version along with the GCC Runtime Library + Exception either version 3.1 or (at your option) any later version. + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public + License and GCC Runtime Library Exception for more details. + You should have received a copy of the GNU Lesser General Public License + along with this program; if not, write to the Free Software Foundation, + Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. + + As a special exception, you may use this file as it is a part of a free + software library without restriction. Specifically, if other files + instantiate templates or use macros or inline functions from this file, + or you compile this file and link it with other files to produce an + executable, this file does not by itself cause the resulting executable + to be covered by the GNU Lesser General Public License. This exception + does not however invalidate any other reasons why the executable file + might be covered by the GNU Lesser General Public License. + +===========================================================================*/ + +/**@file gmm_solver_idgmres.h + @author Caroline Lecalvez + @author Yves Renard + @date October 6, 2003. + @brief Implicitly restarted and deflated Generalized Minimum Residual. +*/ +#ifndef GMM_IDGMRES_H +#define GMM_IDGMRES_H + +#include "gmm_kernel.h" +#include "gmm_iter.h" +#include "gmm_dense_sylvester.h" + +namespace gmm { + + template compare_vp { + bool operator()(const std::pair &a, + const std::pair &b) const + { return (gmm::abs(a.first) > gmm::abs(b.first)); } + } + + struct idgmres_state { + size_type m, tb_deb, tb_def, p, k, nb_want, nb_unwant; + size_type nb_nolong, tb_deftot, tb_defwant, conv, nb_un, fin; + bool ok; + + idgmres_state(size_type mm, size_type pp, size_type kk) + : m(mm), tb_deb(1), tb_def(0), p(pp), k(kk), nb_want(0), + nb_unwant(0), nb_nolong(0), tb_deftot(0), tb_defwant(0), + conv(0), nb_un(0), fin(0), ok(false); {} + } + + idgmres_state(size_type mm, size_type pp, size_type kk) + : m(mm), tb_deb(1), tb_def(0), p(pp), k(kk), nb_want(0), + nb_unwant(0), nb_nolong(0), tb_deftot(0), tb_defwant(0), + conv(0), nb_un(0), fin(0), ok(false); {} + + + template + apply_permutation(CONT &cont, const IND &ind) { + size_type m = ind.end() - ind.begin(); + std::vector sorted(m, false); + + for (size_type l = 0; l < m; ++l) + if (!sorted[l] && ind[l] != l) { + + typeid(cont[0]) aux = cont[l]; + k = ind[l]; + cont[l] = cont[k]; + sorted[l] = true; + + for(k2 = ind[k]; k2 != l; k2 = ind[k]) { + cont[k] = cont[k2]; + sorted[k] = true; + k = k2; + } + cont[k] = aux; + } + } + + + /** Implicitly restarted and deflated Generalized Minimum Residual + + See: C. Le Calvez, B. Molina, Implicitly restarted and deflated + FOM and GMRES, numerical applied mathematics, + (30) 2-3 (1999) pp191-212. + + @param A Real or complex unsymmetric matrix. + @param x initial guess vector and final result. + @param b right hand side + @param M preconditionner + @param m size of the subspace between two restarts + @param p number of converged ritz values seeked + @param k size of the remaining Krylov subspace when the p ritz values + have not yet converged 0 <= p <= k < m. + @param tol_vp : tolerance on the ritz values. + @param outer + @param KS + */ + template < typename Mat, typename Vec, typename VecB, typename Precond, + typename Basis > + void idgmres(const Mat &A, Vec &x, const VecB &b, const Precond &M, + size_type m, size_type p, size_type k, double tol_vp, + iteration &outer, Basis& KS) { + + typedef typename linalg_traits::value_type T; + typedef typename number_traits::magnitude_type R; + + R a, beta; + idgmres_state st(m, p, k); + + std::vector w(vect_size(x)), r(vect_size(x)), u(vect_size(x)); + std::vector c_rot(m+1), s_rot(m+1), s(m+1); + std::vector y(m+1), ztest(m+1), gam(m+1); + std::vector gamma(m+1); + gmm::dense_matrix H(m+1, m), Hess(m+1, m), + Hobl(m+1, m), W(vect_size(x), m+1); + + gmm::clear(H); + + outer.set_rhsnorm(gmm::vect_norm2(b)); + if (outer.get_rhsnorm() == 0.0) { clear(x); return; } + + mult(A, scaled(x, -1.0), b, w); + mult(M, w, r); + beta = gmm::vect_norm2(r); + + iteration inner = outer; + inner.reduce_noisy(); + inner.set_maxiter(m); + inner.set_name("GMRes inner iter"); + + while (! outer.finished(beta)) { + + gmm::copy(gmm::scaled(r, 1.0/beta), KS[0]); + gmm::clear(s); + s[0] = beta; + gmm::copy(s, gamma); + + inner.set_maxiter(m - st.tb_deb + 1); + size_type i = st.tb_deb - 1; inner.init(); + + do { + mult(A, KS[i], u); + mult(M, u, KS[i+1]); + orthogonalize_with_refinment(KS, mat_col(H, i), i); + H(i+1, i) = a = gmm::vect_norm2(KS[i+1]); + gmm::scale(KS[i+1], R(1) / a); + + gmm::copy(mat_col(H, i), mat_col(Hess, i)); + gmm::copy(mat_col(H, i), mat_col(Hobl, i)); + + + for (size_type l = 0; l < i; ++l) + Apply_Givens_rotation_left(H(l,i), H(l+1,i), c_rot[l], s_rot[l]); + + Givens_rotation(H(i,i), H(i+1,i), c_rot[i], s_rot[i]); + Apply_Givens_rotation_left(H(i,i), H(i+1,i), c_rot[i], s_rot[i]); + H(i+1, i) = T(0); + Apply_Givens_rotation_left(s[i], s[i+1], c_rot[i], s_rot[i]); + + ++inner, ++outer, ++i; + } while (! inner.finished(gmm::abs(s[i]))); + + if (inner.converged()) { + gmm::copy(s, y); + upper_tri_solve(H, y, i, false); + combine(KS, y, x, i); + mult(A, gmm::scaled(x, T(-1)), b, w); + mult(M, w, r); + beta = gmm::vect_norm2(r); // + verif sur beta ... à faire + break; + } + + gmm::clear(gam); gam[m] = s[i]; + for (size_type l = m; l > 0; --l) + Apply_Givens_rotation_left(gam[l-1], gam[l], gmm::conj(c_rot[l-1]), + -s_rot[l-1]); + + mult(KS.mat(), gam, r); + beta = gmm::vect_norm2(r); + + mult(Hess, scaled(y, T(-1)), gamma, ztest); + // En fait, d'après Caroline qui s'y connait ztest et gam devrait + // être confondus + // Quand on aura vérifié que ça marche, il faudra utiliser gam à la + // place de ztest. + if (st.tb_def < p) { + T nss = H(m,m-1) / ztest[m]; + nss /= gmm::abs(nss); // ns à calculer plus tard aussi + gmm::copy(KS.mat(), W); gmm::copy(scaled(r, nss /beta), mat_col(W, m)); + + // Computation of the oblique matrix + sub_interval SUBI(0, m); + add(scaled(sub_vector(ztest, SUBI), -Hobl(m, m-1) / ztest[m]), + sub_vector(mat_col(Hobl, m-1), SUBI)); + Hobl(m, m-1) *= nss * beta / ztest[m]; + + /* **************************************************************** */ + /* Locking */ + /* **************************************************************** */ + + // Computation of the Ritz eigenpairs. + std::vector > eval(m); + dense_matrix YB(m-st.tb_def, m-st.tb_def); + std::vector pure(m-st.tb_def, 0); + gmm::clear(YB); + + select_eval(Hobl, eval, YB, pure, st); + + if (st.conv != 0) { + // DEFLATION using the QR Factorization of YB + + T alpha = Lock(W, Hobl, + sub_matrix(YB, sub_interval(0, m-st.tb_def)), + sub_interval(st.tb_def, m-st.tb_def), + (st.tb_defwant < p)); + // ns *= alpha; // à calculer plus tard ?? + // V(:,m+1) = alpha*V(:, m+1); ça devait servir à qlq chose ... + + + // Clean the portions below the diagonal corresponding + // to the lock Schur vectors + + for (size_type j = st.tb_def; j < st.tb_deftot; ++j) { + if ( pure[j-st.tb_def] == 0) + gmm::clear(sub_vector(mat_col(Hobl,j), sub_interval(j+1,m-j))); + else if (pure[j-st.tb_def] == 1) { + gmm::clear(sub_matrix(Hobl, sub_interval(j+2,m-j-1), + sub_interval(j, 2))); + ++j; + } + else GMM_ASSERT3(false, "internal error"); + } + + if (!st.ok) { + + // attention si m = 0; + size_type mm = std::min(k+st.nb_unwant+st.nb_nolong, m-1); + + if (eval_sort[m-mm-1].second != R(0) + && eval_sort[m-mm-1].second == -eval_sort[m-mm].second) ++mm; + + std::vector > shifts(m-mm); + for (size_type i = 0; i < m-mm; ++i) + shifts[i] = eval_sort[i].second; + + apply_shift_to_Arnoldi_factorization(W, Hobl, shifts, mm, + m-mm, true); + + st.fin = mm; + } + else + st.fin = st.tb_deftot; + + + /* ************************************************************** */ + /* Purge */ + /* ************************************************************** */ + + if (st.nb_nolong + st.nb_unwant > 0) { + + std::vector > eval(m); + dense_matrix YB(st.fin, st.tb_deftot); + std::vector pure(st.tb_deftot, 0); + gmm::clear(YB); + st.nb_un = st.nb_nolong + st.nb_unwant; + + select_eval_for_purging(Hobl, eval, YB, pure, st); + + T alpha = Lock(W, Hobl, YB, sub_interval(0, st.fin), ok); + + // Clean the portions below the diagonal corresponding + // to the unwanted lock Schur vectors + + for (size_type j = 0; j < st.tb_deftot; ++j) { + if ( pure[j] == 0) + gmm::clear(sub_vector(mat_col(Hobl,j), sub_interval(j+1,m-j))); + else if (pure[j] == 1) { + gmm::clear(sub_matrix(Hobl, sub_interval(j+2,m-j-1), + sub_interval(j, 2))); + ++j; + } + else GMM_ASSERT3(false, "internal error"); + } + + gmm::dense_matrix z(st.nb_un, st.fin - st.nb_un); + sub_interval SUBI(0, st.nb_un), SUBJ(st.nb_un, st.fin - st.nb_un); + sylvester(sub_matrix(Hobl, SUBI), + sub_matrix(Hobl, SUBJ), + sub_matrix(gmm::scaled(Hobl, -T(1)), SUBI, SUBJ), z); + + } + + } + + } + } + } + + + template < typename Mat, typename Vec, typename VecB, typename Precond > + void idgmres(const Mat &A, Vec &x, const VecB &b, + const Precond &M, size_type m, iteration& outer) { + typedef typename linalg_traits::value_type T; + modified_gram_schmidt orth(m, vect_size(x)); + gmres(A, x, b, M, m, outer, orth); + } + + + // Lock stage of an implicit restarted Arnoldi process. + // 1- QR factorization of YB through Householder matrices + // Q(Rl) = YB + // (0 ) + // 2- Update of the Arnoldi factorization. + // H <- Q*HQ, W <- WQ + // 3- Restore the Hessemberg form of H. + + template + void Lock(gmm::dense_matrix &W, gmm::dense_matrix &H, + const MATYB &YB, const sub_interval SUB, + bool restore, T &ns) { + + size_type n = mat_nrows(W), m = mat_ncols(W) - 1; + size_type ncols = mat_ncols(YB), nrows = mat_nrows(YB); + size_type begin = min(SUB); end = max(SUB) - 1; + sub_interval SUBR(0, nrows), SUBC(0, ncols); + T alpha(1); + + GMM_ASSERT2(((end-begin) == ncols) && (m == mat_nrows(H)) + && (m+1 == mat_ncols(H)), "dimensions mismatch"); + + // DEFLATION using the QR Factorization of YB + + dense_matrix QR(n_rows, n_rows); + gmmm::copy(YB, sub_matrix(QR, SUBR, SUBC)); + gmm::clear(submatrix(QR, SUBR, sub_interval(ncols, nrows-ncols))); + qr_factor(QR); + + + apply_house_left(QR, sub_matrix(H, SUB)); + apply_house_right(QR, sub_matrix(H, SUBR, SUB)); + apply_house_right(QR, sub_matrix(W, sub_interval(0, n), SUB)); + + // Restore to the initial block hessenberg form + + if (restore) { + + // verifier quand m = 0 ... + gmm::dense_matrix tab_p(end - st.tb_deftot, end - st.tb_deftot); + gmm::copy(identity_matrix(), tab_p); + + for (size_type j = end-1; j >= st.tb_deftot+2; --j) { + + size_type jm = j-1; + std::vector v(jm - st.tb_deftot); + sub_interval SUBtot(st.tb_deftot, jm - st.tb_deftot); + sub_interval SUBtot2(st.tb_deftot, end - st.tb_deftot); + gmm::copy(sub_vector(mat_row(H, j), SUBtot), v); + house_vector_last(v); + w.resize(end); + col_house_update(sub_matrix(H, SUBI, SUBtot), v, w); + w.resize(end - st.tb_deftot); + row_house_update(sub_matrix(H, SUBtot, SUBtot2), v, w); + gmm::clear(sub_vector(mat_row(H, j), + sub_interval(st.tb_deftot, j-1-st.tb_deftot))); + w.resize(end - st.tb_deftot); + col_house_update(sub_matrix(tab_p, sub_interval(0, end-st.tb_deftot), + sub_interval(0, jm-st.tb_deftot)), v, w); + w.resize(n); + col_house_update(sub_matrix(W, sub_interval(0, n), SUBtot), v, w); + } + + // restore positive subdiagonal elements + + std::vector d(fin-st.tb_deftot); d[0] = T(1); + + // We compute d[i+1] in order + // (d[i+1] * H(st.tb_deftot+i+1,st.tb_deftoti)) / d[i] + // be equal to |H(st.tb_deftot+i+1,st.tb_deftot+i))|. + for (size_type j = 0; j+1 < end-st.tb_deftot; ++j) { + T e = H(st.tb_deftot+j, st.tb_deftot+j-1); + d[j+1] = (e == T(0)) ? T(1) : d[j] * gmm::abs(e) / e; + scale(sub_vector(mat_row(H, st.tb_deftot+j+1), + sub_interval(st.tb_deftot, m-st.tb_deftot)), d[j+1]); + scale(mat_col(H, st.tb_deftot+j+1), T(1) / d[j+1]); + scale(mat_col(W, st.tb_deftot+j+1), T(1) / d[j+1]); + } + + alpha = tab_p(end-st.tb_deftot-1, end-st.tb_deftot-1) / d[end-st.tb_deftot-1]; + alpha /= gmm::abs(alpha); + scale(mat_col(W, m), alpha); + + } + + return alpha; + } + + + + + + + + + // Apply p implicit shifts to the Arnoldi factorization + // AV = VH+H(k+p+1,k+p) V(:,k+p+1) e_{k+p}* + // and produces the following new Arnoldi factorization + // A(VQ) = (VQ)(Q*HQ)+H(k+p+1,k+p) V(:,k+p+1) e_{k+p}* Q + // where only the first k columns are relevant. + // + // Dan Sorensen and Richard J. Radke, 11/95 + template + apply_shift_to_Arnoldi_factorization(dense_matrix V, dense_matrix H, + std::vector Lambda, size_type &k, + size_type p, bool true_shift = false) { + + + size_type k1 = 0, num = 0, kend = k+p, kp1 = k + 1; + bool mark = false; + T c, s, x, y, z; + + dense_matrix q(1, kend); + gmm::clear(q); q(0,kend-1) = T(1); + std::vector hv(3), w(std::max(kend, mat_nrows(V))); + + for(size_type jj = 0; jj < p; ++jj) { + // compute and apply a bulge chase sweep initiated by the + // implicit shift held in w(jj) + + if (abs(Lambda[jj].real()) == 0.0) { + // apply a real shift using 2 by 2 Givens rotations + + for (size_type k1 = 0, k2 = 0; k2 != kend-1; k1 = k2+1) { + k2 = k1; + while (h(k2+1, k2) != T(0) && k2 < kend-1) ++k2; + + Givens_rotation(H(k1, k1) - Lambda[jj], H(k1+1, k1), c, s); + + for (i = k1; i <= k2; ++i) { + if (i > k1) Givens_rotation(H(i, i-1), H(i+1, i-1), c, s); + + // Ne pas oublier de nettoyer H(i+1,i-1) (le mettre à zéro). + // Vérifier qu'au final H(i+1,i) est bien un réel positif. + + // apply rotation from left to rows of H + row_rot(sub_matrix(H, sub_interval(i,2), sub_interval(i, kend-i)), + c, s, 0, 0); + + // apply rotation from right to columns of H + size_type ip2 = std::min(i+2, kend); + col_rot(sub_matrix(H, sub_interval(0, ip2), sub_interval(i, 2)) + c, s, 0, 0); + + // apply rotation from right to columns of V + col_rot(V, c, s, i, i+1); + + // accumulate e' Q so residual can be updated k+p + Apply_Givens_rotation_left(q(0,i), q(0,i+1), c, s); + // peut être que nous utilisons G au lieu de G* et que + // nous allons trop loin en k2. + } + } + + num = num + 1; + } + else { + + // Apply a double complex shift using 3 by 3 Householder + // transformations + + if (jj == p || mark) + mark = false; // skip application of conjugate shift + else { + num = num + 2; // mark that a complex conjugate + mark = true; // pair has been applied + + // Indices de fin de boucle à surveiller... de près ! + for (size_type k1 = 0, k3 = 0; k3 != kend-2; k1 = k3+1) { + k3 = k1; + while (h(k3+1, k3) != T(0) && k3 < kend-2) ++k3; + size_type k2 = k1+1; + + + x = H(k1,k1) * H(k1,k1) + H(k1,k2) * H(k2,k1) + - 2.0*Lambda[jj].real() * H(k1,k1) + gmm::abs_sqr(Lambda[jj]); + y = H(k2,k1) * (H(k1,k1) + H(k2,k2) - 2.0*Lambda[jj].real()); + z = H(k2+1,k2) * H(k2,k1); + + for (size_type i = k1; i <= k3; ++i) { + if (i > k1) { + x = H(i, i-1); + y = H(i+1, i-1); + z = H(i+2, i-1); + // Ne pas oublier de nettoyer H(i+1,i-1) et H(i+2,i-1) + // (les mettre à zéro). + } + + hv[0] = x; hv[1] = y; hv[2] = z; + house_vector(v); + + // Vérifier qu'au final H(i+1,i) est bien un réel positif + + // apply transformation from left to rows of H + w.resize(kend-i); + row_house_update(sub_matrix(H, sub_interval(i, 2), + sub_interval(i, kend-i)), v, w); + + // apply transformation from right to columns of H + + size_type ip3 = std::min(kend, i + 3); + w.resize(ip3); + col_house_update(sub_matrix(H, sub_interval(0, ip3), + sub_interval(i, 2)), v, w); + + // apply transformation from right to columns of V + + w.resize(mat_nrows(V)); + col_house_update(sub_matrix(V, sub_interval(0, mat_nrows(V)), + sub_interval(i, 2)), v, w); + + // accumulate e' Q so residual can be updated k+p + + w.resize(1); + col_house_update(sub_matrix(q, sub_interval(0,1), + sub_interval(i,2)), v, w); + + } + } + + // clean up step with Givens rotation + + i = kend-2; + c = x; s = y; + if (i > k1) Givens_rotation(H(i, i-1), H(i+1, i-1), c, s); + + // Ne pas oublier de nettoyer H(i+1,i-1) (le mettre à zéro). + // Vérifier qu'au final H(i+1,i) est bien un réel positif. + + // apply rotation from left to rows of H + row_rot(sub_matrix(H, sub_interval(i,2), sub_interval(i, kend-i)), + c, s, 0, 0); + + // apply rotation from right to columns of H + size_type ip2 = std::min(i+2, kend); + col_rot(sub_matrix(H, sub_interval(0, ip2), sub_interval(i, 2)) + c, s, 0, 0); + + // apply rotation from right to columns of V + col_rot(V, c, s, i, i+1); + + // accumulate e' Q so residual can be updated k+p + Apply_Givens_rotation_left(q(0,i), q(0,i+1), c, s); + + } + } + } + + // update residual and store in the k+1 -st column of v + + k = kend - num; + scale(mat_col(V, kend), q(0, k)); + + if (k < mat_nrows(H)) { + if (true_shift) + gmm::copy(mat_col(V, kend), mat_col(V, k)); + else + // v(:,k+1) = v(:,kend+1) + v(:,k+1)*h(k+1,k); + // v(:,k+1) = v(:,kend+1) ; + gmm::add(scaled(mat_col(V, kend), H(kend, kend-1)), + scaled(mat_col(V, k), H(k, k-1)), mat_col(V, k)); + } + + H(k, k-1) = vect_norm2(mat_col(V, k)); + scale(mat_col(V, kend), T(1) / H(k, k-1)); + } + + + + template + void select_eval(const MAT &Hobl, EVAL &eval, MAT &YB, PURE &pure, + idgmres_state &st) { + + typedef typename linalg_traits::value_type T; + typedef typename number_traits::magnitude_type R; + size_type m = st.m; + + // Computation of the Ritz eigenpairs. + + col_matrix< std::vector > evect(m-st.tb_def, m-st.tb_def); + // std::vector > eval(m); + std::vector ritznew(m, T(-1)); + + // dense_matrix evect_lock(st.tb_def, st.tb_def); + + sub_interval SUB1(st.tb_def, m-st.tb_def); + implicit_qr_algorithm(sub_matrix(Hobl, SUB1), + sub_vector(eval, SUB1), evect); + sub_interval SUB2(0, st.tb_def); + implicit_qr_algorithm(sub_matrix(Hobl, SUB2), + sub_vector(eval, SUB2), /* evect_lock */); + + for (size_type l = st.tb_def; l < m; ++l) + ritznew[l] = gmm::abs(evect(m-st.tb_def-1, l-st.tb_def) * Hobl(m, m-1)); + + std::vector< std::pair > eval_sort(m); + for (size_type l = 0; l < m; ++l) + eval_sort[l] = std::pair(eval[l], l); + std::sort(eval_sort.begin(), eval_sort.end(), compare_vp()); + + std::vector index(m); + for (size_type l = 0; l < m; ++l) index[l] = eval_sort[l].second; + + std::vector kept(m, false); + std::fill(kept.begin(), kept.begin()+st.tb_def, true); + + apply_permutation(eval, index); + apply_permutation(evect, index); + apply_permutation(ritznew, index); + apply_permutation(kept, index); + + // Which are the eigenvalues that converged ? + // + // nb_want is the number of eigenvalues of + // Hess(tb_def+1:n,tb_def+1:n) that converged and are WANTED + // + // nb_unwant is the number of eigenvalues of + // Hess(tb_def+1:n,tb_def+1:n) that converged and are UNWANTED + // + // nb_nolong is the number of eigenvalues of + // Hess(1:tb_def,1:tb_def) that are NO LONGER WANTED. + // + // tb_deftot is the number of the deflated eigenvalues + // that is tb_def + nb_want + nb_unwant + // + // tb_defwant is the number of the wanted deflated eigenvalues + // that is tb_def + nb_want - nb_nolong + + st.nb_want = 0, st.nb_unwant = 0, st.nb_nolong = 0; + size_type j, ind; + + for (j = 0, ind = 0; j < m-p; ++j) { + if (ritznew[j] == R(-1)) { + if (std::imag(eval[j]) != R(0)) { + st.nb_nolong += 2; ++j; // à adapter dans le cas complexe ... + } + else st.nb_nolong++; + } + else { + if (ritznew[j] + < tol_vp * gmm::abs(eval[j])) { + + for (size_type l = 0, l < m-st.tb_def; ++l) + YB(l, ind) = std::real(evect(l, j)); + kept[j] = true; + ++j; ++st.nb_unwant; ind++; + + if (std::imag(eval[j]) != R(0)) { + for (size_type l = 0, l < m-st.tb_def; ++l) + YB(l, ind) = std::imag(evect(l, j)); + pure[ind-1] = 1; + pure[ind] = 2; + + kept[j] = true; + + st.nb_unwant++; + ++ind; + } + } + } + } + + + for (; j < m; ++j) { + if (ritznew[j] != R(-1)) { + + for (size_type l = 0, l < m-st.tb_def; ++l) + YB(l, ind) = std::real(evect(l, j)); + pure[ind] = 1; + ++ind; + kept[j] = true; + ++st.nb_want; + + if (ritznew[j] + < tol_vp * gmm::abs(eval[j])) { + for (size_type l = 0, l < m-st.tb_def; ++l) + YB(l, ind) = std::imag(evect(l, j)); + pure[ind] = 2; + + j++; + kept[j] = true; + + st.nb_want++; + ++ind; + } + } + } + + std::vector shift(m - st.tb_def - st.nb_want - st.nb_unwant); + for (size_type j = 0, i = 0; j < m; ++j) + if (!kept[j]) shift[i++] = eval[j]; + + // st.conv (st.nb_want+st.nb_unwant) is the number of eigenpairs that + // have just converged. + // st.tb_deftot is the total number of eigenpairs that have converged. + + size_type st.conv = ind; + size_type st.tb_deftot = st.tb_def + st.conv; + size_type st.tb_defwant = st.tb_def + st.nb_want - st.nb_nolong; + + sub_interval SUBYB(0, st.conv); + + if ( st.tb_defwant >= p ) { // An invariant subspace has been found. + + st.nb_unwant = 0; + st.nb_want = p + st.nb_nolong - st.tb_def; + st.tb_defwant = p; + + if ( pure[st.conv - st.nb_want + 1] == 2 ) { + ++st.nb_want; st.tb_defwant = ++p;// il faudrait que ce soit un p local + } + + SUBYB = sub_interval(st.conv - st.nb_want, st.nb_want); + // YB = YB(:, st.conv-st.nb_want+1 : st.conv); // On laisse en suspend .. + // pure = pure(st.conv-st.nb_want+1 : st.conv,1); // On laisse suspend .. + st.conv = st.nb_want; + st.tb_deftot = st.tb_def + st.conv; + st.ok = true; + } + + } + + + + template + void select_eval_for_purging(const MAT &Hobl, EVAL &eval, MAT &YB, + PURE &pure, idgmres_state &st) { + + typedef typename linalg_traits::value_type T; + typedef typename number_traits::magnitude_type R; + size_type m = st.m; + + // Computation of the Ritz eigenpairs. + + col_matrix< std::vector > evect(st.tb_deftot, st.tb_deftot); + + sub_interval SUB1(0, st.tb_deftot); + implicit_qr_algorithm(sub_matrix(Hobl, SUB1), + sub_vector(eval, SUB1), evect); + std::fill(eval.begin() + st.tb_deftot, eval.end(), std::complex(0)); + + std::vector< std::pair > eval_sort(m); + for (size_type l = 0; l < m; ++l) + eval_sort[l] = std::pair(eval[l], l); + std::sort(eval_sort.begin(), eval_sort.end(), compare_vp()); + + std::vector sorted(m); + std::fill(sorted.begin(), sorted.end(), false); + + std::vector ind(m); + for (size_type l = 0; l < m; ++l) ind[l] = eval_sort[l].second; + + std::vector kept(m, false); + std::fill(kept.begin(), kept.begin()+st.tb_def, true); + + apply_permutation(eval, ind); + apply_permutation(evect, ind); + + size_type j; + for (j = 0; j < st.tb_deftot; ++j) { + + for (size_type l = 0, l < st.tb_deftot; ++l) + YB(l, j) = std::real(evect(l, j)); + + if (std::imag(eval[j]) != R(0)) { + for (size_type l = 0, l < m-st.tb_def; ++l) + YB(l, j+1) = std::imag(evect(l, j)); + pure[j] = 1; + pure[j+1] = 2; + + j += 2; + } + else ++j; + } + } + + + + + + +} + +#endif diff --git a/gmm/gmm_solver_qmr.h b/gmm/gmm_solver_qmr.h new file mode 100644 index 000000000..ca6b8e075 --- /dev/null +++ b/gmm/gmm_solver_qmr.h @@ -0,0 +1,210 @@ +/* -*- c++ -*- (enables emacs c++ mode) */ +/*=========================================================================== + + Copyright (C) 2002-2017 Yves Renard + + This file is a part of GetFEM++ + + GetFEM++ is free software; you can redistribute it and/or modify it + under the terms of the GNU Lesser General Public License as published + by the Free Software Foundation; either version 3 of the License, or + (at your option) any later version along with the GCC Runtime Library + Exception either version 3.1 or (at your option) any later version. + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public + License and GCC Runtime Library Exception for more details. + You should have received a copy of the GNU Lesser General Public License + along with this program; if not, write to the Free Software Foundation, + Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. + + As a special exception, you may use this file as it is a part of a free + software library without restriction. Specifically, if other files + instantiate templates or use macros or inline functions from this file, + or you compile this file and link it with other files to produce an + executable, this file does not by itself cause the resulting executable + to be covered by the GNU Lesser General Public License. This exception + does not however invalidate any other reasons why the executable file + might be covered by the GNU Lesser General Public License. + +===========================================================================*/ + +// This file is a modified version of qmr.h from ITL. +// See http://osl.iu.edu/research/itl/ +// Following the corresponding Copyright notice. +//=========================================================================== +// +// Copyright (c) 1997-2001, The Trustees of Indiana University. +// All rights reserved. +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// * Neither the name of the University of Notre Dame nor the +// names of its contributors may be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE TRUSTEES OF INDIANA UNIVERSITY AND +// CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, +// BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE TRUSTEES +// OF INDIANA UNIVERSITY AND CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +// NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +//=========================================================================== + +/**@file gmm_solver_qmr.h + @author Andrew Lumsdaine + @author Lie-Quan Lee + @author Yves Renard + @date October 13, 2002. + @brief Quasi-Minimal Residual iterative solver. +*/ +#ifndef GMM_QMR_H +#define GMM_QMR_H + +#include "gmm_kernel.h" +#include "gmm_iter.h" + +namespace gmm { + + /** Quasi-Minimal Residual. + + This routine solves the unsymmetric linear system Ax = b using + the Quasi-Minimal Residual method. + + See: R. W. Freund and N. M. Nachtigal, A quasi-minimal residual + method for non-Hermitian linear systems, Numerical Math., + 60(1991), pp. 315-339 + + Preconditioner - Incomplete LU, Incomplete LU with threshold, + SSOR or identity_preconditioner. + */ + template + void qmr(const Matrix &A, Vector &x, const VectorB &b, const Precond1 &M1, + iteration& iter) { + + typedef typename linalg_traits::value_type T; + typedef typename number_traits::magnitude_type R; + + T delta(0), ep(0), beta(0), theta_1(0), gamma_1(0); + T theta(0), gamma(1), eta(-1); + R rho_1(0), rho, xi; + + typedef typename temporary_vector::vector_type TmpVec; + size_type nn = vect_size(x); + TmpVec r(nn), v_tld(nn), y(nn), w_tld(nn), z(nn), v(nn), w(nn); + TmpVec y_tld(nn), z_tld(nn), p(nn), q(nn), p_tld(nn), d(nn), s(nn); + + iter.set_rhsnorm(double(gmm::vect_norm2(b))); + if (iter.get_rhsnorm() == 0.0) { clear(x); return; } + + gmm::mult(A, gmm::scaled(x, T(-1)), b, r); + gmm::copy(r, v_tld); + + gmm::left_mult(M1, v_tld, y); + rho = gmm::vect_norm2(y); + + gmm::copy(r, w_tld); + gmm::transposed_right_mult(M1, w_tld, z); + xi = gmm::vect_norm2(z); + + while (! iter.finished_vect(r)) { + + if (rho == R(0) || xi == R(0)) { + if (iter.get_maxiter() == size_type(-1)) + { GMM_ASSERT1(false, "QMR failed to converge"); } + else { GMM_WARNING1("QMR failed to converge"); return; } + } + gmm::copy(gmm::scaled(v_tld, T(R(1)/rho)), v); + gmm::scale(y, T(R(1)/rho)); + + gmm::copy(gmm::scaled(w_tld, T(R(1)/xi)), w); + gmm::scale(z, T(R(1)/xi)); + + delta = gmm::vect_sp(z, y); + if (delta == T(0)) { + if (iter.get_maxiter() == size_type(-1)) + { GMM_ASSERT1(false, "QMR failed to converge"); } + else { GMM_WARNING1("QMR failed to converge"); return; } + } + gmm::right_mult(M1, y, y_tld); + gmm::transposed_left_mult(M1, z, z_tld); + + if (iter.first()) { + gmm::copy(y_tld, p); + gmm::copy(z_tld, q); + } else { + gmm::add(y_tld, gmm::scaled(p, -(T(xi * delta) / ep)), p); + gmm::add(z_tld, gmm::scaled(q, -(T(rho * delta) / ep)), q); + } + + gmm::mult(A, p, p_tld); + + ep = gmm::vect_sp(q, p_tld); + if (ep == T(0)) { + if (iter.get_maxiter() == size_type(-1)) + { GMM_ASSERT1(false, "QMR failed to converge"); } + else { GMM_WARNING1("QMR failed to converge"); return; } + } + beta = ep / delta; + if (beta == T(0)) { + if (iter.get_maxiter() == size_type(-1)) + { GMM_ASSERT1(false, "QMR failed to converge"); } + else { GMM_WARNING1("QMR failed to converge"); return; } + } + gmm::add(p_tld, gmm::scaled(v, -beta), v_tld); + gmm::left_mult(M1, v_tld, y); + + rho_1 = rho; + rho = gmm::vect_norm2(y); + + gmm::mult(gmm::transposed(A), q, w_tld); + gmm::add(w_tld, gmm::scaled(w, -beta), w_tld); + gmm::transposed_right_mult(M1, w_tld, z); + + xi = gmm::vect_norm2(z); + + gamma_1 = gamma; + theta_1 = theta; + + theta = rho / (gamma_1 * beta); + gamma = T(1) / gmm::sqrt(T(1) + gmm::sqr(theta)); + + if (gamma == T(0)) { + if (iter.get_maxiter() == size_type(-1)) + { GMM_ASSERT1(false, "QMR failed to converge"); } + else { GMM_WARNING1("QMR failed to converge"); return; } + } + eta = -eta * T(rho_1) * gmm::sqr(gamma) / (beta * gmm::sqr(gamma_1)); + + if (iter.first()) { + gmm::copy(gmm::scaled(p, eta), d); + gmm::copy(gmm::scaled(p_tld, eta), s); + } else { + T tmp = gmm::sqr(theta_1 * gamma); + gmm::add(gmm::scaled(p, eta), gmm::scaled(d, tmp), d); + gmm::add(gmm::scaled(p_tld, eta), gmm::scaled(s, tmp), s); + } + gmm::add(d, x); + gmm::add(gmm::scaled(s, T(-1)), r); + + ++iter; + } + } + + +} + +#endif + diff --git a/gmm/gmm_std.h b/gmm/gmm_std.h new file mode 100644 index 000000000..8727e059b --- /dev/null +++ b/gmm/gmm_std.h @@ -0,0 +1,424 @@ +/* -*- c++ -*- (enables emacs c++ mode) */ +/*=========================================================================== + + Copyright (C) 2002-2017 Yves Renard + + This file is a part of GetFEM++ + + GetFEM++ is free software; you can redistribute it and/or modify it + under the terms of the GNU Lesser General Public License as published + by the Free Software Foundation; either version 3 of the License, or + (at your option) any later version along with the GCC Runtime Library + Exception either version 3.1 or (at your option) any later version. + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public + License and GCC Runtime Library Exception for more details. + You should have received a copy of the GNU Lesser General Public License + along with this program; if not, write to the Free Software Foundation, + Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. + + As a special exception, you may use this file as it is a part of a free + software library without restriction. Specifically, if other files + instantiate templates or use macros or inline functions from this file, + or you compile this file and link it with other files to produce an + executable, this file does not by itself cause the resulting executable + to be covered by the GNU Lesser General Public License. This exception + does not however invalidate any other reasons why the executable file + might be covered by the GNU Lesser General Public License. + +===========================================================================*/ + +/**@file gmm_std.h +@author Yves Renard , +@author Julien Pommier +@date June 01, 1995. +@brief basic setup for gmm (includes, typedefs etc.) +*/ +#ifndef GMM_STD_H__ +#define GMM_STD_H__ + +//#include + +#ifndef __USE_STD_IOSTREAM +# define __USE_STD_IOSTREAM +#endif + +#ifndef __USE_BSD +# define __USE_BSD +#endif + +#ifndef __USE_ISOC99 +# define __USE_ISOC99 +#endif + +#if defined(_MSC_VER) && _MSC_VER >= 1400 // Secure versions for VC++ +# define GMM_SECURE_CRT +# define SECURE_NONCHAR_SSCANF sscanf_s +# define SECURE_NONCHAR_FSCANF fscanf_s +# define SECURE_STRNCPY(a, la, b, lb) strncpy_s(a, la, b, lb) +# define SECURE_FOPEN(F, filename, mode) (*(F) = 0, fopen_s(F, filename, mode)) +# define SECURE_SPRINTF1(S, l, st, p1) sprintf_s(S, l, st, p1) +# define SECURE_SPRINTF2(S, l, st, p1, p2) sprintf_s(S, l, st, p1, p2) +# define SECURE_SPRINTF4(S, l, st, p1, p2, p3, p4) sprintf_s(S, l, st, p1, p2, p3, p4) +# define SECURE_STRDUP(s) _strdup(s) +# ifndef _SCL_SECURE_NO_DEPRECATE +# error Add the option /D_SCL_SECURE_NO_DEPRECATE to the compilation command +# endif +#else +# define SECURE_NONCHAR_SSCANF sscanf +# define SECURE_NONCHAR_FSCANF fscanf +# define SECURE_STRNCPY(a, la, b, lb) strncpy(a, b, lb) +# define SECURE_FOPEN(F, filename, mode) ((*(F)) = fopen(filename, mode)) +# define SECURE_SPRINTF1(S, l, st, p1) sprintf(S, st, p1) +# define SECURE_SPRINTF2(S, l, st, p1, p2) sprintf(S, st, p1, p2) +# define SECURE_SPRINTF4(S, l, st, p1, p2, p3, p4) sprintf(S, st, p1, p2, p3, p4) +# define SECURE_STRDUP(s) strdup(s) +#endif + +inline void GMM_NOPERATION_(int) { } +#define GMM_NOPERATION(a) { GMM_NOPERATION_(abs(&(a) != &(a))); } + +/* ********************************************************************** */ +/* Compilers detection. */ +/* ********************************************************************** */ + +/* for sun CC 5.0 ... +#if defined(__SUNPRO_CC) && __SUNPRO_CC >= 0x500 +# include +# undef _RWSTD_NO_CLASS_PARTIAL_SPEC +# undef _RWSTD_NO_NAMESPACE +#endif +*/ +/* for VISUAL C++ ... +#if defined(_MSC_VER) // && !defined(__MWERKS__) +#define _GETFEM_MSVCPP_ _MSC_VER +#endif +*/ + +#if defined(__GNUC__) +# if (__GNUC__ < 4) +# error : PLEASE UPDATE g++ TO AT LEAST 4.8 VERSION +# endif +#endif + +/* ********************************************************************** */ +/* C++ Standard Headers. */ +/* ********************************************************************** */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +//#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace std { +#if defined(__GNUC__) && (__cplusplus <= 201103L) + template + struct _MakeUniq + { typedef unique_ptr<_Tp> __single_object; }; + template + struct _MakeUniq<_Tp[]> + { typedef unique_ptr<_Tp[]> __array; }; + template + struct _MakeUniq<_Tp[_Bound]> + { struct __invalid_type { }; }; + /// std::make_unique for single objects + template + inline typename _MakeUniq<_Tp>::__single_object + make_unique(_Args&&... __args) + { return unique_ptr<_Tp>(new _Tp(std::forward<_Args>(__args)...)); } + /// std::make_unique for arrays of unknown bound + template + inline typename _MakeUniq<_Tp>::__array + make_unique(size_t __num) + { return unique_ptr<_Tp>(new typename remove_extent<_Tp>::type[__num]()); } + /// Disable std::make_unique for arrays of known bound + template + inline typename _MakeUniq<_Tp>::__invalid_type + make_unique(_Args&&...) = delete; +#endif + + + // Should simply be replaced by std::shared_ptr when it will be supported + // by the STL + template class shared_array_ptr : shared_ptr { + public: + shared_array_ptr() {} + shared_array_ptr(T *q) : std::shared_ptr(q, default_delete()) {} + template shared_array_ptr(const std::shared_ptr &p, T *q) + : std::shared_ptr(p, q) {} + T *get() const { return shared_ptr::get(); } + T& operator*() const { return shared_ptr::operator*(); } + T* operator->() const { return shared_ptr::operator->(); } + }; + + template shared_array_ptr make_shared_array(size_t num) + { return shared_array_ptr(new T[num]); } +} + + + + +#ifdef GETFEM_HAVE_OPENMP + +#include + /**number of OpenMP threads*/ + inline size_t num_threads(){return omp_get_max_threads();} + /**index of the current thread*/ + inline size_t this_thread() {return omp_get_thread_num();} + /**is the program running in the parallel section*/ + inline bool me_is_multithreaded_now(){return static_cast(omp_in_parallel());} +#else + inline size_t num_threads(){return size_t(1);} + inline size_t this_thread() {return size_t(0);} + inline bool me_is_multithreaded_now(){return false;} +#endif + +namespace gmm { + + using std::endl; using std::cout; using std::cerr; + using std::ends; using std::cin; using std::isnan; + +#ifdef _WIN32 + + class standard_locale { + std::string cloc; + std::locale cinloc; + public : + inline standard_locale(void) : cinloc(cin.getloc()) + { + if (!me_is_multithreaded_now()){ + cloc=setlocale(LC_NUMERIC, 0); + setlocale(LC_NUMERIC,"C"); + } + } + + inline ~standard_locale() { + if (!me_is_multithreaded_now()) + setlocale(LC_NUMERIC, cloc.c_str()); + + } + }; +#else + /**this is the above solutions for linux, but I still needs to be tested.*/ + //class standard_locale { + // locale_t oldloc; + // locale_t temploc; + + //public : + // inline standard_locale(void) : oldloc(uselocale((locale_t)0)) + // { + // temploc = newlocale(LC_NUMERIC, "C", NULL); + // uselocale(temploc); + // } + + // inline ~standard_locale() + // { + // uselocale(oldloc); + // freelocale(temploc); + // } + //}; + + + class standard_locale { + std::string cloc; + std::locale cinloc; + + public : + inline standard_locale(void) + : cloc(setlocale(LC_NUMERIC, 0)), cinloc(cin.getloc()) + { setlocale(LC_NUMERIC,"C"); cin.imbue(std::locale("C")); } + inline ~standard_locale() + { setlocale(LC_NUMERIC, cloc.c_str()); cin.imbue(cinloc); } + }; + + +#endif + + class stream_standard_locale { + std::locale cloc; + std::ios &io; + + public : + inline stream_standard_locale(std::ios &i) + : cloc(i.getloc()), io(i) { io.imbue(std::locale("C")); } + inline ~stream_standard_locale() { io.imbue(cloc); } + }; + + + + + /* ******************************************************************* */ + /* Clock functions. */ + /* ******************************************************************* */ + +# if defined(HAVE_SYS_TIMES) + inline double uclock_sec(void) { + static double ttclk = 0.; + if (ttclk == 0.) ttclk = sysconf(_SC_CLK_TCK); + tms t; times(&t); return double(t.tms_utime) / ttclk; + } +# else + inline double uclock_sec(void) + { return double(clock())/double(CLOCKS_PER_SEC); } +# endif + + /* ******************************************************************** */ + /* Fixed size integer types. */ + /* ******************************************************************** */ + // Remark : the test program dynamic_array tests the length of + // resulting integers + + template struct fixed_size_integer_generator { + typedef void int_base_type; + typedef void uint_base_type; + }; + + template <> struct fixed_size_integer_generator { + typedef signed char int_base_type; + typedef unsigned char uint_base_type; + }; + + template <> struct fixed_size_integer_generator { + typedef signed short int int_base_type; + typedef unsigned short int uint_base_type; +}; + +template <> struct fixed_size_integer_generator { + typedef signed int int_base_type; + typedef unsigned int uint_base_type; + }; + +template <> struct fixed_size_integer_generator { + typedef signed long int_base_type; + typedef unsigned long uint_base_type; + }; + +template <> struct fixed_size_integer_generator { + typedef signed long long int_base_type; + typedef unsigned long long uint_base_type; + }; + +typedef fixed_size_integer_generator<1>::int_base_type int8_type; +typedef fixed_size_integer_generator<1>::uint_base_type uint8_type; +typedef fixed_size_integer_generator<2>::int_base_type int16_type; +typedef fixed_size_integer_generator<2>::uint_base_type uint16_type; +typedef fixed_size_integer_generator<4>::int_base_type int32_type; +typedef fixed_size_integer_generator<4>::uint_base_type uint32_type; +typedef fixed_size_integer_generator<8>::int_base_type int64_type; +typedef fixed_size_integer_generator<8>::uint_base_type uint64_type; + +// #if INT_MAX == 32767 +// typedef signed int int16_type; +// typedef unsigned int uint16_type; +// #elif SHRT_MAX == 32767 +// typedef signed short int int16_type; +// typedef unsigned short int uint16_type; +// #else +// # error "impossible to build a 16 bits integer" +// #endif + +// #if INT_MAX == 2147483647 +// typedef signed int int32_type; +// typedef unsigned int uint32_type; +// #elif SHRT_MAX == 2147483647 +// typedef signed short int int32_type; +// typedef unsigned short int uint32_type; +// #elif LONG_MAX == 2147483647 +// typedef signed long int int32_type; +// typedef unsigned long int uint32_type; +// #else +// # error "impossible to build a 32 bits integer" +// #endif + +// #if INT_MAX == 9223372036854775807L || INT_MAX == 9223372036854775807 +// typedef signed int int64_type; +// typedef unsigned int uint64_type; +// #elif LONG_MAX == 9223372036854775807L || LONG_MAX == 9223372036854775807 +// typedef signed long int int64_type; +// typedef unsigned long int uint64_type; +// #elif LLONG_MAX == 9223372036854775807LL || LLONG_MAX == 9223372036854775807L || LLONG_MAX == 9223372036854775807 +// typedef signed long long int int64_type; +// typedef unsigned long long int uint64_type; +// #else +// # error "impossible to build a 64 bits integer" +// #endif + +#if defined(__GNUC__) && !defined(__ICC) +/* + g++ can issue a warning at each usage of a function declared with this special attribute + (also works with typedefs and variable declarations) +*/ +# define IS_DEPRECATED __attribute__ ((__deprecated__)) +/* + the specified function is inlined at any optimization level +*/ +# define ALWAYS_INLINE __attribute__((always_inline)) +#else +# define IS_DEPRECATED +# define ALWAYS_INLINE +#endif + +} + + /* ******************************************************************** */ + /* Import/export classes and interfaces from a shared library */ + /* ******************************************************************** */ + +#if defined(EXPORTED_TO_SHARED_LIB) +# if defined(_MSC_VER) || defined(__INTEL_COMPILER) +# define APIDECL __declspec(dllexport) +# elif defined(__GNUC__) +# define __attribute__((visibility("default"))) +# else +# define APIDECL +# endif +# if defined(IMPORTED_FROM_SHARED_LIB) +# error INTENTIONAL COMPILCATION ERROR, DLL IMPORT AND EXPORT ARE INCOMPITABLE +# endif +#endif + +#if defined(IMPORTED_FROM_SHARED_LIB) +# if defined(_MSC_VER) || defined(__INTEL_COMPILER) +# define APIDECL __declspec(dllimport) +# else +# define APIDECL +# endif +# if defined(EXPORTED_TO_SHARED_LIB) +# error INTENTIONAL COMPILCATION ERROR, DLL IMPORT AND EXPORT ARE INCOMPITABLE +# endif +#endif + +#ifndef EXPORTED_TO_SHARED_LIB +# ifndef IMPORTED_FROM_SHARED_LIB +# define APIDECL //empty, used during static linking +# endif +#endif + +#endif /* GMM_STD_H__ */ diff --git a/gmm/gmm_sub_index.h b/gmm/gmm_sub_index.h new file mode 100644 index 000000000..f1f0097ce --- /dev/null +++ b/gmm/gmm_sub_index.h @@ -0,0 +1,224 @@ +/* -*- c++ -*- (enables emacs c++ mode) */ +/*=========================================================================== + + Copyright (C) 2002-2017 Yves Renard + + This file is a part of GetFEM++ + + GetFEM++ is free software; you can redistribute it and/or modify it + under the terms of the GNU Lesser General Public License as published + by the Free Software Foundation; either version 3 of the License, or + (at your option) any later version along with the GCC Runtime Library + Exception either version 3.1 or (at your option) any later version. + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public + License and GCC Runtime Library Exception for more details. + You should have received a copy of the GNU Lesser General Public License + along with this program; if not, write to the Free Software Foundation, + Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. + + As a special exception, you may use this file as it is a part of a free + software library without restriction. Specifically, if other files + instantiate templates or use macros or inline functions from this file, + or you compile this file and link it with other files to produce an + executable, this file does not by itself cause the resulting executable + to be covered by the GNU Lesser General Public License. This exception + does not however invalidate any other reasons why the executable file + might be covered by the GNU Lesser General Public License. + +===========================================================================*/ + +/**@file gmm_sub_index.h + @author Yves Renard + @date October 13, 2002. + @brief sub-indices. +*/ + +#ifndef GMM_SUB_INDEX_H__ +#define GMM_SUB_INDEX_H__ + +#include "gmm_def.h" + +namespace gmm { + + /* ******************************************************************** */ + /* sub indices */ + /* ******************************************************************** */ + + struct basic_index : public std::vector { + + mutable size_type nb_ref; + // size_type key1; faire la somme des composantes + // const basic_index *rind; rindex s'il existe + + + size_t operator[](size_type i) const { + return (i < size()) ? std::vector::operator[](i) : size_type(-1); + } + + basic_index() : nb_ref(1) {} + basic_index(size_type j) : std::vector(j), nb_ref(1) {} + template basic_index(IT b, IT e) + : std::vector(e-b), nb_ref(1) { std::copy(b, e, begin()); } + basic_index(const basic_index *pbi) : nb_ref(1) { + const_iterator it = pbi->begin(), ite = pbi->end(); + size_type i = 0; + for ( ; it != ite; ++it) i = std::max(i, *it); + resize(i+1); std::fill(begin(), end(), size_type(-1)); + for (it = pbi->begin(), i = 0; it != ite; ++it, ++i) + std::vector::operator[](*it) = i; + } + void swap(size_type i, size_type j) { + std::swap(std::vector::operator[](i), + std::vector::operator[](j)); + } + + }; + + typedef basic_index *pbasic_index; + + struct index_generator { + + template static pbasic_index create_index(IT begin, IT end) + { return new basic_index(begin, end); } + static pbasic_index create_rindex(pbasic_index pbi) + { return new basic_index(pbi); } + static void attach(pbasic_index pbi) { if (pbi) pbi->nb_ref++; } + static void unattach(pbasic_index pbi) + { if (pbi && --(pbi->nb_ref) == 0) delete pbi; } + + }; + + struct sub_index { + + size_type first_, last_; + typedef basic_index base_type; + typedef base_type::const_iterator const_iterator; + + mutable pbasic_index ind; + mutable pbasic_index rind; + + void comp_extr(void) { + std::vector::const_iterator it = ind->begin(), ite = ind->end(); + if (it != ite) { first_=last_= *it; ++it; } else { first_=last_= 0; } + for (; it != ite; ++it) + { first_ = std::min(first_, *it); last_ = std::max(last_, *it); } + } + + inline void test_rind(void) const + { if (!rind) rind = index_generator::create_rindex(ind); } + size_type size(void) const { return ind->size(); } + size_type first(void) const { return first_; } + size_type last(void) const { return last_; } + size_type index(size_type i) const { return (*ind)[i]; } + size_type rindex(size_type i) const { + test_rind(); + if (i < rind->size()) return (*rind)[i]; else return size_type(-1); + } + + const_iterator begin(void) const { return ind->begin(); } + const_iterator end(void) const { return ind->end(); } + const_iterator rbegin(void) const { test_rind(); return rind->begin(); } + const_iterator rend(void) const { test_rind(); return rind->end(); } + + sub_index() : ind(0), rind(0) {} + template sub_index(IT it, IT ite) + : ind(index_generator::create_index(it, ite)), + rind(0) { comp_extr(); } + template sub_index(const CONT &c) + : ind(index_generator::create_index(c.begin(), c.end())), + rind(0) { comp_extr(); } + ~sub_index() { + index_generator::unattach(rind); + index_generator::unattach(ind); + } + sub_index(const sub_index &si) : first_(si.first_), last_(si.last_), + ind(si.ind), rind(si.rind) + { index_generator::attach(rind); index_generator::attach(ind); } + sub_index &operator =(const sub_index &si) { + index_generator::unattach(rind); + index_generator::unattach(ind); + ind = si.ind; rind = si.rind; + index_generator::attach(rind); + index_generator::attach(ind); + first_ = si.first_; last_ = si.last_; + return *this; + } + }; + + struct unsorted_sub_index : public sub_index { + typedef basic_index base_type; + typedef base_type::const_iterator const_iterator; + + template unsorted_sub_index(IT it, IT ite) + : sub_index(it, ite) {} + template unsorted_sub_index(const CONT &c) + : sub_index(c) {} + unsorted_sub_index() {} + unsorted_sub_index(const unsorted_sub_index &si) : sub_index((const sub_index &)(si)) { } + unsorted_sub_index &operator =(const unsorted_sub_index &si) + { sub_index::operator =(si); return *this; } + void swap(size_type i, size_type j) { + GMM_ASSERT2(ind->nb_ref <= 1, "Operation not allowed on this index"); + if (rind) rind->swap((*ind)[i], (*ind)[j]); + ind->swap(i, j); + } + }; + + inline std::ostream &operator << (std::ostream &o, const sub_index &si) { + o << "sub_index("; + if (si.size() != 0) o << si.index(0); + for (size_type i = 1; i < si.size(); ++i) o << ", " << si.index(i); + o << ")"; + return o; + } + + struct sub_interval { + size_type min, max; + + size_type size(void) const { return max - min; } + size_type first(void) const { return min; } + size_type last(void) const { return max; } + size_type index(size_type i) const { return min + i; } + size_type step(void) const { return 1; } + size_type rindex(size_type i) const + { if (i >= min && i < max) return i - min; return size_type(-1); } + sub_interval(size_type mi, size_type l) : min(mi), max(mi+l) {} + sub_interval() {} + }; + + inline std::ostream &operator << (std::ostream &o, const sub_interval &si) + { o << "sub_interval(" << si.min << ", " << si.size() << ")"; return o; } + + struct sub_slice { + size_type min, max, N; + + size_type size(void) const { return (max - min) / N; } + size_type first(void) const { return min; } + size_type last(void) const { return (min == max) ? max : max+1-N; } + size_type step(void) const { return N; } + size_type index(size_type i) const { return min + N * i; } + size_type rindex(size_type i) const { + if (i >= min && i < max) + { size_type j = (i - min); if (j % N == 0) return j / N; } + return size_type(-1); + } + sub_slice(size_type mi, size_type l, size_type n) + : min(mi), max(mi+l*n), N(n) {} + sub_slice(void) {} + }; + + inline std::ostream &operator << (std::ostream &o, const sub_slice &si) { + o << "sub_slice(" << si.min << ", " << si.size() << ", " << si.step() + << ")"; return o; + } + + template struct index_is_sorted + { typedef linalg_true bool_type; }; + template<> struct index_is_sorted + { typedef linalg_false bool_type; }; + +} + +#endif // GMM_SUB_INDEX_H__ diff --git a/gmm/gmm_sub_matrix.h b/gmm/gmm_sub_matrix.h new file mode 100644 index 000000000..e79883c31 --- /dev/null +++ b/gmm/gmm_sub_matrix.h @@ -0,0 +1,406 @@ +/* -*- c++ -*- (enables emacs c++ mode) */ +/*=========================================================================== + + Copyright (C) 2002-2017 Yves Renard + + This file is a part of GetFEM++ + + GetFEM++ is free software; you can redistribute it and/or modify it + under the terms of the GNU Lesser General Public License as published + by the Free Software Foundation; either version 3 of the License, or + (at your option) any later version along with the GCC Runtime Library + Exception either version 3.1 or (at your option) any later version. + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public + License and GCC Runtime Library Exception for more details. + You should have received a copy of the GNU Lesser General Public License + along with this program; if not, write to the Free Software Foundation, + Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. + + As a special exception, you may use this file as it is a part of a free + software library without restriction. Specifically, if other files + instantiate templates or use macros or inline functions from this file, + or you compile this file and link it with other files to produce an + executable, this file does not by itself cause the resulting executable + to be covered by the GNU Lesser General Public License. This exception + does not however invalidate any other reasons why the executable file + might be covered by the GNU Lesser General Public License. + +===========================================================================*/ + +/**@file gmm_sub_matrix.h + @author Yves Renard + @date October 13, 2002. + @brief Generic sub-matrices. +*/ + +#ifndef GMM_SUB_MATRIX_H__ +#define GMM_SUB_MATRIX_H__ + +#include "gmm_sub_vector.h" + +namespace gmm { + + /* ********************************************************************* */ + /* sub row matrices type */ + /* ********************************************************************* */ + + template + struct gen_sub_row_matrix { + typedef gen_sub_row_matrix this_type; + typedef typename std::iterator_traits::value_type M; + typedef M * CPT; + typedef typename std::iterator_traits::reference ref_M; + typedef typename select_ref + ::const_row_iterator, typename linalg_traits::row_iterator, + PT>::ref_type iterator; + typedef typename linalg_traits::reference reference; + typedef typename linalg_traits::porigin_type porigin_type; + + SUBI1 si1; + SUBI2 si2; + iterator begin_; + porigin_type origin; + + reference operator()(size_type i, size_type j) const + { return linalg_traits::access(begin_ + si1.index(i), si2.index(j)); } + + size_type nrows(void) const { return si1.size(); } + size_type ncols(void) const { return si2.size(); } + + gen_sub_row_matrix(ref_M m, const SUBI1 &s1, const SUBI2 &s2) + : si1(s1), si2(s2), begin_(mat_row_begin(m)), + origin(linalg_origin(m)) {} + gen_sub_row_matrix() {} + gen_sub_row_matrix(const gen_sub_row_matrix &cr) : + si1(cr.si1), si2(cr.si2), begin_(cr.begin_),origin(cr.origin) {} + }; + + template + struct gen_sub_row_matrix_iterator { + typedef gen_sub_row_matrix this_type; + typedef typename modifiable_pointer::pointer MPT; + typedef typename std::iterator_traits::value_type M; + typedef typename select_ref + ::const_row_iterator, typename linalg_traits::row_iterator, + PT>::ref_type ITER; + typedef ITER value_type; + typedef ITER *pointer; + typedef ITER &reference; + typedef ptrdiff_t difference_type; + typedef size_t size_type; + typedef std::random_access_iterator_tag iterator_category; + typedef gen_sub_row_matrix_iterator iterator; + + ITER it; + SUBI1 si1; + SUBI2 si2; + size_type ii; + + iterator operator ++(int) { iterator tmp = *this; ii++; return tmp; } + iterator operator --(int) { iterator tmp = *this; ii--; return tmp; } + iterator &operator ++() { ii++; return *this; } + iterator &operator --() { ii--; return *this; } + iterator &operator +=(difference_type i) { ii += i; return *this; } + iterator &operator -=(difference_type i) { ii -= i; return *this; } + iterator operator +(difference_type i) const + { iterator itt = *this; return (itt += i); } + iterator operator -(difference_type i) const + { iterator itt = *this; return (itt -= i); } + difference_type operator -(const iterator &i) const { return ii - i.ii; } + + ITER operator *() const { return it + si1.index(ii); } + ITER operator [](int i) { return it + si1.index(ii+i); } + + bool operator ==(const iterator &i) const { return (ii == i.ii); } + bool operator !=(const iterator &i) const { return !(i == *this); } + bool operator < (const iterator &i) const { return (ii < i.ii); } + + gen_sub_row_matrix_iterator(void) {} + gen_sub_row_matrix_iterator(const + gen_sub_row_matrix_iterator &itm) + : it(itm.it), si1(itm.si1), si2(itm.si2), ii(itm.ii) {} + gen_sub_row_matrix_iterator(const ITER &iter, const SUBI1 &s1, + const SUBI2 &s2, size_type i) + : it(iter), si1(s1), si2(s2), ii(i) { } + + }; + + template + struct linalg_traits > { + typedef gen_sub_row_matrix this_type; + typedef typename std::iterator_traits::value_type M; + typedef typename which_reference::is_reference is_reference; + typedef abstract_matrix linalg_type; + typedef typename linalg_traits::origin_type origin_type; + typedef typename select_ref::ref_type porigin_type; + typedef typename linalg_traits::value_type value_type; + typedef typename select_ref::reference, PT>::ref_type reference; + typedef abstract_null_type sub_col_type; + typedef abstract_null_type col_iterator; + typedef abstract_null_type const_sub_col_type; + typedef abstract_null_type const_col_iterator; + typedef typename sub_vector_type::const_sub_row_type>::t *, SUBI2>::vector_type + const_sub_row_type; + typedef typename select_ref::sub_row_type>::t *, + SUBI2>::vector_type, PT>::ref_type sub_row_type; + typedef gen_sub_row_matrix_iterator::pointer, + SUBI1, SUBI2> const_row_iterator; + typedef typename select_ref, PT>::ref_type + row_iterator; + typedef typename linalg_traits::storage_type + storage_type; + typedef row_major sub_orientation; + typedef linalg_true index_sorted; + static size_type nrows(const this_type &m) { return m.nrows(); } + static size_type ncols(const this_type &m) { return m.ncols(); } + static const_sub_row_type row(const const_row_iterator &it) + { return const_sub_row_type(linalg_traits::row(*it), it.si2); } + static sub_row_type row(const row_iterator &it) + { return sub_row_type(linalg_traits::row(*it), it.si2); } + static const_row_iterator row_begin(const this_type &m) + { return const_row_iterator(m.begin_, m.si1, m.si2, 0); } + static row_iterator row_begin(this_type &m) + { return row_iterator(m.begin_, m.si1, m.si2, 0); } + static const_row_iterator row_end(const this_type &m) + { return const_row_iterator(m.begin_, m.si1, m.si2, m.nrows()); } + static row_iterator row_end(this_type &m) + { return row_iterator(m.begin_, m.si1, m.si2, m.nrows()); } + static origin_type* origin(this_type &v) { return v.origin; } + static const origin_type* origin(const this_type &v) { return v.origin; } + static void do_clear(this_type &m) { + row_iterator it = mat_row_begin(m), ite = mat_row_end(m); + for (; it != ite; ++it) clear(row(it)); + } + static value_type access(const const_row_iterator &itrow, size_type i) + { return linalg_traits::access(*itrow, itrow.si2.index(i)); } + static reference access(const row_iterator &itrow, size_type i) + { return linalg_traits::access(*itrow, itrow.si2.index(i)); } + }; + + template + std::ostream &operator <<(std::ostream &o, + const gen_sub_row_matrix& m) + { gmm::write(o,m); return o; } + + + /* ********************************************************************* */ + /* sub column matrices type */ + /* ********************************************************************* */ + + template + struct gen_sub_col_matrix { + typedef gen_sub_col_matrix this_type; + typedef typename std::iterator_traits::value_type M; + typedef M * CPT; + typedef typename std::iterator_traits::reference ref_M; + typedef typename select_ref + ::const_col_iterator, typename linalg_traits::col_iterator, + PT>::ref_type iterator; + typedef typename linalg_traits::reference reference; + typedef typename linalg_traits::porigin_type porigin_type; + + SUBI1 si1; + SUBI2 si2; + iterator begin_; + porigin_type origin; + + reference operator()(size_type i, size_type j) const + { return linalg_traits::access(begin_ + si2.index(j), si1.index(i)); } + + size_type nrows(void) const { return si1.size(); } + size_type ncols(void) const { return si2.size(); } + + gen_sub_col_matrix(ref_M m, const SUBI1 &s1, const SUBI2 &s2) + : si1(s1), si2(s2), begin_(mat_col_begin(m)), + origin(linalg_origin(m)) {} + gen_sub_col_matrix() {} + gen_sub_col_matrix(const gen_sub_col_matrix &cr) : + si1(cr.si1), si2(cr.si2), begin_(cr.begin_),origin(cr.origin) {} + }; + + template + struct gen_sub_col_matrix_iterator { + typedef gen_sub_col_matrix this_type; + typedef typename modifiable_pointer::pointer MPT; + typedef typename std::iterator_traits::value_type M; + typedef typename select_ref::const_col_iterator, + typename linalg_traits::col_iterator, + PT>::ref_type ITER; + typedef ITER value_type; + typedef ITER *pointer; + typedef ITER &reference; + typedef ptrdiff_t difference_type; + typedef size_t size_type; + typedef std::random_access_iterator_tag iterator_category; + typedef gen_sub_col_matrix_iterator iterator; + + ITER it; + SUBI1 si1; + SUBI2 si2; + size_type ii; + + iterator operator ++(int) { iterator tmp = *this; ii++; return tmp; } + iterator operator --(int) { iterator tmp = *this; ii--; return tmp; } + iterator &operator ++() { ii++; return *this; } + iterator &operator --() { ii--; return *this; } + iterator &operator +=(difference_type i) { ii += i; return *this; } + iterator &operator -=(difference_type i) { ii -= i; return *this; } + iterator operator +(difference_type i) const + { iterator itt = *this; return (itt += i); } + iterator operator -(difference_type i) const + { iterator itt = *this; return (itt -= i); } + difference_type operator -(const iterator &i) const { return ii - i.ii; } + + ITER operator *() const { return it + si2.index(ii); } + ITER operator [](int i) { return it + si2.index(ii+i); } + + bool operator ==(const iterator &i) const { return (ii == i.ii); } + bool operator !=(const iterator &i) const { return !(i == *this); } + bool operator < (const iterator &i) const { return (ii < i.ii); } + + gen_sub_col_matrix_iterator(void) {} + gen_sub_col_matrix_iterator(const + gen_sub_col_matrix_iterator &itm) + : it(itm.it), si1(itm.si1), si2(itm.si2), ii(itm.ii) {} + gen_sub_col_matrix_iterator(const ITER &iter, const SUBI1 &s1, + const SUBI2 &s2, size_type i) + : it(iter), si1(s1), si2(s2), ii(i) { } + }; + + template + struct linalg_traits > { + typedef gen_sub_col_matrix this_type; + typedef typename std::iterator_traits::value_type M; + typedef typename linalg_traits::origin_type origin_type; + typedef typename select_ref::ref_type porigin_type; + typedef typename which_reference::is_reference is_reference; + typedef abstract_matrix linalg_type; + typedef typename linalg_traits::value_type value_type; + typedef typename select_ref::reference, PT>::ref_type reference; + typedef abstract_null_type sub_row_type; + typedef abstract_null_type row_iterator; + typedef abstract_null_type const_sub_row_type; + typedef abstract_null_type const_row_iterator; + typedef typename sub_vector_type::const_sub_col_type>::t *, SUBI1>::vector_type const_sub_col_type; + typedef typename select_ref::sub_col_type>::t *, SUBI1>::vector_type, PT>::ref_type sub_col_type; + typedef gen_sub_col_matrix_iterator::pointer, + SUBI1, SUBI2> const_col_iterator; + typedef typename select_ref, PT>::ref_type + col_iterator; + typedef col_major sub_orientation; + typedef linalg_true index_sorted; + typedef typename linalg_traits::storage_type + storage_type; + static size_type nrows(const this_type &m) { return m.nrows(); } + static size_type ncols(const this_type &m) { return m.ncols(); } + static const_sub_col_type col(const const_col_iterator &it) + { return const_sub_col_type(linalg_traits::col(*it), it.si1); } + static sub_col_type col(const col_iterator &it) + { return sub_col_type(linalg_traits::col(*it), it.si1); } + static const_col_iterator col_begin(const this_type &m) + { return const_col_iterator(m.begin_, m.si1, m.si2, 0); } + static col_iterator col_begin(this_type &m) + { return col_iterator(m.begin_, m.si1, m.si2, 0); } + static const_col_iterator col_end(const this_type &m) + { return const_col_iterator(m.begin_, m.si1, m.si2, m.ncols()); } + static col_iterator col_end(this_type &m) + { return col_iterator(m.begin_, m.si1, m.si2, m.ncols()); } + static origin_type* origin(this_type &v) { return v.origin; } + static const origin_type* origin(const this_type &v) { return v.origin; } + static void do_clear(this_type &m) { + col_iterator it = mat_col_begin(m), ite = mat_col_end(m); + for (; it != ite; ++it) clear(col(it)); + } + static value_type access(const const_col_iterator &itcol, size_type i) + { return linalg_traits::access(*itcol, itcol.si1.index(i)); } + static reference access(const col_iterator &itcol, size_type i) + { return linalg_traits::access(*itcol, itcol.si1.index(i)); } + }; + + template std::ostream &operator << + (std::ostream &o, const gen_sub_col_matrix& m) + { gmm::write(o,m); return o; } + + /* ******************************************************************** */ + /* sub matrices */ + /* ******************************************************************** */ + + template + struct sub_matrix_type_ { + typedef abstract_null_type return_type; + }; + template + struct sub_matrix_type_ + { typedef gen_sub_col_matrix matrix_type; }; + template + struct sub_matrix_type_ + { typedef gen_sub_row_matrix matrix_type; }; + template + struct sub_matrix_type { + typedef typename std::iterator_traits::value_type M; + typedef typename sub_matrix_type_::sub_orientation>::potype>::matrix_type matrix_type; + }; + + template inline + typename select_return + ::matrix_type, typename sub_matrix_type::matrix_type, + M *>::return_type + sub_matrix(M &m, const SUBI1 &si1, const SUBI2 &si2) { + GMM_ASSERT2(si1.last() <= mat_nrows(m) && si2.last() <= mat_ncols(m), + "sub matrix too large"); + return typename select_return::matrix_type, typename sub_matrix_type + ::matrix_type, M *>::return_type(linalg_cast(m), si1, si2); + } + + template inline + typename select_return + ::matrix_type, typename sub_matrix_type::matrix_type, + const M *>::return_type + sub_matrix(const M &m, const SUBI1 &si1, const SUBI2 &si2) { + GMM_ASSERT2(si1.last() <= mat_nrows(m) && si2.last() <= mat_ncols(m), + "sub matrix too large"); + return typename select_return::matrix_type, typename sub_matrix_type + ::matrix_type, const M *>::return_type(linalg_cast(m), si1, si2); + } + + template inline + typename select_return + ::matrix_type, typename sub_matrix_type::matrix_type, + M *>::return_type + sub_matrix(M &m, const SUBI1 &si1) { + GMM_ASSERT2(si1.last() <= mat_nrows(m) && si1.last() <= mat_ncols(m), + "sub matrix too large"); + return typename select_return::matrix_type, typename sub_matrix_type + ::matrix_type, M *>::return_type(linalg_cast(m), si1, si1); + } + + template inline + typename select_return + ::matrix_type, typename sub_matrix_type::matrix_type, + const M *>::return_type + sub_matrix(const M &m, const SUBI1 &si1) { + GMM_ASSERT2(si1.last() <= mat_nrows(m) && si1.last() <= mat_ncols(m), + "sub matrix too large"); + return typename select_return::matrix_type, typename sub_matrix_type + ::matrix_type, const M *>::return_type(linalg_cast(m), si1, si1); + } + +} + +#endif // GMM_SUB_MATRIX_H__ diff --git a/gmm/gmm_sub_vector.h b/gmm/gmm_sub_vector.h new file mode 100644 index 000000000..d35f908d5 --- /dev/null +++ b/gmm/gmm_sub_vector.h @@ -0,0 +1,560 @@ +/* -*- c++ -*- (enables emacs c++ mode) */ +/*=========================================================================== + + Copyright (C) 2002-2017 Yves Renard + + This file is a part of GetFEM++ + + GetFEM++ is free software; you can redistribute it and/or modify it + under the terms of the GNU Lesser General Public License as published + by the Free Software Foundation; either version 3 of the License, or + (at your option) any later version along with the GCC Runtime Library + Exception either version 3.1 or (at your option) any later version. + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public + License and GCC Runtime Library Exception for more details. + You should have received a copy of the GNU Lesser General Public License + along with this program; if not, write to the Free Software Foundation, + Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. + + As a special exception, you may use this file as it is a part of a free + software library without restriction. Specifically, if other files + instantiate templates or use macros or inline functions from this file, + or you compile this file and link it with other files to produce an + executable, this file does not by itself cause the resulting executable + to be covered by the GNU Lesser General Public License. This exception + does not however invalidate any other reasons why the executable file + might be covered by the GNU Lesser General Public License. + +===========================================================================*/ + +/**@file gmm_sub_vector.h + @author Yves Renard + @date October 13, 2002. + @brief Generic sub-vectors. +*/ + +#ifndef GMM_SUB_VECTOR_H__ +#define GMM_SUB_VECTOR_H__ + +#include "gmm_interface.h" +#include "gmm_sub_index.h" + +namespace gmm { + + /* ********************************************************************* */ + /* sparse sub-vectors */ + /* ********************************************************************* */ + + template + struct sparse_sub_vector_iterator { + + IT itb, itbe; + SUBI si; + + typedef std::iterator_traits traits_type; + typedef typename traits_type::value_type value_type; + typedef typename traits_type::pointer pointer; + typedef typename traits_type::reference reference; + typedef typename traits_type::difference_type difference_type; + typedef std::bidirectional_iterator_tag iterator_category; + typedef size_t size_type; + typedef sparse_sub_vector_iterator iterator; + + size_type index(void) const { return si.rindex(itb.index()); } + void forward(void); + void backward(void); + iterator &operator ++() + { ++itb; forward(); return *this; } + iterator operator ++(int) { iterator tmp = *this; ++(*this); return tmp; } + iterator &operator --() + { --itb; backward(); return *this; } + iterator operator --(int) { iterator tmp = *this; --(*this); return tmp; } + reference operator *() const { return *itb; } + + bool operator ==(const iterator &i) const { return itb == i.itb; } + bool operator !=(const iterator &i) const { return !(i == *this); } + + sparse_sub_vector_iterator(void) {} + sparse_sub_vector_iterator(const IT &it, const IT &ite, const SUBI &s) + : itb(it), itbe(ite), si(s) { forward(); } + sparse_sub_vector_iterator(const sparse_sub_vector_iterator &it) : itb(it.itb), itbe(it.itbe), si(it.si) {} + }; + + template + void sparse_sub_vector_iterator::forward(void) + { while(itb!=itbe && index()==size_type(-1)) { ++itb; } } + + template + void sparse_sub_vector_iterator::backward(void) + { while(itb!=itbe && index()==size_type(-1)) --itb; } + + template struct sparse_sub_vector { + typedef sparse_sub_vector this_type; + typedef typename std::iterator_traits::value_type V; + typedef V * CPT; + typedef typename select_ref::const_iterator, + typename linalg_traits::iterator, PT>::ref_type iterator; + typedef typename linalg_traits::reference reference; + typedef typename linalg_traits::porigin_type porigin_type; + + iterator begin_, end_; + porigin_type origin; + SUBI si; + + size_type size(void) const { return si.size(); } + + reference operator[](size_type i) const + { return linalg_traits::access(origin, begin_, end_, si.index(i)); } + + sparse_sub_vector(V &v, const SUBI &s) : begin_(vect_begin(v)), + end_(vect_end(v)), origin(linalg_origin(v)), si(s) {} + sparse_sub_vector(const V &v, const SUBI &s) + : begin_(vect_begin(const_cast(v))), + end_(vect_end(const_cast(v))), + origin(linalg_origin(const_cast(v))), si(s) {} + sparse_sub_vector() {} + sparse_sub_vector(const sparse_sub_vector &cr) + : begin_(cr.begin_),end_(cr.end_),origin(cr.origin), si(cr.si) {} + }; + + template inline + void set_to_begin(sparse_sub_vector_iterator &it, + ORG o, sparse_sub_vector *, + linalg_modifiable) { + typedef sparse_sub_vector VECT; + typedef typename linalg_traits::V_reference ref_t; + set_to_begin(it.itb, o, typename linalg_traits::pV(), ref_t()); + set_to_end(it.itbe, o, typename linalg_traits::pV(), ref_t()); + it.forward(); + } + template inline + void set_to_begin(sparse_sub_vector_iterator &it, + ORG o, const sparse_sub_vector *, + linalg_modifiable) { + typedef sparse_sub_vector VECT; + typedef typename linalg_traits::V_reference ref_t; + set_to_begin(it.itb, o, typename linalg_traits::pV(), ref_t()); + set_to_end(it.itbe, o, typename linalg_traits::pV(), ref_t()); + it.forward(); + } + + template inline + void set_to_end(sparse_sub_vector_iterator &it, + ORG o, sparse_sub_vector *, linalg_modifiable) { + typedef sparse_sub_vector VECT; + typedef typename linalg_traits::V_reference ref_t; + set_to_end(it.itb, o, typename linalg_traits::pV(), ref_t()); + set_to_end(it.itbe, o, typename linalg_traits::pV(), ref_t()); + it.forward(); + } + template inline + void set_to_end(sparse_sub_vector_iterator &it, + ORG o, const sparse_sub_vector *, + linalg_modifiable) { + typedef sparse_sub_vector VECT; + typedef typename linalg_traits::V_reference ref_t; + set_to_end(it.itb, o, typename linalg_traits::pV(), ref_t()); + set_to_end(it.itbe, o, typename linalg_traits::pV(), ref_t()); + it.forward(); + } + + template + struct linalg_traits > { + typedef sparse_sub_vector this_type; + typedef this_type * pthis_type; + typedef PT pV; + typedef typename std::iterator_traits::value_type V; + typedef typename linalg_and::bool_type, + typename linalg_traits::index_sorted>::bool_type index_sorted; + typedef typename linalg_traits::is_reference V_reference; + typedef typename linalg_traits::origin_type origin_type; + typedef typename select_ref::ref_type porigin_type; + typedef typename which_reference::is_reference is_reference; + typedef abstract_vector linalg_type; + typedef typename linalg_traits::value_type value_type; + typedef typename select_ref::reference, PT>::ref_type reference; + typedef typename select_ref::const_iterator, + typename linalg_traits::iterator, PT>::ref_type pre_iterator; + typedef typename select_ref, + PT>::ref_type iterator; + typedef sparse_sub_vector_iterator + ::const_iterator, pre_iterator, SUBI> const_iterator; + typedef abstract_sparse storage_type; + static size_type size(const this_type &v) { return v.size(); } + static iterator begin(this_type &v) { + iterator it; + it.itb = v.begin_; it.itbe = v.end_; it.si = v.si; + if (!is_const_reference(is_reference())) + set_to_begin(it, v.origin, pthis_type(), is_reference()); + else it.forward(); + return it; + } + static const_iterator begin(const this_type &v) { + const_iterator it; it.itb = v.begin_; it.itbe = v.end_; it.si = v.si; + if (!is_const_reference(is_reference())) + { set_to_begin(it, v.origin, pthis_type(), is_reference()); } + else it.forward(); + return it; + } + static iterator end(this_type &v) { + iterator it; + it.itb = v.end_; it.itbe = v.end_; it.si = v.si; + if (!is_const_reference(is_reference())) + set_to_end(it, v.origin, pthis_type(), is_reference()); + else it.forward(); + return it; + } + static const_iterator end(const this_type &v) { + const_iterator it; it.itb = v.end_; it.itbe = v.end_; it.si = v.si; + if (!is_const_reference(is_reference())) + set_to_end(it, v.origin, pthis_type(), is_reference()); + else it.forward(); + return it; + } + static origin_type* origin(this_type &v) { return v.origin; } + static const origin_type* origin(const this_type &v) { return v.origin; } + static void clear(origin_type* o, const iterator &begin_, + const iterator &end_) { + std::deque ind; + iterator it = begin_; + for (; it != end_; ++it) ind.push_front(it.index()); + for (; !(ind.empty()); ind.pop_back()) + access(o, begin_, end_, ind.back()) = value_type(0); + } + static void do_clear(this_type &v) { clear(v.origin, begin(v), end(v)); } + static value_type access(const origin_type *o, const const_iterator &it, + const const_iterator &ite, size_type i) + { return linalg_traits::access(o, it.itb, ite.itb, it.si.index(i)); } + static reference access(origin_type *o, const iterator &it, + const iterator &ite, size_type i) + { return linalg_traits::access(o, it.itb, ite.itb, it.si.index(i)); } + }; + + template std::ostream &operator << + (std::ostream &o, const sparse_sub_vector& m) + { gmm::write(o,m); return o; } + + /* ********************************************************************* */ + /* skyline sub-vectors */ + /* ********************************************************************* */ + + template + struct skyline_sub_vector_iterator { + + IT itb; + SUBI si; + + typedef std::iterator_traits traits_type; + typedef typename traits_type::value_type value_type; + typedef typename traits_type::pointer pointer; + typedef typename traits_type::reference reference; + typedef typename traits_type::difference_type difference_type; + typedef std::bidirectional_iterator_tag iterator_category; + typedef size_t size_type; + typedef skyline_sub_vector_iterator iterator; + + size_type index(void) const + { return (itb.index() - si.min + si.step() - 1) / si.step(); } + void backward(void); + iterator &operator ++() + { itb += si.step(); return *this; } + iterator operator ++(int) { iterator tmp = *this; ++(*this); return tmp; } + iterator &operator --() + { itb -= si.step(); return *this; } + iterator operator --(int) { iterator tmp = *this; --(*this); return tmp; } + + iterator &operator +=(difference_type i) + { itb += si.step() * i; return *this; } + iterator &operator -=(difference_type i) + { itb -= si.step() * i; return *this; } + iterator operator +(difference_type i) const + { iterator ii = *this; return (ii += i); } + iterator operator -(difference_type i) const + { iterator ii = *this; return (ii -= i); } + difference_type operator -(const iterator &i) const + { return (itb - i.itb) / si.step(); } + + reference operator *() const { return *itb; } + reference operator [](int ii) { return *(itb + ii * si.step()); } + + bool operator ==(const iterator &i) const { return index() == i.index();} + bool operator !=(const iterator &i) const { return !(i == *this); } + bool operator < (const iterator &i) const { return index() < i.index();} + + skyline_sub_vector_iterator(void) {} + skyline_sub_vector_iterator(const IT &it, const SUBI &s) + : itb(it), si(s) {} + skyline_sub_vector_iterator(const skyline_sub_vector_iterator &it) : itb(it.itb), si(it.si) {} + }; + + template + void update_for_sub_skyline(IT &it, IT &ite, const SUBI &si) { + if (it.index() >= si.max || ite.index() <= si.min) { it = ite; return; } + ptrdiff_t dec1 = si.min - it.index(), dec2 = ite.index() - si.max; + it += (dec1 < 0) ? ((si.step()-((-dec1) % si.step())) % si.step()) : dec1; + ite -= (dec2 < 0) ? -((-dec2) % si.step()) : dec2; + } + + template struct skyline_sub_vector { + typedef skyline_sub_vector this_type; + typedef typename std::iterator_traits::value_type V; + typedef V * pV; + typedef typename select_ref::const_iterator, + typename linalg_traits::iterator, PT>::ref_type iterator; + typedef typename linalg_traits::reference reference; + typedef typename linalg_traits::porigin_type porigin_type; + + iterator begin_, end_; + porigin_type origin; + SUBI si; + + size_type size(void) const { return si.size(); } + + reference operator[](size_type i) const + { return linalg_traits::access(origin, begin_, end_, si.index(i)); } + + skyline_sub_vector(V &v, const SUBI &s) : begin_(vect_begin(v)), + end_(vect_end(v)), origin(linalg_origin(v)), si(s) { + update_for_sub_skyline(begin_, end_, si); + } + skyline_sub_vector(const V &v, const SUBI &s) + : begin_(vect_begin(const_cast(v))), + end_(vect_end(const_cast(v))), + origin(linalg_origin(const_cast(v))), si(s) { + update_for_sub_skyline(begin_, end_, si); + } + skyline_sub_vector() {} + skyline_sub_vector(const skyline_sub_vector &cr) + : begin_(cr.begin_),end_(cr.end_),origin(cr.origin), si(cr.si) {} + }; + + template inline + void set_to_begin(skyline_sub_vector_iterator &it, + ORG o, skyline_sub_vector *, + linalg_modifiable) { + typedef skyline_sub_vector VECT; + typedef typename linalg_traits::V_reference ref_t; + IT itbe = it.itb; + set_to_begin(it.itb, o, typename linalg_traits::pV(), ref_t()); + set_to_end(itbe, o, typename linalg_traits::pV(), ref_t()); + update_for_sub_skyline(it.itb, itbe, it.si); + } + template inline + void set_to_begin(skyline_sub_vector_iterator &it, + ORG o, const skyline_sub_vector *, + linalg_modifiable) { + typedef skyline_sub_vector VECT; + typedef typename linalg_traits::V_reference ref_t; + IT itbe = it.itb; + set_to_begin(it.itb, o, typename linalg_traits::pV(), ref_t()); + set_to_end(itbe, o, typename linalg_traits::pV(), ref_t()); + update_for_sub_skyline(it.itb, itbe, it.si); + } + + template inline + void set_to_end(skyline_sub_vector_iterator &it, + ORG o, skyline_sub_vector *, + linalg_modifiable) { + typedef skyline_sub_vector VECT; + typedef typename linalg_traits::V_reference ref_t; + IT itb = it.itb; + set_to_begin(itb, o, typename linalg_traits::pV(), ref_t()); + set_to_end(it.itb, o, typename linalg_traits::pV(), ref_t()); + update_for_sub_skyline(itb, it.itb, it.si); + } + template inline + void set_to_end(skyline_sub_vector_iterator &it, + ORG o, const skyline_sub_vector *, + linalg_modifiable) { + typedef skyline_sub_vector VECT; + typedef typename linalg_traits::V_reference ref_t; + IT itb = it.itb; + set_to_begin(itb, o, typename linalg_traits::pV(), ref_t()); + set_to_end(it.itb, o, typename linalg_traits::pV(), ref_t()); + update_for_sub_skyline(itb, it.itb, it.si); + } + + + template + struct linalg_traits > { + typedef skyline_sub_vector this_type; + typedef this_type *pthis_type; + typedef typename std::iterator_traits::value_type V; + typedef typename linalg_traits::is_reference V_reference; + typedef typename linalg_traits::origin_type origin_type; + typedef typename select_ref::ref_type porigin_type; + typedef V * pV; + typedef typename which_reference::is_reference is_reference; + typedef abstract_vector linalg_type; + typedef typename linalg_traits::value_type value_type; + typedef typename select_ref::reference, PT>::ref_type reference; + typedef typename linalg_traits::const_iterator const_V_iterator; + typedef typename linalg_traits::iterator V_iterator; + typedef typename select_ref::ref_type pre_iterator; + typedef typename select_ref, + PT>::ref_type iterator; + typedef skyline_sub_vector_iterator + const_iterator; + typedef abstract_skyline storage_type; + typedef linalg_true index_sorted; + static size_type size(const this_type &v) { return v.size(); } + static iterator begin(this_type &v) { + iterator it; + it.itb = v.begin_; it.si = v.si; + if (!is_const_reference(is_reference())) + set_to_begin(it, v.origin, pthis_type(), is_reference()); + return it; + } + static const_iterator begin(const this_type &v) { + const_iterator it; it.itb = v.begin_; it.si = v.si; + if (!is_const_reference(is_reference())) + { set_to_begin(it, v.origin, pthis_type(), is_reference()); } + return it; + } + static iterator end(this_type &v) { + iterator it; + it.itb = v.end_; it.si = v.si; + if (!is_const_reference(is_reference())) + set_to_end(it, v.origin, pthis_type(), is_reference()); + return it; + } + static const_iterator end(const this_type &v) { + const_iterator it; it.itb = v.end_; it.si = v.si; + if (!is_const_reference(is_reference())) + set_to_end(it, v.origin, pthis_type(), is_reference()); + return it; + } + static origin_type* origin(this_type &v) { return v.origin; } + static const origin_type* origin(const this_type &v) { return v.origin; } + static void clear(origin_type*, const iterator &it, const iterator &ite) + { std::fill(it, ite, value_type(0)); } + static void do_clear(this_type &v) { clear(v.origin, begin(v), end(v)); } + static value_type access(const origin_type *o, const const_iterator &it, + const const_iterator &ite, size_type i) + { return linalg_traits::access(o, it.itb, ite.itb, it.si.index(i)); } + static reference access(origin_type *o, const iterator &it, + const iterator &ite, size_type i) + { return linalg_traits::access(o, it.itb, ite.itb, it.si.index(i)); } + }; + + template std::ostream &operator << + (std::ostream &o, const skyline_sub_vector& m) + { gmm::write(o,m); return o; } + + /* ******************************************************************** */ + /* sub vector. */ + /* ******************************************************************** */ + /* sub_vector_type::vector_type is the sub vector type */ + /* returned by sub_vector(v, sub_index) */ + /************************************************************************/ + + template struct svrt_ir { + typedef abstract_null_type vector_type; + }; + + template + struct svrt_ir { + typedef typename std::iterator_traits::value_type V; + typedef typename vect_ref_type::iterator iterator; + typedef tab_ref_index_ref_with_origin vector_type; + }; + + template + struct svrt_ir { + typedef typename std::iterator_traits::value_type V; + typedef typename vect_ref_type::iterator iterator; + typedef tab_ref_index_ref_with_origin vector_type; + }; + + template + struct svrt_ir { + typedef typename std::iterator_traits::value_type V; + typedef typename vect_ref_type::iterator iterator; + typedef tab_ref_with_origin vector_type; + }; + + template + struct svrt_ir { + typedef typename std::iterator_traits::value_type V; + typedef typename vect_ref_type::iterator iterator; + typedef tab_ref_reg_spaced_with_origin vector_type; + }; + + template + struct svrt_ir { + typedef skyline_sub_vector vector_type; + }; + + template + struct svrt_ir { + typedef sparse_sub_vector vector_type; + }; + + template + struct svrt_ir { + typedef sparse_sub_vector vector_type; + }; + + + template + struct svrt_ir { + typedef sparse_sub_vector vector_type; + }; + + template + struct sub_vector_type { + typedef typename std::iterator_traits::value_type V; + typedef typename svrt_ir::storage_type>::vector_type vector_type; + }; + + template + typename select_return< + typename sub_vector_type::vector_type, + typename sub_vector_type::vector_type, const V *>::return_type + sub_vector(const V &v, const SUBI &si) { + GMM_ASSERT2(si.last() <= vect_size(v), + "sub vector too large, " << si.last() << " > " << vect_size(v)); + return typename select_return< + typename sub_vector_type::vector_type, + typename sub_vector_type::vector_type, const V *>::return_type + (linalg_cast(v), si); + } + + template + typename select_return< + typename sub_vector_type::vector_type, + typename sub_vector_type::vector_type, V *>::return_type + sub_vector(V &v, const SUBI &si) { + GMM_ASSERT2(si.last() <= vect_size(v), + "sub vector too large, " << si.last() << " > " << vect_size(v)); + return typename select_return< + typename sub_vector_type::vector_type, + typename sub_vector_type::vector_type, V *>::return_type + (linalg_cast(v), si); + } + +} + +#endif // GMM_SUB_VECTOR_H__ diff --git a/gmm/gmm_superlu_interface.h b/gmm/gmm_superlu_interface.h new file mode 100644 index 000000000..b732445e7 --- /dev/null +++ b/gmm/gmm_superlu_interface.h @@ -0,0 +1,410 @@ +/* -*- c++ -*- (enables emacs c++ mode) */ +/*=========================================================================== + + Copyright (C) 2003-2017 Yves Renard + + This file is a part of GetFEM++ + + GetFEM++ is free software; you can redistribute it and/or modify it + under the terms of the GNU Lesser General Public License as published + by the Free Software Foundation; either version 3 of the License, or + (at your option) any later version along with the GCC Runtime Library + Exception either version 3.1 or (at your option) any later version. + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public + License and GCC Runtime Library Exception for more details. + You should have received a copy of the GNU Lesser General Public License + along with this program; if not, write to the Free Software Foundation, + Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. + + As a special exception, you may use this file as it is a part of a free + software library without restriction. Specifically, if other files + instantiate templates or use macros or inline functions from this file, + or you compile this file and link it with other files to produce an + executable, this file does not by itself cause the resulting executable + to be covered by the GNU Lesser General Public License. This exception + does not however invalidate any other reasons why the executable file + might be covered by the GNU Lesser General Public License. + +===========================================================================*/ + +/**@file gmm_superlu_interface.h + @author Yves Renard + @date October 17, 2003. + @brief Interface with SuperLU (LU direct solver for sparse matrices). +*/ +#if defined(GMM_USES_SUPERLU) && !defined(GETFEM_VERSION) + +#ifndef GMM_SUPERLU_INTERFACE_H +#define GMM_SUPERLU_INTERFACE_H + +#include "gmm_kernel.h" + +typedef int int_t; + +/* because SRC/util.h defines TRUE and FALSE ... */ +#ifdef TRUE +# undef TRUE +#endif +#ifdef FALSE +# undef FALSE +#endif + +#include "superlu/slu_Cnames.h" +#include "superlu/supermatrix.h" +#include "superlu/slu_util.h" + +namespace SuperLU_S { +#include "superlu/slu_sdefs.h" +} +namespace SuperLU_D { +#include "superlu/slu_ddefs.h" +} +namespace SuperLU_C { +#include "superlu/slu_cdefs.h" +} +namespace SuperLU_Z { +#include "superlu/slu_zdefs.h" +} + + + +namespace gmm { + + /* interface for Create_CompCol_Matrix */ + + inline void Create_CompCol_Matrix(SuperMatrix *A, int m, int n, int nnz, + float *a, int *ir, int *jc) { + SuperLU_S::sCreate_CompCol_Matrix(A, m, n, nnz, a, ir, jc, + SLU_NC, SLU_S, SLU_GE); + } + + inline void Create_CompCol_Matrix(SuperMatrix *A, int m, int n, int nnz, + double *a, int *ir, int *jc) { + SuperLU_D::dCreate_CompCol_Matrix(A, m, n, nnz, a, ir, jc, + SLU_NC, SLU_D, SLU_GE); + } + + inline void Create_CompCol_Matrix(SuperMatrix *A, int m, int n, int nnz, + std::complex *a, int *ir, int *jc) { + SuperLU_C::cCreate_CompCol_Matrix(A, m, n, nnz, (SuperLU_C::complex *)(a), + ir, jc, SLU_NC, SLU_C, SLU_GE); + } + + inline void Create_CompCol_Matrix(SuperMatrix *A, int m, int n, int nnz, + std::complex *a, int *ir, int *jc) { + SuperLU_Z::zCreate_CompCol_Matrix(A, m, n, nnz, + (SuperLU_Z::doublecomplex *)(a), ir, jc, + SLU_NC, SLU_Z, SLU_GE); + } + + /* interface for Create_Dense_Matrix */ + + inline void Create_Dense_Matrix(SuperMatrix *A, int m, int n, float *a, int k) + { SuperLU_S::sCreate_Dense_Matrix(A, m, n, a, k, SLU_DN, SLU_S, SLU_GE); } + inline void Create_Dense_Matrix(SuperMatrix *A, int m, int n, double *a, int k) + { SuperLU_D::dCreate_Dense_Matrix(A, m, n, a, k, SLU_DN, SLU_D, SLU_GE); } + inline void Create_Dense_Matrix(SuperMatrix *A, int m, int n, + std::complex *a, int k) { + SuperLU_C::cCreate_Dense_Matrix(A, m, n, (SuperLU_C::complex *)(a), + k, SLU_DN, SLU_C, SLU_GE); + } + inline void Create_Dense_Matrix(SuperMatrix *A, int m, int n, + std::complex *a, int k) { + SuperLU_Z::zCreate_Dense_Matrix(A, m, n, (SuperLU_Z::doublecomplex *)(a), + k, SLU_DN, SLU_Z, SLU_GE); + } + + /* interface for gssv */ + +#define DECL_GSSV(NAMESPACE,FNAME,FLOATTYPE,KEYTYPE) \ + inline void SuperLU_gssv(superlu_options_t *options, SuperMatrix *A, int *p, \ + int *q, SuperMatrix *L, SuperMatrix *U, SuperMatrix *B, \ + SuperLUStat_t *stats, int *info, KEYTYPE) { \ + NAMESPACE::FNAME(options, A, p, q, L, U, B, stats, info); \ + } + + DECL_GSSV(SuperLU_S,sgssv,float,float) + DECL_GSSV(SuperLU_C,cgssv,float,std::complex) + DECL_GSSV(SuperLU_D,dgssv,double,double) + DECL_GSSV(SuperLU_Z,zgssv,double,std::complex) + + /* interface for gssvx */ + +#define DECL_GSSVX(NAMESPACE,FNAME,FLOATTYPE,KEYTYPE) \ + inline float SuperLU_gssvx(superlu_options_t *options, SuperMatrix *A, \ + int *perm_c, int *perm_r, int *etree, char *equed, \ + FLOATTYPE *R, FLOATTYPE *C, SuperMatrix *L, \ + SuperMatrix *U, void *work, int lwork, \ + SuperMatrix *B, SuperMatrix *X, \ + FLOATTYPE *recip_pivot_growth, \ + FLOATTYPE *rcond, FLOATTYPE *ferr, FLOATTYPE *berr, \ + SuperLUStat_t *stats, int *info, KEYTYPE) { \ + NAMESPACE::mem_usage_t mem_usage; \ + NAMESPACE::FNAME(options, A, perm_c, perm_r, etree, equed, R, C, L, \ + U, work, lwork, B, X, recip_pivot_growth, rcond, \ + ferr, berr, &mem_usage, stats, info); \ + return mem_usage.for_lu; /* bytes used by the factor storage */ \ + } + + DECL_GSSVX(SuperLU_S,sgssvx,float,float) + DECL_GSSVX(SuperLU_C,cgssvx,float,std::complex) + DECL_GSSVX(SuperLU_D,dgssvx,double,double) + DECL_GSSVX(SuperLU_Z,zgssvx,double,std::complex) + + /* ********************************************************************* */ + /* SuperLU solve interface */ + /* ********************************************************************* */ + + template + int SuperLU_solve(const MAT &A, const VECTX &X_, const VECTB &B, + double& rcond_, int permc_spec = 3) { + VECTX &X = const_cast(X_); + /* + * Get column permutation vector perm_c[], according to permc_spec: + * permc_spec = 0: use the natural ordering + * permc_spec = 1: use minimum degree ordering on structure of A'*A + * permc_spec = 2: use minimum degree ordering on structure of A'+A + * permc_spec = 3: use approximate minimum degree column ordering + */ + typedef typename linalg_traits::value_type T; + typedef typename number_traits::magnitude_type R; + + int m = mat_nrows(A), n = mat_ncols(A), nrhs = 1, info = 0; + + csc_matrix csc_A(m, n); gmm::copy(A, csc_A); + std::vector rhs(m), sol(m); + gmm::copy(B, rhs); + + int nz = nnz(csc_A); + if ((2 * nz / n) >= m) + GMM_WARNING2("CAUTION : it seems that SuperLU has a problem" + " for nearly dense sparse matrices"); + + superlu_options_t options; + set_default_options(&options); + options.ColPerm = NATURAL; + options.PrintStat = NO; + options.ConditionNumber = YES; + switch (permc_spec) { + case 1 : options.ColPerm = MMD_ATA; break; + case 2 : options.ColPerm = MMD_AT_PLUS_A; break; + case 3 : options.ColPerm = COLAMD; break; + } + SuperLUStat_t stat; + StatInit(&stat); + + SuperMatrix SA, SL, SU, SB, SX; // SuperLU format. + Create_CompCol_Matrix(&SA, m, n, nz, (double *)(&(csc_A.pr[0])), + (int *)(&(csc_A.ir[0])), (int *)(&(csc_A.jc[0]))); + Create_Dense_Matrix(&SB, m, nrhs, &rhs[0], m); + Create_Dense_Matrix(&SX, m, nrhs, &sol[0], m); + memset(&SL,0,sizeof SL); + memset(&SU,0,sizeof SU); + + std::vector etree(n); + char equed[] = "B"; + std::vector Rscale(m),Cscale(n); // row scale factors + std::vector ferr(nrhs), berr(nrhs); + R recip_pivot_gross, rcond; + std::vector perm_r(m), perm_c(n); + + SuperLU_gssvx(&options, &SA, &perm_c[0], &perm_r[0], + &etree[0] /* output */, equed /* output */, + &Rscale[0] /* row scale factors (output) */, + &Cscale[0] /* col scale factors (output) */, + &SL /* fact L (output)*/, &SU /* fact U (output)*/, + NULL /* work */, + 0 /* lwork: superlu auto allocates (input) */, + &SB /* rhs */, &SX /* solution */, + &recip_pivot_gross /* reciprocal pivot growth */ + /* factor max_j( norm(A_j)/norm(U_j) ). */, + &rcond /*estimate of the reciprocal condition */ + /* number of the matrix A after equilibration */, + &ferr[0] /* estimated forward error */, + &berr[0] /* relative backward error */, + &stat, &info, T()); + rcond_ = rcond; + Destroy_SuperMatrix_Store(&SB); + Destroy_SuperMatrix_Store(&SX); + Destroy_SuperMatrix_Store(&SA); + Destroy_SuperNode_Matrix(&SL); + Destroy_CompCol_Matrix(&SU); + StatFree(&stat); + GMM_ASSERT1(info >= 0, "SuperLU solve failed: info =" << info); + if (info > 0) GMM_WARNING1("SuperLU solve failed: info =" << info); + gmm::copy(sol, X); + return info; + } + + template class SuperLU_factor { + typedef typename number_traits::magnitude_type R; + + csc_matrix csc_A; + mutable SuperMatrix SA, SL, SB, SU, SX; + mutable SuperLUStat_t stat; + mutable superlu_options_t options; + float memory_used; + mutable std::vector etree, perm_r, perm_c; + mutable std::vector Rscale, Cscale; + mutable std::vector ferr, berr; + mutable std::vector rhs; + mutable std::vector sol; + mutable bool is_init; + mutable char equed; + + public : + enum { LU_NOTRANSP, LU_TRANSP, LU_CONJUGATED }; + void free_supermatrix(void); + template void build_with(const MAT &A, int permc_spec = 3); + template + /* transp = LU_NOTRANSP -> solves Ax = B + transp = LU_TRANSP -> solves A'x = B + transp = LU_CONJUGATED -> solves conj(A)X = B */ + void solve(const VECTX &X_, const VECTB &B, int transp=LU_NOTRANSP) const; + SuperLU_factor(void) { is_init = false; } + SuperLU_factor(const SuperLU_factor& other) { + GMM_ASSERT2(!(other.is_init), + "copy of initialized SuperLU_factor is forbidden"); + is_init = false; + } + SuperLU_factor& operator=(const SuperLU_factor& other) { + GMM_ASSERT2(!(other.is_init) && !is_init, + "assignment of initialized SuperLU_factor is forbidden"); + return *this; + } + ~SuperLU_factor() { free_supermatrix(); } + float memsize() { return memory_used; } + }; + + + template void SuperLU_factor::free_supermatrix(void) { + if (is_init) { + if (SB.Store) Destroy_SuperMatrix_Store(&SB); + if (SX.Store) Destroy_SuperMatrix_Store(&SX); + if (SA.Store) Destroy_SuperMatrix_Store(&SA); + if (SL.Store) Destroy_SuperNode_Matrix(&SL); + if (SU.Store) Destroy_CompCol_Matrix(&SU); + } + } + + + template template + void SuperLU_factor::build_with(const MAT &A, int permc_spec) { + /* + * Get column permutation vector perm_c[], according to permc_spec: + * permc_spec = 0: use the natural ordering + * permc_spec = 1: use minimum degree ordering on structure of A'*A + * permc_spec = 2: use minimum degree ordering on structure of A'+A + * permc_spec = 3: use approximate minimum degree column ordering + */ + free_supermatrix(); + int n = mat_nrows(A), m = mat_ncols(A), info = 0; + csc_A.init_with(A); + + rhs.resize(m); sol.resize(m); + gmm::clear(rhs); + int nz = nnz(csc_A); + + set_default_options(&options); + options.ColPerm = NATURAL; + options.PrintStat = NO; + options.ConditionNumber = NO; + switch (permc_spec) { + case 1 : options.ColPerm = MMD_ATA; break; + case 2 : options.ColPerm = MMD_AT_PLUS_A; break; + case 3 : options.ColPerm = COLAMD; break; + } + StatInit(&stat); + + Create_CompCol_Matrix(&SA, m, n, nz, (double *)(&(csc_A.pr[0])), + (int *)(&(csc_A.ir[0])), (int *)(&(csc_A.jc[0]))); + + Create_Dense_Matrix(&SB, m, 0, &rhs[0], m); + Create_Dense_Matrix(&SX, m, 0, &sol[0], m); + memset(&SL,0,sizeof SL); + memset(&SU,0,sizeof SU); + equed = 'B'; + Rscale.resize(m); Cscale.resize(n); etree.resize(n); + ferr.resize(1); berr.resize(1); + R recip_pivot_gross, rcond; + perm_r.resize(m); perm_c.resize(n); + memory_used = SuperLU_gssvx(&options, &SA, &perm_c[0], &perm_r[0], + &etree[0] /* output */, &equed /* output */, + &Rscale[0] /* row scale factors (output) */, + &Cscale[0] /* col scale factors (output) */, + &SL /* fact L (output)*/, &SU /* fact U (output)*/, + NULL /* work */, + 0 /* lwork: superlu auto allocates (input) */, + &SB /* rhs */, &SX /* solution */, + &recip_pivot_gross /* reciprocal pivot growth */ + /* factor max_j( norm(A_j)/norm(U_j) ). */, + &rcond /*estimate of the reciprocal condition */ + /* number of the matrix A after equilibration */, + &ferr[0] /* estimated forward error */, + &berr[0] /* relative backward error */, + &stat, &info, T()); + + Destroy_SuperMatrix_Store(&SB); + Destroy_SuperMatrix_Store(&SX); + Create_Dense_Matrix(&SB, m, 1, &rhs[0], m); + Create_Dense_Matrix(&SX, m, 1, &sol[0], m); + StatFree(&stat); + + GMM_ASSERT1(info == 0, "SuperLU solve failed: info=" << info); + is_init = true; + } + + template template + void SuperLU_factor::solve(const VECTX &X_, const VECTB &B, + int transp) const { + VECTX &X = const_cast(X_); + gmm::copy(B, rhs); + options.Fact = FACTORED; + options.IterRefine = NOREFINE; + switch (transp) { + case LU_NOTRANSP: options.Trans = NOTRANS; break; + case LU_TRANSP: options.Trans = TRANS; break; + case LU_CONJUGATED: options.Trans = CONJ; break; + default: GMM_ASSERT1(false, "invalid value for transposition option"); + } + StatInit(&stat); + int info = 0; + R recip_pivot_gross, rcond; + SuperLU_gssvx(&options, &SA, &perm_c[0], &perm_r[0], + &etree[0] /* output */, &equed /* output */, + &Rscale[0] /* row scale factors (output) */, + &Cscale[0] /* col scale factors (output) */, + &SL /* fact L (output)*/, &SU /* fact U (output)*/, + NULL /* work */, + 0 /* lwork: superlu auto allocates (input) */, + &SB /* rhs */, &SX /* solution */, + &recip_pivot_gross /* reciprocal pivot growth */ + /* factor max_j( norm(A_j)/norm(U_j) ). */, + &rcond /*estimate of the reciprocal condition */ + /* number of the matrix A after equilibration */, + &ferr[0] /* estimated forward error */, + &berr[0] /* relative backward error */, + &stat, &info, T()); + StatFree(&stat); + GMM_ASSERT1(info == 0, "SuperLU solve failed: info=" << info); + gmm::copy(sol, X); + } + + template inline + void mult(const SuperLU_factor& P, const V1 &v1, const V2 &v2) { + P.solve(v2,v1); + } + + template inline + void transposed_mult(const SuperLU_factor& P,const V1 &v1,const V2 &v2) { + P.solve(v2, v1, SuperLU_factor::LU_TRANSP); + } + +} + + +#endif // GMM_SUPERLU_INTERFACE_H + +#endif // GMM_USES_SUPERLU diff --git a/gmm/gmm_transposed.h b/gmm/gmm_transposed.h new file mode 100644 index 000000000..d9b6a8182 --- /dev/null +++ b/gmm/gmm_transposed.h @@ -0,0 +1,244 @@ +/* -*- c++ -*- (enables emacs c++ mode) */ +/*=========================================================================== + + Copyright (C) 2002-2017 Yves Renard + + This file is a part of GetFEM++ + + GetFEM++ is free software; you can redistribute it and/or modify it + under the terms of the GNU Lesser General Public License as published + by the Free Software Foundation; either version 3 of the License, or + (at your option) any later version along with the GCC Runtime Library + Exception either version 3.1 or (at your option) any later version. + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public + License and GCC Runtime Library Exception for more details. + You should have received a copy of the GNU Lesser General Public License + along with this program; if not, write to the Free Software Foundation, + Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. + + As a special exception, you may use this file as it is a part of a free + software library without restriction. Specifically, if other files + instantiate templates or use macros or inline functions from this file, + or you compile this file and link it with other files to produce an + executable, this file does not by itself cause the resulting executable + to be covered by the GNU Lesser General Public License. This exception + does not however invalidate any other reasons why the executable file + might be covered by the GNU Lesser General Public License. + +===========================================================================*/ + +/**@file gmm_transposed.h + @author Yves Renard + @date November 10, 2002. + @brief Generic transposed matrices +*/ +#ifndef GMM_TRANSPOSED_H__ +#define GMM_TRANSPOSED_H__ + +#include "gmm_def.h" + +namespace gmm { + + /* ********************************************************************* */ + /* transposed reference */ + /* ********************************************************************* */ + + template struct transposed_row_ref { + + typedef transposed_row_ref this_type; + typedef typename std::iterator_traits::value_type M; + typedef M * CPT; + typedef typename std::iterator_traits::reference ref_M; + typedef typename select_ref + ::const_col_iterator, typename linalg_traits + ::col_iterator, PT>::ref_type iterator; + typedef typename linalg_traits::reference reference; + typedef typename linalg_traits::porigin_type porigin_type; + + iterator begin_, end_; + porigin_type origin; + size_type nr, nc; + + transposed_row_ref(ref_M m) + : begin_(mat_row_begin(m)), end_(mat_row_end(m)), + origin(linalg_origin(m)), nr(mat_ncols(m)), nc(mat_nrows(m)) {} + + transposed_row_ref(const transposed_row_ref &cr) : + begin_(cr.begin_),end_(cr.end_), origin(cr.origin),nr(cr.nr),nc(cr.nc) {} + + reference operator()(size_type i, size_type j) const + { return linalg_traits::access(begin_+j, i); } + }; + + template struct linalg_traits > { + typedef transposed_row_ref this_type; + typedef typename std::iterator_traits::value_type M; + typedef typename linalg_traits::origin_type origin_type; + typedef typename select_ref::ref_type porigin_type; + typedef typename which_reference::is_reference is_reference; + typedef abstract_matrix linalg_type; + typedef typename linalg_traits::value_type value_type; + typedef typename select_ref::reference, PT>::ref_type reference; + typedef typename linalg_traits::storage_type storage_type; + typedef abstract_null_type sub_row_type; + typedef abstract_null_type const_sub_row_type; + typedef abstract_null_type row_iterator; + typedef abstract_null_type const_row_iterator; + typedef typename linalg_traits::const_sub_row_type const_sub_col_type; + typedef typename select_ref::sub_row_type, PT>::ref_type sub_col_type; + typedef typename linalg_traits::const_row_iterator const_col_iterator; + typedef typename select_ref::row_iterator, PT>::ref_type col_iterator; + typedef col_major sub_orientation; + typedef typename linalg_traits::index_sorted index_sorted; + static size_type ncols(const this_type &v) { return v.nc; } + static size_type nrows(const this_type &v) { return v.nr; } + static const_sub_col_type col(const const_col_iterator &it) + { return linalg_traits::row(it); } + static sub_col_type col(const col_iterator &it) + { return linalg_traits::row(it); } + static col_iterator col_begin(this_type &m) { return m.begin_; } + static col_iterator col_end(this_type &m) { return m.end_; } + static const_col_iterator col_begin(const this_type &m) + { return m.begin_; } + static const_col_iterator col_end(const this_type &m) { return m.end_; } + static origin_type* origin(this_type &v) { return v.origin; } + static const origin_type* origin(const this_type &v) { return v.origin; } + static void do_clear(this_type &v); + static value_type access(const const_col_iterator &itcol, size_type i) + { return linalg_traits::access(itcol, i); } + static reference access(const col_iterator &itcol, size_type i) + { return linalg_traits::access(itcol, i); } + }; + + template + void linalg_traits >::do_clear(this_type &v) { + col_iterator it = mat_col_begin(v), ite = mat_col_end(v); + for (; it != ite; ++it) clear(col(it)); + } + + template std::ostream &operator << + (std::ostream &o, const transposed_row_ref& m) + { gmm::write(o,m); return o; } + + template struct transposed_col_ref { + + typedef transposed_col_ref this_type; + typedef typename std::iterator_traits::value_type M; + typedef M * CPT; + typedef typename std::iterator_traits::reference ref_M; + typedef typename select_ref + ::const_row_iterator, typename linalg_traits + ::row_iterator, PT>::ref_type iterator; + typedef typename linalg_traits::reference reference; + typedef typename linalg_traits::porigin_type porigin_type; + + iterator begin_, end_; + porigin_type origin; + size_type nr, nc; + + transposed_col_ref(ref_M m) + : begin_(mat_col_begin(m)), end_(mat_col_end(m)), + origin(linalg_origin(m)), nr(mat_ncols(m)), nc(mat_nrows(m)) {} + + transposed_col_ref(const transposed_col_ref &cr) : + begin_(cr.begin_),end_(cr.end_), origin(cr.origin),nr(cr.nr),nc(cr.nc) {} + + reference operator()(size_type i, size_type j) const + { return linalg_traits::access(begin_+i, j); } + }; + + template struct linalg_traits > { + typedef transposed_col_ref this_type; + typedef typename std::iterator_traits::value_type M; + typedef typename linalg_traits::origin_type origin_type; + typedef typename select_ref::ref_type porigin_type; + typedef typename which_reference::is_reference is_reference; + typedef abstract_matrix linalg_type; + typedef typename linalg_traits::value_type value_type; + typedef typename select_ref::reference, PT>::ref_type reference; + typedef typename linalg_traits::storage_type storage_type; + typedef abstract_null_type sub_col_type; + typedef abstract_null_type const_sub_col_type; + typedef abstract_null_type col_iterator; + typedef abstract_null_type const_col_iterator; + typedef typename linalg_traits::const_sub_col_type const_sub_row_type; + typedef typename select_ref::sub_col_type, PT>::ref_type sub_row_type; + typedef typename linalg_traits::const_col_iterator const_row_iterator; + typedef typename select_ref::col_iterator, PT>::ref_type row_iterator; + typedef row_major sub_orientation; + typedef typename linalg_traits::index_sorted index_sorted; + static size_type nrows(const this_type &v) + { return v.nr; } + static size_type ncols(const this_type &v) + { return v.nc; } + static const_sub_row_type row(const const_row_iterator &it) + { return linalg_traits::col(it); } + static sub_row_type row(const row_iterator &it) + { return linalg_traits::col(it); } + static row_iterator row_begin(this_type &m) { return m.begin_; } + static row_iterator row_end(this_type &m) { return m.end_; } + static const_row_iterator row_begin(const this_type &m) + { return m.begin_; } + static const_row_iterator row_end(const this_type &m) { return m.end_; } + static origin_type* origin(this_type &v) { return v.origin; } + static const origin_type* origin(const this_type &v) { return v.origin; } + static void do_clear(this_type &m); + static value_type access(const const_row_iterator &itrow, size_type i) + { return linalg_traits::access(itrow, i); } + static reference access(const row_iterator &itrow, size_type i) + { return linalg_traits::access(itrow, i); } + }; + + template + void linalg_traits >::do_clear(this_type &v) { + row_iterator it = mat_row_begin(v), ite = mat_row_end(v); + for (; it != ite; ++it) clear(row(it)); + } + + template std::ostream &operator << + (std::ostream &o, const transposed_col_ref& m) + { gmm::write(o,m); return o; } + + template struct transposed_return_ { + typedef abstract_null_type return_type; + }; + template struct transposed_return_ { + typedef typename std::iterator_traits::value_type L; + typedef typename select_return, + transposed_row_ref< L *>, PT>::return_type return_type; + }; + template struct transposed_return_ { + typedef typename std::iterator_traits::value_type L; + typedef typename select_return, + transposed_col_ref< L *>, PT>::return_type return_type; + }; + template struct transposed_return { + typedef typename std::iterator_traits::value_type L; + typedef typename transposed_return_::sub_orientation>::potype, + PT>::return_type return_type; + }; + + template inline + typename transposed_return::return_type transposed(const L &l) { + return typename transposed_return::return_type + (linalg_cast(const_cast(l))); + } + + template inline + typename transposed_return::return_type transposed(L &l) + { return typename transposed_return::return_type(linalg_cast(l)); } + +} + +#endif // GMM_TRANSPOSED_H__ diff --git a/gmm/gmm_tri_solve.h b/gmm/gmm_tri_solve.h new file mode 100644 index 000000000..d05520eb3 --- /dev/null +++ b/gmm/gmm_tri_solve.h @@ -0,0 +1,222 @@ +/* -*- c++ -*- (enables emacs c++ mode) */ +/*=========================================================================== + + Copyright (C) 2002-2017 Yves Renard + + This file is a part of GetFEM++ + + GetFEM++ is free software; you can redistribute it and/or modify it + under the terms of the GNU Lesser General Public License as published + by the Free Software Foundation; either version 3 of the License, or + (at your option) any later version along with the GCC Runtime Library + Exception either version 3.1 or (at your option) any later version. + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public + License and GCC Runtime Library Exception for more details. + You should have received a copy of the GNU Lesser General Public License + along with this program; if not, write to the Free Software Foundation, + Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. + + As a special exception, you may use this file as it is a part of a free + software library without restriction. Specifically, if other files + instantiate templates or use macros or inline functions from this file, + or you compile this file and link it with other files to produce an + executable, this file does not by itself cause the resulting executable + to be covered by the GNU Lesser General Public License. This exception + does not however invalidate any other reasons why the executable file + might be covered by the GNU Lesser General Public License. + +===========================================================================*/ + +/**@file gmm_tri_solve.h + @author Yves Renard + @date October 13, 2002. + @brief Solve triangular linear system for dense matrices. +*/ + +#ifndef GMM_TRI_SOLVE_H__ +#define GMM_TRI_SOLVE_H__ + +#include "gmm_interface.h" + +namespace gmm { + + template + void upper_tri_solve__(const TriMatrix& T, VecX& x, size_t k, + col_major, abstract_sparse, bool is_unit) { + typename linalg_traits::value_type x_j; + for (int j = int(k) - 1; j >= 0; --j) { + typedef typename linalg_traits::const_sub_col_type COL; + COL c = mat_const_col(T, j); + typename linalg_traits::t>::const_iterator + it = vect_const_begin(c), ite = vect_const_end(c); + if (!is_unit) x[j] /= c[j]; + for (x_j = x[j]; it != ite ; ++it) + if (int(it.index()) < j) x[it.index()] -= x_j * (*it); + } + } + + template + void upper_tri_solve__(const TriMatrix& T, VecX& x, size_t k, + col_major, abstract_dense, bool is_unit) { + typename linalg_traits::value_type x_j; + for (int j = int(k) - 1; j >= 0; --j) { + typedef typename linalg_traits::const_sub_col_type COL; + COL c = mat_const_col(T, j); + typename linalg_traits::t>::const_iterator + it = vect_const_begin(c), ite = it + j; + typename linalg_traits::iterator itx = vect_begin(x); + if (!is_unit) x[j] /= c[j]; + for (x_j = x[j]; it != ite ; ++it, ++itx) *itx -= x_j * (*it); + } + } + + template + void lower_tri_solve__(const TriMatrix& T, VecX& x, size_t k, + col_major, abstract_sparse, bool is_unit) { + typename linalg_traits::value_type x_j; + // cout << "(lower col)The Tri Matrix = " << T << endl; + // cout << "k = " << endl; + for (int j = 0; j < int(k); ++j) { + typedef typename linalg_traits::const_sub_col_type COL; + COL c = mat_const_col(T, j); + typename linalg_traits::t>::const_iterator + it = vect_const_begin(c), ite = vect_const_end(c); + if (!is_unit) x[j] /= c[j]; + for (x_j = x[j]; it != ite ; ++it) + if (int(it.index()) > j && it.index() < k) x[it.index()] -= x_j*(*it); + } + } + + template + void lower_tri_solve__(const TriMatrix& T, VecX& x, size_t k, + col_major, abstract_dense, bool is_unit) { + typename linalg_traits::value_type x_j; + for (int j = 0; j < int(k); ++j) { + typedef typename linalg_traits::const_sub_col_type COL; + COL c = mat_const_col(T, j); + typename linalg_traits::t>::const_iterator + it = vect_const_begin(c) + (j+1), ite = vect_const_begin(c) + k; + typename linalg_traits::iterator itx = vect_begin(x) + (j+1); + if (!is_unit) x[j] /= c[j]; + for (x_j = x[j]; it != ite ; ++it, ++itx) *itx -= x_j * (*it); + } + } + + + template + void upper_tri_solve__(const TriMatrix& T, VecX& x, size_t k, + row_major, abstract_sparse, bool is_unit) { + typedef typename linalg_traits::const_sub_row_type ROW; + typename linalg_traits::value_type t; + typename linalg_traits::const_row_iterator + itr = mat_row_const_end(T); + for (int i = int(k) - 1; i >= 0; --i) { + --itr; + ROW c = linalg_traits::row(itr); + typename linalg_traits::t>::const_iterator + it = vect_const_begin(c), ite = vect_const_end(c); + for (t = x[i]; it != ite; ++it) + if (int(it.index()) > i && it.index() < k) t -= (*it) * x[it.index()]; + if (!is_unit) x[i] = t / c[i]; else x[i] = t; + } + } + + template + void upper_tri_solve__(const TriMatrix& T, VecX& x, size_t k, + row_major, abstract_dense, bool is_unit) { + typename linalg_traits::value_type t; + + for (int i = int(k) - 1; i >= 0; --i) { + typedef typename linalg_traits::const_sub_row_type ROW; + ROW c = mat_const_row(T, i); + typename linalg_traits::t>::const_iterator + it = vect_const_begin(c) + (i + 1), ite = vect_const_begin(c) + k; + typename linalg_traits::iterator itx = vect_begin(x) + (i+1); + + for (t = x[i]; it != ite; ++it, ++itx) t -= (*it) * (*itx); + if (!is_unit) x[i] = t / c[i]; else x[i] = t; + } + } + + template + void lower_tri_solve__(const TriMatrix& T, VecX& x, size_t k, + row_major, abstract_sparse, bool is_unit) { + typename linalg_traits::value_type t; + + for (int i = 0; i < int(k); ++i) { + typedef typename linalg_traits::const_sub_row_type ROW; + ROW c = mat_const_row(T, i); + typename linalg_traits::t>::const_iterator + it = vect_const_begin(c), ite = vect_const_end(c); + + for (t = x[i]; it != ite; ++it) + if (int(it.index()) < i) t -= (*it) * x[it.index()]; + if (!is_unit) x[i] = t / c[i]; else x[i] = t; + } + } + + template + void lower_tri_solve__(const TriMatrix& T, VecX& x, size_t k, + row_major, abstract_dense, bool is_unit) { + typename linalg_traits::value_type t; + + for (int i = 0; i < int(k); ++i) { + typedef typename linalg_traits::const_sub_row_type ROW; + ROW c = mat_const_row(T, i); + typename linalg_traits::t>::const_iterator + it = vect_const_begin(c), ite = it + i; + typename linalg_traits::iterator itx = vect_begin(x); + + for (t = x[i]; it != ite; ++it, ++itx) t -= (*it) * (*itx); + if (!is_unit) x[i] = t / c[i]; else x[i] = t; + } + } + + +// Triangular Solve: x <-- T^{-1} * x + + template inline + void upper_tri_solve(const TriMatrix& T, VecX &x_, bool is_unit = false) + { upper_tri_solve(T, x_, mat_nrows(T), is_unit); } + + template inline + void lower_tri_solve(const TriMatrix& T, VecX &x_, bool is_unit = false) + { lower_tri_solve(T, x_, mat_nrows(T), is_unit); } + + template inline + void upper_tri_solve(const TriMatrix& T, VecX &x_, size_t k, + bool is_unit) { + VecX& x = const_cast(x_); + GMM_ASSERT2(mat_nrows(T) >= k && vect_size(x) >= k + && mat_ncols(T) >= k && !is_sparse(x_), "dimensions mismatch"); + upper_tri_solve__(T, x, k, + typename principal_orientation_type::sub_orientation>::potype(), + typename linalg_traits::storage_type(), + is_unit); + } + + template inline + void lower_tri_solve(const TriMatrix& T, VecX &x_, size_t k, + bool is_unit) { + VecX& x = const_cast(x_); + GMM_ASSERT2(mat_nrows(T) >= k && vect_size(x) >= k + && mat_ncols(T) >= k && !is_sparse(x_), "dimensions mismatch"); + lower_tri_solve__(T, x, k, + typename principal_orientation_type::sub_orientation>::potype(), + typename linalg_traits::storage_type(), + is_unit); + } + + + + + + +} + + +#endif // GMM_TRI_SOLVE_H__ diff --git a/gmm/gmm_vector.h b/gmm/gmm_vector.h new file mode 100644 index 000000000..e69931dbe --- /dev/null +++ b/gmm/gmm_vector.h @@ -0,0 +1,1571 @@ +/* -*- c++ -*- (enables emacs c++ mode) */ +/*=========================================================================== + + Copyright (C) 2002-2017 Yves Renard + + This file is a part of GetFEM++ + + GetFEM++ is free software; you can redistribute it and/or modify it + under the terms of the GNU Lesser General Public License as published + by the Free Software Foundation; either version 3 of the License, or + (at your option) any later version along with the GCC Runtime Library + Exception either version 3.1 or (at your option) any later version. + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public + License and GCC Runtime Library Exception for more details. + You should have received a copy of the GNU Lesser General Public License + along with this program; if not, write to the Free Software Foundation, + Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. + + As a special exception, you may use this file as it is a part of a free + software library without restriction. Specifically, if other files + instantiate templates or use macros or inline functions from this file, + or you compile this file and link it with other files to produce an + executable, this file does not by itself cause the resulting executable + to be covered by the GNU Lesser General Public License. This exception + does not however invalidate any other reasons why the executable file + might be covered by the GNU Lesser General Public License. + +===========================================================================*/ +/**@file gmm_vector.h + @author Yves Renard + @date October 13, 2002. + @brief Declaration of the vector types (gmm::rsvector, gmm::wsvector, + gmm::slvector ,..) +*/ +#ifndef GMM_VECTOR_H__ +#define GMM_VECTOR_H__ + +#include +#include "gmm_interface.h" + +namespace gmm { + + /*************************************************************************/ + /* */ + /* Class ref_elt_vector: reference on a vector component. */ + /* */ + /*************************************************************************/ + + + template class ref_elt_vector { + + V *pm; + size_type l; + + public : + + operator T() const { return pm->r(l); } + ref_elt_vector(V *p, size_type ll) : pm(p), l(ll) {} + inline bool operator ==(T v) const { return ((*pm).r(l) == v); } + inline bool operator !=(T v) const { return ((*pm).r(l) != v); } + inline bool operator ==(std::complex v) const + { return ((*pm).r(l) == v); } + inline bool operator !=(std::complex v) const + { return ((*pm).r(l) != v); } + inline ref_elt_vector &operator +=(T v) + { (*pm).wa(l, v); return *this; } + inline ref_elt_vector &operator -=(T v) + { (*pm).wa(l, -v); return *this; } + inline ref_elt_vector &operator /=(T v) + { (*pm).w(l,(*pm).r(l) / v); return *this; } + inline ref_elt_vector &operator *=(T v) + { (*pm).w(l,(*pm).r(l) * v); return *this; } + inline ref_elt_vector &operator =(const ref_elt_vector &re) + { *this = T(re); return *this; } + inline ref_elt_vector &operator =(T v) + { (*pm).w(l,v); return *this; } + T operator +() { return T(*this); } + T operator -() { return -T(*this); } + T operator +(T v) { return T(*this)+ v; } + T operator -(T v) { return T(*this)- v; } + T operator *(T v) { return T(*this)* v; } + T operator /(T v) { return T(*this)/ v; } + std::complex operator +(std::complex v) { return T(*this)+ v; } + std::complex operator -(std::complex v) { return T(*this)- v; } + std::complex operator *(std::complex v) { return T(*this)* v; } + std::complex operator /(std::complex v) { return T(*this)/ v; } + }; + + template class ref_elt_vector,V> { + + V *pm; + size_type l; + + public : + + operator std::complex() const { return pm->r(l); } + ref_elt_vector(V *p, size_type ll) : pm(p), l(ll) {} + inline bool operator ==(std::complex v) const + { return ((*pm).r(l) == v); } + inline bool operator !=(std::complex v) const + { return ((*pm).r(l) != v); } + inline bool operator ==(T v) const { return ((*pm).r(l) == v); } + inline bool operator !=(T v) const { return ((*pm).r(l) != v); } + inline ref_elt_vector &operator +=(std::complex v) + { (*pm).w(l,(*pm).r(l) + v); return *this; } + inline ref_elt_vector &operator -=(std::complex v) + { (*pm).w(l,(*pm).r(l) - v); return *this; } + inline ref_elt_vector &operator /=(std::complex v) + { (*pm).w(l,(*pm).r(l) / v); return *this; } + inline ref_elt_vector &operator *=(std::complex v) + { (*pm).w(l,(*pm).r(l) * v); return *this; } + inline ref_elt_vector &operator =(const ref_elt_vector &re) + { *this = T(re); return *this; } + inline ref_elt_vector &operator =(std::complex v) + { (*pm).w(l,v); return *this; } + inline ref_elt_vector &operator =(T v) + { (*pm).w(l,std::complex(v)); return *this; } + inline ref_elt_vector &operator +=(T v) + { (*pm).w(l,(*pm).r(l) + v); return *this; } + inline ref_elt_vector &operator -=(T v) + { (*pm).w(l,(*pm).r(l) - v); return *this; } + inline ref_elt_vector &operator /=(T v) + { (*pm).w(l,(*pm).r(l) / v); return *this; } + inline ref_elt_vector &operator *=(T v) + { (*pm).w(l,(*pm).r(l) * v); return *this; } + std::complex operator +() { return std::complex(*this); } + std::complex operator -() { return -std::complex(*this); } + std::complex operator +(T v) { return std::complex(*this)+ v; } + std::complex operator -(T v) { return std::complex(*this)- v; } + std::complex operator *(T v) { return std::complex(*this)* v; } + std::complex operator /(T v) { return std::complex(*this)/ v; } + std::complex operator +(std::complex v) + { return std::complex(*this)+ v; } + std::complex operator -(std::complex v) + { return std::complex(*this)- v; } + std::complex operator *(std::complex v) + { return std::complex(*this)* v; } + std::complex operator /(std::complex v) + { return std::complex(*this)/ v; } + }; + + + template inline + bool operator ==(T v, const ref_elt_vector &re) { return (v==T(re)); } + template inline + bool operator !=(T v, const ref_elt_vector &re) { return (v!=T(re)); } + template inline + T &operator +=(T &v, const ref_elt_vector &re) + { v += T(re); return v; } + template inline + T &operator -=(T &v, const ref_elt_vector &re) + { v -= T(re); return v; } + template inline + T &operator *=(T &v, const ref_elt_vector &re) + { v *= T(re); return v; } + template inline + T &operator /=(T &v, const ref_elt_vector &re) + { v /= T(re); return v; } + template inline + T operator +(T v, const ref_elt_vector &re) { return v+ T(re); } + template inline + T operator -(T v, const ref_elt_vector &re) { return v- T(re); } + template inline + T operator *(T v, const ref_elt_vector &re) { return v* T(re); } + template inline + T operator /(T v, const ref_elt_vector &re) { return v/ T(re); } + template inline + std::complex operator +(std::complex v, const ref_elt_vector &re) + { return v+ T(re); } + template inline + std::complex operator -(std::complex v, const ref_elt_vector &re) + { return v- T(re); } + template inline + std::complex operator *(std::complex v, const ref_elt_vector &re) + { return v* T(re); } + template inline + std::complex operator /(std::complex v, const ref_elt_vector &re) + { return v/ T(re); } + template inline + std::complex operator +(T v, const ref_elt_vector, V> &re) + { return v+ std::complex(re); } + template inline + std::complex operator -(T v, const ref_elt_vector, V> &re) + { return v- std::complex(re); } + template inline + std::complex operator *(T v, const ref_elt_vector, V> &re) + { return v* std::complex(re); } + template inline + std::complex operator /(T v, const ref_elt_vector, V> &re) + { return v/ std::complex(re); } + template inline + typename number_traits::magnitude_type + abs(const ref_elt_vector &re) { return gmm::abs(T(re)); } + template inline + T sqr(const ref_elt_vector &re) { return gmm::sqr(T(re)); } + template inline + typename number_traits::magnitude_type + abs_sqr(const ref_elt_vector &re) { return gmm::abs_sqr(T(re)); } + template inline + T conj(const ref_elt_vector &re) { return gmm::conj(T(re)); } + template std::ostream &operator << + (std::ostream &o, const ref_elt_vector &re) { o << T(re); return o; } + template inline + typename number_traits::magnitude_type + real(const ref_elt_vector &re) { return gmm::real(T(re)); } + template inline + typename number_traits::magnitude_type + imag(const ref_elt_vector &re) { return gmm::imag(T(re)); } + + /*************************************************************************/ + /* */ + /* Class dsvector: sparse vector optimized for random write operations */ + /* with constant complexity for read and write operations. */ + /* Based on distribution sort principle. */ + /* Cheap for densely populated vectors. */ + /* */ + /*************************************************************************/ + + template class dsvector; + + template struct dsvector_iterator { + size_type i; // Current index. + T* p; // Pointer to the current position. + dsvector *v; // Pointer to the vector. + + typedef T value_type; + typedef value_type* pointer; + typedef const value_type* const_pointer; + typedef value_type& reference; + // typedef size_t size_type; + typedef ptrdiff_t difference_type; + typedef std::bidirectional_iterator_tag iterator_category; + typedef dsvector_iterator iterator; + + reference operator *() const { return *p; } + pointer operator->() const { return &(operator*()); } + + iterator &operator ++() { + for (size_type k = (i & 15); k < 15; ++k) + { ++p; ++i; if (*p != T(0)) return *this; } + v->next_pos(*(const_cast(&(p))), i); + return *this; + } + iterator operator ++(int) { iterator tmp = *this; ++(*this); return tmp; } + iterator &operator --() { + for (size_type k = (i & 15); k > 0; --k) + { --p; --i; if (*p != T(0)) return *this; } + v->previous_pos(p, i); + return *this; + } + iterator operator --(int) { iterator tmp = *this; --(*this); return tmp; } + + bool operator ==(const iterator &it) const + { return (i == it.i && p == it.p && v == it.v); } + bool operator !=(const iterator &it) const + { return !(it == *this); } + + size_type index(void) const { return i; } + + dsvector_iterator(void) : i(size_type(-1)), p(0), v(0) {} + dsvector_iterator(dsvector &w) : i(size_type(-1)), p(0), v(&w) {}; + }; + + + template struct dsvector_const_iterator { + size_type i; // Current index. + const T* p; // Pointer to the current position. + const dsvector *v; // Pointer to the vector. + + typedef T value_type; + typedef const value_type* pointer; + typedef const value_type& reference; + // typedef size_t size_type; + typedef ptrdiff_t difference_type; + typedef std::bidirectional_iterator_tag iterator_category; + typedef dsvector_const_iterator iterator; + + reference operator *() const { return *p; } + pointer operator->() const { return &(operator*()); } + iterator &operator ++() { + for (size_type k = (i & 15); k < 15; ++k) + { ++p; ++i; if (*p != T(0)) return *this; } + v->next_pos(p, i); + return *this; + } + iterator operator ++(int) { iterator tmp = *this; ++(*this); return tmp; } + iterator &operator --() { + for (size_type k = (i & 15); k > 0; --k) + { --p; --i; if (*p != T(0)) return *this; } + v->previous_pos(p, i); + return *this; + } + iterator operator --(int) { iterator tmp = *this; --(*this); return tmp; } + + bool operator ==(const iterator &it) const + { return (i == it.i && p == it.p && v == it.v); } + bool operator !=(const iterator &it) const + { return !(it == *this); } + + size_type index(void) const { return i; } + + dsvector_const_iterator(void) : i(size_type(-1)), p(0) {} + dsvector_const_iterator(const dsvector_iterator &it) + : i(it.i), p(it.p), v(it.v) {} + dsvector_const_iterator(const dsvector &w) + : i(size_type(-1)), p(0), v(&w) {}; + }; + + + /** + Sparse vector built on distribution sort principle. + Read and write access have a constant complexity depending only on the + vector size. + */ + template class dsvector { + + typedef dsvector_iterator iterator; + typedef dsvector_const_iterator const_iterator; + typedef dsvector this_type; + typedef T * pointer; + typedef const T * const_pointer; + typedef void * void_pointer; + typedef const void * const_void_pointer; + + protected: + size_type n; // Potential vector size + size_type depth; // Number of row of pointer arrays + size_type mask; // Mask for the first pointer array + size_type shift; // Shift for the first pointer array + void_pointer root_ptr; // Root pointer + + const T *read_access(size_type i) const { + GMM_ASSERT1(i < n, "index out of range"); + size_type my_mask = mask, my_shift = shift; + void_pointer p = root_ptr; + if (!p) return 0; + for (size_type k = 0; k < depth; ++k) { + p = ((void **)(p))[(i & my_mask) >> my_shift]; + if (!p) return 0; + my_mask = (my_mask >> 4); + my_shift -= 4; + } + GMM_ASSERT1(my_shift == 0, "internal error"); + GMM_ASSERT1(my_mask == 15, "internal error"); + return &(((const T *)(p))[i & 15]); + } + + T *write_access(size_type i) { + GMM_ASSERT1(i < n, "index " << i << " out of range (size " << n << ")"); + size_type my_mask = mask, my_shift = shift; + if (!root_ptr) { + if (depth) { + root_ptr = new void_pointer[16]; + std::memset(root_ptr, 0, 16*sizeof(void_pointer)); + } else { + root_ptr = new T[16]; + for (size_type l = 0; l < 16; ++l) ((T *)(root_ptr))[l] = T(0); + } + } + + void_pointer p = root_ptr; + for (size_type k = 0; k < depth; ++k) { + size_type j = (i & my_mask) >> my_shift; + void_pointer q = ((void_pointer *)(p))[j]; + if (!q) { + if (k+1 != depth) { + q = new void_pointer[16]; + std::memset(q, 0, 16*sizeof(void_pointer)); + } else { + q = new T[16]; + for (size_type l = 0; l < 16; ++l) ((T *)(q))[l] = T(0); + } + ((void_pointer *)(p))[j] = q; + } + p = q; + my_mask = (my_mask >> 4); + my_shift -= 4; + } + GMM_ASSERT1(my_shift == 0, "internal error"); + GMM_ASSERT1(my_mask == 15, "internal error " << my_mask); + return &(((T *)(p))[i & 15]); + } + + void init(size_type n_) { + n = n_; depth = 0; shift = 0; mask = 1; if (n_) --n_; + while (n_) { n_ /= 16; ++depth; shift += 4; mask *= 16; } + mask--; if (shift) shift -= 4; if (depth) --depth; + root_ptr = 0; + } + + void rec_del(void_pointer p, size_type my_depth) { + if (my_depth) { + for (size_type k = 0; k < 16; ++k) + if (((void_pointer *)(p))[k]) + rec_del(((void_pointer *)(p))[k], my_depth-1); + delete[] ((void_pointer *)(p)); + } else { + delete[] ((T *)(p)); + } + } + + void rec_clean(void_pointer p, size_type my_depth, double eps) { + if (my_depth) { + for (size_type k = 0; k < 16; ++k) + if (((void_pointer *)(p))[k]) + rec_clean(((void_pointer *)(p))[k], my_depth-1, eps); + } else { + for (size_type k = 0; k < 16; ++k) + if (gmm::abs(((T *)(p))[k]) <= eps) ((T *)(p))[k] = T(0); + } + } + + void rec_clean_i(void_pointer p, size_type my_depth, size_type my_mask, + size_type i, size_type base) { + if (my_depth) { + my_mask = (my_mask >> 4); + for (size_type k = 0; k < 16; ++k) + if (((void_pointer *)(p))[k] && (base + (k+1)*(mask+1)) >= i) + rec_clean_i(((void_pointer *)(p))[k], my_depth-1, my_mask, + i, base + k*(my_mask+1)); + } else { + for (size_type k = 0; k < 16; ++k) + if (base+k > i) ((T *)(p))[k] = T(0); + } + } + + + size_type rec_nnz(void_pointer p, size_type my_depth) const { + size_type nn = 0; + if (my_depth) { + for (size_type k = 0; k < 16; ++k) + if (((void_pointer *)(p))[k]) + nn += rec_nnz(((void_pointer *)(p))[k], my_depth-1); + } else { + for (size_type k = 0; k < 16; ++k) + if (((const T *)(p))[k] != T(0)) nn++; + } + return nn; + } + + void copy_rec(void_pointer &p, const_void_pointer q, size_type my_depth) { + if (my_depth) { + p = new void_pointer[16]; + std::memset(p, 0, 16*sizeof(void_pointer)); + for (size_type l = 0; l < 16; ++l) + if (((const const_void_pointer *)(q))[l]) + copy_rec(((void_pointer *)(p))[l], + ((const const_void_pointer *)(q))[l], my_depth-1); + } else { + p = new T[16]; + for (size_type l = 0; l < 16; ++l) ((T *)(p))[l] = ((const T *)(q))[l]; + } + } + + void copy(const dsvector &v) { + if (root_ptr) rec_del(root_ptr, depth); + root_ptr = 0; + mask = v.mask; depth = v.depth; n = v.n; shift = v.shift; + if (v.root_ptr) copy_rec(root_ptr, v.root_ptr, depth); + } + + void next_pos_rec(void_pointer p, size_type my_depth, size_type my_mask, + const_pointer &pp, size_type &i, size_type base) const { + size_type ii = i; + if (my_depth) { + my_mask = (my_mask >> 4); + for (size_type k = 0; k < 16; ++k) + if (((void_pointer *)(p))[k] && (base + (k+1)*(my_mask+1)) >= i) { + next_pos_rec(((void_pointer *)(p))[k], my_depth-1, my_mask, + pp, i, base + k*(my_mask+1)); + if (i != size_type(-1)) return; else i = ii; + } + i = size_type(-1); pp = 0; + } else { + for (size_type k = 0; k < 16; ++k) + if (base+k > i && ((const_pointer)(p))[k] != T(0)) + { i = base+k; pp = &(((const_pointer)(p))[k]); return; } + i = size_type(-1); pp = 0; + } + } + + void previous_pos_rec(void_pointer p, size_type my_depth, size_type my_mask, + const_pointer &pp, size_type &i, + size_type base) const { + size_type ii = i; + if (my_depth) { + my_mask = (my_mask >> 4); + for (size_type k = 15; k != size_type(-1); --k) + if (((void_pointer *)(p))[k] && ((base + k*(my_mask+1)) < i)) { + previous_pos_rec(((void_pointer *)(p))[k], my_depth-1, + my_mask, pp, i, base + k*(my_mask+1)); + if (i != size_type(-1)) return; else i = ii; + } + i = size_type(-1); pp = 0; + } else { + for (size_type k = 15; k != size_type(-1); --k) + if (base+k < i && ((const_pointer)(p))[k] != T(0)) + { i = base+k; pp = &(((const_pointer)(p))[k]); return; } + i = size_type(-1); pp = 0; + } + } + + + public: + void clean(double eps) { if (root_ptr) rec_clean(root_ptr, depth); } + void resize(size_type n_) { + if (n_ != n) { + n = n_; + if (n_ < n) { // Depth unchanged (a choice) + if (root_ptr) rec_clean_i(root_ptr, depth, mask, n_, 0); + } else { + // may change the depth (add some levels) + size_type my_depth = 0, my_shift = 0, my_mask = 1; if (n_) --n_; + while (n_) { n_ /= 16; ++my_depth; my_shift += 4; my_mask *= 16; } + my_mask--; if (my_shift) my_shift -= 4; if (my_depth) --my_depth; + if (my_depth > depth || depth == 0) { + if (root_ptr) { + for (size_type k = depth; k < my_depth; ++k) { + void_pointer *q = new void_pointer [16]; + std::memset(q, 0, 16*sizeof(void_pointer)); + q[0] = root_ptr; root_ptr = q; + } + } + mask = my_mask; depth = my_depth; shift = my_shift; + } + } + } + } + + void clear(void) { if (root_ptr) rec_del(root_ptr, depth); root_ptr = 0; } + + void next_pos(const_pointer &pp, size_type &i) const { + if (!root_ptr || i >= n) { pp = 0, i = size_type(-1); return; } + next_pos_rec(root_ptr, depth, mask, pp, i, 0); + } + + void previous_pos(const_pointer &pp, size_type &i) const { + if (!root_ptr) { pp = 0, i = size_type(-1); return; } + if (i == size_type(-1)) { i = n; } + previous_pos_rec(root_ptr, depth, mask, pp, i, 0); + } + + iterator begin(void) { + iterator it(*this); + if (n && root_ptr) { + it.i = 0; it.p = const_cast(read_access(0)); + if (!(it.p) || *(it.p) == T(0)) + next_pos(*(const_cast(&(it.p))), it.i); + } + return it; + } + + iterator end(void) { return iterator(*this); } + + const_iterator begin(void) const { + const_iterator it(*this); + if (n && root_ptr) { + it.i = 0; it.p = read_access(0); + if (!(it.p) || *(it.p) == T(0)) next_pos(it.p, it.i); + } + return it; + } + + const_iterator end(void) const { return const_iterator(*this); } + + inline ref_elt_vector > operator [](size_type c) + { return ref_elt_vector >(this, c); } + + inline void w(size_type c, const T &e) { + if (e == T(0)) { if (read_access(c)) *(write_access(c)) = e; } + else *(write_access(c)) = e; + } + + inline void wa(size_type c, const T &e) + { if (e != T(0)) { *(write_access(c)) += e; } } + + inline T r(size_type c) const + { const T *p = read_access(c); if (p) return *p; else return T(0); } + + inline T operator [](size_type c) const { return r(c); } + + size_type nnz(void) const + { if (root_ptr) return rec_nnz(root_ptr, depth); else return 0; } + size_type size(void) const { return n; } + + void swap(dsvector &v) { + std::swap(n, v.n); std::swap(root_ptr, v.root_ptr); + std::swap(depth, v.depth); std::swap(shift, v.shift); + std::swap(mask, v.mask); + } + + /* Constructors */ + dsvector(const dsvector &v) { init(0); copy(v); } + dsvector &operator =(const dsvector &v) { copy(v); return *this; } + explicit dsvector(size_type l){ init(l); } + dsvector(void) { init(0); } + ~dsvector() { if (root_ptr) rec_del(root_ptr, depth); root_ptr = 0; } + }; + + template struct linalg_traits> { + typedef dsvector this_type; + typedef this_type origin_type; + typedef linalg_false is_reference; + typedef abstract_vector linalg_type; + typedef T value_type; + typedef ref_elt_vector > reference; + typedef dsvector_iterator iterator; + typedef dsvector_const_iterator const_iterator; + typedef abstract_sparse storage_type; + typedef linalg_true index_sorted; + static size_type size(const this_type &v) { return v.size(); } + static iterator begin(this_type &v) { return v.begin(); } + static const_iterator begin(const this_type &v) { return v.begin(); } + static iterator end(this_type &v) { return v.end(); } + static const_iterator end(const this_type &v) { return v.end(); } + static origin_type* origin(this_type &v) { return &v; } + static const origin_type* origin(const this_type &v) { return &v; } + static void clear(origin_type* o, const iterator &, const iterator &) + { o->clear(); } + static void do_clear(this_type &v) { v.clear(); } + static value_type access(const origin_type *o, const const_iterator &, + const const_iterator &, size_type i) + { return (*o)[i]; } + static reference access(origin_type *o, const iterator &, const iterator &, + size_type i) + { return (*o)[i]; } + static void resize(this_type &v, size_type n) { v.resize(n); } + }; + + template std::ostream &operator << + (std::ostream &o, const dsvector& v) { gmm::write(o,v); return o; } + + /******* Optimized operations for dsvector ****************************/ + + template inline void copy(const dsvector &v1, + dsvector &v2) { + GMM_ASSERT2(v1.size() == v2.size(), "dimensions mismatch"); + v2 = v1; + } + template inline void copy(const dsvector &v1, + const dsvector &v2) { + GMM_ASSERT2(v1.size() == v2.size(), "dimensions mismatch"); + v2 = const_cast &>(v1); + } + template inline + void copy(const dsvector &v1, const simple_vector_ref *> &v2){ + simple_vector_ref *> + *svr = const_cast *> *>(&v2); + dsvector + *pv = const_cast *>((v2.origin)); + GMM_ASSERT2(vect_size(v1) == vect_size(v2), "dimensions mismatch"); + *pv = v1; svr->begin_ = vect_begin(*pv); svr->end_ = vect_end(*pv); + } + template inline + void copy(const simple_vector_ref *> &v1, + dsvector &v2) + { copy(*(v1.origin), v2); } + template inline + void copy(const simple_vector_ref *> &v1, dsvector &v2) + { copy(*(v1.origin), v2); } + template inline + void copy(const simple_vector_ref *> &v1, + const simple_vector_ref *> &v2) + { copy(*(v1.origin), v2); } + template inline + void copy(const simple_vector_ref *> &v1, + const simple_vector_ref *> &v2) + { copy(*(v1.origin), v2); } + + template + inline size_type nnz(const dsvector& l) { return l.nnz(); } + + /*************************************************************************/ + /* */ + /* Class wsvector: sparse vector optimized for random write operations, */ + /* with log(n) complexity for read and write operations. */ + /* Based on std::map */ + /* */ + /*************************************************************************/ + + template struct wsvector_iterator + : public std::map::iterator { + typedef typename std::map::iterator base_it_type; + typedef T value_type; + typedef value_type* pointer; + typedef value_type& reference; + // typedef size_t size_type; + typedef ptrdiff_t difference_type; + typedef std::bidirectional_iterator_tag iterator_category; + + reference operator *() const { return (base_it_type::operator*()).second; } + pointer operator->() const { return &(operator*()); } + size_type index(void) const { return (base_it_type::operator*()).first; } + + wsvector_iterator(void) {} + wsvector_iterator(const base_it_type &it) : base_it_type(it) {} + }; + + template struct wsvector_const_iterator + : public std::map::const_iterator { + typedef typename std::map::const_iterator base_it_type; + typedef T value_type; + typedef const value_type* pointer; + typedef const value_type& reference; + // typedef size_t size_type; + typedef ptrdiff_t difference_type; + typedef std::bidirectional_iterator_tag iterator_category; + + reference operator *() const { return (base_it_type::operator*()).second; } + pointer operator->() const { return &(operator*()); } + size_type index(void) const { return (base_it_type::operator*()).first; } + + wsvector_const_iterator(void) {} + wsvector_const_iterator(const wsvector_iterator &it) + : base_it_type(it) {} + wsvector_const_iterator(const base_it_type &it) : base_it_type(it) {} + }; + + + /** + sparse vector built upon std::map. + Read and write access are quite fast (log n) + */ + template class wsvector : public std::map { + public: + + typedef typename std::map::size_type size_type; + typedef std::map base_type; + typedef typename base_type::iterator iterator; + typedef typename base_type::const_iterator const_iterator; + + protected: + size_type nbl; + + public: + void clean(double eps); + void resize(size_type); + + inline ref_elt_vector > operator [](size_type c) + { return ref_elt_vector >(this, c); } + + inline void w(size_type c, const T &e) { + GMM_ASSERT2(c < nbl, "out of range"); + if (e == T(0)) { this->erase(c); } + else base_type::operator [](c) = e; + } + + inline void wa(size_type c, const T &e) { + GMM_ASSERT2(c < nbl, "out of range"); + if (e != T(0)) { + iterator it = this->lower_bound(c); + if (it != this->end() && it->first == c) it->second += e; + else base_type::operator [](c) = e; + } + } + + inline T r(size_type c) const { + GMM_ASSERT2(c < nbl, "out of range"); + const_iterator it = this->lower_bound(c); + if (it != this->end() && c == it->first) return it->second; + else return T(0); + } + + inline T operator [](size_type c) const { return r(c); } + + size_type nb_stored(void) const { return base_type::size(); } + size_type size(void) const { return nbl; } + + void swap(wsvector &v) + { std::swap(nbl, v.nbl); std::map::swap(v); } + + + /* Constructors */ + void init(size_type l) { nbl = l; this->clear(); } + explicit wsvector(size_type l){ init(l); } + wsvector(void) { init(0); } + }; + + template void wsvector::clean(double eps) { + iterator it = this->begin(), itf = it, ite = this->end(); + while (it != ite) { + ++itf; if (gmm::abs(it->second) <= eps) this->erase(it); it = itf; + } + } + + template void wsvector::resize(size_type n) { + if (n < nbl) { + iterator it = this->begin(), itf = it, ite = this->end(); + while (it != ite) { ++itf; if (it->first >= n) this->erase(it); it=itf; } + } + nbl = n; + } + + template struct linalg_traits > { + typedef wsvector this_type; + typedef this_type origin_type; + typedef linalg_false is_reference; + typedef abstract_vector linalg_type; + typedef T value_type; + typedef ref_elt_vector > reference; + typedef wsvector_iterator iterator; + typedef wsvector_const_iterator const_iterator; + typedef abstract_sparse storage_type; + typedef linalg_true index_sorted; + static size_type size(const this_type &v) { return v.size(); } + static iterator begin(this_type &v) { return v.begin(); } + static const_iterator begin(const this_type &v) { return v.begin(); } + static iterator end(this_type &v) { return v.end(); } + static const_iterator end(const this_type &v) { return v.end(); } + static origin_type* origin(this_type &v) { return &v; } + static const origin_type* origin(const this_type &v) { return &v; } + static void clear(origin_type* o, const iterator &, const iterator &) + { o->clear(); } + static void do_clear(this_type &v) { v.clear(); } + static value_type access(const origin_type *o, const const_iterator &, + const const_iterator &, size_type i) + { return (*o)[i]; } + static reference access(origin_type *o, const iterator &, const iterator &, + size_type i) + { return (*o)[i]; } + static void resize(this_type &v, size_type n) { v.resize(n); } + }; + + template std::ostream &operator << + (std::ostream &o, const wsvector& v) { gmm::write(o,v); return o; } + + /******* Optimized BLAS for wsvector **********************************/ + + template inline void copy(const wsvector &v1, + wsvector &v2) { + GMM_ASSERT2(vect_size(v1) == vect_size(v2), "dimensions mismatch"); + v2 = v1; + } + template inline + void copy(const wsvector &v1, const simple_vector_ref *> &v2){ + simple_vector_ref *> + *svr = const_cast *> *>(&v2); + wsvector + *pv = const_cast *>(v2.origin); + GMM_ASSERT2(vect_size(v1) == vect_size(v2), "dimensions mismatch"); + *pv = v1; svr->begin_ = vect_begin(*pv); svr->end_ = vect_end(*pv); + } + template inline + void copy(const simple_vector_ref *> &v1, + wsvector &v2) + { copy(*(v1.origin), v2); } + template inline + void copy(const simple_vector_ref *> &v1, wsvector &v2) + { copy(*(v1.origin), v2); } + + template inline void clean(wsvector &v, double eps) { + typedef typename number_traits::magnitude_type R; + typename wsvector::iterator it = v.begin(), ite = v.end(), itc; + while (it != ite) + if (gmm::abs((*it).second) <= R(eps)) + { itc=it; ++it; v.erase(itc); } else ++it; + } + + template + inline void clean(const simple_vector_ref *> &l, double eps) { + simple_vector_ref *> + *svr = const_cast *> *>(&l); + wsvector + *pv = const_cast *>((l.origin)); + clean(*pv, eps); + svr->begin_ = vect_begin(*pv); svr->end_ = vect_end(*pv); + } + + template + inline size_type nnz(const wsvector& l) { return l.nb_stored(); } + + /*************************************************************************/ + /* */ + /* rsvector: sparse vector optimized for linear algebra operations. */ + /* */ + /*************************************************************************/ + + template struct elt_rsvector_ { + size_type c; T e; + /* e is initialized by default to avoid some false warnings of valgrind. + (from http://valgrind.org/docs/manual/mc-manual.html: + + When memory is read into the CPU's floating point registers, the + relevant V bits are read from memory and they are immediately + checked. If any are invalid, an uninitialised value error is + emitted. This precludes using the floating-point registers to copy + possibly-uninitialised memory, but simplifies Valgrind in that it + does not have to track the validity status of the floating-point + registers. + */ + elt_rsvector_(void) : e(0) {} + elt_rsvector_(size_type cc) : c(cc), e(0) {} + elt_rsvector_(size_type cc, const T &ee) : c(cc), e(ee) {} + bool operator < (const elt_rsvector_ &a) const { return c < a.c; } + bool operator == (const elt_rsvector_ &a) const { return c == a.c; } + bool operator != (const elt_rsvector_ &a) const { return c != a.c; } + }; + + template struct rsvector_iterator { + typedef typename std::vector >::iterator IT; + typedef T value_type; + typedef value_type* pointer; + typedef value_type& reference; + typedef size_t size_type; + typedef ptrdiff_t difference_type; + typedef std::bidirectional_iterator_tag iterator_category; + typedef rsvector_iterator iterator; + + IT it; + + reference operator *() const { return it->e; } + pointer operator->() const { return &(operator*()); } + + iterator &operator ++() { ++it; return *this; } + iterator operator ++(int) { iterator tmp = *this; ++(*this); return tmp; } + iterator &operator --() { --it; return *this; } + iterator operator --(int) { iterator tmp = *this; --(*this); return tmp; } + + bool operator ==(const iterator &i) const { return it == i.it; } + bool operator !=(const iterator &i) const { return !(i == *this); } + + size_type index(void) const { return it->c; } + rsvector_iterator(void) {} + rsvector_iterator(const IT &i) : it(i) {} + }; + + template struct rsvector_const_iterator { + typedef typename std::vector >::const_iterator IT; + typedef T value_type; + typedef const value_type* pointer; + typedef const value_type& reference; + typedef size_t size_type; + typedef ptrdiff_t difference_type; + typedef std::forward_iterator_tag iterator_category; + typedef rsvector_const_iterator iterator; + + IT it; + + reference operator *() const { return it->e; } + pointer operator->() const { return &(operator*()); } + size_type index(void) const { return it->c; } + + iterator &operator ++() { ++it; return *this; } + iterator operator ++(int) { iterator tmp = *this; ++(*this); return tmp; } + iterator &operator --() { --it; return *this; } + iterator operator --(int) { iterator tmp = *this; --(*this); return tmp; } + + bool operator ==(const iterator &i) const { return it == i.it; } + bool operator !=(const iterator &i) const { return !(i == *this); } + + rsvector_const_iterator(void) {} + rsvector_const_iterator(const rsvector_iterator &i) : it(i.it) {} + rsvector_const_iterator(const IT &i) : it(i) {} + }; + + /** + sparse vector built upon std::vector. Read access is fast, + but insertion is O(n) + */ + template class rsvector : public std::vector > { + public: + + typedef std::vector > base_type_; + typedef typename base_type_::iterator iterator; + typedef typename base_type_::const_iterator const_iterator; + typedef typename base_type_::size_type size_type; + typedef T value_type; + + protected: + size_type nbl; /* size of the vector. */ + + public: + + void sup(size_type j); + void base_resize(size_type n) { base_type_::resize(n); } + void resize(size_type); + + ref_elt_vector > operator [](size_type c) + { return ref_elt_vector >(this, c); } + + void w(size_type c, const T &e); + void wa(size_type c, const T &e); + T r(size_type c) const; + void swap_indices(size_type i, size_type j); + + inline T operator [](size_type c) const { return r(c); } + + size_type nb_stored(void) const { return base_type_::size(); } + size_type size(void) const { return nbl; } + void clear(void) { base_type_::resize(0); } + void swap(rsvector &v) + { std::swap(nbl, v.nbl); std::vector >::swap(v); } + + /* Constructeurs */ + explicit rsvector(size_type l) : nbl(l) { } + rsvector(void) : nbl(0) { } + }; + + template + void rsvector::swap_indices(size_type i, size_type j) { + if (i > j) std::swap(i, j); + if (i != j) { + int situation = 0; + elt_rsvector_ ei(i), ej(j), a; + iterator it, ite, iti, itj; + iti = std::lower_bound(this->begin(), this->end(), ei); + if (iti != this->end() && iti->c == i) situation += 1; + itj = std::lower_bound(this->begin(), this->end(), ej); + if (itj != this->end() && itj->c == j) situation += 2; + + switch (situation) { + case 1 : a = *iti; a.c = j; it = iti; ++it; ite = this->end(); + for (; it != ite && it->c <= j; ++it, ++iti) *iti = *it; + *iti = a; + break; + case 2 : a = *itj; a.c = i; it = itj; ite = this->begin(); + if (it != ite) { + --it; + while (it->c >= i) { *itj = *it; --itj; if (it==ite) break; --it; } + } + *itj = a; + break; + case 3 : std::swap(iti->e, itj->e); + break; + } + } + } + + template void rsvector::sup(size_type j) { + if (nb_stored() != 0) { + elt_rsvector_ ev(j); + iterator it = std::lower_bound(this->begin(), this->end(), ev); + if (it != this->end() && it->c == j) { + for (iterator ite = this->end() - 1; it != ite; ++it) *it = *(it+1); + base_resize(nb_stored()-1); + } + } + } + + template void rsvector::resize(size_type n) { + if (n < nbl) { + for (size_type i = 0; i < nb_stored(); ++i) + if (base_type_::operator[](i).c >= n) { base_resize(i); break; } + } + nbl = n; + } + + template void rsvector::w(size_type c, const T &e) { + GMM_ASSERT2(c < nbl, "out of range"); + if (e == T(0)) sup(c); + else { + elt_rsvector_ ev(c, e); + if (nb_stored() == 0) { + base_type_::push_back(ev); + } + else { + iterator it = std::lower_bound(this->begin(), this->end(), ev); + if (it != this->end() && it->c == c) it->e = e; + else { + size_type ind = it - this->begin(), nb = this->nb_stored(); + if (nb - ind > 1100) + GMM_WARNING2("Inefficient addition of element in rsvector with " + << this->nb_stored() - ind << " non-zero entries"); + base_type_::push_back(ev); + if (ind != nb) { + it = this->begin() + ind; + iterator ite = this->end(); --ite; iterator itee = ite; + for (; ite != it; --ite) { --itee; *ite = *itee; } + *it = ev; + } + } + } + } + } + + template void rsvector::wa(size_type c, const T &e) { + GMM_ASSERT2(c < nbl, "out of range"); + if (e != T(0)) { + elt_rsvector_ ev(c, e); + if (nb_stored() == 0) { + base_type_::push_back(ev); + } + else { + iterator it = std::lower_bound(this->begin(), this->end(), ev); + if (it != this->end() && it->c == c) it->e += e; + else { + size_type ind = it - this->begin(), nb = this->nb_stored(); + if (nb - ind > 1100) + GMM_WARNING2("Inefficient addition of element in rsvector with " + << this->nb_stored() - ind << " non-zero entries"); + base_type_::push_back(ev); + if (ind != nb) { + it = this->begin() + ind; + iterator ite = this->end(); --ite; iterator itee = ite; + for (; ite != it; --ite) { --itee; *ite = *itee; } + *it = ev; + } + } + } + } + } + + template T rsvector::r(size_type c) const { + GMM_ASSERT2(c < nbl, "out of range. Index " << c + << " for a length of " << nbl); + if (nb_stored() != 0) { + elt_rsvector_ ev(c); + const_iterator it = std::lower_bound(this->begin(), this->end(), ev); + if (it != this->end() && it->c == c) return it->e; + } + return T(0); + } + + template struct linalg_traits > { + typedef rsvector this_type; + typedef this_type origin_type; + typedef linalg_false is_reference; + typedef abstract_vector linalg_type; + typedef T value_type; + typedef ref_elt_vector > reference; + typedef rsvector_iterator iterator; + typedef rsvector_const_iterator const_iterator; + typedef abstract_sparse storage_type; + typedef linalg_true index_sorted; + static size_type size(const this_type &v) { return v.size(); } + static iterator begin(this_type &v) { return iterator(v.begin()); } + static const_iterator begin(const this_type &v) + { return const_iterator(v.begin()); } + static iterator end(this_type &v) { return iterator(v.end()); } + static const_iterator end(const this_type &v) + { return const_iterator(v.end()); } + static origin_type* origin(this_type &v) { return &v; } + static const origin_type* origin(const this_type &v) { return &v; } + static void clear(origin_type* o, const iterator &, const iterator &) + { o->clear(); } + static void do_clear(this_type &v) { v.clear(); } + static value_type access(const origin_type *o, const const_iterator &, + const const_iterator &, size_type i) + { return (*o)[i]; } + static reference access(origin_type *o, const iterator &, const iterator &, + size_type i) + { return (*o)[i]; } + static void resize(this_type &v, size_type n) { v.resize(n); } + }; + + template std::ostream &operator << + (std::ostream &o, const rsvector& v) { gmm::write(o,v); return o; } + + /******* Optimized operations for rsvector ****************************/ + + template inline void copy(const rsvector &v1, + rsvector &v2) { + GMM_ASSERT2(vect_size(v1) == vect_size(v2), "dimensions mismatch"); + v2 = v1; + } + template inline + void copy(const rsvector &v1, const simple_vector_ref *> &v2){ + simple_vector_ref *> + *svr = const_cast *> *>(&v2); + rsvector + *pv = const_cast *>((v2.origin)); + GMM_ASSERT2(vect_size(v1) == vect_size(v2), "dimensions mismatch"); + *pv = v1; svr->begin_ = vect_begin(*pv); svr->end_ = vect_end(*pv); + } + template inline + void copy(const simple_vector_ref *> &v1, + rsvector &v2) + { copy(*(v1.origin), v2); } + template inline + void copy(const simple_vector_ref *> &v1, rsvector &v2) + { copy(*(v1.origin), v2); } + + template inline void add(const V &v1, + rsvector &v2) { + if ((const void *)(&v1) != (const void *)(&v2)) { + GMM_ASSERT2(vect_size(v1) == vect_size(v2), "dimensions mismatch"); + add_rsvector(v1, v2, typename linalg_traits::storage_type()); + } + } + + template + inline void add_rsvector(const V &v1, rsvector &v2, abstract_dense) + { add(v1, v2, abstract_dense(), abstract_sparse()); } + + template + inline void add_rsvector(const V &v1, rsvector &v2, abstract_skyline) + { add(v1, v2, abstract_skyline(), abstract_sparse()); } + + template + void add_rsvector(const V &v1, rsvector &v2, abstract_sparse) { + add_rsvector(v1, v2, typename linalg_traits::index_sorted()); + } + + template + void add_rsvector(const V &v1, rsvector &v2, linalg_false) { + add(v1, v2, abstract_sparse(), abstract_sparse()); + } + + template + void add_rsvector(const V &v1, rsvector &v2, linalg_true) { + typename linalg_traits::const_iterator it1 = vect_const_begin(v1), + ite1 = vect_const_end(v1); + typename rsvector::iterator it2 = v2.begin(), ite2 = v2.end(), it3; + size_type nbc = 0, old_nbc = v2.nb_stored(); + for (; it1 != ite1 && it2 != ite2 ; ++nbc) + if (it1.index() == it2->c) { ++it1; ++it2; } + else if (it1.index() < it2->c) ++it1; else ++it2; + for (; it1 != ite1; ++it1) ++nbc; + for (; it2 != ite2; ++it2) ++nbc; + + v2.base_resize(nbc); + it3 = v2.begin() + old_nbc; + it2 = v2.end(); ite2 = v2.begin(); + it1 = vect_end(v1); ite1 = vect_const_begin(v1); + while (it1 != ite1 && it3 != ite2) { + --it3; --it1; --it2; + if (it3->c > it1.index()) { *it2 = *it3; ++it1; } + else if (it3->c == it1.index()) { *it2=*it3; it2->e+=*it1; } + else { it2->c = it1.index(); it2->e = *it1; ++it3; } + } + while (it1 != ite1) { --it1; --it2; it2->c = it1.index(); it2->e = *it1; } + } + + template void copy(const V &v1, rsvector &v2) { + if ((const void *)(&v1) != (const void *)(&v2)) { + GMM_ASSERT2(vect_size(v1) == vect_size(v2), "dimensions mismatch"); + if (same_origin(v1, v2)) + GMM_WARNING2("a conflict is possible in vector copy\n"); + copy_rsvector(v1, v2, typename linalg_traits::storage_type()); + } + } + + template + void copy_rsvector(const V &v1, rsvector &v2, abstract_dense) + { copy_vect(v1, v2, abstract_dense(), abstract_sparse()); } + + template + void copy_rsvector(const V &v1, rsvector &v2, abstract_skyline) + { copy_vect(v1, v2, abstract_skyline(), abstract_sparse()); } + + template + void copy_rsvector(const V &v1, rsvector &v2, abstract_sparse) { + copy_rsvector(v1, v2, typename linalg_traits::index_sorted()); + } + + template + void copy_rsvector(const V &v1, rsvector &v2, linalg_true) { + typedef typename linalg_traits::value_type T1; + typename linalg_traits::const_iterator it = vect_const_begin(v1), + ite = vect_const_end(v1); + v2.base_resize(nnz(v1)); + typename rsvector::iterator it2 = v2.begin(); + size_type nn = 0; + for (; it != ite; ++it) + if ((*it) != T1(0)) { it2->c = it.index(); it2->e = *it; ++it2; ++nn; } + v2.base_resize(nn); + } + + template + void copy_rsvector(const V &v1, rsvector &v2, linalg_false) { + typedef typename linalg_traits::value_type T1; + typename linalg_traits::const_iterator it = vect_const_begin(v1), + ite = vect_const_end(v1); + v2.base_resize(nnz(v1)); + typename rsvector::iterator it2 = v2.begin(); + size_type nn = 0; + for (; it != ite; ++it) + if ((*it) != T1(0)) { it2->c = it.index(); it2->e = *it; ++it2; ++nn; } + v2.base_resize(nn); + std::sort(v2.begin(), v2.end()); + } + + template inline void clean(rsvector &v, double eps) { + typedef typename number_traits::magnitude_type R; + typename rsvector::iterator it = v.begin(), ite = v.end(); + for (; it != ite; ++it) if (gmm::abs((*it).e) <= eps) break; + if (it != ite) { + typename rsvector::iterator itc = it; + size_type erased = 1; + for (++it; it != ite; ++it) + { *itc = *it; if (gmm::abs((*it).e) <= R(eps)) ++erased; else ++itc; } + v.base_resize(v.nb_stored() - erased); + } + } + + template + inline void clean(const simple_vector_ref *> &l, double eps) { + simple_vector_ref *> + *svr = const_cast *> *>(&l); + rsvector + *pv = const_cast *>((l.origin)); + clean(*pv, eps); + svr->begin_ = vect_begin(*pv); svr->end_ = vect_end(*pv); + } + + template + inline size_type nnz(const rsvector& l) { return l.nb_stored(); } + + /*************************************************************************/ + /* */ + /* Class slvector: 'sky-line' vector. */ + /* */ + /*************************************************************************/ + + template struct slvector_iterator { + typedef T value_type; + typedef T *pointer; + typedef T &reference; + typedef ptrdiff_t difference_type; + typedef std::random_access_iterator_tag iterator_category; + typedef size_t size_type; + typedef slvector_iterator iterator; + typedef typename std::vector::iterator base_iterator; + + base_iterator it; + size_type shift; + + + iterator &operator ++() + { ++it; ++shift; return *this; } + iterator &operator --() + { --it; --shift; return *this; } + iterator operator ++(int) + { iterator tmp = *this; ++(*(this)); return tmp; } + iterator operator --(int) + { iterator tmp = *this; --(*(this)); return tmp; } + iterator &operator +=(difference_type i) + { it += i; shift += i; return *this; } + iterator &operator -=(difference_type i) + { it -= i; shift -= i; return *this; } + iterator operator +(difference_type i) const + { iterator tmp = *this; return (tmp += i); } + iterator operator -(difference_type i) const + { iterator tmp = *this; return (tmp -= i); } + difference_type operator -(const iterator &i) const + { return it - i.it; } + + reference operator *() const + { return *it; } + reference operator [](int ii) + { return *(it + ii); } + + bool operator ==(const iterator &i) const + { return it == i.it; } + bool operator !=(const iterator &i) const + { return !(i == *this); } + bool operator < (const iterator &i) const + { return it < i.it; } + size_type index(void) const { return shift; } + + slvector_iterator(void) {} + slvector_iterator(const base_iterator &iter, size_type s) + : it(iter), shift(s) {} + }; + + template struct slvector_const_iterator { + typedef T value_type; + typedef const T *pointer; + typedef value_type reference; + typedef ptrdiff_t difference_type; + typedef std::random_access_iterator_tag iterator_category; + typedef size_t size_type; + typedef slvector_const_iterator iterator; + typedef typename std::vector::const_iterator base_iterator; + + base_iterator it; + size_type shift; + + + iterator &operator ++() + { ++it; ++shift; return *this; } + iterator &operator --() + { --it; --shift; return *this; } + iterator operator ++(int) + { iterator tmp = *this; ++(*(this)); return tmp; } + iterator operator --(int) + { iterator tmp = *this; --(*(this)); return tmp; } + iterator &operator +=(difference_type i) + { it += i; shift += i; return *this; } + iterator &operator -=(difference_type i) + { it -= i; shift -= i; return *this; } + iterator operator +(difference_type i) const + { iterator tmp = *this; return (tmp += i); } + iterator operator -(difference_type i) const + { iterator tmp = *this; return (tmp -= i); } + difference_type operator -(const iterator &i) const + { return it - i.it; } + + value_type operator *() const + { return *it; } + value_type operator [](int ii) + { return *(it + ii); } + + bool operator ==(const iterator &i) const + { return it == i.it; } + bool operator !=(const iterator &i) const + { return !(i == *this); } + bool operator < (const iterator &i) const + { return it < i.it; } + size_type index(void) const { return shift; } + + slvector_const_iterator(void) {} + slvector_const_iterator(const slvector_iterator& iter) + : it(iter.it), shift(iter.shift) {} + slvector_const_iterator(const base_iterator &iter, size_type s) + : it(iter), shift(s) {} + }; + + + /** skyline vector. + */ + template class slvector { + + public : + typedef slvector_iterator iterators; + typedef slvector_const_iterator const_iterators; + typedef typename std::vector::size_type size_type; + typedef T value_type; + + protected : + std::vector data; + size_type shift; + size_type size_; + + + public : + + size_type size(void) const { return size_; } + size_type first(void) const { return shift; } + size_type last(void) const { return shift + data.size(); } + ref_elt_vector > operator [](size_type c) + { return ref_elt_vector >(this, c); } + + typename std::vector::iterator data_begin(void) { return data.begin(); } + typename std::vector::iterator data_end(void) { return data.end(); } + typename std::vector::const_iterator data_begin(void) const + { return data.begin(); } + typename std::vector::const_iterator data_end(void) const + { return data.end(); } + + void w(size_type c, const T &e); + void wa(size_type c, const T &e); + T r(size_type c) const { + GMM_ASSERT2(c < size_, "out of range"); + if (c < shift || c >= shift + data.size()) return T(0); + return data[c - shift]; + } + + inline T operator [](size_type c) const { return r(c); } + void resize(size_type); + void clear(void) { data.resize(0); shift = 0; } + void swap(slvector &v) { + std::swap(data, v.data); + std::swap(shift, v.shift); + std::swap(size_, v.size_); + } + + + slvector(void) : data(0), shift(0), size_(0) {} + explicit slvector(size_type l) : data(0), shift(0), size_(l) {} + slvector(size_type l, size_type d, size_type s) + : data(d), shift(s), size_(l) {} + + }; + + template void slvector::resize(size_type n) { + if (n < last()) { + if (shift >= n) clear(); else { data.resize(n-shift); } + } + size_ = n; + } + + template void slvector::w(size_type c, const T &e) { + GMM_ASSERT2(c < size_, "out of range"); + size_type s = data.size(); + if (!s) { data.resize(1); shift = c; } + else if (c < shift) { + data.resize(s + shift - c); + typename std::vector::iterator it = data.begin(),it2=data.end()-1; + typename std::vector::iterator it3 = it2 - shift + c; + for (; it3 >= it; --it3, --it2) *it2 = *it3; + std::fill(it, it + shift - c, T(0)); + shift = c; + } + else if (c >= shift + s) { + data.resize(c - shift + 1, T(0)); + // std::fill(data.begin() + s, data.end(), T(0)); + } + data[c - shift] = e; + } + + template void slvector::wa(size_type c, const T &e) { + GMM_ASSERT2(c < size_, "out of range"); + size_type s = data.size(); + if (!s) { data.resize(1, e); shift = c; return; } + else if (c < shift) { + data.resize(s + shift - c); + typename std::vector::iterator it = data.begin(),it2=data.end()-1; + typename std::vector::iterator it3 = it2 - shift + c; + for (; it3 >= it; --it3, --it2) *it2 = *it3; + std::fill(it, it + shift - c, T(0)); + shift = c; + data[c - shift] = e; + return; + } + else if (c >= shift + s) { + data.resize(c - shift + 1, T(0)); + data[c - shift] = e; + return; + // std::fill(data.begin() + s, data.end(), T(0)); + } + data[c - shift] += e; + } + + + template struct linalg_traits > { + typedef slvector this_type; + typedef this_type origin_type; + typedef linalg_false is_reference; + typedef abstract_vector linalg_type; + typedef T value_type; + typedef ref_elt_vector > reference; + typedef slvector_iterator iterator; + typedef slvector_const_iterator const_iterator; + typedef abstract_skyline storage_type; + typedef linalg_true index_sorted; + static size_type size(const this_type &v) { return v.size(); } + static iterator begin(this_type &v) + { return iterator(v.data_begin(), v.first()); } + static const_iterator begin(const this_type &v) + { return const_iterator(v.data_begin(), v.first()); } + static iterator end(this_type &v) + { return iterator(v.data_end(), v.last()); } + static const_iterator end(const this_type &v) + { return const_iterator(v.data_end(), v.last()); } + static origin_type* origin(this_type &v) { return &v; } + static const origin_type* origin(const this_type &v) { return &v; } + static void clear(origin_type* o, const iterator &, const iterator &) + { o->clear(); } + static void do_clear(this_type &v) { v.clear(); } + static value_type access(const origin_type *o, const const_iterator &, + const const_iterator &, size_type i) + { return (*o)[i]; } + static reference access(origin_type *o, const iterator &, const iterator &, + size_type i) + { return (*o)[i]; } + static void resize(this_type &v, size_type n) { v.resize(n); } + }; + + template std::ostream &operator << + (std::ostream &o, const slvector& v) { gmm::write(o,v); return o; } + + template + inline size_type nnz(const slvector& l) { return l.last() - l.first(); } + +} + +namespace std { + template void swap(gmm::wsvector &v, gmm::wsvector &w) + { v.swap(w);} + template void swap(gmm::rsvector &v, gmm::rsvector &w) + { v.swap(w);} + template void swap(gmm::slvector &v, gmm::slvector &w) + { v.swap(w);} +} + + + +#endif /* GMM_VECTOR_H__ */ diff --git a/gmm/gmm_vector_to_matrix.h b/gmm/gmm_vector_to_matrix.h new file mode 100644 index 000000000..83fc0c54f --- /dev/null +++ b/gmm/gmm_vector_to_matrix.h @@ -0,0 +1,340 @@ +/* -*- c++ -*- (enables emacs c++ mode) */ +/*=========================================================================== + + Copyright (C) 2003-2017 Yves Renard + + This file is a part of GetFEM++ + + GetFEM++ is free software; you can redistribute it and/or modify it + under the terms of the GNU Lesser General Public License as published + by the Free Software Foundation; either version 3 of the License, or + (at your option) any later version along with the GCC Runtime Library + Exception either version 3.1 or (at your option) any later version. + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public + License and GCC Runtime Library Exception for more details. + You should have received a copy of the GNU Lesser General Public License + along with this program; if not, write to the Free Software Foundation, + Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. + + As a special exception, you may use this file as it is a part of a free + software library without restriction. Specifically, if other files + instantiate templates or use macros or inline functions from this file, + or you compile this file and link it with other files to produce an + executable, this file does not by itself cause the resulting executable + to be covered by the GNU Lesser General Public License. This exception + does not however invalidate any other reasons why the executable file + might be covered by the GNU Lesser General Public License. + +===========================================================================*/ + +/**@file gmm_vector_to_matrix.h + @author Yves Renard + @date December 6, 2003. + @brief View vectors as row or column matrices. */ +#ifndef GMM_VECTOR_TO_MATRIX_H__ +#define GMM_VECTOR_TO_MATRIX_H__ + +#include "gmm_interface.h" + +namespace gmm { + + /* ********************************************************************* */ + /* row vector -> transform a vector in a (1, n) matrix. */ + /* ********************************************************************* */ + + template struct gen_row_vector { + typedef gen_row_vector this_type; + typedef typename std::iterator_traits::value_type V; + typedef V * CPT; + typedef typename std::iterator_traits::reference ref_V; + typedef typename linalg_traits::reference reference; + + simple_vector_ref vec; + + reference operator()(size_type, size_type j) const { return vec[j]; } + + size_type nrows(void) const { return 1; } + size_type ncols(void) const { return vect_size(vec); } + + gen_row_vector(ref_V v) : vec(v) {} + gen_row_vector() {} + gen_row_vector(const gen_row_vector &cr) : vec(cr.vec) {} + }; + + template + struct gen_row_vector_iterator { + typedef gen_row_vector this_type; + typedef typename modifiable_pointer::pointer MPT; + typedef typename std::iterator_traits::value_type V; + typedef simple_vector_ref value_type; + typedef const simple_vector_ref *pointer; + typedef const simple_vector_ref &reference; + typedef ptrdiff_t difference_type; + typedef size_t size_type; + typedef std::random_access_iterator_tag iterator_category; + typedef gen_row_vector_iterator iterator; + + simple_vector_ref vec; + bool isend; + + iterator &operator ++() { isend = true; return *this; } + iterator &operator --() { isend = false; return *this; } + iterator operator ++(int) { iterator tmp = *this; ++(*this); return tmp; } + iterator operator --(int) { iterator tmp = *this; --(*this); return tmp; } + iterator &operator +=(difference_type i) + { if (i) isend = false; return *this; } + iterator &operator -=(difference_type i) + { if (i) isend = true; return *this; } + iterator operator +(difference_type i) const + { iterator itt = *this; return (itt += i); } + iterator operator -(difference_type i) const + { iterator itt = *this; return (itt -= i); } + difference_type operator -(const iterator &i) const { + return (isend == true) ? ((i.isend == true) ? 0 : 1) + : ((i.isend == true) ? -1 : 0); + } + + const simple_vector_ref& operator *() const { return vec; } + const simple_vector_ref& operator [](int i) { return vec; } + + bool operator ==(const iterator &i) const { return (isend == i.isend); } + bool operator !=(const iterator &i) const { return !(i == *this); } + bool operator < (const iterator &i) const { return (*this - i < 0); } + + gen_row_vector_iterator(void) {} + gen_row_vector_iterator(const gen_row_vector_iterator &itm) + : vec(itm.vec), isend(itm.isend) {} + gen_row_vector_iterator(const gen_row_vector &m, bool iis_end) + : vec(m.vec), isend(iis_end) { } + + }; + + template + struct linalg_traits > { + typedef gen_row_vector this_type; + typedef typename std::iterator_traits::value_type V; + typedef typename which_reference::is_reference is_reference; + typedef abstract_matrix linalg_type; + typedef typename linalg_traits::origin_type origin_type; + typedef typename select_ref::ref_type porigin_type; + typedef typename linalg_traits::value_type value_type; + typedef typename select_ref::reference, PT>::ref_type reference; + typedef abstract_null_type sub_col_type; + typedef abstract_null_type col_iterator; + typedef abstract_null_type const_sub_col_type; + typedef abstract_null_type const_col_iterator; + typedef simple_vector_ref const_sub_row_type; + typedef typename select_ref, PT>::ref_type sub_row_type; + typedef gen_row_vector_iterator::pointer> + const_row_iterator; + typedef typename select_ref, PT>::ref_type row_iterator; + typedef typename linalg_traits::storage_type storage_type; + typedef row_major sub_orientation; + typedef typename linalg_traits::index_sorted index_sorted; + static size_type nrows(const this_type &) { return 1; } + static size_type ncols(const this_type &m) { return m.ncols(); } + static const_sub_row_type row(const const_row_iterator &it) { return *it; } + static sub_row_type row(const row_iterator &it) { return *it; } + static const_row_iterator row_begin(const this_type &m) + { return const_row_iterator(m, false); } + static row_iterator row_begin(this_type &m) + { return row_iterator(m, false); } + static const_row_iterator row_end(const this_type &m) + { return const_row_iterator(m, true); } + static row_iterator row_end(this_type &m) + { return row_iterator(m, true); } + static origin_type* origin(this_type &m) { return m.vec.origin; } + static const origin_type* origin(const this_type &m) + { return m.vec.origin; } + static void do_clear(this_type &m) + { clear(row(mat_row_begin(m))); } + static value_type access(const const_row_iterator &itrow, size_type i) + { return itrow.vec[i]; } + static reference access(const row_iterator &itrow, size_type i) + { return itrow.vec[i]; } + }; + + template + std::ostream &operator <<(std::ostream &o, const gen_row_vector& m) + { gmm::write(o,m); return o; } + + /* ********************************************************************* */ + /* col vector -> transform a vector in a (n, 1) matrix. */ + /* ********************************************************************* */ + + template struct gen_col_vector { + typedef gen_col_vector this_type; + typedef typename std::iterator_traits::value_type V; + typedef V * CPT; + typedef typename std::iterator_traits::reference ref_V; + typedef typename linalg_traits::reference reference; + + simple_vector_ref vec; + + reference operator()(size_type i, size_type) const { return vec[i]; } + + size_type ncols(void) const { return 1; } + size_type nrows(void) const { return vect_size(vec); } + + gen_col_vector(ref_V v) : vec(v) {} + gen_col_vector() {} + gen_col_vector(const gen_col_vector &cr) : vec(cr.vec) {} + }; + + template + struct gen_col_vector_iterator { + typedef gen_col_vector this_type; + typedef typename modifiable_pointer::pointer MPT; + typedef typename std::iterator_traits::value_type V; + typedef simple_vector_ref value_type; + typedef const simple_vector_ref *pointer; + typedef const simple_vector_ref &reference; + typedef ptrdiff_t difference_type; + typedef size_t size_type; + typedef std::random_access_iterator_tag iterator_category; + typedef gen_col_vector_iterator iterator; + + simple_vector_ref vec; + bool isend; + + iterator &operator ++() { isend = true; return *this; } + iterator &operator --() { isend = false; return *this; } + iterator operator ++(int) { iterator tmp = *this; ++(*this); return tmp; } + iterator operator --(int) { iterator tmp = *this; --(*this); return tmp; } + iterator &operator +=(difference_type i) + { if (i) isend = false; return *this; } + iterator &operator -=(difference_type i) + { if (i) isend = true; return *this; } + iterator operator +(difference_type i) const + { iterator itt = *this; return (itt += i); } + iterator operator -(difference_type i) const + { iterator itt = *this; return (itt -= i); } + difference_type operator -(const iterator &i) const { + return (isend == true) ? ((i.isend == true) ? 0 : 1) + : ((i.isend == true) ? -1 : 0); + } + + const simple_vector_ref& operator *() const { return vec; } + const simple_vector_ref& operator [](int i) { return vec; } + + bool operator ==(const iterator &i) const { return (isend == i.isend); } + bool operator !=(const iterator &i) const { return !(i == *this); } + bool operator < (const iterator &i) const { return (*this - i < 0); } + + gen_col_vector_iterator(void) {} + gen_col_vector_iterator(const gen_col_vector_iterator &itm) + : vec(itm.vec), isend(itm.isend) {} + gen_col_vector_iterator(const gen_col_vector &m, bool iis_end) + : vec(m.vec), isend(iis_end) { } + + }; + + template + struct linalg_traits > { + typedef gen_col_vector this_type; + typedef typename std::iterator_traits::value_type V; + typedef typename which_reference::is_reference is_reference; + typedef abstract_matrix linalg_type; + typedef typename linalg_traits::origin_type origin_type; + typedef typename select_ref::ref_type porigin_type; + typedef typename linalg_traits::value_type value_type; + typedef typename select_ref::reference, PT>::ref_type reference; + typedef abstract_null_type sub_row_type; + typedef abstract_null_type row_iterator; + typedef abstract_null_type const_sub_row_type; + typedef abstract_null_type const_row_iterator; + typedef simple_vector_ref const_sub_col_type; + typedef typename select_ref, PT>::ref_type sub_col_type; + typedef gen_col_vector_iterator::pointer> + const_col_iterator; + typedef typename select_ref, PT>::ref_type col_iterator; + typedef typename linalg_traits::storage_type storage_type; + typedef col_major sub_orientation; + typedef typename linalg_traits::index_sorted index_sorted; + static size_type ncols(const this_type &) { return 1; } + static size_type nrows(const this_type &m) { return m.nrows(); } + static const_sub_col_type col(const const_col_iterator &it) { return *it; } + static sub_col_type col(const col_iterator &it) { return *it; } + static const_col_iterator col_begin(const this_type &m) + { return const_col_iterator(m, false); } + static col_iterator col_begin(this_type &m) + { return col_iterator(m, false); } + static const_col_iterator col_end(const this_type &m) + { return const_col_iterator(m, true); } + static col_iterator col_end(this_type &m) + { return col_iterator(m, true); } + static origin_type* origin(this_type &m) { return m.vec.origin; } + static const origin_type* origin(const this_type &m) + { return m.vec.origin; } + static void do_clear(this_type &m) + { clear(col(mat_col_begin(m))); } + static value_type access(const const_col_iterator &itcol, size_type i) + { return itcol.vec[i]; } + static reference access(const col_iterator &itcol, size_type i) + { return itcol.vec[i]; } + }; + + template + std::ostream &operator <<(std::ostream &o, const gen_col_vector& m) + { gmm::write(o,m); return o; } + + /* ******************************************************************** */ + /* col and row vectors */ + /* ******************************************************************** */ + + + template inline + typename select_return< gen_row_vector, gen_row_vector, + const V *>::return_type + row_vector(const V& v) { + return typename select_return< gen_row_vector, + gen_row_vector, const V *>::return_type(linalg_cast(v)); + } + + template inline + typename select_return< gen_row_vector, gen_row_vector, + V *>::return_type + row_vector(V& v) { + return typename select_return< gen_row_vector, + gen_row_vector, V *>::return_type(linalg_cast(v)); + } + + template inline gen_row_vector + const_row_vector(V& v) + { return gen_row_vector(v); } + + + template inline + typename select_return< gen_col_vector, gen_col_vector, + const V *>::return_type + col_vector(const V& v) { + return typename select_return< gen_col_vector, + gen_col_vector, const V *>::return_type(linalg_cast(v)); + } + + template inline + typename select_return< gen_col_vector, gen_col_vector, + V *>::return_type + col_vector(V& v) { + return typename select_return< gen_col_vector, + gen_col_vector, V *>::return_type(linalg_cast(v)); + } + + template inline gen_col_vector + const_col_vector(V& v) + { return gen_col_vector(v); } + + +} + +#endif // GMM_VECTOR_TO_MATRIX_H__ diff --git a/hecl b/hecl index 34e28fe18..f949aabf5 160000 --- a/hecl +++ b/hecl @@ -1 +1 @@ -Subproject commit 34e28fe18c77efe661e04742f9b3350eba880267 +Subproject commit f949aabf5c4632df97746c273cab27a1ea1bffe4