From 03f155fcf5ba4c79fa7537084a51a04da5bdbfdf Mon Sep 17 00:00:00 2001 From: Jack Andersen Date: Sat, 4 Mar 2017 21:54:58 -1000 Subject: [PATCH] Move xxhash to boo; cached shader components --- CMakeLists.txt | 6 +- include/boo/graphicsdev/GL.hpp | 28 +- .../boo/graphicsdev/IGraphicsCommandQueue.hpp | 2 +- .../boo/graphicsdev/IGraphicsDataFactory.hpp | 12 +- include/boo/graphicsdev/Metal.hpp | 31 +- lib/graphicsdev/Common.hpp | 38 + lib/graphicsdev/GL.cpp | 264 +++-- lib/graphicsdev/Metal.mm | 200 +++- lib/mac/WindowCocoa.mm | 7 +- lib/win/WindowWin32.cpp | 3 +- lib/x11/WindowXlib.cpp | 3 +- test/CMakeLists.txt | 2 +- xxhash/CMakeLists.txt | 1 + xxhash/LICENSE | 24 + xxhash/xxhash.c | 962 ++++++++++++++++++ xxhash/xxhash.h | 192 ++++ 16 files changed, 1558 insertions(+), 217 deletions(-) create mode 100644 xxhash/CMakeLists.txt create mode 100644 xxhash/LICENSE create mode 100644 xxhash/xxhash.c create mode 100644 xxhash/xxhash.h diff --git a/CMakeLists.txt b/CMakeLists.txt index 13b8d33..3dddc78 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -9,6 +9,8 @@ if (NOT TARGET logvisor) add_subdirectory(logvisor) endif() +add_subdirectory(xxhash) + set(WITH_LSR_BINDINGS OFF) set(BUILD_TESTS OFF) set(BUILD_SHARED_LIBS OFF) @@ -18,7 +20,7 @@ add_subdirectory(soxr) set(BOO_INCLUDE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/include CACHE PATH "boo include path" FORCE) -include_directories(include ${LOGVISOR_INCLUDE_DIR}) +include_directories(include xxhash ${LOGVISOR_INCLUDE_DIR}) if(NOT GEKKO AND NOT CAFE) list(APPEND PLAT_SRCS @@ -195,7 +197,7 @@ target_include_directories(glslang-default-resource-limits PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/glslang ) -list(APPEND _BOO_SYS_LIBS glslang HLSL soxr OSDependent OGLCompiler SPIRV glslang-default-resource-limits) +list(APPEND _BOO_SYS_LIBS glslang HLSL soxr xxhash OSDependent OGLCompiler SPIRV glslang-default-resource-limits) set(BOO_SYS_LIBS ${_BOO_SYS_LIBS} CACHE PATH "boo system libraries" FORCE) set(BOO_SYS_DEFINES ${_BOO_SYS_DEFINES} CACHE PATH "boo system defines" FORCE) diff --git a/include/boo/graphicsdev/GL.hpp b/include/boo/graphicsdev/GL.hpp index de9c733..5902978 100644 --- a/include/boo/graphicsdev/GL.hpp +++ b/include/boo/graphicsdev/GL.hpp @@ -5,39 +5,16 @@ #include "IGraphicsCommandQueue.hpp" #include "boo/IGraphicsContext.hpp" #include "GLSLMacros.hpp" -#include -#include -#include -#include namespace boo { class GLDataFactory : public IGraphicsDataFactory { - friend struct GLCommandQueue; - IGraphicsContext* m_parent; - uint32_t m_drawSamples; - static ThreadLocalPtr m_deferredData; - std::unordered_set m_committedData; - std::unordered_set m_committedPools; - std::mutex m_committedMutex; - void destroyData(IGraphicsData*); - void destroyAllData(); - void destroyPool(IGraphicsBufferPool*); - IGraphicsBufferD* newPoolBuffer(IGraphicsBufferPool* pool, BufferUse use, - size_t stride, size_t count); - void deletePoolBuffer(IGraphicsBufferPool* p, IGraphicsBufferD* buf); public: - GLDataFactory(IGraphicsContext* parent, uint32_t drawSamples); - ~GLDataFactory() {destroyAllData();} - - Platform platform() const {return Platform::OpenGL;} - const SystemChar* platformName() const {return _S("OpenGL");} - class Context : public IGraphicsDataFactory::Context { - friend class GLDataFactory; + friend class GLDataFactoryImpl; GLDataFactory& m_parent; Context(GLDataFactory& parent) : m_parent(parent) {} public: @@ -73,9 +50,6 @@ public: const size_t* ubufOffs, const size_t* ubufSizes, size_t texCount, ITexture** texs, size_t baseVert = 0, size_t baseInst = 0); }; - - GraphicsDataToken commitTransaction(const FactoryCommitFunc&); - GraphicsBufferPoolToken newBufferPool(); }; } diff --git a/include/boo/graphicsdev/IGraphicsCommandQueue.hpp b/include/boo/graphicsdev/IGraphicsCommandQueue.hpp index 986195c..63f6433 100644 --- a/include/boo/graphicsdev/IGraphicsCommandQueue.hpp +++ b/include/boo/graphicsdev/IGraphicsCommandQueue.hpp @@ -10,7 +10,7 @@ namespace boo struct IGraphicsCommandQueue { - virtual ~IGraphicsCommandQueue() {} + virtual ~IGraphicsCommandQueue() = default; using Platform = IGraphicsDataFactory::Platform; virtual Platform platform() const=0; diff --git a/include/boo/graphicsdev/IGraphicsDataFactory.hpp b/include/boo/graphicsdev/IGraphicsDataFactory.hpp index 3355beb..0cd731b 100644 --- a/include/boo/graphicsdev/IGraphicsDataFactory.hpp +++ b/include/boo/graphicsdev/IGraphicsDataFactory.hpp @@ -17,7 +17,7 @@ struct IGraphicsBuffer protected: bool m_dynamic; IGraphicsBuffer(bool dynamic) : m_dynamic(dynamic) {} - virtual ~IGraphicsBuffer() {} + virtual ~IGraphicsBuffer() = default; }; /** Static resource buffer for verts, indices, uniform constants */ @@ -190,7 +190,7 @@ enum class BlendFactor /** Factory object for creating batches of resources as an IGraphicsData token */ struct IGraphicsDataFactory { - virtual ~IGraphicsDataFactory() {} + virtual ~IGraphicsDataFactory() = default; enum class Platform { @@ -276,10 +276,10 @@ using FactoryCommitFunc = std::function -#include -#include -#include namespace boo { -struct MetalContext; class MetalDataFactory : public IGraphicsDataFactory { - friend struct MetalCommandQueue; - IGraphicsContext* m_parent; - static ThreadLocalPtr m_deferredData; - std::unordered_set m_committedData; - std::unordered_set m_committedPools; - std::mutex m_committedMutex; - struct MetalContext* m_ctx; - uint32_t m_sampleCount; - - void destroyData(IGraphicsData*); - void destroyAllData(); - void destroyPool(IGraphicsBufferPool*); - IGraphicsBufferD* newPoolBuffer(IGraphicsBufferPool* pool, BufferUse use, - size_t stride, size_t count); - void deletePoolBuffer(IGraphicsBufferPool* p, IGraphicsBufferD* buf); public: - MetalDataFactory(IGraphicsContext* parent, MetalContext* ctx, uint32_t sampleCount); - ~MetalDataFactory() {} - - Platform platform() const {return Platform::Metal;} - const char* platformName() const {return "Metal";} - class Context : public IGraphicsDataFactory::Context { - friend class MetalDataFactory; + friend class MetalDataFactoryImpl; MetalDataFactory& m_parent; Context(MetalDataFactory& parent) : m_parent(parent) {} public: @@ -76,9 +50,6 @@ public: const size_t* ubufOffs, const size_t* ubufSizes, size_t texCount, ITexture** texs, size_t baseVert = 0, size_t baseInst = 0); }; - - GraphicsDataToken commitTransaction(const std::function&); - GraphicsBufferPoolToken newBufferPool(); }; } diff --git a/lib/graphicsdev/Common.hpp b/lib/graphicsdev/Common.hpp index 52907b8..73ea4b7 100644 --- a/lib/graphicsdev/Common.hpp +++ b/lib/graphicsdev/Common.hpp @@ -50,6 +50,44 @@ public: Token lock() const { return Token(this); } }; +template +class IShareableShader +{ + std::atomic_int m_refCount = {0}; + FactoryImpl& m_factory; + uint64_t m_key; +public: + IShareableShader(FactoryImpl& factory, uint64_t key) + : m_factory(factory), m_key(key) {} + void increment() { m_refCount++; } + void decrement() + { + if (m_refCount.fetch_sub(1) == 1) + m_factory._unregisterShareableShader(m_key); + } + + class Token + { + IShareableShader* m_parent = nullptr; + public: + Token() = default; + Token(IShareableShader* p) + : m_parent(p) + { m_parent->increment(); } + Token& operator=(const Token&) = delete; + Token(const Token&) = delete; + Token& operator=(Token&& other) + { m_parent = other.m_parent; other.m_parent = nullptr; return *this; } + Token(Token&& other) + { m_parent = other.m_parent; other.m_parent = nullptr; } + ~Token() { if (m_parent) m_parent->decrement(); } + operator bool() const { return m_parent != nullptr; } + ShaderImpl& get() const { return static_cast(*m_parent); } + }; + + Token lock() { return Token(this); } +}; + } #endif // BOO_GRAPHICSDEV_COMMON_HPP diff --git a/lib/graphicsdev/GL.cpp b/lib/graphicsdev/GL.cpp index 06dd010..b60b193 100644 --- a/lib/graphicsdev/GL.cpp +++ b/lib/graphicsdev/GL.cpp @@ -8,7 +8,10 @@ #include #include #include +#include #include +#include +#include "xxhash.h" #include "logvisor/logvisor.hpp" @@ -18,8 +21,47 @@ namespace boo { static logvisor::Module Log("boo::GL"); +class GLDataFactoryImpl; -ThreadLocalPtr GLDataFactory::m_deferredData; +struct GLShareableShader : IShareableShader +{ + GLuint m_shader = 0; + GLShareableShader(GLDataFactoryImpl& fac, uint64_t key, GLuint s) + : IShareableShader(fac, key), m_shader(s) {} + ~GLShareableShader() { glDeleteShader(m_shader); } +}; + +class GLDataFactoryImpl : public GLDataFactory +{ + friend struct GLCommandQueue; + friend class GLDataFactory::Context; + IGraphicsContext* m_parent; + uint32_t m_drawSamples; + static ThreadLocalPtr m_deferredData; + std::unordered_set m_committedData; + std::unordered_set m_committedPools; + std::mutex m_committedMutex; + std::unordered_map> m_sharedShaders; + void destroyData(IGraphicsData*); + void destroyAllData(); + void destroyPool(IGraphicsBufferPool*); + IGraphicsBufferD* newPoolBuffer(IGraphicsBufferPool* pool, BufferUse use, + size_t stride, size_t count); + void deletePoolBuffer(IGraphicsBufferPool* p, IGraphicsBufferD* buf); +public: + GLDataFactoryImpl(IGraphicsContext* parent, uint32_t drawSamples); + ~GLDataFactoryImpl() {destroyAllData();} + + Platform platform() const {return Platform::OpenGL;} + const SystemChar* platformName() const {return _S("OpenGL");} + + GraphicsDataToken commitTransaction(const FactoryCommitFunc&); + GraphicsBufferPoolToken newBufferPool(); + + void _unregisterShareableShader(uint64_t key) { m_sharedShaders.erase(key); } +}; + +ThreadLocalPtr GLDataFactoryImpl::m_deferredData; struct GLData : IGraphicsDataPriv { std::vector> m_SPs; @@ -75,6 +117,7 @@ public: class GLGraphicsBufferD : public IGraphicsBufferD { friend class GLDataFactory; + friend class GLDataFactoryImpl; friend struct GLCommandQueue; GLuint m_bufs[3]; GLenum m_target; @@ -109,7 +152,7 @@ IGraphicsBufferS* GLDataFactory::Context::newStaticBuffer(BufferUse use, const void* data, size_t stride, size_t count) { GLGraphicsBufferS* retval = new GLGraphicsBufferS(use, data, stride * count); - m_deferredData->m_SBufs.emplace_back(retval); + GLDataFactoryImpl::m_deferredData->m_SBufs.emplace_back(retval); return retval; } @@ -344,7 +387,7 @@ GLDataFactory::Context::newStaticTexture(size_t width, size_t height, size_t mip const void* data, size_t sz) { GLTextureS* retval = new GLTextureS(width, height, mips, fmt, data, sz); - m_deferredData->m_STexs.emplace_back(retval); + GLDataFactoryImpl::m_deferredData->m_STexs.emplace_back(retval); return retval; } @@ -353,7 +396,7 @@ GLDataFactory::Context::newStaticArrayTexture(size_t width, size_t height, size_ TextureFormat fmt, const void *data, size_t sz) { GLTextureSA* retval = new GLTextureSA(width, height, layers, mips, fmt, data, sz); - m_deferredData->m_SATexs.emplace_back(retval); + GLDataFactoryImpl::m_deferredData->m_SATexs.emplace_back(retval); return retval; } @@ -362,8 +405,8 @@ class GLShaderPipeline : public IShaderPipeline friend class GLDataFactory; friend struct GLCommandQueue; friend struct GLShaderDataBinding; - GLuint m_vert = 0; - GLuint m_frag = 0; + GLShareableShader::Token m_vert; + GLShareableShader::Token m_frag; GLuint m_prog = 0; GLenum m_sfactor = GL_ONE; GLenum m_dfactor = GL_ZERO; @@ -372,48 +415,17 @@ class GLShaderPipeline : public IShaderPipeline bool m_depthWrite = true; bool m_backfaceCulling = true; std::vector m_uniLocs; - bool initObjects() - { - m_vert = glCreateShader(GL_VERTEX_SHADER); - m_frag = glCreateShader(GL_FRAGMENT_SHADER); - m_prog = glCreateProgram(); - if (!m_vert || !m_frag || !m_prog) - { - glDeleteShader(m_vert); - m_vert = 0; - glDeleteShader(m_frag); - m_frag = 0; - glDeleteProgram(m_prog); - m_prog = 0; - return false; - } - glAttachShader(m_prog, m_vert); - glAttachShader(m_prog, m_frag); - return true; - } - void clearObjects() - { - if (m_vert) - glDeleteShader(m_vert); - if (m_frag) - glDeleteShader(m_frag); - if (m_prog) - glDeleteProgram(m_prog); - } GLShaderPipeline() = default; public: operator bool() const {return m_prog != 0;} - ~GLShaderPipeline() {clearObjects();} + ~GLShaderPipeline() { glDeleteProgram(m_prog); } GLShaderPipeline& operator=(const GLShaderPipeline&) = delete; GLShaderPipeline(const GLShaderPipeline&) = delete; GLShaderPipeline& operator=(GLShaderPipeline&& other) { - m_vert = other.m_vert; - other.m_vert = 0; - m_frag = other.m_frag; - other.m_frag = 0; - m_prog = other.m_prog; - other.m_prog = 0; + m_vert = std::move(other.m_vert); + m_frag = std::move(other.m_frag); + m_prog = std::move(other.m_prog); m_sfactor = other.m_sfactor; m_dfactor = other.m_dfactor; m_depthTest = other.m_depthTest; @@ -482,47 +494,95 @@ IShaderPipeline* GLDataFactory::Context::newShaderPipeline BlendFactor srcFac, BlendFactor dstFac, Primitive prim, bool depthTest, bool depthWrite, bool backfaceCulling) { + GLDataFactoryImpl& factory = static_cast(m_parent); GLShaderPipeline shader; - if (!shader.initObjects()) - { - Log.report(logvisor::Error, "unable to create shader objects\n"); - return nullptr; - } - shader.m_sfactor = BLEND_FACTOR_TABLE[int(srcFac)]; - shader.m_dfactor = BLEND_FACTOR_TABLE[int(dstFac)]; - shader.m_depthTest = depthTest; - shader.m_depthWrite = depthWrite; - shader.m_backfaceCulling = backfaceCulling; - shader.m_drawPrim = PRIMITIVE_TABLE[int(prim)]; - glShaderSource(shader.m_vert, 1, &vertSource, nullptr); - glCompileShader(shader.m_vert); + XXH64_state_t hashState; + uint64_t hashes[2]; + XXH64_reset(&hashState, 0); + XXH64_update(&hashState, vertSource, strlen(vertSource)); + hashes[0] = XXH64_digest(&hashState); + XXH64_reset(&hashState, 0); + XXH64_update(&hashState, fragSource, strlen(fragSource)); + hashes[1] = XXH64_digest(&hashState); + GLint status; - glGetShaderiv(shader.m_vert, GL_COMPILE_STATUS, &status); - if (status != GL_TRUE) + auto vertFind = factory.m_sharedShaders.find(hashes[0]); + if (vertFind != factory.m_sharedShaders.end()) { - GLint logLen; - glGetShaderiv(shader.m_vert, GL_INFO_LOG_LENGTH, &logLen); - char* log = (char*)malloc(logLen); - glGetShaderInfoLog(shader.m_vert, logLen, nullptr, log); - Log.report(logvisor::Error, "unable to compile vert source\n%s\n%s\n", log, vertSource); - free(log); + shader.m_vert = vertFind->second->lock(); + } + else + { + GLuint sobj = glCreateShader(GL_VERTEX_SHADER); + if (!sobj) + { + Log.report(logvisor::Error, "unable to create vert shader"); + return nullptr; + } + + glShaderSource(sobj, 1, &vertSource, nullptr); + glCompileShader(sobj); + glGetShaderiv(sobj, GL_COMPILE_STATUS, &status); + if (status != GL_TRUE) + { + GLint logLen; + glGetShaderiv(sobj, GL_INFO_LOG_LENGTH, &logLen); + char* log = (char*)malloc(logLen); + glGetShaderInfoLog(sobj, logLen, nullptr, log); + Log.report(logvisor::Error, "unable to compile vert source\n%s\n%s\n", log, vertSource); + free(log); + return nullptr; + } + + auto it = + factory.m_sharedShaders.emplace(std::make_pair(hashes[0], + std::make_unique(factory, hashes[0], sobj))).first; + shader.m_vert = it->second->lock(); + } + auto fragFind = factory.m_sharedShaders.find(hashes[1]); + if (fragFind != factory.m_sharedShaders.end()) + { + shader.m_frag = fragFind->second->lock(); + } + else + { + GLuint sobj = glCreateShader(GL_FRAGMENT_SHADER); + if (!sobj) + { + Log.report(logvisor::Error, "unable to create frag shader"); + return nullptr; + } + + glShaderSource(sobj, 1, &fragSource, nullptr); + glCompileShader(sobj); + glGetShaderiv(sobj, GL_COMPILE_STATUS, &status); + if (status != GL_TRUE) + { + GLint logLen; + glGetShaderiv(sobj, GL_INFO_LOG_LENGTH, &logLen); + char* log = (char*)malloc(logLen); + glGetShaderInfoLog(sobj, logLen, nullptr, log); + Log.report(logvisor::Error, "unable to compile frag source\n%s\n%s\n", log, fragSource); + free(log); + return nullptr; + } + + auto it = + factory.m_sharedShaders.emplace(std::make_pair(hashes[1], + std::make_unique(factory, hashes[1], sobj))).first; + shader.m_frag = it->second->lock(); + } + + shader.m_prog = glCreateProgram(); + if (!shader.m_prog) + { + Log.report(logvisor::Error, "unable to create shader program"); return nullptr; } - glShaderSource(shader.m_frag, 1, &fragSource, nullptr); - glCompileShader(shader.m_frag); - glGetShaderiv(shader.m_frag, GL_COMPILE_STATUS, &status); - if (status != GL_TRUE) - { - GLint logLen; - glGetShaderiv(shader.m_frag, GL_INFO_LOG_LENGTH, &logLen); - char* log = (char*)malloc(logLen); - glGetShaderInfoLog(shader.m_frag, logLen, nullptr, log); - Log.report(logvisor::Error, "unable to compile frag source\n%s\n%s\n", log, fragSource); - free(log); - return nullptr; - } + glAttachShader(shader.m_prog, shader.m_vert.get().m_shader); + glAttachShader(shader.m_prog, shader.m_frag.get().m_shader); glLinkProgram(shader.m_prog); glGetProgramiv(shader.m_prog, GL_LINK_STATUS, &status); @@ -563,8 +623,15 @@ IShaderPipeline* GLDataFactory::Context::newShaderPipeline } } + shader.m_sfactor = BLEND_FACTOR_TABLE[int(srcFac)]; + shader.m_dfactor = BLEND_FACTOR_TABLE[int(dstFac)]; + shader.m_depthTest = depthTest; + shader.m_depthWrite = depthWrite; + shader.m_backfaceCulling = backfaceCulling; + shader.m_drawPrim = PRIMITIVE_TABLE[int(prim)]; + GLShaderPipeline* retval = new GLShaderPipeline(std::move(shader)); - m_deferredData->m_SPs.emplace_back(retval); + GLDataFactoryImpl::m_deferredData->m_SPs.emplace_back(retval); return retval; } @@ -699,17 +766,17 @@ GLDataFactory::Context::newShaderDataBinding(IShaderPipeline* pipeline, size_t texCount, ITexture** texs, size_t baseVert, size_t baseInst) { GLShaderDataBinding* retval = - new GLShaderDataBinding(m_deferredData.get(), pipeline, vtxFormat, ubufCount, ubufs, + new GLShaderDataBinding(GLDataFactoryImpl::m_deferredData.get(), pipeline, vtxFormat, ubufCount, ubufs, ubufOffs, ubufSizes, texCount, texs); - m_deferredData->m_SBinds.emplace_back(retval); + GLDataFactoryImpl::m_deferredData->m_SBinds.emplace_back(retval); return retval; } -GLDataFactory::GLDataFactory(IGraphicsContext* parent, uint32_t drawSamples) +GLDataFactoryImpl::GLDataFactoryImpl(IGraphicsContext* parent, uint32_t drawSamples) : m_parent(parent), m_drawSamples(drawSamples) {} -GraphicsDataToken GLDataFactory::commitTransaction(const FactoryCommitFunc& trans) +GraphicsDataToken GLDataFactoryImpl::commitTransaction(const FactoryCommitFunc& trans) { if (m_deferredData.get()) Log.report(logvisor::Fatal, "nested commitTransaction usage detected"); @@ -736,7 +803,7 @@ GraphicsDataToken GLDataFactory::commitTransaction(const FactoryCommitFunc& tran return GraphicsDataToken(this, retval); } -GraphicsBufferPoolToken GLDataFactory::newBufferPool() +GraphicsBufferPoolToken GLDataFactoryImpl::newBufferPool() { std::unique_lock lk(m_committedMutex); GLPool* retval = new GLPool; @@ -744,7 +811,7 @@ GraphicsBufferPoolToken GLDataFactory::newBufferPool() return GraphicsBufferPoolToken(this, retval); } -void GLDataFactory::destroyData(IGraphicsData* d) +void GLDataFactoryImpl::destroyData(IGraphicsData* d) { std::unique_lock lk(m_committedMutex); GLData* data = static_cast(d); @@ -752,7 +819,7 @@ void GLDataFactory::destroyData(IGraphicsData* d) data->decrement(); } -void GLDataFactory::destroyAllData() +void GLDataFactoryImpl::destroyAllData() { std::unique_lock lk(m_committedMutex); for (GLData* data : m_committedData) @@ -763,7 +830,7 @@ void GLDataFactory::destroyAllData() m_committedPools.clear(); } -void GLDataFactory::destroyPool(IGraphicsBufferPool* p) +void GLDataFactoryImpl::destroyPool(IGraphicsBufferPool* p) { std::unique_lock lk(m_committedMutex); GLPool* pool = static_cast(p); @@ -771,8 +838,8 @@ void GLDataFactory::destroyPool(IGraphicsBufferPool* p) delete pool; } -IGraphicsBufferD* GLDataFactory::newPoolBuffer(IGraphicsBufferPool* p, BufferUse use, - size_t stride, size_t count) +IGraphicsBufferD* GLDataFactoryImpl::newPoolBuffer(IGraphicsBufferPool* p, BufferUse use, + size_t stride, size_t count) { GLPool* pool = static_cast(p); GLGraphicsBufferD* retval = new GLGraphicsBufferD(use, stride * count); @@ -780,7 +847,7 @@ IGraphicsBufferD* GLDataFactory::newPoolBuffer(IGraphicsBufferPool* p, BufferUse return retval; } -void GLDataFactory::deletePoolBuffer(IGraphicsBufferPool *p, IGraphicsBufferD *buf) +void GLDataFactoryImpl::deletePoolBuffer(IGraphicsBufferPool *p, IGraphicsBufferD *buf) { GLPool* pool = static_cast(p); pool->m_DBufs.erase(static_cast(buf)); @@ -1336,7 +1403,7 @@ struct GLCommandQueue : IGraphicsCommandQueue } /* Update dynamic data here */ - GLDataFactory* gfxF = static_cast(m_parent->getDataFactory()); + GLDataFactoryImpl* gfxF = static_cast(m_parent->getDataFactory()); std::unique_lock datalk(gfxF->m_committedMutex); for (GLData* d : gfxF->m_committedData) { @@ -1403,7 +1470,7 @@ IGraphicsBufferD* GLDataFactory::Context::newDynamicBuffer(BufferUse use, size_t stride, size_t count) { GLGraphicsBufferD* retval = new GLGraphicsBufferD(use, stride * count); - m_deferredData->m_DBufs.emplace_back(retval); + GLDataFactoryImpl::m_deferredData->m_DBufs.emplace_back(retval); return retval; } @@ -1478,7 +1545,7 @@ ITextureD* GLDataFactory::Context::newDynamicTexture(size_t width, size_t height, TextureFormat fmt) { GLTextureD* retval = new GLTextureD(width, height, fmt); - m_deferredData->m_DTexs.emplace_back(retval); + GLDataFactoryImpl::m_deferredData->m_DTexs.emplace_back(retval); return retval; } @@ -1546,11 +1613,12 @@ ITextureR* GLDataFactory::Context::newRenderTexture(size_t width, size_t height, bool enableShaderColorBinding, bool enableShaderDepthBinding) { - GLCommandQueue* q = static_cast(m_parent.m_parent->getCommandQueue()); - GLTextureR* retval = new GLTextureR(q, width, height, m_parent.m_drawSamples, + GLDataFactoryImpl& factory = static_cast(m_parent); + GLCommandQueue* q = static_cast(factory.m_parent->getCommandQueue()); + GLTextureR* retval = new GLTextureR(q, width, height, factory.m_drawSamples, enableShaderColorBinding, enableShaderDepthBinding); q->resizeRenderTexture(retval, width, height); - m_deferredData->m_RTexs.emplace_back(retval); + GLDataFactoryImpl::m_deferredData->m_RTexs.emplace_back(retval); return retval; } @@ -1572,9 +1640,10 @@ IVertexFormat* GLDataFactory::Context::newVertexFormat (size_t elementCount, const VertexElementDescriptor* elements, size_t baseVert, size_t baseInst) { - GLCommandQueue* q = static_cast(m_parent.m_parent->getCommandQueue()); + GLDataFactoryImpl& factory = static_cast(m_parent); + GLCommandQueue* q = static_cast(factory.m_parent->getCommandQueue()); GLVertexFormat* retval = new struct GLVertexFormat(q, elementCount, elements, baseVert, baseInst); - m_deferredData->m_VFmts.emplace_back(retval); + GLDataFactoryImpl::m_deferredData->m_VFmts.emplace_back(retval); return retval; } @@ -1583,4 +1652,9 @@ IGraphicsCommandQueue* _NewGLCommandQueue(IGraphicsContext* parent) return new struct GLCommandQueue(parent); } +IGraphicsDataFactory* _NewGLDataFactory(IGraphicsContext* parent, uint32_t drawSamples) +{ + return new class GLDataFactoryImpl(parent, drawSamples); +} + } diff --git a/lib/graphicsdev/Metal.mm b/lib/graphicsdev/Metal.mm index e6feb21..b241805 100644 --- a/lib/graphicsdev/Metal.mm +++ b/lib/graphicsdev/Metal.mm @@ -5,6 +5,9 @@ #include "boo/IGraphicsContext.hpp" #include "Common.hpp" #include +#include +#include +#include "xxhash.h" #if !__has_feature(objc_arc) #error ARC Required @@ -17,8 +20,48 @@ namespace boo { static logvisor::Module Log("boo::Metal"); struct MetalCommandQueue; +class MetalDataFactoryImpl; -ThreadLocalPtr MetalDataFactory::m_deferredData; +struct MetalShareableShader : IShareableShader +{ + id m_shader; + MetalShareableShader(MetalDataFactoryImpl& fac, uint64_t key, id s) + : IShareableShader(fac, key), m_shader(s) {} +}; + +class MetalDataFactoryImpl : public MetalDataFactory +{ + friend struct MetalCommandQueue; + friend class MetalDataFactory::Context; + IGraphicsContext* m_parent; + static ThreadLocalPtr m_deferredData; + std::unordered_set m_committedData; + std::unordered_set m_committedPools; + std::mutex m_committedMutex; + std::unordered_map> m_sharedShaders; + struct MetalContext* m_ctx; + uint32_t m_sampleCount; + + void destroyData(IGraphicsData*); + void destroyAllData(); + void destroyPool(IGraphicsBufferPool*); + IGraphicsBufferD* newPoolBuffer(IGraphicsBufferPool* pool, BufferUse use, + size_t stride, size_t count); + void deletePoolBuffer(IGraphicsBufferPool* p, IGraphicsBufferD* buf); +public: + MetalDataFactoryImpl(IGraphicsContext* parent, MetalContext* ctx, uint32_t sampleCount); + ~MetalDataFactoryImpl() {} + + Platform platform() const {return Platform::Metal;} + const char* platformName() const {return "Metal";} + + GraphicsDataToken commitTransaction(const std::function&); + GraphicsBufferPoolToken newBufferPool(); + + void _unregisterShareableShader(uint64_t key) { m_sharedShaders.erase(key); } +}; + +ThreadLocalPtr MetalDataFactoryImpl::m_deferredData; struct MetalData : IGraphicsDataPriv { std::vector> m_SPs; @@ -60,6 +103,7 @@ public: class MetalGraphicsBufferD : public IGraphicsBufferD { friend class MetalDataFactory; + friend class MetalDataFactoryImpl; friend struct MetalCommandQueue; MetalCommandQueue* m_q; std::unique_ptr m_cpuBuf; @@ -502,19 +546,24 @@ class MetalShaderPipeline : public IShaderPipeline MTLCullMode m_cullMode = MTLCullModeNone; MTLPrimitiveType m_drawPrim; const MetalVertexFormat* m_vtxFmt; + MetalShareableShader::Token m_vert; + MetalShareableShader::Token m_frag; - MetalShaderPipeline(MetalContext* ctx, id vert, id frag, + MetalShaderPipeline(MetalContext* ctx, + MetalShareableShader::Token&& vert, + MetalShareableShader::Token&& frag, const MetalVertexFormat* vtxFmt, NSUInteger targetSamples, BlendFactor srcFac, BlendFactor dstFac, Primitive prim, bool depthTest, bool depthWrite, bool backfaceCulling) - : m_drawPrim(PRIMITIVE_TABLE[int(prim)]), m_vtxFmt(vtxFmt) + : m_drawPrim(PRIMITIVE_TABLE[int(prim)]), m_vtxFmt(vtxFmt), + m_vert(std::move(vert)), m_frag(std::move(frag)) { if (backfaceCulling) m_cullMode = MTLCullModeBack; MTLRenderPipelineDescriptor* desc = [MTLRenderPipelineDescriptor new]; - desc.vertexFunction = vert; - desc.fragmentFunction = frag; + desc.vertexFunction = m_vert.get().m_shader; + desc.fragmentFunction = m_frag.get().m_shader; desc.vertexDescriptor = vtxFmt->m_vdesc; desc.sampleCount = targetSamples; desc.colorAttachments[0].pixelFormat = MTLPixelFormatBGRA8Unorm; @@ -897,7 +946,7 @@ struct MetalCommandQueue : IGraphicsCommandQueue return; /* Update dynamic data here */ - MetalDataFactory* gfxF = static_cast(m_parent->getDataFactory()); + MetalDataFactoryImpl* gfxF = static_cast(m_parent->getDataFactory()); std::unique_lock datalk(gfxF->m_committedMutex); for (MetalData* d : gfxF->m_committedData) { @@ -1042,49 +1091,55 @@ void MetalTextureD::unmap() m_validSlots = 0; } -MetalDataFactory::MetalDataFactory(IGraphicsContext* parent, MetalContext* ctx, uint32_t sampleCount) +MetalDataFactoryImpl::MetalDataFactoryImpl(IGraphicsContext* parent, MetalContext* ctx, uint32_t sampleCount) : m_parent(parent), m_ctx(ctx), m_sampleCount(sampleCount) {} IGraphicsBufferS* MetalDataFactory::Context::newStaticBuffer(BufferUse use, const void* data, size_t stride, size_t count) { - MetalGraphicsBufferS* retval = new MetalGraphicsBufferS(use, m_parent.m_ctx, data, stride, count); - m_deferredData->m_SBufs.emplace_back(retval); + MetalDataFactoryImpl& factory = static_cast(m_parent); + MetalGraphicsBufferS* retval = new MetalGraphicsBufferS(use, factory.m_ctx, data, stride, count); + MetalDataFactoryImpl::m_deferredData->m_SBufs.emplace_back(retval); return retval; } IGraphicsBufferD* MetalDataFactory::Context::newDynamicBuffer(BufferUse use, size_t stride, size_t count) { - MetalCommandQueue* q = static_cast(m_parent.m_parent->getCommandQueue()); - MetalGraphicsBufferD* retval = new MetalGraphicsBufferD(q, use, m_parent.m_ctx, stride, count); - m_deferredData->m_DBufs.emplace_back(retval); + MetalDataFactoryImpl& factory = static_cast(m_parent); + MetalCommandQueue* q = static_cast(factory.m_parent->getCommandQueue()); + MetalGraphicsBufferD* retval = new MetalGraphicsBufferD(q, use, factory.m_ctx, stride, count); + MetalDataFactoryImpl::m_deferredData->m_DBufs.emplace_back(retval); return retval; } ITextureS* MetalDataFactory::Context::newStaticTexture(size_t width, size_t height, size_t mips, TextureFormat fmt, const void* data, size_t sz) { - MetalTextureS* retval = new MetalTextureS(m_parent.m_ctx, width, height, mips, fmt, data, sz); - m_deferredData->m_STexs.emplace_back(retval); + MetalDataFactoryImpl& factory = static_cast(m_parent); + MetalTextureS* retval = new MetalTextureS(factory.m_ctx, width, height, mips, fmt, data, sz); + MetalDataFactoryImpl::m_deferredData->m_STexs.emplace_back(retval); return retval; } ITextureSA* MetalDataFactory::Context::newStaticArrayTexture(size_t width, size_t height, size_t layers, size_t mips, TextureFormat fmt, const void* data, size_t sz) { - MetalTextureSA* retval = new MetalTextureSA(m_parent.m_ctx, width, height, layers, mips, fmt, data, sz); - m_deferredData->m_SATexs.emplace_back(retval); + MetalDataFactoryImpl& factory = static_cast(m_parent); + MetalTextureSA* retval = new MetalTextureSA(factory.m_ctx, width, height, layers, mips, fmt, data, sz); + MetalDataFactoryImpl::m_deferredData->m_SATexs.emplace_back(retval); return retval; } ITextureD* MetalDataFactory::Context::newDynamicTexture(size_t width, size_t height, TextureFormat fmt) { - MetalCommandQueue* q = static_cast(m_parent.m_parent->getCommandQueue()); - MetalTextureD* retval = new MetalTextureD(q, m_parent.m_ctx, width, height, fmt); - m_deferredData->m_DTexs.emplace_back(retval); + MetalDataFactoryImpl& factory = static_cast(m_parent); + MetalCommandQueue* q = static_cast(factory.m_parent->getCommandQueue()); + MetalTextureD* retval = new MetalTextureD(q, factory.m_ctx, width, height, fmt); + MetalDataFactoryImpl::m_deferredData->m_DTexs.emplace_back(retval); return retval; } ITextureR* MetalDataFactory::Context::newRenderTexture(size_t width, size_t height, bool enableShaderColorBinding, bool enableShaderDepthBinding) { - MetalTextureR* retval = new MetalTextureR(m_parent.m_ctx, width, height, m_parent.m_sampleCount, enableShaderColorBinding); - m_deferredData->m_RTexs.emplace_back(retval); + MetalDataFactoryImpl& factory = static_cast(m_parent); + MetalTextureR* retval = new MetalTextureR(factory.m_ctx, width, height, factory.m_sampleCount, enableShaderColorBinding); + MetalDataFactoryImpl::m_deferredData->m_RTexs.emplace_back(retval); return retval; } @@ -1092,7 +1147,7 @@ IVertexFormat* MetalDataFactory::Context::newVertexFormat(size_t elementCount, c size_t baseVert, size_t baseInst) { MetalVertexFormat* retval = new struct MetalVertexFormat(elementCount, elements); - m_deferredData->m_VFmts.emplace_back(retval); + MetalDataFactoryImpl::m_deferredData->m_VFmts.emplace_back(retval); return retval; } @@ -1101,34 +1156,71 @@ IShaderPipeline* MetalDataFactory::Context::newShaderPipeline(const char* vertSo BlendFactor srcFac, BlendFactor dstFac, Primitive prim, bool depthTest, bool depthWrite, bool backfaceCulling) { + MetalDataFactoryImpl& factory = static_cast(m_parent); MTLCompileOptions* compOpts = [MTLCompileOptions new]; compOpts.languageVersion = MTLLanguageVersion1_1; NSError* err = nullptr; - id vertShaderLib = [m_parent.m_ctx->m_dev newLibraryWithSource:@(vertSource) - options:compOpts - error:&err]; - if (!vertShaderLib) - { - printf("%s\n", vertSource); - Log.report(logvisor::Fatal, "error compiling vert shader: %s", [[err localizedDescription] UTF8String]); - } - id vertFunc = [vertShaderLib newFunctionWithName:@"vmain"]; + XXH64_state_t hashState; + uint64_t hashes[2]; + XXH64_reset(&hashState, 0); + XXH64_update(&hashState, vertSource, strlen(vertSource)); + hashes[0] = XXH64_digest(&hashState); + XXH64_reset(&hashState, 0); + XXH64_update(&hashState, fragSource, strlen(fragSource)); + hashes[1] = XXH64_digest(&hashState); - id fragShaderLib = [m_parent.m_ctx->m_dev newLibraryWithSource:@(fragSource) - options:compOpts - error:&err]; - if (!fragShaderLib) + MetalShareableShader::Token vertShader; + MetalShareableShader::Token fragShader; + auto vertFind = factory.m_sharedShaders.find(hashes[0]); + if (vertFind != factory.m_sharedShaders.end()) { - printf("%s\n", fragSource); - Log.report(logvisor::Fatal, "error compiling frag shader: %s", [[err localizedDescription] UTF8String]); + vertShader = vertFind->second->lock(); } - id fragFunc = [fragShaderLib newFunctionWithName:@"fmain"]; + else + { + id vertShaderLib = [factory.m_ctx->m_dev newLibraryWithSource:@(vertSource) + options:compOpts + error:&err]; + if (!vertShaderLib) + { + printf("%s\n", vertSource); + Log.report(logvisor::Fatal, "error compiling vert shader: %s", [[err localizedDescription] UTF8String]); + } + id vertFunc = [vertShaderLib newFunctionWithName:@"vmain"]; - MetalShaderPipeline* retval = new MetalShaderPipeline(m_parent.m_ctx, vertFunc, fragFunc, + auto it = + factory.m_sharedShaders.emplace(std::make_pair(hashes[0], + std::make_unique(factory, hashes[0], vertFunc))).first; + vertShader = it->second->lock(); + } + auto fragFind = factory.m_sharedShaders.find(hashes[1]); + if (fragFind != factory.m_sharedShaders.end()) + { + fragShader = fragFind->second->lock(); + } + else + { + id fragShaderLib = [factory.m_ctx->m_dev newLibraryWithSource:@(fragSource) + options:compOpts + error:&err]; + if (!fragShaderLib) + { + printf("%s\n", fragSource); + Log.report(logvisor::Fatal, "error compiling frag shader: %s", [[err localizedDescription] UTF8String]); + } + id fragFunc = [fragShaderLib newFunctionWithName:@"fmain"]; + + auto it = + factory.m_sharedShaders.emplace(std::make_pair(hashes[1], + std::make_unique(factory, hashes[1], fragFunc))).first; + fragShader = it->second->lock(); + } + + MetalShaderPipeline* retval = new MetalShaderPipeline(factory.m_ctx, std::move(vertShader), std::move(fragShader), static_cast(vtxFmt), targetSamples, srcFac, dstFac, prim, depthTest, depthWrite, backfaceCulling); - m_deferredData->m_SPs.emplace_back(retval); + MetalDataFactoryImpl::m_deferredData->m_SPs.emplace_back(retval); return retval; } @@ -1140,16 +1232,17 @@ MetalDataFactory::Context::newShaderDataBinding(IShaderPipeline* pipeline, const size_t* ubufOffs, const size_t* ubufSizes, size_t texCount, ITexture** texs, size_t baseVert, size_t baseInst) { + MetalDataFactoryImpl& factory = static_cast(m_parent); MetalShaderDataBinding* retval = - new MetalShaderDataBinding(m_deferredData.get(), - m_parent.m_ctx, pipeline, vbuf, instVbo, ibuf, + new MetalShaderDataBinding(MetalDataFactoryImpl::m_deferredData.get(), + factory.m_ctx, pipeline, vbuf, instVbo, ibuf, ubufCount, ubufs, ubufStages, ubufOffs, ubufSizes, texCount, texs, baseVert, baseInst); - m_deferredData->m_SBinds.emplace_back(retval); + MetalDataFactoryImpl::m_deferredData->m_SBinds.emplace_back(retval); return retval; } -GraphicsDataToken MetalDataFactory::commitTransaction(const FactoryCommitFunc& trans) +GraphicsDataToken MetalDataFactoryImpl::commitTransaction(const FactoryCommitFunc& trans) { if (m_deferredData.get()) Log.report(logvisor::Fatal, "nested commitTransaction usage detected"); @@ -1170,7 +1263,7 @@ GraphicsDataToken MetalDataFactory::commitTransaction(const FactoryCommitFunc& t return GraphicsDataToken(this, retval); } -GraphicsBufferPoolToken MetalDataFactory::newBufferPool() +GraphicsBufferPoolToken MetalDataFactoryImpl::newBufferPool() { std::unique_lock lk(m_committedMutex); MetalPool* retval = new MetalPool; @@ -1178,7 +1271,7 @@ GraphicsBufferPoolToken MetalDataFactory::newBufferPool() return GraphicsBufferPoolToken(this, retval); } -void MetalDataFactory::destroyData(IGraphicsData* d) +void MetalDataFactoryImpl::destroyData(IGraphicsData* d) { std::unique_lock lk(m_committedMutex); MetalData* data = static_cast(d); @@ -1186,7 +1279,7 @@ void MetalDataFactory::destroyData(IGraphicsData* d) data->decrement(); } -void MetalDataFactory::destroyAllData() +void MetalDataFactoryImpl::destroyAllData() { std::unique_lock lk(m_committedMutex); for (MetalData* data : m_committedData) @@ -1197,7 +1290,7 @@ void MetalDataFactory::destroyAllData() m_committedPools.clear(); } -void MetalDataFactory::destroyPool(IGraphicsBufferPool* p) +void MetalDataFactoryImpl::destroyPool(IGraphicsBufferPool* p) { std::unique_lock lk(m_committedMutex); MetalPool* pool = static_cast(p); @@ -1205,8 +1298,8 @@ void MetalDataFactory::destroyPool(IGraphicsBufferPool* p) delete pool; } -IGraphicsBufferD* MetalDataFactory::newPoolBuffer(IGraphicsBufferPool* p, BufferUse use, - size_t stride, size_t count) +IGraphicsBufferD* MetalDataFactoryImpl::newPoolBuffer(IGraphicsBufferPool* p, BufferUse use, + size_t stride, size_t count) { MetalPool* pool = static_cast(p); MetalCommandQueue* q = static_cast(m_parent->getCommandQueue()); @@ -1215,7 +1308,7 @@ IGraphicsBufferD* MetalDataFactory::newPoolBuffer(IGraphicsBufferPool* p, Buffer return retval; } -void MetalDataFactory::deletePoolBuffer(IGraphicsBufferPool* p, IGraphicsBufferD* buf) +void MetalDataFactoryImpl::deletePoolBuffer(IGraphicsBufferPool* p, IGraphicsBufferD* buf) { MetalPool* pool = static_cast(p); pool->m_DBufs.erase(static_cast(buf)); @@ -1227,6 +1320,11 @@ IGraphicsCommandQueue* _NewMetalCommandQueue(MetalContext* ctx, IWindow* parentW return new struct MetalCommandQueue(ctx, parentWindow, parent); } +IGraphicsDataFactory* _NewMetalDataFactory(IGraphicsContext* parent, MetalContext* ctx, uint32_t sampleCount) +{ + return new class MetalDataFactoryImpl(parent, ctx, sampleCount); +} + } #endif diff --git a/lib/mac/WindowCocoa.mm b/lib/mac/WindowCocoa.mm index 3bec683..9211b72 100644 --- a/lib/mac/WindowCocoa.mm +++ b/lib/mac/WindowCocoa.mm @@ -185,8 +185,11 @@ namespace boo { static logvisor::Module Log("boo::WindowCocoa"); IGraphicsCommandQueue* _NewGLCommandQueue(IGraphicsContext* parent); +IGraphicsDataFactory* _NewGLDataFactory(IGraphicsContext* parent, uint32_t drawSamples); IGraphicsCommandQueue* _NewMetalCommandQueue(MetalContext* ctx, IWindow* parentWindow, IGraphicsContext* parent); +IGraphicsDataFactory* _NewMetalDataFactory(IGraphicsContext* parent, + MetalContext* ctx, uint32_t sampleCount); void _CocoaUpdateLastGLCtx(NSOpenGLContext* lastGLCtx); class GraphicsContextCocoaGL : public GraphicsContextCocoa @@ -206,7 +209,7 @@ public: : GraphicsContextCocoa(api, EPixelFormat::RGBA8, parentWindow), m_lastCtx(lastGLCtx) { - m_dataFactory = new GLDataFactory(this, sampleCount); + m_dataFactory = _NewGLDataFactory(this, sampleCount); } ~GraphicsContextCocoaGL() @@ -362,7 +365,7 @@ public: : GraphicsContextCocoa(api, EPixelFormat::RGBA8, parentWindow), m_parentWindow(parentWindow), m_metalCtx(metalCtx) { - m_dataFactory = new MetalDataFactory(this, metalCtx, sampleCount); + m_dataFactory = _NewMetalDataFactory(this, metalCtx, sampleCount); } ~GraphicsContextCocoaMetal() diff --git a/lib/win/WindowWin32.cpp b/lib/win/WindowWin32.cpp index 23933f4..db4932a 100644 --- a/lib/win/WindowWin32.cpp +++ b/lib/win/WindowWin32.cpp @@ -36,6 +36,7 @@ IGraphicsDataFactory* _NewD3D12DataFactory(D3D12Context* ctx, IGraphicsContext* IGraphicsCommandQueue* _NewD3D11CommandQueue(D3D11Context* ctx, D3D11Context::Window* windowCtx, IGraphicsContext* parent); IGraphicsDataFactory* _NewD3D11DataFactory(D3D11Context* ctx, IGraphicsContext* parent, uint32_t sampleCount); IGraphicsCommandQueue* _NewGLCommandQueue(IGraphicsContext* parent); +IGraphicsDataFactory* _NewGLDataFactory(IGraphicsContext* parent, uint32_t drawSamples); #if BOO_HAS_VULKAN IGraphicsCommandQueue* _NewVulkanCommandQueue(VulkanContext* ctx, VulkanContext::Window* windowCtx, @@ -280,7 +281,7 @@ public: Log.report(logvisor::Fatal, "unable to share contexts"); m_3dCtx.m_ctxOgl.m_lastContext = w.m_mainContext; - m_dataFactory = new GLDataFactory(this, sampleCount); + m_dataFactory = _NewGLDataFactory(this, sampleCount); m_commandQueue = _NewGLCommandQueue(this); } diff --git a/lib/x11/WindowXlib.cpp b/lib/x11/WindowXlib.cpp index ea24041..2b93680 100644 --- a/lib/x11/WindowXlib.cpp +++ b/lib/x11/WindowXlib.cpp @@ -114,6 +114,7 @@ namespace boo { static logvisor::Module Log("boo::WindowXlib"); IGraphicsCommandQueue* _NewGLCommandQueue(IGraphicsContext* parent); +IGraphicsDataFactory* _NewGLDataFactory(IGraphicsContext* parent, uint32_t drawSamples); #if BOO_HAS_VULKAN IGraphicsCommandQueue* _NewVulkanCommandQueue(VulkanContext* ctx, VulkanContext::Window* windowCtx, @@ -327,7 +328,7 @@ public: : GraphicsContextXlib(api, EPixelFormat::RGBA8, parentWindow, display, drawSamples), m_lastCtx(lastCtx) { - m_dataFactory = new class GLDataFactory(this, drawSamples); + m_dataFactory = _NewGLDataFactory(this, drawSamples); /* Query framebuffer configurations */ GLXFBConfig* fbConfigs = nullptr; diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index a6bee3f..7a7cbe8 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -1,2 +1,2 @@ add_executable(booTest WIN32 main.cpp) -target_link_libraries(booTest boo logvisor ${BOO_SYS_LIBS}) +target_link_libraries(booTest boo logvisor xxhash ${BOO_SYS_LIBS}) diff --git a/xxhash/CMakeLists.txt b/xxhash/CMakeLists.txt new file mode 100644 index 0000000..f705464 --- /dev/null +++ b/xxhash/CMakeLists.txt @@ -0,0 +1 @@ +add_library(xxhash xxhash.c xxhash.h) diff --git a/xxhash/LICENSE b/xxhash/LICENSE new file mode 100644 index 0000000..7de801e --- /dev/null +++ b/xxhash/LICENSE @@ -0,0 +1,24 @@ +xxHash Library +Copyright (c) 2012-2014, Yann Collet +All rights reserved. + +Redistribution and use in source and binary forms, with or without modification, +are permitted provided that the following conditions are met: + +* Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +* Redistributions in binary form must reproduce the above copyright notice, this + list of conditions and the following disclaimer in the documentation and/or + other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR +ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON +ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/xxhash/xxhash.c b/xxhash/xxhash.c new file mode 100644 index 0000000..511d994 --- /dev/null +++ b/xxhash/xxhash.c @@ -0,0 +1,962 @@ +/* +xxHash - Fast Hash algorithm +Copyright (C) 2012-2015, Yann Collet + +BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + +* Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. +* Redistributions in binary form must reproduce the above +copyright notice, this list of conditions and the following disclaimer +in the documentation and/or other materials provided with the +distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +You can contact the author at : +- xxHash source repository : https://github.com/Cyan4973/xxHash +*/ + + +/************************************** +* Tuning parameters +**************************************/ +/* XXH_FORCE_MEMORY_ACCESS + * By default, access to unaligned memory is controlled by `memcpy()`, which is safe and portable. + * Unfortunately, on some target/compiler combinations, the generated assembly is sub-optimal. + * The below switch allow to select different access method for improved performance. + * Method 0 (default) : use `memcpy()`. Safe and portable. + * Method 1 : `__packed` statement. It depends on compiler extension (ie, not portable). + * This method is safe if your compiler supports it, and *generally* as fast or faster than `memcpy`. + * Method 2 : direct access. This method is portable but violate C standard. + * It can generate buggy code on targets which generate assembly depending on alignment. + * But in some circumstances, it's the only known way to get the most performance (ie GCC + ARMv6) + * See http://stackoverflow.com/a/32095106/646947 for details. + * Prefer these methods in priority order (0 > 1 > 2) + */ +#ifndef XXH_FORCE_MEMORY_ACCESS /* can be defined externally, on command line for example */ +# if defined(__GNUC__) && ( defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) ) +# define XXH_FORCE_MEMORY_ACCESS 2 +# elif defined(__INTEL_COMPILER) || \ + (defined(__GNUC__) && ( defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) || defined(__ARM_ARCH_7S__) )) +# define XXH_FORCE_MEMORY_ACCESS 1 +# endif +#endif + +/* XXH_ACCEPT_NULL_INPUT_POINTER : + * If the input pointer is a null pointer, xxHash default behavior is to trigger a memory access error, since it is a bad pointer. + * When this option is enabled, xxHash output for null input pointers will be the same as a null-length input. + * By default, this option is disabled. To enable it, uncomment below define : + */ +/* #define XXH_ACCEPT_NULL_INPUT_POINTER 1 */ + +/* XXH_FORCE_NATIVE_FORMAT : + * By default, xxHash library provides endian-independant Hash values, based on little-endian convention. + * Results are therefore identical for little-endian and big-endian CPU. + * This comes at a performance cost for big-endian CPU, since some swapping is required to emulate little-endian format. + * Should endian-independance be of no importance for your application, you may set the #define below to 1, + * to improve speed for Big-endian CPU. + * This option has no impact on Little_Endian CPU. + */ +#define XXH_FORCE_NATIVE_FORMAT 0 + +/* XXH_USELESS_ALIGN_BRANCH : + * This is a minor performance trick, only useful with lots of very small keys. + * It means : don't make a test between aligned/unaligned, because performance will be the same. + * It saves one initial branch per hash. + */ +#if defined(__i386) || defined(_M_IX86) || defined(__x86_64__) || defined(_M_X64) +# define XXH_USELESS_ALIGN_BRANCH 1 +#endif + + +/************************************** +* Compiler Specific Options +***************************************/ +#ifdef _MSC_VER /* Visual Studio */ +# pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */ +# define FORCE_INLINE static __forceinline +#else +# if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* C99 */ +# ifdef __GNUC__ +# define FORCE_INLINE static inline __attribute__((always_inline)) +# else +# define FORCE_INLINE static inline +# endif +# else +# define FORCE_INLINE static +# endif /* __STDC_VERSION__ */ +#endif + + +/************************************** +* Includes & Memory related functions +***************************************/ +#include "xxhash.h" +/* Modify the local functions below should you wish to use some other memory routines */ +/* for malloc(), free() */ +#include +static void* XXH_malloc(size_t s) { return malloc(s); } +static void XXH_free (void* p) { free(p); } +/* for memcpy() */ +#include +static void* XXH_memcpy(void* dest, const void* src, size_t size) { return memcpy(dest,src,size); } + + +/************************************** +* Basic Types +***************************************/ +#if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* C99 */ +# include + typedef uint8_t BYTE; + typedef uint16_t U16; + typedef uint32_t U32; + typedef int32_t S32; + typedef uint64_t U64; +#else + typedef unsigned char BYTE; + typedef unsigned short U16; + typedef unsigned int U32; + typedef signed int S32; + typedef unsigned long long U64; +#endif + + +#if (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==2)) + +/* Force direct memory access. Only works on CPU which support unaligned memory access in hardware */ +static U32 XXH_read32(const void* memPtr) { return *(const U32*) memPtr; } +static U64 XXH_read64(const void* memPtr) { return *(const U64*) memPtr; } + +#elif (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==1)) + +/* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */ +/* currently only defined for gcc and icc */ +typedef union { U32 u32; U64 u64; } __attribute__((packed)) unalign; + +static U32 XXH_read32(const void* ptr) { return ((const unalign*)ptr)->u32; } +static U64 XXH_read64(const void* ptr) { return ((const unalign*)ptr)->u64; } + +#else + +/* portable and safe solution. Generally efficient. + * see : http://stackoverflow.com/a/32095106/646947 + */ + +static U32 XXH_read32(const void* memPtr) +{ + U32 val; + memcpy(&val, memPtr, sizeof(val)); + return val; +} + +static U64 XXH_read64(const void* memPtr) +{ + U64 val; + memcpy(&val, memPtr, sizeof(val)); + return val; +} + +#endif // XXH_FORCE_DIRECT_MEMORY_ACCESS + + +/****************************************** +* Compiler-specific Functions and Macros +******************************************/ +#define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__) + +/* Note : although _rotl exists for minGW (GCC under windows), performance seems poor */ +#if defined(_MSC_VER) +# define XXH_rotl32(x,r) _rotl(x,r) +# define XXH_rotl64(x,r) _rotl64(x,r) +#else +# define XXH_rotl32(x,r) ((x << r) | (x >> (32 - r))) +# define XXH_rotl64(x,r) ((x << r) | (x >> (64 - r))) +#endif + +#if defined(_MSC_VER) /* Visual Studio */ +# define XXH_swap32 _byteswap_ulong +# define XXH_swap64 _byteswap_uint64 +#elif GCC_VERSION >= 403 +# define XXH_swap32 __builtin_bswap32 +# define XXH_swap64 __builtin_bswap64 +#else +static U32 XXH_swap32 (U32 x) +{ + return ((x << 24) & 0xff000000 ) | + ((x << 8) & 0x00ff0000 ) | + ((x >> 8) & 0x0000ff00 ) | + ((x >> 24) & 0x000000ff ); +} +static U64 XXH_swap64 (U64 x) +{ + return ((x << 56) & 0xff00000000000000ULL) | + ((x << 40) & 0x00ff000000000000ULL) | + ((x << 24) & 0x0000ff0000000000ULL) | + ((x << 8) & 0x000000ff00000000ULL) | + ((x >> 8) & 0x00000000ff000000ULL) | + ((x >> 24) & 0x0000000000ff0000ULL) | + ((x >> 40) & 0x000000000000ff00ULL) | + ((x >> 56) & 0x00000000000000ffULL); +} +#endif + + +/*************************************** +* Architecture Macros +***************************************/ +typedef enum { XXH_bigEndian=0, XXH_littleEndian=1 } XXH_endianess; + +/* XXH_CPU_LITTLE_ENDIAN can be defined externally, for example one the compiler command line */ +#ifndef XXH_CPU_LITTLE_ENDIAN + static const int one = 1; +# define XXH_CPU_LITTLE_ENDIAN (*(const char*)(&one)) +#endif + + +/***************************** +* Memory reads +*****************************/ +typedef enum { XXH_aligned, XXH_unaligned } XXH_alignment; + +FORCE_INLINE U32 XXH_readLE32_align(const void* ptr, XXH_endianess endian, XXH_alignment align) +{ + if (align==XXH_unaligned) + return endian==XXH_littleEndian ? XXH_read32(ptr) : XXH_swap32(XXH_read32(ptr)); + else + return endian==XXH_littleEndian ? *(const U32*)ptr : XXH_swap32(*(const U32*)ptr); +} + +FORCE_INLINE U32 XXH_readLE32(const void* ptr, XXH_endianess endian) +{ + return XXH_readLE32_align(ptr, endian, XXH_unaligned); +} + +FORCE_INLINE U64 XXH_readLE64_align(const void* ptr, XXH_endianess endian, XXH_alignment align) +{ + if (align==XXH_unaligned) + return endian==XXH_littleEndian ? XXH_read64(ptr) : XXH_swap64(XXH_read64(ptr)); + else + return endian==XXH_littleEndian ? *(const U64*)ptr : XXH_swap64(*(const U64*)ptr); +} + +FORCE_INLINE U64 XXH_readLE64(const void* ptr, XXH_endianess endian) +{ + return XXH_readLE64_align(ptr, endian, XXH_unaligned); +} + + +/*************************************** +* Macros +***************************************/ +#define XXH_STATIC_ASSERT(c) { enum { XXH_static_assert = 1/(!!(c)) }; } /* use only *after* variable declarations */ + + +/*************************************** +* Constants +***************************************/ +#define PRIME32_1 2654435761U +#define PRIME32_2 2246822519U +#define PRIME32_3 3266489917U +#define PRIME32_4 668265263U +#define PRIME32_5 374761393U + +#define PRIME64_1 11400714785074694791ULL +#define PRIME64_2 14029467366897019727ULL +#define PRIME64_3 1609587929392839161ULL +#define PRIME64_4 9650029242287828579ULL +#define PRIME64_5 2870177450012600261ULL + + +/***************************** +* Simple Hash Functions +*****************************/ +FORCE_INLINE U32 XXH32_endian_align(const void* input, size_t len, U32 seed, XXH_endianess endian, XXH_alignment align) +{ + const BYTE* p = (const BYTE*)input; + const BYTE* bEnd = p + len; + U32 h32; +#define XXH_get32bits(p) XXH_readLE32_align(p, endian, align) + +#ifdef XXH_ACCEPT_NULL_INPUT_POINTER + if (p==NULL) + { + len=0; + bEnd=p=(const BYTE*)(size_t)16; + } +#endif + + if (len>=16) + { + const BYTE* const limit = bEnd - 16; + U32 v1 = seed + PRIME32_1 + PRIME32_2; + U32 v2 = seed + PRIME32_2; + U32 v3 = seed + 0; + U32 v4 = seed - PRIME32_1; + + do + { + v1 += XXH_get32bits(p) * PRIME32_2; + v1 = XXH_rotl32(v1, 13); + v1 *= PRIME32_1; + p+=4; + v2 += XXH_get32bits(p) * PRIME32_2; + v2 = XXH_rotl32(v2, 13); + v2 *= PRIME32_1; + p+=4; + v3 += XXH_get32bits(p) * PRIME32_2; + v3 = XXH_rotl32(v3, 13); + v3 *= PRIME32_1; + p+=4; + v4 += XXH_get32bits(p) * PRIME32_2; + v4 = XXH_rotl32(v4, 13); + v4 *= PRIME32_1; + p+=4; + } + while (p<=limit); + + h32 = XXH_rotl32(v1, 1) + XXH_rotl32(v2, 7) + XXH_rotl32(v3, 12) + XXH_rotl32(v4, 18); + } + else + { + h32 = seed + PRIME32_5; + } + + h32 += (U32) len; + + while (p+4<=bEnd) + { + h32 += XXH_get32bits(p) * PRIME32_3; + h32 = XXH_rotl32(h32, 17) * PRIME32_4 ; + p+=4; + } + + while (p> 15; + h32 *= PRIME32_2; + h32 ^= h32 >> 13; + h32 *= PRIME32_3; + h32 ^= h32 >> 16; + + return h32; +} + + +unsigned int XXH32 (const void* input, size_t len, unsigned int seed) +{ +#if 0 + /* Simple version, good for code maintenance, but unfortunately slow for small inputs */ + XXH32_state_t state; + XXH32_reset(&state, seed); + XXH32_update(&state, input, len); + return XXH32_digest(&state); +#else + XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN; + +# if !defined(XXH_USELESS_ALIGN_BRANCH) + if ((((size_t)input) & 3) == 0) /* Input is 4-bytes aligned, leverage the speed benefit */ + { + if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) + return XXH32_endian_align(input, len, seed, XXH_littleEndian, XXH_aligned); + else + return XXH32_endian_align(input, len, seed, XXH_bigEndian, XXH_aligned); + } +# endif + + if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) + return XXH32_endian_align(input, len, seed, XXH_littleEndian, XXH_unaligned); + else + return XXH32_endian_align(input, len, seed, XXH_bigEndian, XXH_unaligned); +#endif +} + +FORCE_INLINE U64 XXH64_endian_align(const void* input, size_t len, U64 seed, XXH_endianess endian, XXH_alignment align) +{ + const BYTE* p = (const BYTE*)input; + const BYTE* bEnd = p + len; + U64 h64; +#define XXH_get64bits(p) XXH_readLE64_align(p, endian, align) + +#ifdef XXH_ACCEPT_NULL_INPUT_POINTER + if (p==NULL) + { + len=0; + bEnd=p=(const BYTE*)(size_t)32; + } +#endif + + if (len>=32) + { + const BYTE* const limit = bEnd - 32; + U64 v1 = seed + PRIME64_1 + PRIME64_2; + U64 v2 = seed + PRIME64_2; + U64 v3 = seed + 0; + U64 v4 = seed - PRIME64_1; + + do + { + v1 += XXH_get64bits(p) * PRIME64_2; + p+=8; + v1 = XXH_rotl64(v1, 31); + v1 *= PRIME64_1; + v2 += XXH_get64bits(p) * PRIME64_2; + p+=8; + v2 = XXH_rotl64(v2, 31); + v2 *= PRIME64_1; + v3 += XXH_get64bits(p) * PRIME64_2; + p+=8; + v3 = XXH_rotl64(v3, 31); + v3 *= PRIME64_1; + v4 += XXH_get64bits(p) * PRIME64_2; + p+=8; + v4 = XXH_rotl64(v4, 31); + v4 *= PRIME64_1; + } + while (p<=limit); + + h64 = XXH_rotl64(v1, 1) + XXH_rotl64(v2, 7) + XXH_rotl64(v3, 12) + XXH_rotl64(v4, 18); + + v1 *= PRIME64_2; + v1 = XXH_rotl64(v1, 31); + v1 *= PRIME64_1; + h64 ^= v1; + h64 = h64 * PRIME64_1 + PRIME64_4; + + v2 *= PRIME64_2; + v2 = XXH_rotl64(v2, 31); + v2 *= PRIME64_1; + h64 ^= v2; + h64 = h64 * PRIME64_1 + PRIME64_4; + + v3 *= PRIME64_2; + v3 = XXH_rotl64(v3, 31); + v3 *= PRIME64_1; + h64 ^= v3; + h64 = h64 * PRIME64_1 + PRIME64_4; + + v4 *= PRIME64_2; + v4 = XXH_rotl64(v4, 31); + v4 *= PRIME64_1; + h64 ^= v4; + h64 = h64 * PRIME64_1 + PRIME64_4; + } + else + { + h64 = seed + PRIME64_5; + } + + h64 += (U64) len; + + while (p+8<=bEnd) + { + U64 k1 = XXH_get64bits(p); + k1 *= PRIME64_2; + k1 = XXH_rotl64(k1,31); + k1 *= PRIME64_1; + h64 ^= k1; + h64 = XXH_rotl64(h64,27) * PRIME64_1 + PRIME64_4; + p+=8; + } + + if (p+4<=bEnd) + { + h64 ^= (U64)(XXH_get32bits(p)) * PRIME64_1; + h64 = XXH_rotl64(h64, 23) * PRIME64_2 + PRIME64_3; + p+=4; + } + + while (p> 33; + h64 *= PRIME64_2; + h64 ^= h64 >> 29; + h64 *= PRIME64_3; + h64 ^= h64 >> 32; + + return h64; +} + + +unsigned long long XXH64 (const void* input, size_t len, unsigned long long seed) +{ +#if 0 + /* Simple version, good for code maintenance, but unfortunately slow for small inputs */ + XXH64_state_t state; + XXH64_reset(&state, seed); + XXH64_update(&state, input, len); + return XXH64_digest(&state); +#else + XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN; + +# if !defined(XXH_USELESS_ALIGN_BRANCH) + if ((((size_t)input) & 7)==0) /* Input is aligned, let's leverage the speed advantage */ + { + if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) + return XXH64_endian_align(input, len, seed, XXH_littleEndian, XXH_aligned); + else + return XXH64_endian_align(input, len, seed, XXH_bigEndian, XXH_aligned); + } +# endif + + if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) + return XXH64_endian_align(input, len, seed, XXH_littleEndian, XXH_unaligned); + else + return XXH64_endian_align(input, len, seed, XXH_bigEndian, XXH_unaligned); +#endif +} + +/**************************************************** +* Advanced Hash Functions +****************************************************/ + +/*** Allocation ***/ +typedef struct +{ + U64 total_len; + U32 seed; + U32 v1; + U32 v2; + U32 v3; + U32 v4; + U32 mem32[4]; /* defined as U32 for alignment */ + U32 memsize; +} XXH_istate32_t; + +typedef struct +{ + U64 total_len; + U64 seed; + U64 v1; + U64 v2; + U64 v3; + U64 v4; + U64 mem64[4]; /* defined as U64 for alignment */ + U32 memsize; +} XXH_istate64_t; + + +XXH32_state_t* XXH32_createState(void) +{ + XXH_STATIC_ASSERT(sizeof(XXH32_state_t) >= sizeof(XXH_istate32_t)); /* A compilation error here means XXH32_state_t is not large enough */ + return (XXH32_state_t*)XXH_malloc(sizeof(XXH32_state_t)); +} +XXH_errorcode XXH32_freeState(XXH32_state_t* statePtr) +{ + XXH_free(statePtr); + return XXH_OK; +} + +XXH64_state_t* XXH64_createState(void) +{ + XXH_STATIC_ASSERT(sizeof(XXH64_state_t) >= sizeof(XXH_istate64_t)); /* A compilation error here means XXH64_state_t is not large enough */ + return (XXH64_state_t*)XXH_malloc(sizeof(XXH64_state_t)); +} +XXH_errorcode XXH64_freeState(XXH64_state_t* statePtr) +{ + XXH_free(statePtr); + return XXH_OK; +} + + +/*** Hash feed ***/ + +XXH_errorcode XXH32_reset(XXH32_state_t* state_in, unsigned int seed) +{ + XXH_istate32_t* state = (XXH_istate32_t*) state_in; + state->seed = seed; + state->v1 = seed + PRIME32_1 + PRIME32_2; + state->v2 = seed + PRIME32_2; + state->v3 = seed + 0; + state->v4 = seed - PRIME32_1; + state->total_len = 0; + state->memsize = 0; + return XXH_OK; +} + +XXH_errorcode XXH64_reset(XXH64_state_t* state_in, unsigned long long seed) +{ + XXH_istate64_t* state = (XXH_istate64_t*) state_in; + state->seed = seed; + state->v1 = seed + PRIME64_1 + PRIME64_2; + state->v2 = seed + PRIME64_2; + state->v3 = seed + 0; + state->v4 = seed - PRIME64_1; + state->total_len = 0; + state->memsize = 0; + return XXH_OK; +} + + +FORCE_INLINE XXH_errorcode XXH32_update_endian (XXH32_state_t* state_in, const void* input, size_t len, XXH_endianess endian) +{ + XXH_istate32_t* state = (XXH_istate32_t *) state_in; + const BYTE* p = (const BYTE*)input; + const BYTE* const bEnd = p + len; + +#ifdef XXH_ACCEPT_NULL_INPUT_POINTER + if (input==NULL) return XXH_ERROR; +#endif + + state->total_len += len; + + if (state->memsize + len < 16) /* fill in tmp buffer */ + { + XXH_memcpy((BYTE*)(state->mem32) + state->memsize, input, len); + state->memsize += (U32)len; + return XXH_OK; + } + + if (state->memsize) /* some data left from previous update */ + { + XXH_memcpy((BYTE*)(state->mem32) + state->memsize, input, 16-state->memsize); + { + const U32* p32 = state->mem32; + state->v1 += XXH_readLE32(p32, endian) * PRIME32_2; + state->v1 = XXH_rotl32(state->v1, 13); + state->v1 *= PRIME32_1; + p32++; + state->v2 += XXH_readLE32(p32, endian) * PRIME32_2; + state->v2 = XXH_rotl32(state->v2, 13); + state->v2 *= PRIME32_1; + p32++; + state->v3 += XXH_readLE32(p32, endian) * PRIME32_2; + state->v3 = XXH_rotl32(state->v3, 13); + state->v3 *= PRIME32_1; + p32++; + state->v4 += XXH_readLE32(p32, endian) * PRIME32_2; + state->v4 = XXH_rotl32(state->v4, 13); + state->v4 *= PRIME32_1; + p32++; + } + p += 16-state->memsize; + state->memsize = 0; + } + + if (p <= bEnd-16) + { + const BYTE* const limit = bEnd - 16; + U32 v1 = state->v1; + U32 v2 = state->v2; + U32 v3 = state->v3; + U32 v4 = state->v4; + + do + { + v1 += XXH_readLE32(p, endian) * PRIME32_2; + v1 = XXH_rotl32(v1, 13); + v1 *= PRIME32_1; + p+=4; + v2 += XXH_readLE32(p, endian) * PRIME32_2; + v2 = XXH_rotl32(v2, 13); + v2 *= PRIME32_1; + p+=4; + v3 += XXH_readLE32(p, endian) * PRIME32_2; + v3 = XXH_rotl32(v3, 13); + v3 *= PRIME32_1; + p+=4; + v4 += XXH_readLE32(p, endian) * PRIME32_2; + v4 = XXH_rotl32(v4, 13); + v4 *= PRIME32_1; + p+=4; + } + while (p<=limit); + + state->v1 = v1; + state->v2 = v2; + state->v3 = v3; + state->v4 = v4; + } + + if (p < bEnd) + { + XXH_memcpy(state->mem32, p, bEnd-p); + state->memsize = (int)(bEnd-p); + } + + return XXH_OK; +} + +XXH_errorcode XXH32_update (XXH32_state_t* state_in, const void* input, size_t len) +{ + XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN; + + if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) + return XXH32_update_endian(state_in, input, len, XXH_littleEndian); + else + return XXH32_update_endian(state_in, input, len, XXH_bigEndian); +} + + + +FORCE_INLINE U32 XXH32_digest_endian (const XXH32_state_t* state_in, XXH_endianess endian) +{ + const XXH_istate32_t* state = (const XXH_istate32_t*) state_in; + const BYTE * p = (const BYTE*)state->mem32; + const BYTE* bEnd = (const BYTE*)(state->mem32) + state->memsize; + U32 h32; + + if (state->total_len >= 16) + { + h32 = XXH_rotl32(state->v1, 1) + XXH_rotl32(state->v2, 7) + XXH_rotl32(state->v3, 12) + XXH_rotl32(state->v4, 18); + } + else + { + h32 = state->seed + PRIME32_5; + } + + h32 += (U32) state->total_len; + + while (p+4<=bEnd) + { + h32 += XXH_readLE32(p, endian) * PRIME32_3; + h32 = XXH_rotl32(h32, 17) * PRIME32_4; + p+=4; + } + + while (p> 15; + h32 *= PRIME32_2; + h32 ^= h32 >> 13; + h32 *= PRIME32_3; + h32 ^= h32 >> 16; + + return h32; +} + + +unsigned int XXH32_digest (const XXH32_state_t* state_in) +{ + XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN; + + if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) + return XXH32_digest_endian(state_in, XXH_littleEndian); + else + return XXH32_digest_endian(state_in, XXH_bigEndian); +} + + +FORCE_INLINE XXH_errorcode XXH64_update_endian (XXH64_state_t* state_in, const void* input, size_t len, XXH_endianess endian) +{ + XXH_istate64_t * state = (XXH_istate64_t *) state_in; + const BYTE* p = (const BYTE*)input; + const BYTE* const bEnd = p + len; + +#ifdef XXH_ACCEPT_NULL_INPUT_POINTER + if (input==NULL) return XXH_ERROR; +#endif + + state->total_len += len; + + if (state->memsize + len < 32) /* fill in tmp buffer */ + { + XXH_memcpy(((BYTE*)state->mem64) + state->memsize, input, len); + state->memsize += (U32)len; + return XXH_OK; + } + + if (state->memsize) /* some data left from previous update */ + { + XXH_memcpy(((BYTE*)state->mem64) + state->memsize, input, 32-state->memsize); + { + const U64* p64 = state->mem64; + state->v1 += XXH_readLE64(p64, endian) * PRIME64_2; + state->v1 = XXH_rotl64(state->v1, 31); + state->v1 *= PRIME64_1; + p64++; + state->v2 += XXH_readLE64(p64, endian) * PRIME64_2; + state->v2 = XXH_rotl64(state->v2, 31); + state->v2 *= PRIME64_1; + p64++; + state->v3 += XXH_readLE64(p64, endian) * PRIME64_2; + state->v3 = XXH_rotl64(state->v3, 31); + state->v3 *= PRIME64_1; + p64++; + state->v4 += XXH_readLE64(p64, endian) * PRIME64_2; + state->v4 = XXH_rotl64(state->v4, 31); + state->v4 *= PRIME64_1; + p64++; + } + p += 32-state->memsize; + state->memsize = 0; + } + + if (p+32 <= bEnd) + { + const BYTE* const limit = bEnd - 32; + U64 v1 = state->v1; + U64 v2 = state->v2; + U64 v3 = state->v3; + U64 v4 = state->v4; + + do + { + v1 += XXH_readLE64(p, endian) * PRIME64_2; + v1 = XXH_rotl64(v1, 31); + v1 *= PRIME64_1; + p+=8; + v2 += XXH_readLE64(p, endian) * PRIME64_2; + v2 = XXH_rotl64(v2, 31); + v2 *= PRIME64_1; + p+=8; + v3 += XXH_readLE64(p, endian) * PRIME64_2; + v3 = XXH_rotl64(v3, 31); + v3 *= PRIME64_1; + p+=8; + v4 += XXH_readLE64(p, endian) * PRIME64_2; + v4 = XXH_rotl64(v4, 31); + v4 *= PRIME64_1; + p+=8; + } + while (p<=limit); + + state->v1 = v1; + state->v2 = v2; + state->v3 = v3; + state->v4 = v4; + } + + if (p < bEnd) + { + XXH_memcpy(state->mem64, p, bEnd-p); + state->memsize = (int)(bEnd-p); + } + + return XXH_OK; +} + +XXH_errorcode XXH64_update (XXH64_state_t* state_in, const void* input, size_t len) +{ + XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN; + + if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) + return XXH64_update_endian(state_in, input, len, XXH_littleEndian); + else + return XXH64_update_endian(state_in, input, len, XXH_bigEndian); +} + + + +FORCE_INLINE U64 XXH64_digest_endian (const XXH64_state_t* state_in, XXH_endianess endian) +{ + const XXH_istate64_t * state = (const XXH_istate64_t *) state_in; + const BYTE * p = (const BYTE*)state->mem64; + const BYTE* bEnd = (const BYTE*)state->mem64 + state->memsize; + U64 h64; + + if (state->total_len >= 32) + { + U64 v1 = state->v1; + U64 v2 = state->v2; + U64 v3 = state->v3; + U64 v4 = state->v4; + + h64 = XXH_rotl64(v1, 1) + XXH_rotl64(v2, 7) + XXH_rotl64(v3, 12) + XXH_rotl64(v4, 18); + + v1 *= PRIME64_2; + v1 = XXH_rotl64(v1, 31); + v1 *= PRIME64_1; + h64 ^= v1; + h64 = h64*PRIME64_1 + PRIME64_4; + + v2 *= PRIME64_2; + v2 = XXH_rotl64(v2, 31); + v2 *= PRIME64_1; + h64 ^= v2; + h64 = h64*PRIME64_1 + PRIME64_4; + + v3 *= PRIME64_2; + v3 = XXH_rotl64(v3, 31); + v3 *= PRIME64_1; + h64 ^= v3; + h64 = h64*PRIME64_1 + PRIME64_4; + + v4 *= PRIME64_2; + v4 = XXH_rotl64(v4, 31); + v4 *= PRIME64_1; + h64 ^= v4; + h64 = h64*PRIME64_1 + PRIME64_4; + } + else + { + h64 = state->seed + PRIME64_5; + } + + h64 += (U64) state->total_len; + + while (p+8<=bEnd) + { + U64 k1 = XXH_readLE64(p, endian); + k1 *= PRIME64_2; + k1 = XXH_rotl64(k1,31); + k1 *= PRIME64_1; + h64 ^= k1; + h64 = XXH_rotl64(h64,27) * PRIME64_1 + PRIME64_4; + p+=8; + } + + if (p+4<=bEnd) + { + h64 ^= (U64)(XXH_readLE32(p, endian)) * PRIME64_1; + h64 = XXH_rotl64(h64, 23) * PRIME64_2 + PRIME64_3; + p+=4; + } + + while (p> 33; + h64 *= PRIME64_2; + h64 ^= h64 >> 29; + h64 *= PRIME64_3; + h64 ^= h64 >> 32; + + return h64; +} + + +unsigned long long XXH64_digest (const XXH64_state_t* state_in) +{ + XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN; + + if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) + return XXH64_digest_endian(state_in, XXH_littleEndian); + else + return XXH64_digest_endian(state_in, XXH_bigEndian); +} + + diff --git a/xxhash/xxhash.h b/xxhash/xxhash.h new file mode 100644 index 0000000..c60aa61 --- /dev/null +++ b/xxhash/xxhash.h @@ -0,0 +1,192 @@ +/* + xxHash - Extremely Fast Hash algorithm + Header File + Copyright (C) 2012-2015, Yann Collet. + + BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following disclaimer + in the documentation and/or other materials provided with the + distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + You can contact the author at : + - xxHash source repository : https://github.com/Cyan4973/xxHash +*/ + +/* Notice extracted from xxHash homepage : + +xxHash is an extremely fast Hash algorithm, running at RAM speed limits. +It also successfully passes all tests from the SMHasher suite. + +Comparison (single thread, Windows Seven 32 bits, using SMHasher on a Core 2 Duo @3GHz) + +Name Speed Q.Score Author +xxHash 5.4 GB/s 10 +CrapWow 3.2 GB/s 2 Andrew +MumurHash 3a 2.7 GB/s 10 Austin Appleby +SpookyHash 2.0 GB/s 10 Bob Jenkins +SBox 1.4 GB/s 9 Bret Mulvey +Lookup3 1.2 GB/s 9 Bob Jenkins +SuperFastHash 1.2 GB/s 1 Paul Hsieh +CityHash64 1.05 GB/s 10 Pike & Alakuijala +FNV 0.55 GB/s 5 Fowler, Noll, Vo +CRC32 0.43 GB/s 9 +MD5-32 0.33 GB/s 10 Ronald L. Rivest +SHA1-32 0.28 GB/s 10 + +Q.Score is a measure of quality of the hash function. +It depends on successfully passing SMHasher test set. +10 is a perfect score. + +A 64-bits version, named XXH64, is available since r35. +It offers much better speed, but for 64-bits applications only. +Name Speed on 64 bits Speed on 32 bits +XXH64 13.8 GB/s 1.9 GB/s +XXH32 6.8 GB/s 6.0 GB/s +*/ + +#pragma once + +#if defined (__cplusplus) +extern "C" { +#endif + + +/***************************** +* Definitions +*****************************/ +#include /* size_t */ +typedef enum { XXH_OK=0, XXH_ERROR } XXH_errorcode; + + +/***************************** +* Namespace Emulation +*****************************/ +/* Motivations : + +If you need to include xxHash into your library, +but wish to avoid xxHash symbols to be present on your library interface +in an effort to avoid potential name collision if another library also includes xxHash, + +you can use XXH_NAMESPACE, which will automatically prefix any symbol from xxHash +with the value of XXH_NAMESPACE (so avoid to keep it NULL, and avoid numeric values). + +Note that no change is required within the calling program : +it can still call xxHash functions using their regular name. +They will be automatically translated by this header. +*/ +#ifdef XXH_NAMESPACE +# define XXH_CAT(A,B) A##B +# define XXH_NAME2(A,B) XXH_CAT(A,B) +# define XXH32 XXH_NAME2(XXH_NAMESPACE, XXH32) +# define XXH64 XXH_NAME2(XXH_NAMESPACE, XXH64) +# define XXH32_createState XXH_NAME2(XXH_NAMESPACE, XXH32_createState) +# define XXH64_createState XXH_NAME2(XXH_NAMESPACE, XXH64_createState) +# define XXH32_freeState XXH_NAME2(XXH_NAMESPACE, XXH32_freeState) +# define XXH64_freeState XXH_NAME2(XXH_NAMESPACE, XXH64_freeState) +# define XXH32_reset XXH_NAME2(XXH_NAMESPACE, XXH32_reset) +# define XXH64_reset XXH_NAME2(XXH_NAMESPACE, XXH64_reset) +# define XXH32_update XXH_NAME2(XXH_NAMESPACE, XXH32_update) +# define XXH64_update XXH_NAME2(XXH_NAMESPACE, XXH64_update) +# define XXH32_digest XXH_NAME2(XXH_NAMESPACE, XXH32_digest) +# define XXH64_digest XXH_NAME2(XXH_NAMESPACE, XXH64_digest) +#endif + + +/***************************** +* Simple Hash Functions +*****************************/ + +unsigned int XXH32 (const void* input, size_t length, unsigned seed); +unsigned long long XXH64 (const void* input, size_t length, unsigned long long seed); + +/* +XXH32() : + Calculate the 32-bits hash of sequence "length" bytes stored at memory address "input". + The memory between input & input+length must be valid (allocated and read-accessible). + "seed" can be used to alter the result predictably. + This function successfully passes all SMHasher tests. + Speed on Core 2 Duo @ 3 GHz (single thread, SMHasher benchmark) : 5.4 GB/s +XXH64() : + Calculate the 64-bits hash of sequence of length "len" stored at memory address "input". + Faster on 64-bits systems. Slower on 32-bits systems. +*/ + + + +/***************************** +* Advanced Hash Functions +*****************************/ +typedef struct { long long ll[ 6]; } XXH32_state_t; +typedef struct { long long ll[11]; } XXH64_state_t; + +/* +These structures allow static allocation of XXH states. +States must then be initialized using XXHnn_reset() before first use. + +If you prefer dynamic allocation, please refer to functions below. +*/ + +XXH32_state_t* XXH32_createState(void); +XXH_errorcode XXH32_freeState(XXH32_state_t* statePtr); + +XXH64_state_t* XXH64_createState(void); +XXH_errorcode XXH64_freeState(XXH64_state_t* statePtr); + +/* +These functions create and release memory for XXH state. +States must then be initialized using XXHnn_reset() before first use. +*/ + + +XXH_errorcode XXH32_reset (XXH32_state_t* statePtr, unsigned seed); +XXH_errorcode XXH32_update (XXH32_state_t* statePtr, const void* input, size_t length); +unsigned int XXH32_digest (const XXH32_state_t* statePtr); + +XXH_errorcode XXH64_reset (XXH64_state_t* statePtr, unsigned long long seed); +XXH_errorcode XXH64_update (XXH64_state_t* statePtr, const void* input, size_t length); +unsigned long long XXH64_digest (const XXH64_state_t* statePtr); + +/* +These functions calculate the xxHash of an input provided in multiple smaller packets, +as opposed to an input provided as a single block. + +XXH state space must first be allocated, using either static or dynamic method provided above. + +Start a new hash by initializing state with a seed, using XXHnn_reset(). + +Then, feed the hash state by calling XXHnn_update() as many times as necessary. +Obviously, input must be valid, meaning allocated and read accessible. +The function returns an error code, with 0 meaning OK, and any other value meaning there is an error. + +Finally, you can produce a hash anytime, by using XXHnn_digest(). +This function returns the final nn-bits hash. +You can nonetheless continue feeding the hash state with more input, +and therefore get some new hashes, by calling again XXHnn_digest(). + +When you are done, don't forget to free XXH state space, using typically XXHnn_freeState(). +*/ + + +#if defined (__cplusplus) +} +#endif