mirror of
				https://github.com/AxioDL/boo.git
				synced 2025-10-25 11:10:25 +00:00 
			
		
		
		
	Move xxhash to boo; cached shader components
This commit is contained in:
		
							parent
							
								
									8bcac27c10
								
							
						
					
					
						commit
						03f155fcf5
					
				| @ -9,6 +9,8 @@ if (NOT TARGET logvisor) | ||||
|     add_subdirectory(logvisor) | ||||
| endif() | ||||
| 
 | ||||
| add_subdirectory(xxhash) | ||||
| 
 | ||||
| set(WITH_LSR_BINDINGS OFF) | ||||
| set(BUILD_TESTS OFF) | ||||
| set(BUILD_SHARED_LIBS OFF) | ||||
| @ -18,7 +20,7 @@ add_subdirectory(soxr) | ||||
| 
 | ||||
| set(BOO_INCLUDE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/include CACHE PATH "boo include path" FORCE) | ||||
| 
 | ||||
| include_directories(include ${LOGVISOR_INCLUDE_DIR}) | ||||
| include_directories(include xxhash ${LOGVISOR_INCLUDE_DIR}) | ||||
| 
 | ||||
| if(NOT GEKKO AND NOT CAFE) | ||||
| list(APPEND PLAT_SRCS | ||||
| @ -195,7 +197,7 @@ target_include_directories(glslang-default-resource-limits | ||||
|     PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/glslang | ||||
| ) | ||||
| 
 | ||||
| list(APPEND _BOO_SYS_LIBS glslang HLSL soxr OSDependent OGLCompiler SPIRV glslang-default-resource-limits) | ||||
| list(APPEND _BOO_SYS_LIBS glslang HLSL soxr xxhash OSDependent OGLCompiler SPIRV glslang-default-resource-limits) | ||||
| 
 | ||||
| set(BOO_SYS_LIBS ${_BOO_SYS_LIBS} CACHE PATH "boo system libraries" FORCE) | ||||
| set(BOO_SYS_DEFINES ${_BOO_SYS_DEFINES} CACHE PATH "boo system defines" FORCE) | ||||
|  | ||||
| @ -5,39 +5,16 @@ | ||||
| #include "IGraphicsCommandQueue.hpp" | ||||
| #include "boo/IGraphicsContext.hpp" | ||||
| #include "GLSLMacros.hpp" | ||||
| #include <vector> | ||||
| #include <unordered_set> | ||||
| #include <unordered_map> | ||||
| #include <mutex> | ||||
| 
 | ||||
| namespace boo | ||||
| { | ||||
| 
 | ||||
| class GLDataFactory : public IGraphicsDataFactory | ||||
| { | ||||
|     friend struct GLCommandQueue; | ||||
|     IGraphicsContext* m_parent; | ||||
|     uint32_t m_drawSamples; | ||||
|     static ThreadLocalPtr<struct GLData> m_deferredData; | ||||
|     std::unordered_set<struct GLData*> m_committedData; | ||||
|     std::unordered_set<struct GLPool*> m_committedPools; | ||||
|     std::mutex m_committedMutex; | ||||
|     void destroyData(IGraphicsData*); | ||||
|     void destroyAllData(); | ||||
|     void destroyPool(IGraphicsBufferPool*); | ||||
|     IGraphicsBufferD* newPoolBuffer(IGraphicsBufferPool* pool, BufferUse use, | ||||
|                                     size_t stride, size_t count); | ||||
|     void deletePoolBuffer(IGraphicsBufferPool* p, IGraphicsBufferD* buf); | ||||
| public: | ||||
|     GLDataFactory(IGraphicsContext* parent, uint32_t drawSamples); | ||||
|     ~GLDataFactory() {destroyAllData();} | ||||
| 
 | ||||
|     Platform platform() const {return Platform::OpenGL;} | ||||
|     const SystemChar* platformName() const {return _S("OpenGL");} | ||||
| 
 | ||||
|     class Context : public IGraphicsDataFactory::Context | ||||
|     { | ||||
|         friend class GLDataFactory; | ||||
|         friend class GLDataFactoryImpl; | ||||
|         GLDataFactory& m_parent; | ||||
|         Context(GLDataFactory& parent) : m_parent(parent) {} | ||||
|     public: | ||||
| @ -73,9 +50,6 @@ public: | ||||
|                              const size_t* ubufOffs, const size_t* ubufSizes, | ||||
|                              size_t texCount, ITexture** texs, size_t baseVert = 0, size_t baseInst = 0); | ||||
|     }; | ||||
| 
 | ||||
|     GraphicsDataToken commitTransaction(const FactoryCommitFunc&); | ||||
|     GraphicsBufferPoolToken newBufferPool(); | ||||
| }; | ||||
| 
 | ||||
| } | ||||
|  | ||||
| @ -10,7 +10,7 @@ namespace boo | ||||
| 
 | ||||
| struct IGraphicsCommandQueue | ||||
| { | ||||
|     virtual ~IGraphicsCommandQueue() {} | ||||
|     virtual ~IGraphicsCommandQueue() = default; | ||||
| 
 | ||||
|     using Platform = IGraphicsDataFactory::Platform; | ||||
|     virtual Platform platform() const=0; | ||||
|  | ||||
| @ -17,7 +17,7 @@ struct IGraphicsBuffer | ||||
| protected: | ||||
|     bool m_dynamic; | ||||
|     IGraphicsBuffer(bool dynamic) : m_dynamic(dynamic) {} | ||||
|     virtual ~IGraphicsBuffer() {} | ||||
|     virtual ~IGraphicsBuffer() = default; | ||||
| }; | ||||
| 
 | ||||
| /** Static resource buffer for verts, indices, uniform constants */ | ||||
| @ -190,7 +190,7 @@ enum class BlendFactor | ||||
| /** Factory object for creating batches of resources as an IGraphicsData token */ | ||||
| struct IGraphicsDataFactory | ||||
| { | ||||
|     virtual ~IGraphicsDataFactory() {} | ||||
|     virtual ~IGraphicsDataFactory() = default; | ||||
| 
 | ||||
|     enum class Platform | ||||
|     { | ||||
| @ -276,10 +276,10 @@ using FactoryCommitFunc = std::function<bool(IGraphicsDataFactory::Context& ctx) | ||||
|  *  IGraphicsData (please don't delete and draw contained resources in the same frame). */ | ||||
| class GraphicsDataToken | ||||
| { | ||||
|     friend class GLDataFactory; | ||||
|     friend class GLDataFactoryImpl; | ||||
|     friend class D3D12DataFactory; | ||||
|     friend class D3D11DataFactory; | ||||
|     friend class MetalDataFactory; | ||||
|     friend class MetalDataFactoryImpl; | ||||
|     friend class VulkanDataFactory; | ||||
|     IGraphicsDataFactory* m_factory = nullptr; | ||||
|     IGraphicsData* m_data = nullptr; | ||||
| @ -323,10 +323,10 @@ public: | ||||
|  *  (please don't delete and draw contained resources in the same frame). */ | ||||
| class GraphicsBufferPoolToken | ||||
| { | ||||
|     friend class GLDataFactory; | ||||
|     friend class GLDataFactoryImpl; | ||||
|     friend class D3D12DataFactory; | ||||
|     friend class D3D11DataFactory; | ||||
|     friend class MetalDataFactory; | ||||
|     friend class MetalDataFactoryImpl; | ||||
|     friend class VulkanDataFactory; | ||||
|     IGraphicsDataFactory* m_factory = nullptr; | ||||
|     IGraphicsBufferPool* m_pool = nullptr; | ||||
|  | ||||
| @ -6,42 +6,16 @@ | ||||
| #include "IGraphicsDataFactory.hpp" | ||||
| #include "IGraphicsCommandQueue.hpp" | ||||
| #include "boo/IGraphicsContext.hpp" | ||||
| #include <vector> | ||||
| #include <mutex> | ||||
| #include <unordered_set> | ||||
| #include <unordered_map> | ||||
| 
 | ||||
| namespace boo | ||||
| { | ||||
| struct MetalContext; | ||||
| 
 | ||||
| class MetalDataFactory : public IGraphicsDataFactory | ||||
| { | ||||
|     friend struct MetalCommandQueue; | ||||
|     IGraphicsContext* m_parent; | ||||
|     static ThreadLocalPtr<struct MetalData> m_deferredData; | ||||
|     std::unordered_set<struct MetalData*> m_committedData; | ||||
|     std::unordered_set<struct MetalPool*> m_committedPools; | ||||
|     std::mutex m_committedMutex; | ||||
|     struct MetalContext* m_ctx; | ||||
|     uint32_t m_sampleCount; | ||||
| 
 | ||||
|     void destroyData(IGraphicsData*); | ||||
|     void destroyAllData(); | ||||
|     void destroyPool(IGraphicsBufferPool*); | ||||
|     IGraphicsBufferD* newPoolBuffer(IGraphicsBufferPool* pool, BufferUse use, | ||||
|                                     size_t stride, size_t count); | ||||
|     void deletePoolBuffer(IGraphicsBufferPool* p, IGraphicsBufferD* buf); | ||||
| public: | ||||
|     MetalDataFactory(IGraphicsContext* parent, MetalContext* ctx, uint32_t sampleCount); | ||||
|     ~MetalDataFactory() {} | ||||
| 
 | ||||
|     Platform platform() const {return Platform::Metal;} | ||||
|     const char* platformName() const {return "Metal";} | ||||
| 
 | ||||
|     class Context : public IGraphicsDataFactory::Context | ||||
|     { | ||||
|         friend class MetalDataFactory; | ||||
|         friend class MetalDataFactoryImpl; | ||||
|         MetalDataFactory& m_parent; | ||||
|         Context(MetalDataFactory& parent) : m_parent(parent) {} | ||||
|     public: | ||||
| @ -76,9 +50,6 @@ public: | ||||
|                              const size_t* ubufOffs, const size_t* ubufSizes, | ||||
|                              size_t texCount, ITexture** texs, size_t baseVert = 0, size_t baseInst = 0); | ||||
|     }; | ||||
| 
 | ||||
|     GraphicsDataToken commitTransaction(const std::function<bool(IGraphicsDataFactory::Context& ctx)>&); | ||||
|     GraphicsBufferPoolToken newBufferPool(); | ||||
| }; | ||||
| 
 | ||||
| } | ||||
|  | ||||
| @ -50,6 +50,44 @@ public: | ||||
|     Token lock() const { return Token(this); } | ||||
| }; | ||||
| 
 | ||||
| template <class FactoryImpl, class ShaderImpl> | ||||
| class IShareableShader | ||||
| { | ||||
|     std::atomic_int m_refCount = {0}; | ||||
|     FactoryImpl& m_factory; | ||||
|     uint64_t m_key; | ||||
| public: | ||||
|     IShareableShader(FactoryImpl& factory, uint64_t key) | ||||
|     : m_factory(factory), m_key(key) {} | ||||
|     void increment() { m_refCount++; } | ||||
|     void decrement() | ||||
|     { | ||||
|         if (m_refCount.fetch_sub(1) == 1) | ||||
|             m_factory._unregisterShareableShader(m_key); | ||||
|     } | ||||
| 
 | ||||
|     class Token | ||||
|     { | ||||
|         IShareableShader<FactoryImpl, ShaderImpl>* m_parent = nullptr; | ||||
|     public: | ||||
|         Token() = default; | ||||
|         Token(IShareableShader* p) | ||||
|         : m_parent(p) | ||||
|         { m_parent->increment(); } | ||||
|         Token& operator=(const Token&) = delete; | ||||
|         Token(const Token&) = delete; | ||||
|         Token& operator=(Token&& other) | ||||
|         { m_parent = other.m_parent; other.m_parent = nullptr; return *this; } | ||||
|         Token(Token&& other) | ||||
|         { m_parent = other.m_parent; other.m_parent = nullptr; } | ||||
|         ~Token() { if (m_parent) m_parent->decrement(); } | ||||
|         operator bool() const { return m_parent != nullptr; } | ||||
|         ShaderImpl& get() const { return static_cast<ShaderImpl&>(*m_parent); } | ||||
|     }; | ||||
| 
 | ||||
|     Token lock() { return Token(this); } | ||||
| }; | ||||
| 
 | ||||
| } | ||||
| 
 | ||||
| #endif // BOO_GRAPHICSDEV_COMMON_HPP
 | ||||
|  | ||||
| @ -8,7 +8,10 @@ | ||||
| #include <condition_variable> | ||||
| #include <array> | ||||
| #include <unordered_map> | ||||
| #include <unordered_set> | ||||
| #include <atomic> | ||||
| #include <functional> | ||||
| #include "xxhash.h" | ||||
| 
 | ||||
| #include "logvisor/logvisor.hpp" | ||||
| 
 | ||||
| @ -18,8 +21,47 @@ | ||||
| namespace boo | ||||
| { | ||||
| static logvisor::Module Log("boo::GL"); | ||||
| class GLDataFactoryImpl; | ||||
| 
 | ||||
| ThreadLocalPtr<struct GLData> GLDataFactory::m_deferredData; | ||||
| struct GLShareableShader : IShareableShader<GLDataFactoryImpl, GLShareableShader> | ||||
| { | ||||
|     GLuint m_shader = 0; | ||||
|     GLShareableShader(GLDataFactoryImpl& fac, uint64_t key, GLuint s) | ||||
|     : IShareableShader(fac, key), m_shader(s) {} | ||||
|     ~GLShareableShader() { glDeleteShader(m_shader); } | ||||
| }; | ||||
| 
 | ||||
| class GLDataFactoryImpl : public GLDataFactory | ||||
| { | ||||
|     friend struct GLCommandQueue; | ||||
|     friend class GLDataFactory::Context; | ||||
|     IGraphicsContext* m_parent; | ||||
|     uint32_t m_drawSamples; | ||||
|     static ThreadLocalPtr<struct GLData> m_deferredData; | ||||
|     std::unordered_set<struct GLData*> m_committedData; | ||||
|     std::unordered_set<struct GLPool*> m_committedPools; | ||||
|     std::mutex m_committedMutex; | ||||
|     std::unordered_map<uint64_t, std::unique_ptr<GLShareableShader>> m_sharedShaders; | ||||
|     void destroyData(IGraphicsData*); | ||||
|     void destroyAllData(); | ||||
|     void destroyPool(IGraphicsBufferPool*); | ||||
|     IGraphicsBufferD* newPoolBuffer(IGraphicsBufferPool* pool, BufferUse use, | ||||
|                                     size_t stride, size_t count); | ||||
|     void deletePoolBuffer(IGraphicsBufferPool* p, IGraphicsBufferD* buf); | ||||
| public: | ||||
|     GLDataFactoryImpl(IGraphicsContext* parent, uint32_t drawSamples); | ||||
|     ~GLDataFactoryImpl() {destroyAllData();} | ||||
| 
 | ||||
|     Platform platform() const {return Platform::OpenGL;} | ||||
|     const SystemChar* platformName() const {return _S("OpenGL");} | ||||
| 
 | ||||
|     GraphicsDataToken commitTransaction(const FactoryCommitFunc&); | ||||
|     GraphicsBufferPoolToken newBufferPool(); | ||||
| 
 | ||||
|     void _unregisterShareableShader(uint64_t key) { m_sharedShaders.erase(key); } | ||||
| }; | ||||
| 
 | ||||
| ThreadLocalPtr<struct GLData> GLDataFactoryImpl::m_deferredData; | ||||
| struct GLData : IGraphicsDataPriv<GLData> | ||||
| { | ||||
|     std::vector<std::unique_ptr<class GLShaderPipeline>> m_SPs; | ||||
| @ -75,6 +117,7 @@ public: | ||||
| class GLGraphicsBufferD : public IGraphicsBufferD | ||||
| { | ||||
|     friend class GLDataFactory; | ||||
|     friend class GLDataFactoryImpl; | ||||
|     friend struct GLCommandQueue; | ||||
|     GLuint m_bufs[3]; | ||||
|     GLenum m_target; | ||||
| @ -109,7 +152,7 @@ IGraphicsBufferS* | ||||
| GLDataFactory::Context::newStaticBuffer(BufferUse use, const void* data, size_t stride, size_t count) | ||||
| { | ||||
|     GLGraphicsBufferS* retval = new GLGraphicsBufferS(use, data, stride * count); | ||||
|     m_deferredData->m_SBufs.emplace_back(retval); | ||||
|     GLDataFactoryImpl::m_deferredData->m_SBufs.emplace_back(retval); | ||||
|     return retval; | ||||
| } | ||||
| 
 | ||||
| @ -344,7 +387,7 @@ GLDataFactory::Context::newStaticTexture(size_t width, size_t height, size_t mip | ||||
|                                          const void* data, size_t sz) | ||||
| { | ||||
|     GLTextureS* retval = new GLTextureS(width, height, mips, fmt, data, sz); | ||||
|     m_deferredData->m_STexs.emplace_back(retval); | ||||
|     GLDataFactoryImpl::m_deferredData->m_STexs.emplace_back(retval); | ||||
|     return retval; | ||||
| } | ||||
| 
 | ||||
| @ -353,7 +396,7 @@ GLDataFactory::Context::newStaticArrayTexture(size_t width, size_t height, size_ | ||||
|                                               TextureFormat fmt, const void *data, size_t sz) | ||||
| { | ||||
|     GLTextureSA* retval = new GLTextureSA(width, height, layers, mips, fmt, data, sz); | ||||
|     m_deferredData->m_SATexs.emplace_back(retval); | ||||
|     GLDataFactoryImpl::m_deferredData->m_SATexs.emplace_back(retval); | ||||
|     return retval; | ||||
| } | ||||
| 
 | ||||
| @ -362,8 +405,8 @@ class GLShaderPipeline : public IShaderPipeline | ||||
|     friend class GLDataFactory; | ||||
|     friend struct GLCommandQueue; | ||||
|     friend struct GLShaderDataBinding; | ||||
|     GLuint m_vert = 0; | ||||
|     GLuint m_frag = 0; | ||||
|     GLShareableShader::Token m_vert; | ||||
|     GLShareableShader::Token m_frag; | ||||
|     GLuint m_prog = 0; | ||||
|     GLenum m_sfactor = GL_ONE; | ||||
|     GLenum m_dfactor = GL_ZERO; | ||||
| @ -372,48 +415,17 @@ class GLShaderPipeline : public IShaderPipeline | ||||
|     bool m_depthWrite = true; | ||||
|     bool m_backfaceCulling = true; | ||||
|     std::vector<GLint> m_uniLocs; | ||||
|     bool initObjects() | ||||
|     { | ||||
|         m_vert = glCreateShader(GL_VERTEX_SHADER); | ||||
|         m_frag = glCreateShader(GL_FRAGMENT_SHADER); | ||||
|         m_prog = glCreateProgram(); | ||||
|         if (!m_vert || !m_frag || !m_prog) | ||||
|         { | ||||
|             glDeleteShader(m_vert); | ||||
|             m_vert = 0; | ||||
|             glDeleteShader(m_frag); | ||||
|             m_frag = 0; | ||||
|             glDeleteProgram(m_prog); | ||||
|             m_prog = 0; | ||||
|             return false; | ||||
|         } | ||||
|         glAttachShader(m_prog, m_vert); | ||||
|         glAttachShader(m_prog, m_frag); | ||||
|         return true; | ||||
|     } | ||||
|     void clearObjects() | ||||
|     { | ||||
|         if (m_vert) | ||||
|             glDeleteShader(m_vert); | ||||
|         if (m_frag) | ||||
|             glDeleteShader(m_frag); | ||||
|         if (m_prog) | ||||
|             glDeleteProgram(m_prog); | ||||
|     } | ||||
|     GLShaderPipeline() = default; | ||||
| public: | ||||
|     operator bool() const {return m_prog != 0;} | ||||
|     ~GLShaderPipeline() {clearObjects();} | ||||
|     ~GLShaderPipeline() { glDeleteProgram(m_prog); } | ||||
|     GLShaderPipeline& operator=(const GLShaderPipeline&) = delete; | ||||
|     GLShaderPipeline(const GLShaderPipeline&) = delete; | ||||
|     GLShaderPipeline& operator=(GLShaderPipeline&& other) | ||||
|     { | ||||
|         m_vert = other.m_vert; | ||||
|         other.m_vert = 0; | ||||
|         m_frag = other.m_frag; | ||||
|         other.m_frag = 0; | ||||
|         m_prog = other.m_prog; | ||||
|         other.m_prog = 0; | ||||
|         m_vert = std::move(other.m_vert); | ||||
|         m_frag = std::move(other.m_frag); | ||||
|         m_prog = std::move(other.m_prog); | ||||
|         m_sfactor = other.m_sfactor; | ||||
|         m_dfactor = other.m_dfactor; | ||||
|         m_depthTest = other.m_depthTest; | ||||
| @ -482,47 +494,95 @@ IShaderPipeline* GLDataFactory::Context::newShaderPipeline | ||||
|  BlendFactor srcFac, BlendFactor dstFac, Primitive prim, | ||||
|  bool depthTest, bool depthWrite, bool backfaceCulling) | ||||
| { | ||||
|     GLDataFactoryImpl& factory = static_cast<GLDataFactoryImpl&>(m_parent); | ||||
|     GLShaderPipeline shader; | ||||
|     if (!shader.initObjects()) | ||||
|     { | ||||
|         Log.report(logvisor::Error, "unable to create shader objects\n"); | ||||
|         return nullptr; | ||||
|     } | ||||
|     shader.m_sfactor = BLEND_FACTOR_TABLE[int(srcFac)]; | ||||
|     shader.m_dfactor = BLEND_FACTOR_TABLE[int(dstFac)]; | ||||
|     shader.m_depthTest = depthTest; | ||||
|     shader.m_depthWrite = depthWrite; | ||||
|     shader.m_backfaceCulling = backfaceCulling; | ||||
|     shader.m_drawPrim = PRIMITIVE_TABLE[int(prim)]; | ||||
| 
 | ||||
|     glShaderSource(shader.m_vert, 1, &vertSource, nullptr); | ||||
|     glCompileShader(shader.m_vert); | ||||
|     XXH64_state_t hashState; | ||||
|     uint64_t hashes[2]; | ||||
|     XXH64_reset(&hashState, 0); | ||||
|     XXH64_update(&hashState, vertSource, strlen(vertSource)); | ||||
|     hashes[0] = XXH64_digest(&hashState); | ||||
|     XXH64_reset(&hashState, 0); | ||||
|     XXH64_update(&hashState, fragSource, strlen(fragSource)); | ||||
|     hashes[1] = XXH64_digest(&hashState); | ||||
| 
 | ||||
|     GLint status; | ||||
|     glGetShaderiv(shader.m_vert, GL_COMPILE_STATUS, &status); | ||||
|     if (status != GL_TRUE) | ||||
|     auto vertFind = factory.m_sharedShaders.find(hashes[0]); | ||||
|     if (vertFind != factory.m_sharedShaders.end()) | ||||
|     { | ||||
|         GLint logLen; | ||||
|         glGetShaderiv(shader.m_vert, GL_INFO_LOG_LENGTH, &logLen); | ||||
|         char* log = (char*)malloc(logLen); | ||||
|         glGetShaderInfoLog(shader.m_vert, logLen, nullptr, log); | ||||
|         Log.report(logvisor::Error, "unable to compile vert source\n%s\n%s\n", log, vertSource); | ||||
|         free(log); | ||||
|         shader.m_vert = vertFind->second->lock(); | ||||
|     } | ||||
|     else | ||||
|     { | ||||
|         GLuint sobj = glCreateShader(GL_VERTEX_SHADER); | ||||
|         if (!sobj) | ||||
|         { | ||||
|             Log.report(logvisor::Error, "unable to create vert shader"); | ||||
|             return nullptr; | ||||
|         } | ||||
| 
 | ||||
|         glShaderSource(sobj, 1, &vertSource, nullptr); | ||||
|         glCompileShader(sobj); | ||||
|         glGetShaderiv(sobj, GL_COMPILE_STATUS, &status); | ||||
|         if (status != GL_TRUE) | ||||
|         { | ||||
|             GLint logLen; | ||||
|             glGetShaderiv(sobj, GL_INFO_LOG_LENGTH, &logLen); | ||||
|             char* log = (char*)malloc(logLen); | ||||
|             glGetShaderInfoLog(sobj, logLen, nullptr, log); | ||||
|             Log.report(logvisor::Error, "unable to compile vert source\n%s\n%s\n", log, vertSource); | ||||
|             free(log); | ||||
|             return nullptr; | ||||
|         } | ||||
| 
 | ||||
|         auto it = | ||||
|         factory.m_sharedShaders.emplace(std::make_pair(hashes[0], | ||||
|             std::make_unique<GLShareableShader>(factory, hashes[0], sobj))).first; | ||||
|         shader.m_vert = it->second->lock(); | ||||
|     } | ||||
|     auto fragFind = factory.m_sharedShaders.find(hashes[1]); | ||||
|     if (fragFind != factory.m_sharedShaders.end()) | ||||
|     { | ||||
|         shader.m_frag = fragFind->second->lock(); | ||||
|     } | ||||
|     else | ||||
|     { | ||||
|         GLuint sobj = glCreateShader(GL_FRAGMENT_SHADER); | ||||
|         if (!sobj) | ||||
|         { | ||||
|             Log.report(logvisor::Error, "unable to create frag shader"); | ||||
|             return nullptr; | ||||
|         } | ||||
| 
 | ||||
|         glShaderSource(sobj, 1, &fragSource, nullptr); | ||||
|         glCompileShader(sobj); | ||||
|         glGetShaderiv(sobj, GL_COMPILE_STATUS, &status); | ||||
|         if (status != GL_TRUE) | ||||
|         { | ||||
|             GLint logLen; | ||||
|             glGetShaderiv(sobj, GL_INFO_LOG_LENGTH, &logLen); | ||||
|             char* log = (char*)malloc(logLen); | ||||
|             glGetShaderInfoLog(sobj, logLen, nullptr, log); | ||||
|             Log.report(logvisor::Error, "unable to compile frag source\n%s\n%s\n", log, fragSource); | ||||
|             free(log); | ||||
|             return nullptr; | ||||
|         } | ||||
| 
 | ||||
|         auto it = | ||||
|         factory.m_sharedShaders.emplace(std::make_pair(hashes[1], | ||||
|             std::make_unique<GLShareableShader>(factory, hashes[1], sobj))).first; | ||||
|         shader.m_frag = it->second->lock(); | ||||
|     } | ||||
| 
 | ||||
|     shader.m_prog = glCreateProgram(); | ||||
|     if (!shader.m_prog) | ||||
|     { | ||||
|         Log.report(logvisor::Error, "unable to create shader program"); | ||||
|         return nullptr; | ||||
|     } | ||||
| 
 | ||||
|     glShaderSource(shader.m_frag, 1, &fragSource, nullptr); | ||||
|     glCompileShader(shader.m_frag); | ||||
|     glGetShaderiv(shader.m_frag, GL_COMPILE_STATUS, &status); | ||||
|     if (status != GL_TRUE) | ||||
|     { | ||||
|         GLint logLen; | ||||
|         glGetShaderiv(shader.m_frag, GL_INFO_LOG_LENGTH, &logLen); | ||||
|         char* log = (char*)malloc(logLen); | ||||
|         glGetShaderInfoLog(shader.m_frag, logLen, nullptr, log); | ||||
|         Log.report(logvisor::Error, "unable to compile frag source\n%s\n%s\n", log, fragSource); | ||||
|         free(log); | ||||
|         return nullptr; | ||||
|     } | ||||
|     glAttachShader(shader.m_prog, shader.m_vert.get().m_shader); | ||||
|     glAttachShader(shader.m_prog, shader.m_frag.get().m_shader); | ||||
| 
 | ||||
|     glLinkProgram(shader.m_prog); | ||||
|     glGetProgramiv(shader.m_prog, GL_LINK_STATUS, &status); | ||||
| @ -563,8 +623,15 @@ IShaderPipeline* GLDataFactory::Context::newShaderPipeline | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|     shader.m_sfactor = BLEND_FACTOR_TABLE[int(srcFac)]; | ||||
|     shader.m_dfactor = BLEND_FACTOR_TABLE[int(dstFac)]; | ||||
|     shader.m_depthTest = depthTest; | ||||
|     shader.m_depthWrite = depthWrite; | ||||
|     shader.m_backfaceCulling = backfaceCulling; | ||||
|     shader.m_drawPrim = PRIMITIVE_TABLE[int(prim)]; | ||||
| 
 | ||||
|     GLShaderPipeline* retval = new GLShaderPipeline(std::move(shader)); | ||||
|     m_deferredData->m_SPs.emplace_back(retval); | ||||
|     GLDataFactoryImpl::m_deferredData->m_SPs.emplace_back(retval); | ||||
|     return retval; | ||||
| } | ||||
| 
 | ||||
| @ -699,17 +766,17 @@ GLDataFactory::Context::newShaderDataBinding(IShaderPipeline* pipeline, | ||||
|                                              size_t texCount, ITexture** texs, size_t baseVert, size_t baseInst) | ||||
| { | ||||
|     GLShaderDataBinding* retval = | ||||
|     new GLShaderDataBinding(m_deferredData.get(), pipeline, vtxFormat, ubufCount, ubufs, | ||||
|     new GLShaderDataBinding(GLDataFactoryImpl::m_deferredData.get(), pipeline, vtxFormat, ubufCount, ubufs, | ||||
|                             ubufOffs, ubufSizes, texCount, texs); | ||||
|     m_deferredData->m_SBinds.emplace_back(retval); | ||||
|     GLDataFactoryImpl::m_deferredData->m_SBinds.emplace_back(retval); | ||||
|     return retval; | ||||
| } | ||||
| 
 | ||||
| GLDataFactory::GLDataFactory(IGraphicsContext* parent, uint32_t drawSamples) | ||||
| GLDataFactoryImpl::GLDataFactoryImpl(IGraphicsContext* parent, uint32_t drawSamples) | ||||
| : m_parent(parent), m_drawSamples(drawSamples) {} | ||||
| 
 | ||||
| 
 | ||||
| GraphicsDataToken GLDataFactory::commitTransaction(const FactoryCommitFunc& trans) | ||||
| GraphicsDataToken GLDataFactoryImpl::commitTransaction(const FactoryCommitFunc& trans) | ||||
| { | ||||
|     if (m_deferredData.get()) | ||||
|         Log.report(logvisor::Fatal, "nested commitTransaction usage detected"); | ||||
| @ -736,7 +803,7 @@ GraphicsDataToken GLDataFactory::commitTransaction(const FactoryCommitFunc& tran | ||||
|     return GraphicsDataToken(this, retval); | ||||
| } | ||||
| 
 | ||||
| GraphicsBufferPoolToken GLDataFactory::newBufferPool() | ||||
| GraphicsBufferPoolToken GLDataFactoryImpl::newBufferPool() | ||||
| { | ||||
|     std::unique_lock<std::mutex> lk(m_committedMutex); | ||||
|     GLPool* retval = new GLPool; | ||||
| @ -744,7 +811,7 @@ GraphicsBufferPoolToken GLDataFactory::newBufferPool() | ||||
|     return GraphicsBufferPoolToken(this, retval); | ||||
| } | ||||
| 
 | ||||
| void GLDataFactory::destroyData(IGraphicsData* d) | ||||
| void GLDataFactoryImpl::destroyData(IGraphicsData* d) | ||||
| { | ||||
|     std::unique_lock<std::mutex> lk(m_committedMutex); | ||||
|     GLData* data = static_cast<GLData*>(d); | ||||
| @ -752,7 +819,7 @@ void GLDataFactory::destroyData(IGraphicsData* d) | ||||
|     data->decrement(); | ||||
| } | ||||
| 
 | ||||
| void GLDataFactory::destroyAllData() | ||||
| void GLDataFactoryImpl::destroyAllData() | ||||
| { | ||||
|     std::unique_lock<std::mutex> lk(m_committedMutex); | ||||
|     for (GLData* data : m_committedData) | ||||
| @ -763,7 +830,7 @@ void GLDataFactory::destroyAllData() | ||||
|     m_committedPools.clear(); | ||||
| } | ||||
| 
 | ||||
| void GLDataFactory::destroyPool(IGraphicsBufferPool* p) | ||||
| void GLDataFactoryImpl::destroyPool(IGraphicsBufferPool* p) | ||||
| { | ||||
|     std::unique_lock<std::mutex> lk(m_committedMutex); | ||||
|     GLPool* pool = static_cast<GLPool*>(p); | ||||
| @ -771,8 +838,8 @@ void GLDataFactory::destroyPool(IGraphicsBufferPool* p) | ||||
|     delete pool; | ||||
| } | ||||
| 
 | ||||
| IGraphicsBufferD* GLDataFactory::newPoolBuffer(IGraphicsBufferPool* p, BufferUse use, | ||||
|                                                size_t stride, size_t count) | ||||
| IGraphicsBufferD* GLDataFactoryImpl::newPoolBuffer(IGraphicsBufferPool* p, BufferUse use, | ||||
|                                                    size_t stride, size_t count) | ||||
| { | ||||
|     GLPool* pool = static_cast<GLPool*>(p); | ||||
|     GLGraphicsBufferD* retval = new GLGraphicsBufferD(use, stride * count); | ||||
| @ -780,7 +847,7 @@ IGraphicsBufferD* GLDataFactory::newPoolBuffer(IGraphicsBufferPool* p, BufferUse | ||||
|     return retval; | ||||
| } | ||||
| 
 | ||||
| void GLDataFactory::deletePoolBuffer(IGraphicsBufferPool *p, IGraphicsBufferD *buf) | ||||
| void GLDataFactoryImpl::deletePoolBuffer(IGraphicsBufferPool *p, IGraphicsBufferD *buf) | ||||
| { | ||||
|     GLPool* pool = static_cast<GLPool*>(p); | ||||
|     pool->m_DBufs.erase(static_cast<GLGraphicsBufferD*>(buf)); | ||||
| @ -1336,7 +1403,7 @@ struct GLCommandQueue : IGraphicsCommandQueue | ||||
|         } | ||||
| 
 | ||||
|         /* Update dynamic data here */ | ||||
|         GLDataFactory* gfxF = static_cast<GLDataFactory*>(m_parent->getDataFactory()); | ||||
|         GLDataFactoryImpl* gfxF = static_cast<GLDataFactoryImpl*>(m_parent->getDataFactory()); | ||||
|         std::unique_lock<std::mutex> datalk(gfxF->m_committedMutex); | ||||
|         for (GLData* d : gfxF->m_committedData) | ||||
|         { | ||||
| @ -1403,7 +1470,7 @@ IGraphicsBufferD* | ||||
| GLDataFactory::Context::newDynamicBuffer(BufferUse use, size_t stride, size_t count) | ||||
| { | ||||
|     GLGraphicsBufferD* retval = new GLGraphicsBufferD(use, stride * count); | ||||
|     m_deferredData->m_DBufs.emplace_back(retval); | ||||
|     GLDataFactoryImpl::m_deferredData->m_DBufs.emplace_back(retval); | ||||
|     return retval; | ||||
| } | ||||
| 
 | ||||
| @ -1478,7 +1545,7 @@ ITextureD* | ||||
| GLDataFactory::Context::newDynamicTexture(size_t width, size_t height, TextureFormat fmt) | ||||
| { | ||||
|     GLTextureD* retval = new GLTextureD(width, height, fmt); | ||||
|     m_deferredData->m_DTexs.emplace_back(retval); | ||||
|     GLDataFactoryImpl::m_deferredData->m_DTexs.emplace_back(retval); | ||||
|     return retval; | ||||
| } | ||||
| 
 | ||||
| @ -1546,11 +1613,12 @@ ITextureR* | ||||
| GLDataFactory::Context::newRenderTexture(size_t width, size_t height, | ||||
|                                          bool enableShaderColorBinding, bool enableShaderDepthBinding) | ||||
| { | ||||
|     GLCommandQueue* q = static_cast<GLCommandQueue*>(m_parent.m_parent->getCommandQueue()); | ||||
|     GLTextureR* retval = new GLTextureR(q, width, height, m_parent.m_drawSamples, | ||||
|     GLDataFactoryImpl& factory = static_cast<GLDataFactoryImpl&>(m_parent); | ||||
|     GLCommandQueue* q = static_cast<GLCommandQueue*>(factory.m_parent->getCommandQueue()); | ||||
|     GLTextureR* retval = new GLTextureR(q, width, height, factory.m_drawSamples, | ||||
|                                         enableShaderColorBinding, enableShaderDepthBinding); | ||||
|     q->resizeRenderTexture(retval, width, height); | ||||
|     m_deferredData->m_RTexs.emplace_back(retval); | ||||
|     GLDataFactoryImpl::m_deferredData->m_RTexs.emplace_back(retval); | ||||
|     return retval; | ||||
| } | ||||
| 
 | ||||
| @ -1572,9 +1640,10 @@ IVertexFormat* GLDataFactory::Context::newVertexFormat | ||||
| (size_t elementCount, const VertexElementDescriptor* elements, | ||||
|  size_t baseVert, size_t baseInst) | ||||
| { | ||||
|     GLCommandQueue* q = static_cast<GLCommandQueue*>(m_parent.m_parent->getCommandQueue()); | ||||
|     GLDataFactoryImpl& factory = static_cast<GLDataFactoryImpl&>(m_parent); | ||||
|     GLCommandQueue* q = static_cast<GLCommandQueue*>(factory.m_parent->getCommandQueue()); | ||||
|     GLVertexFormat* retval = new struct GLVertexFormat(q, elementCount, elements, baseVert, baseInst); | ||||
|     m_deferredData->m_VFmts.emplace_back(retval); | ||||
|     GLDataFactoryImpl::m_deferredData->m_VFmts.emplace_back(retval); | ||||
|     return retval; | ||||
| } | ||||
| 
 | ||||
| @ -1583,4 +1652,9 @@ IGraphicsCommandQueue* _NewGLCommandQueue(IGraphicsContext* parent) | ||||
|     return new struct GLCommandQueue(parent); | ||||
| } | ||||
| 
 | ||||
| IGraphicsDataFactory* _NewGLDataFactory(IGraphicsContext* parent, uint32_t drawSamples) | ||||
| { | ||||
|     return new class GLDataFactoryImpl(parent, drawSamples); | ||||
| } | ||||
| 
 | ||||
| } | ||||
|  | ||||
| @ -5,6 +5,9 @@ | ||||
| #include "boo/IGraphicsContext.hpp" | ||||
| #include "Common.hpp" | ||||
| #include <vector> | ||||
| #include <unordered_map> | ||||
| #include <unordered_set> | ||||
| #include "xxhash.h" | ||||
| 
 | ||||
| #if !__has_feature(objc_arc) | ||||
| #error ARC Required | ||||
| @ -17,8 +20,48 @@ namespace boo | ||||
| { | ||||
| static logvisor::Module Log("boo::Metal"); | ||||
| struct MetalCommandQueue; | ||||
| class MetalDataFactoryImpl; | ||||
| 
 | ||||
| ThreadLocalPtr<struct MetalData> MetalDataFactory::m_deferredData; | ||||
| struct MetalShareableShader : IShareableShader<MetalDataFactoryImpl, MetalShareableShader> | ||||
| { | ||||
|     id<MTLFunction> m_shader; | ||||
|     MetalShareableShader(MetalDataFactoryImpl& fac, uint64_t key, id<MTLFunction> s) | ||||
|     : IShareableShader(fac, key), m_shader(s) {} | ||||
| }; | ||||
| 
 | ||||
| class MetalDataFactoryImpl : public MetalDataFactory | ||||
| { | ||||
|     friend struct MetalCommandQueue; | ||||
|     friend class MetalDataFactory::Context; | ||||
|     IGraphicsContext* m_parent; | ||||
|     static ThreadLocalPtr<struct MetalData> m_deferredData; | ||||
|     std::unordered_set<struct MetalData*> m_committedData; | ||||
|     std::unordered_set<struct MetalPool*> m_committedPools; | ||||
|     std::mutex m_committedMutex; | ||||
|     std::unordered_map<uint64_t, std::unique_ptr<MetalShareableShader>> m_sharedShaders; | ||||
|     struct MetalContext* m_ctx; | ||||
|     uint32_t m_sampleCount; | ||||
| 
 | ||||
|     void destroyData(IGraphicsData*); | ||||
|     void destroyAllData(); | ||||
|     void destroyPool(IGraphicsBufferPool*); | ||||
|     IGraphicsBufferD* newPoolBuffer(IGraphicsBufferPool* pool, BufferUse use, | ||||
|                                     size_t stride, size_t count); | ||||
|     void deletePoolBuffer(IGraphicsBufferPool* p, IGraphicsBufferD* buf); | ||||
| public: | ||||
|     MetalDataFactoryImpl(IGraphicsContext* parent, MetalContext* ctx, uint32_t sampleCount); | ||||
|     ~MetalDataFactoryImpl() {} | ||||
| 
 | ||||
|     Platform platform() const {return Platform::Metal;} | ||||
|     const char* platformName() const {return "Metal";} | ||||
| 
 | ||||
|     GraphicsDataToken commitTransaction(const std::function<bool(IGraphicsDataFactory::Context& ctx)>&); | ||||
|     GraphicsBufferPoolToken newBufferPool(); | ||||
| 
 | ||||
|     void _unregisterShareableShader(uint64_t key) { m_sharedShaders.erase(key); } | ||||
| }; | ||||
| 
 | ||||
| ThreadLocalPtr<struct MetalData> MetalDataFactoryImpl::m_deferredData; | ||||
| struct MetalData : IGraphicsDataPriv<MetalData> | ||||
| { | ||||
|     std::vector<std::unique_ptr<class MetalShaderPipeline>> m_SPs; | ||||
| @ -60,6 +103,7 @@ public: | ||||
| class MetalGraphicsBufferD : public IGraphicsBufferD | ||||
| { | ||||
|     friend class MetalDataFactory; | ||||
|     friend class MetalDataFactoryImpl; | ||||
|     friend struct MetalCommandQueue; | ||||
|     MetalCommandQueue* m_q; | ||||
|     std::unique_ptr<uint8_t[]> m_cpuBuf; | ||||
| @ -502,19 +546,24 @@ class MetalShaderPipeline : public IShaderPipeline | ||||
|     MTLCullMode m_cullMode = MTLCullModeNone; | ||||
|     MTLPrimitiveType m_drawPrim; | ||||
|     const MetalVertexFormat* m_vtxFmt; | ||||
|     MetalShareableShader::Token m_vert; | ||||
|     MetalShareableShader::Token m_frag; | ||||
| 
 | ||||
|     MetalShaderPipeline(MetalContext* ctx, id<MTLFunction> vert, id<MTLFunction> frag, | ||||
|     MetalShaderPipeline(MetalContext* ctx, | ||||
|                         MetalShareableShader::Token&& vert, | ||||
|                         MetalShareableShader::Token&& frag, | ||||
|                         const MetalVertexFormat* vtxFmt, NSUInteger targetSamples, | ||||
|                         BlendFactor srcFac, BlendFactor dstFac, Primitive prim, | ||||
|                         bool depthTest, bool depthWrite, bool backfaceCulling) | ||||
|     : m_drawPrim(PRIMITIVE_TABLE[int(prim)]), m_vtxFmt(vtxFmt) | ||||
|     : m_drawPrim(PRIMITIVE_TABLE[int(prim)]), m_vtxFmt(vtxFmt), | ||||
|       m_vert(std::move(vert)), m_frag(std::move(frag)) | ||||
|     { | ||||
|         if (backfaceCulling) | ||||
|             m_cullMode = MTLCullModeBack; | ||||
| 
 | ||||
|         MTLRenderPipelineDescriptor* desc = [MTLRenderPipelineDescriptor new]; | ||||
|         desc.vertexFunction = vert; | ||||
|         desc.fragmentFunction = frag; | ||||
|         desc.vertexFunction = m_vert.get().m_shader; | ||||
|         desc.fragmentFunction = m_frag.get().m_shader; | ||||
|         desc.vertexDescriptor = vtxFmt->m_vdesc; | ||||
|         desc.sampleCount = targetSamples; | ||||
|         desc.colorAttachments[0].pixelFormat = MTLPixelFormatBGRA8Unorm; | ||||
| @ -897,7 +946,7 @@ struct MetalCommandQueue : IGraphicsCommandQueue | ||||
|             return; | ||||
| 
 | ||||
|         /* Update dynamic data here */ | ||||
|         MetalDataFactory* gfxF = static_cast<MetalDataFactory*>(m_parent->getDataFactory()); | ||||
|         MetalDataFactoryImpl* gfxF = static_cast<MetalDataFactoryImpl*>(m_parent->getDataFactory()); | ||||
|         std::unique_lock<std::mutex> datalk(gfxF->m_committedMutex); | ||||
|         for (MetalData* d : gfxF->m_committedData) | ||||
|         { | ||||
| @ -1042,49 +1091,55 @@ void MetalTextureD::unmap() | ||||
|     m_validSlots = 0; | ||||
| } | ||||
| 
 | ||||
| MetalDataFactory::MetalDataFactory(IGraphicsContext* parent, MetalContext* ctx, uint32_t sampleCount) | ||||
| MetalDataFactoryImpl::MetalDataFactoryImpl(IGraphicsContext* parent, MetalContext* ctx, uint32_t sampleCount) | ||||
| : m_parent(parent), m_ctx(ctx), m_sampleCount(sampleCount) {} | ||||
| 
 | ||||
| IGraphicsBufferS* MetalDataFactory::Context::newStaticBuffer(BufferUse use, const void* data, size_t stride, size_t count) | ||||
| { | ||||
|     MetalGraphicsBufferS* retval = new MetalGraphicsBufferS(use, m_parent.m_ctx, data, stride, count); | ||||
|     m_deferredData->m_SBufs.emplace_back(retval); | ||||
|     MetalDataFactoryImpl& factory = static_cast<MetalDataFactoryImpl&>(m_parent); | ||||
|     MetalGraphicsBufferS* retval = new MetalGraphicsBufferS(use, factory.m_ctx, data, stride, count); | ||||
|     MetalDataFactoryImpl::m_deferredData->m_SBufs.emplace_back(retval); | ||||
|     return retval; | ||||
| } | ||||
| IGraphicsBufferD* MetalDataFactory::Context::newDynamicBuffer(BufferUse use, size_t stride, size_t count) | ||||
| { | ||||
|     MetalCommandQueue* q = static_cast<MetalCommandQueue*>(m_parent.m_parent->getCommandQueue()); | ||||
|     MetalGraphicsBufferD* retval = new MetalGraphicsBufferD(q, use, m_parent.m_ctx, stride, count); | ||||
|     m_deferredData->m_DBufs.emplace_back(retval); | ||||
|     MetalDataFactoryImpl& factory = static_cast<MetalDataFactoryImpl&>(m_parent); | ||||
|     MetalCommandQueue* q = static_cast<MetalCommandQueue*>(factory.m_parent->getCommandQueue()); | ||||
|     MetalGraphicsBufferD* retval = new MetalGraphicsBufferD(q, use, factory.m_ctx, stride, count); | ||||
|     MetalDataFactoryImpl::m_deferredData->m_DBufs.emplace_back(retval); | ||||
|     return retval; | ||||
| } | ||||
| 
 | ||||
| ITextureS* MetalDataFactory::Context::newStaticTexture(size_t width, size_t height, size_t mips, TextureFormat fmt, | ||||
|                                                        const void* data, size_t sz) | ||||
| { | ||||
|     MetalTextureS* retval = new MetalTextureS(m_parent.m_ctx, width, height, mips, fmt, data, sz); | ||||
|     m_deferredData->m_STexs.emplace_back(retval); | ||||
|     MetalDataFactoryImpl& factory = static_cast<MetalDataFactoryImpl&>(m_parent); | ||||
|     MetalTextureS* retval = new MetalTextureS(factory.m_ctx, width, height, mips, fmt, data, sz); | ||||
|     MetalDataFactoryImpl::m_deferredData->m_STexs.emplace_back(retval); | ||||
|     return retval; | ||||
| } | ||||
| ITextureSA* MetalDataFactory::Context::newStaticArrayTexture(size_t width, size_t height, size_t layers, size_t mips, | ||||
|                                                              TextureFormat fmt, const void* data, size_t sz) | ||||
| { | ||||
|     MetalTextureSA* retval = new MetalTextureSA(m_parent.m_ctx, width, height, layers, mips, fmt, data, sz); | ||||
|     m_deferredData->m_SATexs.emplace_back(retval); | ||||
|     MetalDataFactoryImpl& factory = static_cast<MetalDataFactoryImpl&>(m_parent); | ||||
|     MetalTextureSA* retval = new MetalTextureSA(factory.m_ctx, width, height, layers, mips, fmt, data, sz); | ||||
|     MetalDataFactoryImpl::m_deferredData->m_SATexs.emplace_back(retval); | ||||
|     return retval; | ||||
| } | ||||
| ITextureD* MetalDataFactory::Context::newDynamicTexture(size_t width, size_t height, TextureFormat fmt) | ||||
| { | ||||
|     MetalCommandQueue* q = static_cast<MetalCommandQueue*>(m_parent.m_parent->getCommandQueue()); | ||||
|     MetalTextureD* retval = new MetalTextureD(q, m_parent.m_ctx, width, height, fmt); | ||||
|     m_deferredData->m_DTexs.emplace_back(retval); | ||||
|     MetalDataFactoryImpl& factory = static_cast<MetalDataFactoryImpl&>(m_parent); | ||||
|     MetalCommandQueue* q = static_cast<MetalCommandQueue*>(factory.m_parent->getCommandQueue()); | ||||
|     MetalTextureD* retval = new MetalTextureD(q, factory.m_ctx, width, height, fmt); | ||||
|     MetalDataFactoryImpl::m_deferredData->m_DTexs.emplace_back(retval); | ||||
|     return retval; | ||||
| } | ||||
| ITextureR* MetalDataFactory::Context::newRenderTexture(size_t width, size_t height, | ||||
|                                                        bool enableShaderColorBinding, bool enableShaderDepthBinding) | ||||
| { | ||||
|     MetalTextureR* retval = new MetalTextureR(m_parent.m_ctx, width, height, m_parent.m_sampleCount, enableShaderColorBinding); | ||||
|     m_deferredData->m_RTexs.emplace_back(retval); | ||||
|     MetalDataFactoryImpl& factory = static_cast<MetalDataFactoryImpl&>(m_parent); | ||||
|     MetalTextureR* retval = new MetalTextureR(factory.m_ctx, width, height, factory.m_sampleCount, enableShaderColorBinding); | ||||
|     MetalDataFactoryImpl::m_deferredData->m_RTexs.emplace_back(retval); | ||||
|     return retval; | ||||
| } | ||||
| 
 | ||||
| @ -1092,7 +1147,7 @@ IVertexFormat* MetalDataFactory::Context::newVertexFormat(size_t elementCount, c | ||||
|                                                           size_t baseVert, size_t baseInst) | ||||
| { | ||||
|     MetalVertexFormat* retval = new struct MetalVertexFormat(elementCount, elements); | ||||
|     m_deferredData->m_VFmts.emplace_back(retval); | ||||
|     MetalDataFactoryImpl::m_deferredData->m_VFmts.emplace_back(retval); | ||||
|     return retval; | ||||
| } | ||||
| 
 | ||||
| @ -1101,34 +1156,71 @@ IShaderPipeline* MetalDataFactory::Context::newShaderPipeline(const char* vertSo | ||||
|                                                               BlendFactor srcFac, BlendFactor dstFac, Primitive prim, | ||||
|                                                               bool depthTest, bool depthWrite, bool backfaceCulling) | ||||
| { | ||||
|     MetalDataFactoryImpl& factory = static_cast<MetalDataFactoryImpl&>(m_parent); | ||||
|     MTLCompileOptions* compOpts = [MTLCompileOptions new]; | ||||
|     compOpts.languageVersion = MTLLanguageVersion1_1; | ||||
|     NSError* err = nullptr; | ||||
| 
 | ||||
|     id<MTLLibrary> vertShaderLib = [m_parent.m_ctx->m_dev newLibraryWithSource:@(vertSource) | ||||
|                                                                        options:compOpts | ||||
|                                                                          error:&err]; | ||||
|     if (!vertShaderLib) | ||||
|     { | ||||
|         printf("%s\n", vertSource); | ||||
|         Log.report(logvisor::Fatal, "error compiling vert shader: %s", [[err localizedDescription] UTF8String]); | ||||
|     } | ||||
|     id<MTLFunction> vertFunc = [vertShaderLib newFunctionWithName:@"vmain"]; | ||||
|     XXH64_state_t hashState; | ||||
|     uint64_t hashes[2]; | ||||
|     XXH64_reset(&hashState, 0); | ||||
|     XXH64_update(&hashState, vertSource, strlen(vertSource)); | ||||
|     hashes[0] = XXH64_digest(&hashState); | ||||
|     XXH64_reset(&hashState, 0); | ||||
|     XXH64_update(&hashState, fragSource, strlen(fragSource)); | ||||
|     hashes[1] = XXH64_digest(&hashState); | ||||
| 
 | ||||
|     id<MTLLibrary> fragShaderLib = [m_parent.m_ctx->m_dev newLibraryWithSource:@(fragSource) | ||||
|                                                                        options:compOpts | ||||
|                                                                          error:&err]; | ||||
|     if (!fragShaderLib) | ||||
|     MetalShareableShader::Token vertShader; | ||||
|     MetalShareableShader::Token fragShader; | ||||
|     auto vertFind = factory.m_sharedShaders.find(hashes[0]); | ||||
|     if (vertFind != factory.m_sharedShaders.end()) | ||||
|     { | ||||
|         printf("%s\n", fragSource); | ||||
|         Log.report(logvisor::Fatal, "error compiling frag shader: %s", [[err localizedDescription] UTF8String]); | ||||
|         vertShader = vertFind->second->lock(); | ||||
|     } | ||||
|     id<MTLFunction> fragFunc = [fragShaderLib newFunctionWithName:@"fmain"]; | ||||
|     else | ||||
|     { | ||||
|         id<MTLLibrary> vertShaderLib = [factory.m_ctx->m_dev newLibraryWithSource:@(vertSource) | ||||
|                                                                           options:compOpts | ||||
|                                                                             error:&err]; | ||||
|         if (!vertShaderLib) | ||||
|         { | ||||
|             printf("%s\n", vertSource); | ||||
|             Log.report(logvisor::Fatal, "error compiling vert shader: %s", [[err localizedDescription] UTF8String]); | ||||
|         } | ||||
|         id<MTLFunction> vertFunc = [vertShaderLib newFunctionWithName:@"vmain"]; | ||||
| 
 | ||||
|     MetalShaderPipeline* retval = new MetalShaderPipeline(m_parent.m_ctx, vertFunc, fragFunc, | ||||
|         auto it = | ||||
|         factory.m_sharedShaders.emplace(std::make_pair(hashes[0], | ||||
|             std::make_unique<MetalShareableShader>(factory, hashes[0], vertFunc))).first; | ||||
|         vertShader = it->second->lock(); | ||||
|     } | ||||
|     auto fragFind = factory.m_sharedShaders.find(hashes[1]); | ||||
|     if (fragFind != factory.m_sharedShaders.end()) | ||||
|     { | ||||
|         fragShader = fragFind->second->lock(); | ||||
|     } | ||||
|     else | ||||
|     { | ||||
|         id<MTLLibrary> fragShaderLib = [factory.m_ctx->m_dev newLibraryWithSource:@(fragSource) | ||||
|                                                                           options:compOpts | ||||
|                                                                             error:&err]; | ||||
|         if (!fragShaderLib) | ||||
|         { | ||||
|             printf("%s\n", fragSource); | ||||
|             Log.report(logvisor::Fatal, "error compiling frag shader: %s", [[err localizedDescription] UTF8String]); | ||||
|         } | ||||
|         id<MTLFunction> fragFunc = [fragShaderLib newFunctionWithName:@"fmain"]; | ||||
| 
 | ||||
|         auto it = | ||||
|         factory.m_sharedShaders.emplace(std::make_pair(hashes[1], | ||||
|             std::make_unique<MetalShareableShader>(factory, hashes[1], fragFunc))).first; | ||||
|         fragShader = it->second->lock(); | ||||
|     } | ||||
| 
 | ||||
|     MetalShaderPipeline* retval = new MetalShaderPipeline(factory.m_ctx, std::move(vertShader), std::move(fragShader), | ||||
|                                                           static_cast<const MetalVertexFormat*>(vtxFmt), targetSamples, | ||||
|                                                           srcFac, dstFac, prim, depthTest, depthWrite, backfaceCulling); | ||||
|     m_deferredData->m_SPs.emplace_back(retval); | ||||
|     MetalDataFactoryImpl::m_deferredData->m_SPs.emplace_back(retval); | ||||
|     return retval; | ||||
| } | ||||
| 
 | ||||
| @ -1140,16 +1232,17 @@ MetalDataFactory::Context::newShaderDataBinding(IShaderPipeline* pipeline, | ||||
|                                                 const size_t* ubufOffs, const size_t* ubufSizes, | ||||
|                                                 size_t texCount, ITexture** texs, size_t baseVert, size_t baseInst) | ||||
| { | ||||
|     MetalDataFactoryImpl& factory = static_cast<MetalDataFactoryImpl&>(m_parent); | ||||
|     MetalShaderDataBinding* retval = | ||||
|     new MetalShaderDataBinding(m_deferredData.get(), | ||||
|                                m_parent.m_ctx, pipeline, vbuf, instVbo, ibuf, | ||||
|     new MetalShaderDataBinding(MetalDataFactoryImpl::m_deferredData.get(), | ||||
|                                factory.m_ctx, pipeline, vbuf, instVbo, ibuf, | ||||
|                                ubufCount, ubufs, ubufStages, ubufOffs, | ||||
|                                ubufSizes, texCount, texs, baseVert, baseInst); | ||||
|     m_deferredData->m_SBinds.emplace_back(retval); | ||||
|     MetalDataFactoryImpl::m_deferredData->m_SBinds.emplace_back(retval); | ||||
|     return retval; | ||||
| } | ||||
| 
 | ||||
| GraphicsDataToken MetalDataFactory::commitTransaction(const FactoryCommitFunc& trans) | ||||
| GraphicsDataToken MetalDataFactoryImpl::commitTransaction(const FactoryCommitFunc& trans) | ||||
| { | ||||
|     if (m_deferredData.get()) | ||||
|         Log.report(logvisor::Fatal, "nested commitTransaction usage detected"); | ||||
| @ -1170,7 +1263,7 @@ GraphicsDataToken MetalDataFactory::commitTransaction(const FactoryCommitFunc& t | ||||
|     return GraphicsDataToken(this, retval); | ||||
| } | ||||
| 
 | ||||
| GraphicsBufferPoolToken MetalDataFactory::newBufferPool() | ||||
| GraphicsBufferPoolToken MetalDataFactoryImpl::newBufferPool() | ||||
| { | ||||
|     std::unique_lock<std::mutex> lk(m_committedMutex); | ||||
|     MetalPool* retval = new MetalPool; | ||||
| @ -1178,7 +1271,7 @@ GraphicsBufferPoolToken MetalDataFactory::newBufferPool() | ||||
|     return GraphicsBufferPoolToken(this, retval); | ||||
| } | ||||
| 
 | ||||
| void MetalDataFactory::destroyData(IGraphicsData* d) | ||||
| void MetalDataFactoryImpl::destroyData(IGraphicsData* d) | ||||
| { | ||||
|     std::unique_lock<std::mutex> lk(m_committedMutex); | ||||
|     MetalData* data = static_cast<MetalData*>(d); | ||||
| @ -1186,7 +1279,7 @@ void MetalDataFactory::destroyData(IGraphicsData* d) | ||||
|     data->decrement(); | ||||
| } | ||||
| 
 | ||||
| void MetalDataFactory::destroyAllData() | ||||
| void MetalDataFactoryImpl::destroyAllData() | ||||
| { | ||||
|     std::unique_lock<std::mutex> lk(m_committedMutex); | ||||
|     for (MetalData* data : m_committedData) | ||||
| @ -1197,7 +1290,7 @@ void MetalDataFactory::destroyAllData() | ||||
|     m_committedPools.clear(); | ||||
| } | ||||
| 
 | ||||
| void MetalDataFactory::destroyPool(IGraphicsBufferPool* p) | ||||
| void MetalDataFactoryImpl::destroyPool(IGraphicsBufferPool* p) | ||||
| { | ||||
|     std::unique_lock<std::mutex> lk(m_committedMutex); | ||||
|     MetalPool* pool = static_cast<MetalPool*>(p); | ||||
| @ -1205,8 +1298,8 @@ void MetalDataFactory::destroyPool(IGraphicsBufferPool* p) | ||||
|     delete pool; | ||||
| } | ||||
| 
 | ||||
| IGraphicsBufferD* MetalDataFactory::newPoolBuffer(IGraphicsBufferPool* p, BufferUse use, | ||||
|                                                   size_t stride, size_t count) | ||||
| IGraphicsBufferD* MetalDataFactoryImpl::newPoolBuffer(IGraphicsBufferPool* p, BufferUse use, | ||||
|                                                       size_t stride, size_t count) | ||||
| { | ||||
|     MetalPool* pool = static_cast<MetalPool*>(p); | ||||
|     MetalCommandQueue* q = static_cast<MetalCommandQueue*>(m_parent->getCommandQueue()); | ||||
| @ -1215,7 +1308,7 @@ IGraphicsBufferD* MetalDataFactory::newPoolBuffer(IGraphicsBufferPool* p, Buffer | ||||
|     return retval; | ||||
| } | ||||
| 
 | ||||
| void MetalDataFactory::deletePoolBuffer(IGraphicsBufferPool* p, IGraphicsBufferD* buf) | ||||
| void MetalDataFactoryImpl::deletePoolBuffer(IGraphicsBufferPool* p, IGraphicsBufferD* buf) | ||||
| { | ||||
|     MetalPool* pool = static_cast<MetalPool*>(p); | ||||
|     pool->m_DBufs.erase(static_cast<MetalGraphicsBufferD*>(buf)); | ||||
| @ -1227,6 +1320,11 @@ IGraphicsCommandQueue* _NewMetalCommandQueue(MetalContext* ctx, IWindow* parentW | ||||
|     return new struct MetalCommandQueue(ctx, parentWindow, parent); | ||||
| } | ||||
| 
 | ||||
| IGraphicsDataFactory* _NewMetalDataFactory(IGraphicsContext* parent, MetalContext* ctx, uint32_t sampleCount) | ||||
| { | ||||
|     return new class MetalDataFactoryImpl(parent, ctx, sampleCount); | ||||
| } | ||||
| 
 | ||||
| } | ||||
| 
 | ||||
| #endif | ||||
|  | ||||
| @ -185,8 +185,11 @@ namespace boo | ||||
| { | ||||
| static logvisor::Module Log("boo::WindowCocoa"); | ||||
| IGraphicsCommandQueue* _NewGLCommandQueue(IGraphicsContext* parent); | ||||
| IGraphicsDataFactory* _NewGLDataFactory(IGraphicsContext* parent, uint32_t drawSamples); | ||||
| IGraphicsCommandQueue* _NewMetalCommandQueue(MetalContext* ctx, IWindow* parentWindow, | ||||
|                                              IGraphicsContext* parent); | ||||
| IGraphicsDataFactory* _NewMetalDataFactory(IGraphicsContext* parent, | ||||
|                                            MetalContext* ctx, uint32_t sampleCount); | ||||
| void _CocoaUpdateLastGLCtx(NSOpenGLContext* lastGLCtx); | ||||
| 
 | ||||
| class GraphicsContextCocoaGL : public GraphicsContextCocoa | ||||
| @ -206,7 +209,7 @@ public: | ||||
|     : GraphicsContextCocoa(api, EPixelFormat::RGBA8, parentWindow), | ||||
|       m_lastCtx(lastGLCtx) | ||||
|     { | ||||
|         m_dataFactory = new GLDataFactory(this, sampleCount); | ||||
|         m_dataFactory = _NewGLDataFactory(this, sampleCount); | ||||
|     } | ||||
| 
 | ||||
|     ~GraphicsContextCocoaGL() | ||||
| @ -362,7 +365,7 @@ public: | ||||
|     : GraphicsContextCocoa(api, EPixelFormat::RGBA8, parentWindow), | ||||
|       m_parentWindow(parentWindow), m_metalCtx(metalCtx) | ||||
|     { | ||||
|         m_dataFactory = new MetalDataFactory(this, metalCtx, sampleCount); | ||||
|         m_dataFactory = _NewMetalDataFactory(this, metalCtx, sampleCount); | ||||
|     } | ||||
| 
 | ||||
|     ~GraphicsContextCocoaMetal() | ||||
|  | ||||
| @ -36,6 +36,7 @@ IGraphicsDataFactory* _NewD3D12DataFactory(D3D12Context* ctx, IGraphicsContext* | ||||
| IGraphicsCommandQueue* _NewD3D11CommandQueue(D3D11Context* ctx, D3D11Context::Window* windowCtx, IGraphicsContext* parent); | ||||
| IGraphicsDataFactory* _NewD3D11DataFactory(D3D11Context* ctx, IGraphicsContext* parent, uint32_t sampleCount); | ||||
| IGraphicsCommandQueue* _NewGLCommandQueue(IGraphicsContext* parent); | ||||
| IGraphicsDataFactory* _NewGLDataFactory(IGraphicsContext* parent, uint32_t drawSamples); | ||||
| #if BOO_HAS_VULKAN | ||||
| IGraphicsCommandQueue* _NewVulkanCommandQueue(VulkanContext* ctx, | ||||
|                                               VulkanContext::Window* windowCtx, | ||||
| @ -280,7 +281,7 @@ public: | ||||
|                 Log.report(logvisor::Fatal, "unable to share contexts"); | ||||
|         m_3dCtx.m_ctxOgl.m_lastContext = w.m_mainContext; | ||||
| 
 | ||||
|         m_dataFactory = new GLDataFactory(this, sampleCount); | ||||
|         m_dataFactory = _NewGLDataFactory(this, sampleCount); | ||||
|         m_commandQueue = _NewGLCommandQueue(this); | ||||
|     } | ||||
| 
 | ||||
|  | ||||
| @ -114,6 +114,7 @@ namespace boo | ||||
| { | ||||
| static logvisor::Module Log("boo::WindowXlib"); | ||||
| IGraphicsCommandQueue* _NewGLCommandQueue(IGraphicsContext* parent); | ||||
| IGraphicsDataFactory* _NewGLDataFactory(IGraphicsContext* parent, uint32_t drawSamples); | ||||
| #if BOO_HAS_VULKAN | ||||
| IGraphicsCommandQueue* _NewVulkanCommandQueue(VulkanContext* ctx, | ||||
|                                               VulkanContext::Window* windowCtx, | ||||
| @ -327,7 +328,7 @@ public: | ||||
|     : GraphicsContextXlib(api, EPixelFormat::RGBA8, parentWindow, display, drawSamples), | ||||
|       m_lastCtx(lastCtx) | ||||
|     { | ||||
|         m_dataFactory = new class GLDataFactory(this, drawSamples); | ||||
|         m_dataFactory = _NewGLDataFactory(this, drawSamples); | ||||
| 
 | ||||
|         /* Query framebuffer configurations */ | ||||
|         GLXFBConfig* fbConfigs = nullptr; | ||||
|  | ||||
| @ -1,2 +1,2 @@ | ||||
| add_executable(booTest WIN32 main.cpp) | ||||
| target_link_libraries(booTest boo logvisor ${BOO_SYS_LIBS}) | ||||
| target_link_libraries(booTest boo logvisor xxhash ${BOO_SYS_LIBS}) | ||||
|  | ||||
							
								
								
									
										1
									
								
								xxhash/CMakeLists.txt
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										1
									
								
								xxhash/CMakeLists.txt
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1 @@ | ||||
| add_library(xxhash xxhash.c xxhash.h) | ||||
							
								
								
									
										24
									
								
								xxhash/LICENSE
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										24
									
								
								xxhash/LICENSE
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,24 @@ | ||||
| xxHash Library | ||||
| Copyright (c) 2012-2014, Yann Collet | ||||
| All rights reserved. | ||||
| 
 | ||||
| Redistribution and use in source and binary forms, with or without modification, | ||||
| are permitted provided that the following conditions are met: | ||||
| 
 | ||||
| * Redistributions of source code must retain the above copyright notice, this | ||||
|   list of conditions and the following disclaimer. | ||||
| 
 | ||||
| * Redistributions in binary form must reproduce the above copyright notice, this | ||||
|   list of conditions and the following disclaimer in the documentation and/or | ||||
|   other materials provided with the distribution. | ||||
| 
 | ||||
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND | ||||
| ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED | ||||
| WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE | ||||
| DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR | ||||
| ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES | ||||
| (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; | ||||
| LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON | ||||
| ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | ||||
| (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS | ||||
| SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||||
							
								
								
									
										962
									
								
								xxhash/xxhash.c
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										962
									
								
								xxhash/xxhash.c
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,962 @@ | ||||
| /*
 | ||||
| xxHash - Fast Hash algorithm | ||||
| Copyright (C) 2012-2015, Yann Collet | ||||
| 
 | ||||
| BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
 | ||||
| 
 | ||||
| Redistribution and use in source and binary forms, with or without | ||||
| modification, are permitted provided that the following conditions are | ||||
| met: | ||||
| 
 | ||||
| * Redistributions of source code must retain the above copyright | ||||
| notice, this list of conditions and the following disclaimer. | ||||
| * Redistributions in binary form must reproduce the above | ||||
| copyright notice, this list of conditions and the following disclaimer | ||||
| in the documentation and/or other materials provided with the | ||||
| distribution. | ||||
| 
 | ||||
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | ||||
| "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | ||||
| LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | ||||
| A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | ||||
| OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | ||||
| SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | ||||
| LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | ||||
| DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | ||||
| THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | ||||
| (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | ||||
| OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||||
| 
 | ||||
| You can contact the author at : | ||||
| - xxHash source repository : https://github.com/Cyan4973/xxHash
 | ||||
| */ | ||||
| 
 | ||||
| 
 | ||||
| /**************************************
 | ||||
| *  Tuning parameters | ||||
| **************************************/ | ||||
| /* XXH_FORCE_MEMORY_ACCESS
 | ||||
|  * By default, access to unaligned memory is controlled by `memcpy()`, which is safe and portable. | ||||
|  * Unfortunately, on some target/compiler combinations, the generated assembly is sub-optimal. | ||||
|  * The below switch allow to select different access method for improved performance. | ||||
|  * Method 0 (default) : use `memcpy()`. Safe and portable. | ||||
|  * Method 1 : `__packed` statement. It depends on compiler extension (ie, not portable). | ||||
|  *            This method is safe if your compiler supports it, and *generally* as fast or faster than `memcpy`. | ||||
|  * Method 2 : direct access. This method is portable but violate C standard. | ||||
|  *            It can generate buggy code on targets which generate assembly depending on alignment. | ||||
|  *            But in some circumstances, it's the only known way to get the most performance (ie GCC + ARMv6) | ||||
|  * See http://stackoverflow.com/a/32095106/646947 for details.
 | ||||
|  * Prefer these methods in priority order (0 > 1 > 2) | ||||
|  */ | ||||
| #ifndef XXH_FORCE_MEMORY_ACCESS   /* can be defined externally, on command line for example */ | ||||
| #  if defined(__GNUC__) && ( defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) ) | ||||
| #    define XXH_FORCE_MEMORY_ACCESS 2 | ||||
| #  elif defined(__INTEL_COMPILER) || \ | ||||
|   (defined(__GNUC__) && ( defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) || defined(__ARM_ARCH_7S__) )) | ||||
| #    define XXH_FORCE_MEMORY_ACCESS 1 | ||||
| #  endif | ||||
| #endif | ||||
| 
 | ||||
| /* XXH_ACCEPT_NULL_INPUT_POINTER :
 | ||||
|  * If the input pointer is a null pointer, xxHash default behavior is to trigger a memory access error, since it is a bad pointer. | ||||
|  * When this option is enabled, xxHash output for null input pointers will be the same as a null-length input. | ||||
|  * By default, this option is disabled. To enable it, uncomment below define : | ||||
|  */ | ||||
| /* #define XXH_ACCEPT_NULL_INPUT_POINTER 1 */ | ||||
| 
 | ||||
| /* XXH_FORCE_NATIVE_FORMAT :
 | ||||
|  * By default, xxHash library provides endian-independant Hash values, based on little-endian convention. | ||||
|  * Results are therefore identical for little-endian and big-endian CPU. | ||||
|  * This comes at a performance cost for big-endian CPU, since some swapping is required to emulate little-endian format. | ||||
|  * Should endian-independance be of no importance for your application, you may set the #define below to 1, | ||||
|  * to improve speed for Big-endian CPU. | ||||
|  * This option has no impact on Little_Endian CPU. | ||||
|  */ | ||||
| #define XXH_FORCE_NATIVE_FORMAT 0 | ||||
| 
 | ||||
| /* XXH_USELESS_ALIGN_BRANCH :
 | ||||
|  * This is a minor performance trick, only useful with lots of very small keys. | ||||
|  * It means : don't make a test between aligned/unaligned, because performance will be the same. | ||||
|  * It saves one initial branch per hash. | ||||
|  */ | ||||
| #if defined(__i386) || defined(_M_IX86) || defined(__x86_64__) || defined(_M_X64) | ||||
| #  define XXH_USELESS_ALIGN_BRANCH 1 | ||||
| #endif | ||||
| 
 | ||||
| 
 | ||||
| /**************************************
 | ||||
| *  Compiler Specific Options | ||||
| ***************************************/ | ||||
| #ifdef _MSC_VER    /* Visual Studio */ | ||||
| #  pragma warning(disable : 4127)      /* disable: C4127: conditional expression is constant */ | ||||
| #  define FORCE_INLINE static __forceinline | ||||
| #else | ||||
| #  if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L   /* C99 */ | ||||
| #    ifdef __GNUC__ | ||||
| #      define FORCE_INLINE static inline __attribute__((always_inline)) | ||||
| #    else | ||||
| #      define FORCE_INLINE static inline | ||||
| #    endif | ||||
| #  else | ||||
| #    define FORCE_INLINE static | ||||
| #  endif /* __STDC_VERSION__ */ | ||||
| #endif | ||||
| 
 | ||||
| 
 | ||||
| /**************************************
 | ||||
| *  Includes & Memory related functions | ||||
| ***************************************/ | ||||
| #include "xxhash.h" | ||||
| /* Modify the local functions below should you wish to use some other memory routines */ | ||||
| /* for malloc(), free() */ | ||||
| #include <stdlib.h> | ||||
| static void* XXH_malloc(size_t s) { return malloc(s); } | ||||
| static void  XXH_free  (void* p)  { free(p); } | ||||
| /* for memcpy() */ | ||||
| #include <string.h> | ||||
| static void* XXH_memcpy(void* dest, const void* src, size_t size) { return memcpy(dest,src,size); } | ||||
| 
 | ||||
| 
 | ||||
| /**************************************
 | ||||
| *  Basic Types | ||||
| ***************************************/ | ||||
| #if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L   /* C99 */ | ||||
| # include <stdint.h> | ||||
|   typedef uint8_t  BYTE; | ||||
|   typedef uint16_t U16; | ||||
|   typedef uint32_t U32; | ||||
|   typedef  int32_t S32; | ||||
|   typedef uint64_t U64; | ||||
| #else | ||||
|   typedef unsigned char      BYTE; | ||||
|   typedef unsigned short     U16; | ||||
|   typedef unsigned int       U32; | ||||
|   typedef   signed int       S32; | ||||
|   typedef unsigned long long U64; | ||||
| #endif | ||||
| 
 | ||||
| 
 | ||||
| #if (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==2)) | ||||
| 
 | ||||
| /* Force direct memory access. Only works on CPU which support unaligned memory access in hardware */ | ||||
| static U32 XXH_read32(const void* memPtr) { return *(const U32*) memPtr; } | ||||
| static U64 XXH_read64(const void* memPtr) { return *(const U64*) memPtr; } | ||||
| 
 | ||||
| #elif (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==1)) | ||||
| 
 | ||||
| /* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */ | ||||
| /* currently only defined for gcc and icc */ | ||||
| typedef union { U32 u32; U64 u64; } __attribute__((packed)) unalign; | ||||
| 
 | ||||
| static U32 XXH_read32(const void* ptr) { return ((const unalign*)ptr)->u32; } | ||||
| static U64 XXH_read64(const void* ptr) { return ((const unalign*)ptr)->u64; } | ||||
| 
 | ||||
| #else | ||||
| 
 | ||||
| /* portable and safe solution. Generally efficient.
 | ||||
|  * see : http://stackoverflow.com/a/32095106/646947
 | ||||
|  */ | ||||
| 
 | ||||
| static U32 XXH_read32(const void* memPtr) | ||||
| { | ||||
|     U32 val; | ||||
|     memcpy(&val, memPtr, sizeof(val)); | ||||
|     return val; | ||||
| } | ||||
| 
 | ||||
| static U64 XXH_read64(const void* memPtr) | ||||
| { | ||||
|     U64 val; | ||||
|     memcpy(&val, memPtr, sizeof(val)); | ||||
|     return val; | ||||
| } | ||||
| 
 | ||||
| #endif // XXH_FORCE_DIRECT_MEMORY_ACCESS
 | ||||
| 
 | ||||
| 
 | ||||
| /******************************************
 | ||||
| *  Compiler-specific Functions and Macros | ||||
| ******************************************/ | ||||
| #define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__) | ||||
| 
 | ||||
| /* Note : although _rotl exists for minGW (GCC under windows), performance seems poor */ | ||||
| #if defined(_MSC_VER) | ||||
| #  define XXH_rotl32(x,r) _rotl(x,r) | ||||
| #  define XXH_rotl64(x,r) _rotl64(x,r) | ||||
| #else | ||||
| #  define XXH_rotl32(x,r) ((x << r) | (x >> (32 - r))) | ||||
| #  define XXH_rotl64(x,r) ((x << r) | (x >> (64 - r))) | ||||
| #endif | ||||
| 
 | ||||
| #if defined(_MSC_VER)     /* Visual Studio */ | ||||
| #  define XXH_swap32 _byteswap_ulong | ||||
| #  define XXH_swap64 _byteswap_uint64 | ||||
| #elif GCC_VERSION >= 403 | ||||
| #  define XXH_swap32 __builtin_bswap32 | ||||
| #  define XXH_swap64 __builtin_bswap64 | ||||
| #else | ||||
| static U32 XXH_swap32 (U32 x) | ||||
| { | ||||
|     return  ((x << 24) & 0xff000000 ) | | ||||
|             ((x <<  8) & 0x00ff0000 ) | | ||||
|             ((x >>  8) & 0x0000ff00 ) | | ||||
|             ((x >> 24) & 0x000000ff ); | ||||
| } | ||||
| static U64 XXH_swap64 (U64 x) | ||||
| { | ||||
|     return  ((x << 56) & 0xff00000000000000ULL) | | ||||
|             ((x << 40) & 0x00ff000000000000ULL) | | ||||
|             ((x << 24) & 0x0000ff0000000000ULL) | | ||||
|             ((x << 8)  & 0x000000ff00000000ULL) | | ||||
|             ((x >> 8)  & 0x00000000ff000000ULL) | | ||||
|             ((x >> 24) & 0x0000000000ff0000ULL) | | ||||
|             ((x >> 40) & 0x000000000000ff00ULL) | | ||||
|             ((x >> 56) & 0x00000000000000ffULL); | ||||
| } | ||||
| #endif | ||||
| 
 | ||||
| 
 | ||||
| /***************************************
 | ||||
| *  Architecture Macros | ||||
| ***************************************/ | ||||
| typedef enum { XXH_bigEndian=0, XXH_littleEndian=1 } XXH_endianess; | ||||
| 
 | ||||
| /* XXH_CPU_LITTLE_ENDIAN can be defined externally, for example one the compiler command line */ | ||||
| #ifndef XXH_CPU_LITTLE_ENDIAN | ||||
|     static const int one = 1; | ||||
| #   define XXH_CPU_LITTLE_ENDIAN   (*(const char*)(&one)) | ||||
| #endif | ||||
| 
 | ||||
| 
 | ||||
| /*****************************
 | ||||
| *  Memory reads | ||||
| *****************************/ | ||||
| typedef enum { XXH_aligned, XXH_unaligned } XXH_alignment; | ||||
| 
 | ||||
| FORCE_INLINE U32 XXH_readLE32_align(const void* ptr, XXH_endianess endian, XXH_alignment align) | ||||
| { | ||||
|     if (align==XXH_unaligned) | ||||
|         return endian==XXH_littleEndian ? XXH_read32(ptr) : XXH_swap32(XXH_read32(ptr)); | ||||
|     else | ||||
|         return endian==XXH_littleEndian ? *(const U32*)ptr : XXH_swap32(*(const U32*)ptr); | ||||
| } | ||||
| 
 | ||||
| FORCE_INLINE U32 XXH_readLE32(const void* ptr, XXH_endianess endian) | ||||
| { | ||||
|     return XXH_readLE32_align(ptr, endian, XXH_unaligned); | ||||
| } | ||||
| 
 | ||||
| FORCE_INLINE U64 XXH_readLE64_align(const void* ptr, XXH_endianess endian, XXH_alignment align) | ||||
| { | ||||
|     if (align==XXH_unaligned) | ||||
|         return endian==XXH_littleEndian ? XXH_read64(ptr) : XXH_swap64(XXH_read64(ptr)); | ||||
|     else | ||||
|         return endian==XXH_littleEndian ? *(const U64*)ptr : XXH_swap64(*(const U64*)ptr); | ||||
| } | ||||
| 
 | ||||
| FORCE_INLINE U64 XXH_readLE64(const void* ptr, XXH_endianess endian) | ||||
| { | ||||
|     return XXH_readLE64_align(ptr, endian, XXH_unaligned); | ||||
| } | ||||
| 
 | ||||
| 
 | ||||
| /***************************************
 | ||||
| *  Macros | ||||
| ***************************************/ | ||||
| #define XXH_STATIC_ASSERT(c)   { enum { XXH_static_assert = 1/(!!(c)) }; }    /* use only *after* variable declarations */ | ||||
| 
 | ||||
| 
 | ||||
| /***************************************
 | ||||
| *  Constants | ||||
| ***************************************/ | ||||
| #define PRIME32_1   2654435761U | ||||
| #define PRIME32_2   2246822519U | ||||
| #define PRIME32_3   3266489917U | ||||
| #define PRIME32_4    668265263U | ||||
| #define PRIME32_5    374761393U | ||||
| 
 | ||||
| #define PRIME64_1 11400714785074694791ULL | ||||
| #define PRIME64_2 14029467366897019727ULL | ||||
| #define PRIME64_3  1609587929392839161ULL | ||||
| #define PRIME64_4  9650029242287828579ULL | ||||
| #define PRIME64_5  2870177450012600261ULL | ||||
| 
 | ||||
| 
 | ||||
| /*****************************
 | ||||
| *  Simple Hash Functions | ||||
| *****************************/ | ||||
| FORCE_INLINE U32 XXH32_endian_align(const void* input, size_t len, U32 seed, XXH_endianess endian, XXH_alignment align) | ||||
| { | ||||
|     const BYTE* p = (const BYTE*)input; | ||||
|     const BYTE* bEnd = p + len; | ||||
|     U32 h32; | ||||
| #define XXH_get32bits(p) XXH_readLE32_align(p, endian, align) | ||||
| 
 | ||||
| #ifdef XXH_ACCEPT_NULL_INPUT_POINTER | ||||
|     if (p==NULL) | ||||
|     { | ||||
|         len=0; | ||||
|         bEnd=p=(const BYTE*)(size_t)16; | ||||
|     } | ||||
| #endif | ||||
| 
 | ||||
|     if (len>=16) | ||||
|     { | ||||
|         const BYTE* const limit = bEnd - 16; | ||||
|         U32 v1 = seed + PRIME32_1 + PRIME32_2; | ||||
|         U32 v2 = seed + PRIME32_2; | ||||
|         U32 v3 = seed + 0; | ||||
|         U32 v4 = seed - PRIME32_1; | ||||
| 
 | ||||
|         do | ||||
|         { | ||||
|             v1 += XXH_get32bits(p) * PRIME32_2; | ||||
|             v1 = XXH_rotl32(v1, 13); | ||||
|             v1 *= PRIME32_1; | ||||
|             p+=4; | ||||
|             v2 += XXH_get32bits(p) * PRIME32_2; | ||||
|             v2 = XXH_rotl32(v2, 13); | ||||
|             v2 *= PRIME32_1; | ||||
|             p+=4; | ||||
|             v3 += XXH_get32bits(p) * PRIME32_2; | ||||
|             v3 = XXH_rotl32(v3, 13); | ||||
|             v3 *= PRIME32_1; | ||||
|             p+=4; | ||||
|             v4 += XXH_get32bits(p) * PRIME32_2; | ||||
|             v4 = XXH_rotl32(v4, 13); | ||||
|             v4 *= PRIME32_1; | ||||
|             p+=4; | ||||
|         } | ||||
|         while (p<=limit); | ||||
| 
 | ||||
|         h32 = XXH_rotl32(v1, 1) + XXH_rotl32(v2, 7) + XXH_rotl32(v3, 12) + XXH_rotl32(v4, 18); | ||||
|     } | ||||
|     else | ||||
|     { | ||||
|         h32  = seed + PRIME32_5; | ||||
|     } | ||||
| 
 | ||||
|     h32 += (U32) len; | ||||
| 
 | ||||
|     while (p+4<=bEnd) | ||||
|     { | ||||
|         h32 += XXH_get32bits(p) * PRIME32_3; | ||||
|         h32  = XXH_rotl32(h32, 17) * PRIME32_4 ; | ||||
|         p+=4; | ||||
|     } | ||||
| 
 | ||||
|     while (p<bEnd) | ||||
|     { | ||||
|         h32 += (*p) * PRIME32_5; | ||||
|         h32 = XXH_rotl32(h32, 11) * PRIME32_1 ; | ||||
|         p++; | ||||
|     } | ||||
| 
 | ||||
|     h32 ^= h32 >> 15; | ||||
|     h32 *= PRIME32_2; | ||||
|     h32 ^= h32 >> 13; | ||||
|     h32 *= PRIME32_3; | ||||
|     h32 ^= h32 >> 16; | ||||
| 
 | ||||
|     return h32; | ||||
| } | ||||
| 
 | ||||
| 
 | ||||
| unsigned int XXH32 (const void* input, size_t len, unsigned int seed) | ||||
| { | ||||
| #if 0 | ||||
|     /* Simple version, good for code maintenance, but unfortunately slow for small inputs */ | ||||
|     XXH32_state_t state; | ||||
|     XXH32_reset(&state, seed); | ||||
|     XXH32_update(&state, input, len); | ||||
|     return XXH32_digest(&state); | ||||
| #else | ||||
|     XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN; | ||||
| 
 | ||||
| #  if !defined(XXH_USELESS_ALIGN_BRANCH) | ||||
|     if ((((size_t)input) & 3) == 0)   /* Input is 4-bytes aligned, leverage the speed benefit */ | ||||
|     { | ||||
|         if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) | ||||
|             return XXH32_endian_align(input, len, seed, XXH_littleEndian, XXH_aligned); | ||||
|         else | ||||
|             return XXH32_endian_align(input, len, seed, XXH_bigEndian, XXH_aligned); | ||||
|     } | ||||
| #  endif | ||||
| 
 | ||||
|     if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) | ||||
|         return XXH32_endian_align(input, len, seed, XXH_littleEndian, XXH_unaligned); | ||||
|     else | ||||
|         return XXH32_endian_align(input, len, seed, XXH_bigEndian, XXH_unaligned); | ||||
| #endif | ||||
| } | ||||
| 
 | ||||
| FORCE_INLINE U64 XXH64_endian_align(const void* input, size_t len, U64 seed, XXH_endianess endian, XXH_alignment align) | ||||
| { | ||||
|     const BYTE* p = (const BYTE*)input; | ||||
|     const BYTE* bEnd = p + len; | ||||
|     U64 h64; | ||||
| #define XXH_get64bits(p) XXH_readLE64_align(p, endian, align) | ||||
| 
 | ||||
| #ifdef XXH_ACCEPT_NULL_INPUT_POINTER | ||||
|     if (p==NULL) | ||||
|     { | ||||
|         len=0; | ||||
|         bEnd=p=(const BYTE*)(size_t)32; | ||||
|     } | ||||
| #endif | ||||
| 
 | ||||
|     if (len>=32) | ||||
|     { | ||||
|         const BYTE* const limit = bEnd - 32; | ||||
|         U64 v1 = seed + PRIME64_1 + PRIME64_2; | ||||
|         U64 v2 = seed + PRIME64_2; | ||||
|         U64 v3 = seed + 0; | ||||
|         U64 v4 = seed - PRIME64_1; | ||||
| 
 | ||||
|         do | ||||
|         { | ||||
|             v1 += XXH_get64bits(p) * PRIME64_2; | ||||
|             p+=8; | ||||
|             v1 = XXH_rotl64(v1, 31); | ||||
|             v1 *= PRIME64_1; | ||||
|             v2 += XXH_get64bits(p) * PRIME64_2; | ||||
|             p+=8; | ||||
|             v2 = XXH_rotl64(v2, 31); | ||||
|             v2 *= PRIME64_1; | ||||
|             v3 += XXH_get64bits(p) * PRIME64_2; | ||||
|             p+=8; | ||||
|             v3 = XXH_rotl64(v3, 31); | ||||
|             v3 *= PRIME64_1; | ||||
|             v4 += XXH_get64bits(p) * PRIME64_2; | ||||
|             p+=8; | ||||
|             v4 = XXH_rotl64(v4, 31); | ||||
|             v4 *= PRIME64_1; | ||||
|         } | ||||
|         while (p<=limit); | ||||
| 
 | ||||
|         h64 = XXH_rotl64(v1, 1) + XXH_rotl64(v2, 7) + XXH_rotl64(v3, 12) + XXH_rotl64(v4, 18); | ||||
| 
 | ||||
|         v1 *= PRIME64_2; | ||||
|         v1 = XXH_rotl64(v1, 31); | ||||
|         v1 *= PRIME64_1; | ||||
|         h64 ^= v1; | ||||
|         h64 = h64 * PRIME64_1 + PRIME64_4; | ||||
| 
 | ||||
|         v2 *= PRIME64_2; | ||||
|         v2 = XXH_rotl64(v2, 31); | ||||
|         v2 *= PRIME64_1; | ||||
|         h64 ^= v2; | ||||
|         h64 = h64 * PRIME64_1 + PRIME64_4; | ||||
| 
 | ||||
|         v3 *= PRIME64_2; | ||||
|         v3 = XXH_rotl64(v3, 31); | ||||
|         v3 *= PRIME64_1; | ||||
|         h64 ^= v3; | ||||
|         h64 = h64 * PRIME64_1 + PRIME64_4; | ||||
| 
 | ||||
|         v4 *= PRIME64_2; | ||||
|         v4 = XXH_rotl64(v4, 31); | ||||
|         v4 *= PRIME64_1; | ||||
|         h64 ^= v4; | ||||
|         h64 = h64 * PRIME64_1 + PRIME64_4; | ||||
|     } | ||||
|     else | ||||
|     { | ||||
|         h64  = seed + PRIME64_5; | ||||
|     } | ||||
| 
 | ||||
|     h64 += (U64) len; | ||||
| 
 | ||||
|     while (p+8<=bEnd) | ||||
|     { | ||||
|         U64 k1 = XXH_get64bits(p); | ||||
|         k1 *= PRIME64_2; | ||||
|         k1 = XXH_rotl64(k1,31); | ||||
|         k1 *= PRIME64_1; | ||||
|         h64 ^= k1; | ||||
|         h64 = XXH_rotl64(h64,27) * PRIME64_1 + PRIME64_4; | ||||
|         p+=8; | ||||
|     } | ||||
| 
 | ||||
|     if (p+4<=bEnd) | ||||
|     { | ||||
|         h64 ^= (U64)(XXH_get32bits(p)) * PRIME64_1; | ||||
|         h64 = XXH_rotl64(h64, 23) * PRIME64_2 + PRIME64_3; | ||||
|         p+=4; | ||||
|     } | ||||
| 
 | ||||
|     while (p<bEnd) | ||||
|     { | ||||
|         h64 ^= (*p) * PRIME64_5; | ||||
|         h64 = XXH_rotl64(h64, 11) * PRIME64_1; | ||||
|         p++; | ||||
|     } | ||||
| 
 | ||||
|     h64 ^= h64 >> 33; | ||||
|     h64 *= PRIME64_2; | ||||
|     h64 ^= h64 >> 29; | ||||
|     h64 *= PRIME64_3; | ||||
|     h64 ^= h64 >> 32; | ||||
| 
 | ||||
|     return h64; | ||||
| } | ||||
| 
 | ||||
| 
 | ||||
| unsigned long long XXH64 (const void* input, size_t len, unsigned long long seed) | ||||
| { | ||||
| #if 0 | ||||
|     /* Simple version, good for code maintenance, but unfortunately slow for small inputs */ | ||||
|     XXH64_state_t state; | ||||
|     XXH64_reset(&state, seed); | ||||
|     XXH64_update(&state, input, len); | ||||
|     return XXH64_digest(&state); | ||||
| #else | ||||
|     XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN; | ||||
| 
 | ||||
| #  if !defined(XXH_USELESS_ALIGN_BRANCH) | ||||
|     if ((((size_t)input) & 7)==0)   /* Input is aligned, let's leverage the speed advantage */ | ||||
|     { | ||||
|         if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) | ||||
|             return XXH64_endian_align(input, len, seed, XXH_littleEndian, XXH_aligned); | ||||
|         else | ||||
|             return XXH64_endian_align(input, len, seed, XXH_bigEndian, XXH_aligned); | ||||
|     } | ||||
| #  endif | ||||
| 
 | ||||
|     if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) | ||||
|         return XXH64_endian_align(input, len, seed, XXH_littleEndian, XXH_unaligned); | ||||
|     else | ||||
|         return XXH64_endian_align(input, len, seed, XXH_bigEndian, XXH_unaligned); | ||||
| #endif | ||||
| } | ||||
| 
 | ||||
| /****************************************************
 | ||||
| *  Advanced Hash Functions | ||||
| ****************************************************/ | ||||
| 
 | ||||
| /*** Allocation ***/ | ||||
| typedef struct | ||||
| { | ||||
|     U64 total_len; | ||||
|     U32 seed; | ||||
|     U32 v1; | ||||
|     U32 v2; | ||||
|     U32 v3; | ||||
|     U32 v4; | ||||
|     U32 mem32[4];   /* defined as U32 for alignment */ | ||||
|     U32 memsize; | ||||
| } XXH_istate32_t; | ||||
| 
 | ||||
| typedef struct | ||||
| { | ||||
|     U64 total_len; | ||||
|     U64 seed; | ||||
|     U64 v1; | ||||
|     U64 v2; | ||||
|     U64 v3; | ||||
|     U64 v4; | ||||
|     U64 mem64[4];   /* defined as U64 for alignment */ | ||||
|     U32 memsize; | ||||
| } XXH_istate64_t; | ||||
| 
 | ||||
| 
 | ||||
| XXH32_state_t* XXH32_createState(void) | ||||
| { | ||||
|     XXH_STATIC_ASSERT(sizeof(XXH32_state_t) >= sizeof(XXH_istate32_t));   /* A compilation error here means XXH32_state_t is not large enough */ | ||||
|     return (XXH32_state_t*)XXH_malloc(sizeof(XXH32_state_t)); | ||||
| } | ||||
| XXH_errorcode XXH32_freeState(XXH32_state_t* statePtr) | ||||
| { | ||||
|     XXH_free(statePtr); | ||||
|     return XXH_OK; | ||||
| } | ||||
| 
 | ||||
| XXH64_state_t* XXH64_createState(void) | ||||
| { | ||||
|     XXH_STATIC_ASSERT(sizeof(XXH64_state_t) >= sizeof(XXH_istate64_t));   /* A compilation error here means XXH64_state_t is not large enough */ | ||||
|     return (XXH64_state_t*)XXH_malloc(sizeof(XXH64_state_t)); | ||||
| } | ||||
| XXH_errorcode XXH64_freeState(XXH64_state_t* statePtr) | ||||
| { | ||||
|     XXH_free(statePtr); | ||||
|     return XXH_OK; | ||||
| } | ||||
| 
 | ||||
| 
 | ||||
| /*** Hash feed ***/ | ||||
| 
 | ||||
| XXH_errorcode XXH32_reset(XXH32_state_t* state_in, unsigned int seed) | ||||
| { | ||||
|     XXH_istate32_t* state = (XXH_istate32_t*) state_in; | ||||
|     state->seed = seed; | ||||
|     state->v1 = seed + PRIME32_1 + PRIME32_2; | ||||
|     state->v2 = seed + PRIME32_2; | ||||
|     state->v3 = seed + 0; | ||||
|     state->v4 = seed - PRIME32_1; | ||||
|     state->total_len = 0; | ||||
|     state->memsize = 0; | ||||
|     return XXH_OK; | ||||
| } | ||||
| 
 | ||||
| XXH_errorcode XXH64_reset(XXH64_state_t* state_in, unsigned long long seed) | ||||
| { | ||||
|     XXH_istate64_t* state = (XXH_istate64_t*) state_in; | ||||
|     state->seed = seed; | ||||
|     state->v1 = seed + PRIME64_1 + PRIME64_2; | ||||
|     state->v2 = seed + PRIME64_2; | ||||
|     state->v3 = seed + 0; | ||||
|     state->v4 = seed - PRIME64_1; | ||||
|     state->total_len = 0; | ||||
|     state->memsize = 0; | ||||
|     return XXH_OK; | ||||
| } | ||||
| 
 | ||||
| 
 | ||||
| FORCE_INLINE XXH_errorcode XXH32_update_endian (XXH32_state_t* state_in, const void* input, size_t len, XXH_endianess endian) | ||||
| { | ||||
|     XXH_istate32_t* state = (XXH_istate32_t *) state_in; | ||||
|     const BYTE* p = (const BYTE*)input; | ||||
|     const BYTE* const bEnd = p + len; | ||||
| 
 | ||||
| #ifdef XXH_ACCEPT_NULL_INPUT_POINTER | ||||
|     if (input==NULL) return XXH_ERROR; | ||||
| #endif | ||||
| 
 | ||||
|     state->total_len += len; | ||||
| 
 | ||||
|     if (state->memsize + len < 16)   /* fill in tmp buffer */ | ||||
|     { | ||||
|         XXH_memcpy((BYTE*)(state->mem32) + state->memsize, input, len); | ||||
|         state->memsize += (U32)len; | ||||
|         return XXH_OK; | ||||
|     } | ||||
| 
 | ||||
|     if (state->memsize)   /* some data left from previous update */ | ||||
|     { | ||||
|         XXH_memcpy((BYTE*)(state->mem32) + state->memsize, input, 16-state->memsize); | ||||
|         { | ||||
|             const U32* p32 = state->mem32; | ||||
|             state->v1 += XXH_readLE32(p32, endian) * PRIME32_2; | ||||
|             state->v1 = XXH_rotl32(state->v1, 13); | ||||
|             state->v1 *= PRIME32_1; | ||||
|             p32++; | ||||
|             state->v2 += XXH_readLE32(p32, endian) * PRIME32_2; | ||||
|             state->v2 = XXH_rotl32(state->v2, 13); | ||||
|             state->v2 *= PRIME32_1; | ||||
|             p32++; | ||||
|             state->v3 += XXH_readLE32(p32, endian) * PRIME32_2; | ||||
|             state->v3 = XXH_rotl32(state->v3, 13); | ||||
|             state->v3 *= PRIME32_1; | ||||
|             p32++; | ||||
|             state->v4 += XXH_readLE32(p32, endian) * PRIME32_2; | ||||
|             state->v4 = XXH_rotl32(state->v4, 13); | ||||
|             state->v4 *= PRIME32_1; | ||||
|             p32++; | ||||
|         } | ||||
|         p += 16-state->memsize; | ||||
|         state->memsize = 0; | ||||
|     } | ||||
| 
 | ||||
|     if (p <= bEnd-16) | ||||
|     { | ||||
|         const BYTE* const limit = bEnd - 16; | ||||
|         U32 v1 = state->v1; | ||||
|         U32 v2 = state->v2; | ||||
|         U32 v3 = state->v3; | ||||
|         U32 v4 = state->v4; | ||||
| 
 | ||||
|         do | ||||
|         { | ||||
|             v1 += XXH_readLE32(p, endian) * PRIME32_2; | ||||
|             v1 = XXH_rotl32(v1, 13); | ||||
|             v1 *= PRIME32_1; | ||||
|             p+=4; | ||||
|             v2 += XXH_readLE32(p, endian) * PRIME32_2; | ||||
|             v2 = XXH_rotl32(v2, 13); | ||||
|             v2 *= PRIME32_1; | ||||
|             p+=4; | ||||
|             v3 += XXH_readLE32(p, endian) * PRIME32_2; | ||||
|             v3 = XXH_rotl32(v3, 13); | ||||
|             v3 *= PRIME32_1; | ||||
|             p+=4; | ||||
|             v4 += XXH_readLE32(p, endian) * PRIME32_2; | ||||
|             v4 = XXH_rotl32(v4, 13); | ||||
|             v4 *= PRIME32_1; | ||||
|             p+=4; | ||||
|         } | ||||
|         while (p<=limit); | ||||
| 
 | ||||
|         state->v1 = v1; | ||||
|         state->v2 = v2; | ||||
|         state->v3 = v3; | ||||
|         state->v4 = v4; | ||||
|     } | ||||
| 
 | ||||
|     if (p < bEnd) | ||||
|     { | ||||
|         XXH_memcpy(state->mem32, p, bEnd-p); | ||||
|         state->memsize = (int)(bEnd-p); | ||||
|     } | ||||
| 
 | ||||
|     return XXH_OK; | ||||
| } | ||||
| 
 | ||||
| XXH_errorcode XXH32_update (XXH32_state_t* state_in, const void* input, size_t len) | ||||
| { | ||||
|     XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN; | ||||
| 
 | ||||
|     if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) | ||||
|         return XXH32_update_endian(state_in, input, len, XXH_littleEndian); | ||||
|     else | ||||
|         return XXH32_update_endian(state_in, input, len, XXH_bigEndian); | ||||
| } | ||||
| 
 | ||||
| 
 | ||||
| 
 | ||||
| FORCE_INLINE U32 XXH32_digest_endian (const XXH32_state_t* state_in, XXH_endianess endian) | ||||
| { | ||||
|     const XXH_istate32_t* state = (const XXH_istate32_t*) state_in; | ||||
|     const BYTE * p = (const BYTE*)state->mem32; | ||||
|     const BYTE* bEnd = (const BYTE*)(state->mem32) + state->memsize; | ||||
|     U32 h32; | ||||
| 
 | ||||
|     if (state->total_len >= 16) | ||||
|     { | ||||
|         h32 = XXH_rotl32(state->v1, 1) + XXH_rotl32(state->v2, 7) + XXH_rotl32(state->v3, 12) + XXH_rotl32(state->v4, 18); | ||||
|     } | ||||
|     else | ||||
|     { | ||||
|         h32  = state->seed + PRIME32_5; | ||||
|     } | ||||
| 
 | ||||
|     h32 += (U32) state->total_len; | ||||
| 
 | ||||
|     while (p+4<=bEnd) | ||||
|     { | ||||
|         h32 += XXH_readLE32(p, endian) * PRIME32_3; | ||||
|         h32  = XXH_rotl32(h32, 17) * PRIME32_4; | ||||
|         p+=4; | ||||
|     } | ||||
| 
 | ||||
|     while (p<bEnd) | ||||
|     { | ||||
|         h32 += (*p) * PRIME32_5; | ||||
|         h32 = XXH_rotl32(h32, 11) * PRIME32_1; | ||||
|         p++; | ||||
|     } | ||||
| 
 | ||||
|     h32 ^= h32 >> 15; | ||||
|     h32 *= PRIME32_2; | ||||
|     h32 ^= h32 >> 13; | ||||
|     h32 *= PRIME32_3; | ||||
|     h32 ^= h32 >> 16; | ||||
| 
 | ||||
|     return h32; | ||||
| } | ||||
| 
 | ||||
| 
 | ||||
| unsigned int XXH32_digest (const XXH32_state_t* state_in) | ||||
| { | ||||
|     XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN; | ||||
| 
 | ||||
|     if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) | ||||
|         return XXH32_digest_endian(state_in, XXH_littleEndian); | ||||
|     else | ||||
|         return XXH32_digest_endian(state_in, XXH_bigEndian); | ||||
| } | ||||
| 
 | ||||
| 
 | ||||
| FORCE_INLINE XXH_errorcode XXH64_update_endian (XXH64_state_t* state_in, const void* input, size_t len, XXH_endianess endian) | ||||
| { | ||||
|     XXH_istate64_t * state = (XXH_istate64_t *) state_in; | ||||
|     const BYTE* p = (const BYTE*)input; | ||||
|     const BYTE* const bEnd = p + len; | ||||
| 
 | ||||
| #ifdef XXH_ACCEPT_NULL_INPUT_POINTER | ||||
|     if (input==NULL) return XXH_ERROR; | ||||
| #endif | ||||
| 
 | ||||
|     state->total_len += len; | ||||
| 
 | ||||
|     if (state->memsize + len < 32)   /* fill in tmp buffer */ | ||||
|     { | ||||
|         XXH_memcpy(((BYTE*)state->mem64) + state->memsize, input, len); | ||||
|         state->memsize += (U32)len; | ||||
|         return XXH_OK; | ||||
|     } | ||||
| 
 | ||||
|     if (state->memsize)   /* some data left from previous update */ | ||||
|     { | ||||
|         XXH_memcpy(((BYTE*)state->mem64) + state->memsize, input, 32-state->memsize); | ||||
|         { | ||||
|             const U64* p64 = state->mem64; | ||||
|             state->v1 += XXH_readLE64(p64, endian) * PRIME64_2; | ||||
|             state->v1 = XXH_rotl64(state->v1, 31); | ||||
|             state->v1 *= PRIME64_1; | ||||
|             p64++; | ||||
|             state->v2 += XXH_readLE64(p64, endian) * PRIME64_2; | ||||
|             state->v2 = XXH_rotl64(state->v2, 31); | ||||
|             state->v2 *= PRIME64_1; | ||||
|             p64++; | ||||
|             state->v3 += XXH_readLE64(p64, endian) * PRIME64_2; | ||||
|             state->v3 = XXH_rotl64(state->v3, 31); | ||||
|             state->v3 *= PRIME64_1; | ||||
|             p64++; | ||||
|             state->v4 += XXH_readLE64(p64, endian) * PRIME64_2; | ||||
|             state->v4 = XXH_rotl64(state->v4, 31); | ||||
|             state->v4 *= PRIME64_1; | ||||
|             p64++; | ||||
|         } | ||||
|         p += 32-state->memsize; | ||||
|         state->memsize = 0; | ||||
|     } | ||||
| 
 | ||||
|     if (p+32 <= bEnd) | ||||
|     { | ||||
|         const BYTE* const limit = bEnd - 32; | ||||
|         U64 v1 = state->v1; | ||||
|         U64 v2 = state->v2; | ||||
|         U64 v3 = state->v3; | ||||
|         U64 v4 = state->v4; | ||||
| 
 | ||||
|         do | ||||
|         { | ||||
|             v1 += XXH_readLE64(p, endian) * PRIME64_2; | ||||
|             v1 = XXH_rotl64(v1, 31); | ||||
|             v1 *= PRIME64_1; | ||||
|             p+=8; | ||||
|             v2 += XXH_readLE64(p, endian) * PRIME64_2; | ||||
|             v2 = XXH_rotl64(v2, 31); | ||||
|             v2 *= PRIME64_1; | ||||
|             p+=8; | ||||
|             v3 += XXH_readLE64(p, endian) * PRIME64_2; | ||||
|             v3 = XXH_rotl64(v3, 31); | ||||
|             v3 *= PRIME64_1; | ||||
|             p+=8; | ||||
|             v4 += XXH_readLE64(p, endian) * PRIME64_2; | ||||
|             v4 = XXH_rotl64(v4, 31); | ||||
|             v4 *= PRIME64_1; | ||||
|             p+=8; | ||||
|         } | ||||
|         while (p<=limit); | ||||
| 
 | ||||
|         state->v1 = v1; | ||||
|         state->v2 = v2; | ||||
|         state->v3 = v3; | ||||
|         state->v4 = v4; | ||||
|     } | ||||
| 
 | ||||
|     if (p < bEnd) | ||||
|     { | ||||
|         XXH_memcpy(state->mem64, p, bEnd-p); | ||||
|         state->memsize = (int)(bEnd-p); | ||||
|     } | ||||
| 
 | ||||
|     return XXH_OK; | ||||
| } | ||||
| 
 | ||||
| XXH_errorcode XXH64_update (XXH64_state_t* state_in, const void* input, size_t len) | ||||
| { | ||||
|     XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN; | ||||
| 
 | ||||
|     if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) | ||||
|         return XXH64_update_endian(state_in, input, len, XXH_littleEndian); | ||||
|     else | ||||
|         return XXH64_update_endian(state_in, input, len, XXH_bigEndian); | ||||
| } | ||||
| 
 | ||||
| 
 | ||||
| 
 | ||||
| FORCE_INLINE U64 XXH64_digest_endian (const XXH64_state_t* state_in, XXH_endianess endian) | ||||
| { | ||||
|     const XXH_istate64_t * state = (const XXH_istate64_t *) state_in; | ||||
|     const BYTE * p = (const BYTE*)state->mem64; | ||||
|     const BYTE* bEnd = (const BYTE*)state->mem64 + state->memsize; | ||||
|     U64 h64; | ||||
| 
 | ||||
|     if (state->total_len >= 32) | ||||
|     { | ||||
|         U64 v1 = state->v1; | ||||
|         U64 v2 = state->v2; | ||||
|         U64 v3 = state->v3; | ||||
|         U64 v4 = state->v4; | ||||
| 
 | ||||
|         h64 = XXH_rotl64(v1, 1) + XXH_rotl64(v2, 7) + XXH_rotl64(v3, 12) + XXH_rotl64(v4, 18); | ||||
| 
 | ||||
|         v1 *= PRIME64_2; | ||||
|         v1 = XXH_rotl64(v1, 31); | ||||
|         v1 *= PRIME64_1; | ||||
|         h64 ^= v1; | ||||
|         h64 = h64*PRIME64_1 + PRIME64_4; | ||||
| 
 | ||||
|         v2 *= PRIME64_2; | ||||
|         v2 = XXH_rotl64(v2, 31); | ||||
|         v2 *= PRIME64_1; | ||||
|         h64 ^= v2; | ||||
|         h64 = h64*PRIME64_1 + PRIME64_4; | ||||
| 
 | ||||
|         v3 *= PRIME64_2; | ||||
|         v3 = XXH_rotl64(v3, 31); | ||||
|         v3 *= PRIME64_1; | ||||
|         h64 ^= v3; | ||||
|         h64 = h64*PRIME64_1 + PRIME64_4; | ||||
| 
 | ||||
|         v4 *= PRIME64_2; | ||||
|         v4 = XXH_rotl64(v4, 31); | ||||
|         v4 *= PRIME64_1; | ||||
|         h64 ^= v4; | ||||
|         h64 = h64*PRIME64_1 + PRIME64_4; | ||||
|     } | ||||
|     else | ||||
|     { | ||||
|         h64  = state->seed + PRIME64_5; | ||||
|     } | ||||
| 
 | ||||
|     h64 += (U64) state->total_len; | ||||
| 
 | ||||
|     while (p+8<=bEnd) | ||||
|     { | ||||
|         U64 k1 = XXH_readLE64(p, endian); | ||||
|         k1 *= PRIME64_2; | ||||
|         k1 = XXH_rotl64(k1,31); | ||||
|         k1 *= PRIME64_1; | ||||
|         h64 ^= k1; | ||||
|         h64 = XXH_rotl64(h64,27) * PRIME64_1 + PRIME64_4; | ||||
|         p+=8; | ||||
|     } | ||||
| 
 | ||||
|     if (p+4<=bEnd) | ||||
|     { | ||||
|         h64 ^= (U64)(XXH_readLE32(p, endian)) * PRIME64_1; | ||||
|         h64 = XXH_rotl64(h64, 23) * PRIME64_2 + PRIME64_3; | ||||
|         p+=4; | ||||
|     } | ||||
| 
 | ||||
|     while (p<bEnd) | ||||
|     { | ||||
|         h64 ^= (*p) * PRIME64_5; | ||||
|         h64 = XXH_rotl64(h64, 11) * PRIME64_1; | ||||
|         p++; | ||||
|     } | ||||
| 
 | ||||
|     h64 ^= h64 >> 33; | ||||
|     h64 *= PRIME64_2; | ||||
|     h64 ^= h64 >> 29; | ||||
|     h64 *= PRIME64_3; | ||||
|     h64 ^= h64 >> 32; | ||||
| 
 | ||||
|     return h64; | ||||
| } | ||||
| 
 | ||||
| 
 | ||||
| unsigned long long XXH64_digest (const XXH64_state_t* state_in) | ||||
| { | ||||
|     XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN; | ||||
| 
 | ||||
|     if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) | ||||
|         return XXH64_digest_endian(state_in, XXH_littleEndian); | ||||
|     else | ||||
|         return XXH64_digest_endian(state_in, XXH_bigEndian); | ||||
| } | ||||
| 
 | ||||
| 
 | ||||
							
								
								
									
										192
									
								
								xxhash/xxhash.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										192
									
								
								xxhash/xxhash.h
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,192 @@ | ||||
| /*
 | ||||
|    xxHash - Extremely Fast Hash algorithm | ||||
|    Header File | ||||
|    Copyright (C) 2012-2015, Yann Collet. | ||||
| 
 | ||||
|    BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
 | ||||
| 
 | ||||
|    Redistribution and use in source and binary forms, with or without | ||||
|    modification, are permitted provided that the following conditions are | ||||
|    met: | ||||
| 
 | ||||
|        * Redistributions of source code must retain the above copyright | ||||
|    notice, this list of conditions and the following disclaimer. | ||||
|        * Redistributions in binary form must reproduce the above | ||||
|    copyright notice, this list of conditions and the following disclaimer | ||||
|    in the documentation and/or other materials provided with the | ||||
|    distribution. | ||||
| 
 | ||||
|    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | ||||
|    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | ||||
|    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | ||||
|    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | ||||
|    OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | ||||
|    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | ||||
|    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | ||||
|    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | ||||
|    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | ||||
|    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | ||||
|    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||||
| 
 | ||||
|    You can contact the author at : | ||||
|    - xxHash source repository : https://github.com/Cyan4973/xxHash
 | ||||
| */ | ||||
| 
 | ||||
| /* Notice extracted from xxHash homepage :
 | ||||
| 
 | ||||
| xxHash is an extremely fast Hash algorithm, running at RAM speed limits. | ||||
| It also successfully passes all tests from the SMHasher suite. | ||||
| 
 | ||||
| Comparison (single thread, Windows Seven 32 bits, using SMHasher on a Core 2 Duo @3GHz) | ||||
| 
 | ||||
| Name            Speed       Q.Score   Author | ||||
| xxHash          5.4 GB/s     10 | ||||
| CrapWow         3.2 GB/s      2       Andrew | ||||
| MumurHash 3a    2.7 GB/s     10       Austin Appleby | ||||
| SpookyHash      2.0 GB/s     10       Bob Jenkins | ||||
| SBox            1.4 GB/s      9       Bret Mulvey | ||||
| Lookup3         1.2 GB/s      9       Bob Jenkins | ||||
| SuperFastHash   1.2 GB/s      1       Paul Hsieh | ||||
| CityHash64      1.05 GB/s    10       Pike & Alakuijala | ||||
| FNV             0.55 GB/s     5       Fowler, Noll, Vo | ||||
| CRC32           0.43 GB/s     9 | ||||
| MD5-32          0.33 GB/s    10       Ronald L. Rivest | ||||
| SHA1-32         0.28 GB/s    10 | ||||
| 
 | ||||
| Q.Score is a measure of quality of the hash function. | ||||
| It depends on successfully passing SMHasher test set. | ||||
| 10 is a perfect score. | ||||
| 
 | ||||
| A 64-bits version, named XXH64, is available since r35. | ||||
| It offers much better speed, but for 64-bits applications only. | ||||
| Name     Speed on 64 bits    Speed on 32 bits | ||||
| XXH64       13.8 GB/s            1.9 GB/s | ||||
| XXH32        6.8 GB/s            6.0 GB/s | ||||
| */ | ||||
| 
 | ||||
| #pragma once | ||||
| 
 | ||||
| #if defined (__cplusplus) | ||||
| extern "C" { | ||||
| #endif | ||||
| 
 | ||||
| 
 | ||||
| /*****************************
 | ||||
| *  Definitions | ||||
| *****************************/ | ||||
| #include <stddef.h>   /* size_t */ | ||||
| typedef enum { XXH_OK=0, XXH_ERROR } XXH_errorcode; | ||||
| 
 | ||||
| 
 | ||||
| /*****************************
 | ||||
| *  Namespace Emulation | ||||
| *****************************/ | ||||
| /* Motivations :
 | ||||
| 
 | ||||
| If you need to include xxHash into your library, | ||||
| but wish to avoid xxHash symbols to be present on your library interface | ||||
| in an effort to avoid potential name collision if another library also includes xxHash, | ||||
| 
 | ||||
| you can use XXH_NAMESPACE, which will automatically prefix any symbol from xxHash | ||||
| with the value of XXH_NAMESPACE (so avoid to keep it NULL, and avoid numeric values). | ||||
| 
 | ||||
| Note that no change is required within the calling program : | ||||
| it can still call xxHash functions using their regular name. | ||||
| They will be automatically translated by this header. | ||||
| */ | ||||
| #ifdef XXH_NAMESPACE | ||||
| #  define XXH_CAT(A,B) A##B | ||||
| #  define XXH_NAME2(A,B) XXH_CAT(A,B) | ||||
| #  define XXH32 XXH_NAME2(XXH_NAMESPACE, XXH32) | ||||
| #  define XXH64 XXH_NAME2(XXH_NAMESPACE, XXH64) | ||||
| #  define XXH32_createState XXH_NAME2(XXH_NAMESPACE, XXH32_createState) | ||||
| #  define XXH64_createState XXH_NAME2(XXH_NAMESPACE, XXH64_createState) | ||||
| #  define XXH32_freeState XXH_NAME2(XXH_NAMESPACE, XXH32_freeState) | ||||
| #  define XXH64_freeState XXH_NAME2(XXH_NAMESPACE, XXH64_freeState) | ||||
| #  define XXH32_reset XXH_NAME2(XXH_NAMESPACE, XXH32_reset) | ||||
| #  define XXH64_reset XXH_NAME2(XXH_NAMESPACE, XXH64_reset) | ||||
| #  define XXH32_update XXH_NAME2(XXH_NAMESPACE, XXH32_update) | ||||
| #  define XXH64_update XXH_NAME2(XXH_NAMESPACE, XXH64_update) | ||||
| #  define XXH32_digest XXH_NAME2(XXH_NAMESPACE, XXH32_digest) | ||||
| #  define XXH64_digest XXH_NAME2(XXH_NAMESPACE, XXH64_digest) | ||||
| #endif | ||||
| 
 | ||||
| 
 | ||||
| /*****************************
 | ||||
| *  Simple Hash Functions | ||||
| *****************************/ | ||||
| 
 | ||||
| unsigned int       XXH32 (const void* input, size_t length, unsigned seed); | ||||
| unsigned long long XXH64 (const void* input, size_t length, unsigned long long seed); | ||||
| 
 | ||||
| /*
 | ||||
| XXH32() : | ||||
|     Calculate the 32-bits hash of sequence "length" bytes stored at memory address "input". | ||||
|     The memory between input & input+length must be valid (allocated and read-accessible). | ||||
|     "seed" can be used to alter the result predictably. | ||||
|     This function successfully passes all SMHasher tests. | ||||
|     Speed on Core 2 Duo @ 3 GHz (single thread, SMHasher benchmark) : 5.4 GB/s | ||||
| XXH64() : | ||||
|     Calculate the 64-bits hash of sequence of length "len" stored at memory address "input". | ||||
|     Faster on 64-bits systems. Slower on 32-bits systems. | ||||
| */ | ||||
| 
 | ||||
| 
 | ||||
| 
 | ||||
| /*****************************
 | ||||
| *  Advanced Hash Functions | ||||
| *****************************/ | ||||
| typedef struct { long long ll[ 6]; } XXH32_state_t; | ||||
| typedef struct { long long ll[11]; } XXH64_state_t; | ||||
| 
 | ||||
| /*
 | ||||
| These structures allow static allocation of XXH states. | ||||
| States must then be initialized using XXHnn_reset() before first use. | ||||
| 
 | ||||
| If you prefer dynamic allocation, please refer to functions below. | ||||
| */ | ||||
| 
 | ||||
| XXH32_state_t* XXH32_createState(void); | ||||
| XXH_errorcode  XXH32_freeState(XXH32_state_t* statePtr); | ||||
| 
 | ||||
| XXH64_state_t* XXH64_createState(void); | ||||
| XXH_errorcode  XXH64_freeState(XXH64_state_t* statePtr); | ||||
| 
 | ||||
| /*
 | ||||
| These functions create and release memory for XXH state. | ||||
| States must then be initialized using XXHnn_reset() before first use. | ||||
| */ | ||||
| 
 | ||||
| 
 | ||||
| XXH_errorcode XXH32_reset  (XXH32_state_t* statePtr, unsigned seed); | ||||
| XXH_errorcode XXH32_update (XXH32_state_t* statePtr, const void* input, size_t length); | ||||
| unsigned int  XXH32_digest (const XXH32_state_t* statePtr); | ||||
| 
 | ||||
| XXH_errorcode      XXH64_reset  (XXH64_state_t* statePtr, unsigned long long seed); | ||||
| XXH_errorcode      XXH64_update (XXH64_state_t* statePtr, const void* input, size_t length); | ||||
| unsigned long long XXH64_digest (const XXH64_state_t* statePtr); | ||||
| 
 | ||||
| /*
 | ||||
| These functions calculate the xxHash of an input provided in multiple smaller packets, | ||||
| as opposed to an input provided as a single block. | ||||
| 
 | ||||
| XXH state space must first be allocated, using either static or dynamic method provided above. | ||||
| 
 | ||||
| Start a new hash by initializing state with a seed, using XXHnn_reset(). | ||||
| 
 | ||||
| Then, feed the hash state by calling XXHnn_update() as many times as necessary. | ||||
| Obviously, input must be valid, meaning allocated and read accessible. | ||||
| The function returns an error code, with 0 meaning OK, and any other value meaning there is an error. | ||||
| 
 | ||||
| Finally, you can produce a hash anytime, by using XXHnn_digest(). | ||||
| This function returns the final nn-bits hash. | ||||
| You can nonetheless continue feeding the hash state with more input, | ||||
| and therefore get some new hashes, by calling again XXHnn_digest(). | ||||
| 
 | ||||
| When you are done, don't forget to free XXH state space, using typically XXHnn_freeState(). | ||||
| */ | ||||
| 
 | ||||
| 
 | ||||
| #if defined (__cplusplus) | ||||
| } | ||||
| #endif | ||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user