Move xxhash to boo; cached shader components

This commit is contained in:
Jack Andersen 2017-03-04 21:54:58 -10:00
parent 8bcac27c10
commit 03f155fcf5
16 changed files with 1558 additions and 217 deletions

View File

@ -9,6 +9,8 @@ if (NOT TARGET logvisor)
add_subdirectory(logvisor) add_subdirectory(logvisor)
endif() endif()
add_subdirectory(xxhash)
set(WITH_LSR_BINDINGS OFF) set(WITH_LSR_BINDINGS OFF)
set(BUILD_TESTS OFF) set(BUILD_TESTS OFF)
set(BUILD_SHARED_LIBS OFF) set(BUILD_SHARED_LIBS OFF)
@ -18,7 +20,7 @@ add_subdirectory(soxr)
set(BOO_INCLUDE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/include CACHE PATH "boo include path" FORCE) set(BOO_INCLUDE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/include CACHE PATH "boo include path" FORCE)
include_directories(include ${LOGVISOR_INCLUDE_DIR}) include_directories(include xxhash ${LOGVISOR_INCLUDE_DIR})
if(NOT GEKKO AND NOT CAFE) if(NOT GEKKO AND NOT CAFE)
list(APPEND PLAT_SRCS list(APPEND PLAT_SRCS
@ -195,7 +197,7 @@ target_include_directories(glslang-default-resource-limits
PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/glslang PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/glslang
) )
list(APPEND _BOO_SYS_LIBS glslang HLSL soxr OSDependent OGLCompiler SPIRV glslang-default-resource-limits) list(APPEND _BOO_SYS_LIBS glslang HLSL soxr xxhash OSDependent OGLCompiler SPIRV glslang-default-resource-limits)
set(BOO_SYS_LIBS ${_BOO_SYS_LIBS} CACHE PATH "boo system libraries" FORCE) set(BOO_SYS_LIBS ${_BOO_SYS_LIBS} CACHE PATH "boo system libraries" FORCE)
set(BOO_SYS_DEFINES ${_BOO_SYS_DEFINES} CACHE PATH "boo system defines" FORCE) set(BOO_SYS_DEFINES ${_BOO_SYS_DEFINES} CACHE PATH "boo system defines" FORCE)

View File

@ -5,39 +5,16 @@
#include "IGraphicsCommandQueue.hpp" #include "IGraphicsCommandQueue.hpp"
#include "boo/IGraphicsContext.hpp" #include "boo/IGraphicsContext.hpp"
#include "GLSLMacros.hpp" #include "GLSLMacros.hpp"
#include <vector>
#include <unordered_set>
#include <unordered_map>
#include <mutex>
namespace boo namespace boo
{ {
class GLDataFactory : public IGraphicsDataFactory class GLDataFactory : public IGraphicsDataFactory
{ {
friend struct GLCommandQueue;
IGraphicsContext* m_parent;
uint32_t m_drawSamples;
static ThreadLocalPtr<struct GLData> m_deferredData;
std::unordered_set<struct GLData*> m_committedData;
std::unordered_set<struct GLPool*> m_committedPools;
std::mutex m_committedMutex;
void destroyData(IGraphicsData*);
void destroyAllData();
void destroyPool(IGraphicsBufferPool*);
IGraphicsBufferD* newPoolBuffer(IGraphicsBufferPool* pool, BufferUse use,
size_t stride, size_t count);
void deletePoolBuffer(IGraphicsBufferPool* p, IGraphicsBufferD* buf);
public: public:
GLDataFactory(IGraphicsContext* parent, uint32_t drawSamples);
~GLDataFactory() {destroyAllData();}
Platform platform() const {return Platform::OpenGL;}
const SystemChar* platformName() const {return _S("OpenGL");}
class Context : public IGraphicsDataFactory::Context class Context : public IGraphicsDataFactory::Context
{ {
friend class GLDataFactory; friend class GLDataFactoryImpl;
GLDataFactory& m_parent; GLDataFactory& m_parent;
Context(GLDataFactory& parent) : m_parent(parent) {} Context(GLDataFactory& parent) : m_parent(parent) {}
public: public:
@ -73,9 +50,6 @@ public:
const size_t* ubufOffs, const size_t* ubufSizes, const size_t* ubufOffs, const size_t* ubufSizes,
size_t texCount, ITexture** texs, size_t baseVert = 0, size_t baseInst = 0); size_t texCount, ITexture** texs, size_t baseVert = 0, size_t baseInst = 0);
}; };
GraphicsDataToken commitTransaction(const FactoryCommitFunc&);
GraphicsBufferPoolToken newBufferPool();
}; };
} }

View File

@ -10,7 +10,7 @@ namespace boo
struct IGraphicsCommandQueue struct IGraphicsCommandQueue
{ {
virtual ~IGraphicsCommandQueue() {} virtual ~IGraphicsCommandQueue() = default;
using Platform = IGraphicsDataFactory::Platform; using Platform = IGraphicsDataFactory::Platform;
virtual Platform platform() const=0; virtual Platform platform() const=0;

View File

@ -17,7 +17,7 @@ struct IGraphicsBuffer
protected: protected:
bool m_dynamic; bool m_dynamic;
IGraphicsBuffer(bool dynamic) : m_dynamic(dynamic) {} IGraphicsBuffer(bool dynamic) : m_dynamic(dynamic) {}
virtual ~IGraphicsBuffer() {} virtual ~IGraphicsBuffer() = default;
}; };
/** Static resource buffer for verts, indices, uniform constants */ /** Static resource buffer for verts, indices, uniform constants */
@ -190,7 +190,7 @@ enum class BlendFactor
/** Factory object for creating batches of resources as an IGraphicsData token */ /** Factory object for creating batches of resources as an IGraphicsData token */
struct IGraphicsDataFactory struct IGraphicsDataFactory
{ {
virtual ~IGraphicsDataFactory() {} virtual ~IGraphicsDataFactory() = default;
enum class Platform enum class Platform
{ {
@ -276,10 +276,10 @@ using FactoryCommitFunc = std::function<bool(IGraphicsDataFactory::Context& ctx)
* IGraphicsData (please don't delete and draw contained resources in the same frame). */ * IGraphicsData (please don't delete and draw contained resources in the same frame). */
class GraphicsDataToken class GraphicsDataToken
{ {
friend class GLDataFactory; friend class GLDataFactoryImpl;
friend class D3D12DataFactory; friend class D3D12DataFactory;
friend class D3D11DataFactory; friend class D3D11DataFactory;
friend class MetalDataFactory; friend class MetalDataFactoryImpl;
friend class VulkanDataFactory; friend class VulkanDataFactory;
IGraphicsDataFactory* m_factory = nullptr; IGraphicsDataFactory* m_factory = nullptr;
IGraphicsData* m_data = nullptr; IGraphicsData* m_data = nullptr;
@ -323,10 +323,10 @@ public:
* (please don't delete and draw contained resources in the same frame). */ * (please don't delete and draw contained resources in the same frame). */
class GraphicsBufferPoolToken class GraphicsBufferPoolToken
{ {
friend class GLDataFactory; friend class GLDataFactoryImpl;
friend class D3D12DataFactory; friend class D3D12DataFactory;
friend class D3D11DataFactory; friend class D3D11DataFactory;
friend class MetalDataFactory; friend class MetalDataFactoryImpl;
friend class VulkanDataFactory; friend class VulkanDataFactory;
IGraphicsDataFactory* m_factory = nullptr; IGraphicsDataFactory* m_factory = nullptr;
IGraphicsBufferPool* m_pool = nullptr; IGraphicsBufferPool* m_pool = nullptr;

View File

@ -6,42 +6,16 @@
#include "IGraphicsDataFactory.hpp" #include "IGraphicsDataFactory.hpp"
#include "IGraphicsCommandQueue.hpp" #include "IGraphicsCommandQueue.hpp"
#include "boo/IGraphicsContext.hpp" #include "boo/IGraphicsContext.hpp"
#include <vector>
#include <mutex>
#include <unordered_set>
#include <unordered_map>
namespace boo namespace boo
{ {
struct MetalContext;
class MetalDataFactory : public IGraphicsDataFactory class MetalDataFactory : public IGraphicsDataFactory
{ {
friend struct MetalCommandQueue;
IGraphicsContext* m_parent;
static ThreadLocalPtr<struct MetalData> m_deferredData;
std::unordered_set<struct MetalData*> m_committedData;
std::unordered_set<struct MetalPool*> m_committedPools;
std::mutex m_committedMutex;
struct MetalContext* m_ctx;
uint32_t m_sampleCount;
void destroyData(IGraphicsData*);
void destroyAllData();
void destroyPool(IGraphicsBufferPool*);
IGraphicsBufferD* newPoolBuffer(IGraphicsBufferPool* pool, BufferUse use,
size_t stride, size_t count);
void deletePoolBuffer(IGraphicsBufferPool* p, IGraphicsBufferD* buf);
public: public:
MetalDataFactory(IGraphicsContext* parent, MetalContext* ctx, uint32_t sampleCount);
~MetalDataFactory() {}
Platform platform() const {return Platform::Metal;}
const char* platformName() const {return "Metal";}
class Context : public IGraphicsDataFactory::Context class Context : public IGraphicsDataFactory::Context
{ {
friend class MetalDataFactory; friend class MetalDataFactoryImpl;
MetalDataFactory& m_parent; MetalDataFactory& m_parent;
Context(MetalDataFactory& parent) : m_parent(parent) {} Context(MetalDataFactory& parent) : m_parent(parent) {}
public: public:
@ -76,9 +50,6 @@ public:
const size_t* ubufOffs, const size_t* ubufSizes, const size_t* ubufOffs, const size_t* ubufSizes,
size_t texCount, ITexture** texs, size_t baseVert = 0, size_t baseInst = 0); size_t texCount, ITexture** texs, size_t baseVert = 0, size_t baseInst = 0);
}; };
GraphicsDataToken commitTransaction(const std::function<bool(IGraphicsDataFactory::Context& ctx)>&);
GraphicsBufferPoolToken newBufferPool();
}; };
} }

View File

@ -50,6 +50,44 @@ public:
Token lock() const { return Token(this); } Token lock() const { return Token(this); }
}; };
template <class FactoryImpl, class ShaderImpl>
class IShareableShader
{
std::atomic_int m_refCount = {0};
FactoryImpl& m_factory;
uint64_t m_key;
public:
IShareableShader(FactoryImpl& factory, uint64_t key)
: m_factory(factory), m_key(key) {}
void increment() { m_refCount++; }
void decrement()
{
if (m_refCount.fetch_sub(1) == 1)
m_factory._unregisterShareableShader(m_key);
}
class Token
{
IShareableShader<FactoryImpl, ShaderImpl>* m_parent = nullptr;
public:
Token() = default;
Token(IShareableShader* p)
: m_parent(p)
{ m_parent->increment(); }
Token& operator=(const Token&) = delete;
Token(const Token&) = delete;
Token& operator=(Token&& other)
{ m_parent = other.m_parent; other.m_parent = nullptr; return *this; }
Token(Token&& other)
{ m_parent = other.m_parent; other.m_parent = nullptr; }
~Token() { if (m_parent) m_parent->decrement(); }
operator bool() const { return m_parent != nullptr; }
ShaderImpl& get() const { return static_cast<ShaderImpl&>(*m_parent); }
};
Token lock() { return Token(this); }
};
} }
#endif // BOO_GRAPHICSDEV_COMMON_HPP #endif // BOO_GRAPHICSDEV_COMMON_HPP

View File

@ -8,7 +8,10 @@
#include <condition_variable> #include <condition_variable>
#include <array> #include <array>
#include <unordered_map> #include <unordered_map>
#include <unordered_set>
#include <atomic> #include <atomic>
#include <functional>
#include "xxhash.h"
#include "logvisor/logvisor.hpp" #include "logvisor/logvisor.hpp"
@ -18,8 +21,47 @@
namespace boo namespace boo
{ {
static logvisor::Module Log("boo::GL"); static logvisor::Module Log("boo::GL");
class GLDataFactoryImpl;
ThreadLocalPtr<struct GLData> GLDataFactory::m_deferredData; struct GLShareableShader : IShareableShader<GLDataFactoryImpl, GLShareableShader>
{
GLuint m_shader = 0;
GLShareableShader(GLDataFactoryImpl& fac, uint64_t key, GLuint s)
: IShareableShader(fac, key), m_shader(s) {}
~GLShareableShader() { glDeleteShader(m_shader); }
};
class GLDataFactoryImpl : public GLDataFactory
{
friend struct GLCommandQueue;
friend class GLDataFactory::Context;
IGraphicsContext* m_parent;
uint32_t m_drawSamples;
static ThreadLocalPtr<struct GLData> m_deferredData;
std::unordered_set<struct GLData*> m_committedData;
std::unordered_set<struct GLPool*> m_committedPools;
std::mutex m_committedMutex;
std::unordered_map<uint64_t, std::unique_ptr<GLShareableShader>> m_sharedShaders;
void destroyData(IGraphicsData*);
void destroyAllData();
void destroyPool(IGraphicsBufferPool*);
IGraphicsBufferD* newPoolBuffer(IGraphicsBufferPool* pool, BufferUse use,
size_t stride, size_t count);
void deletePoolBuffer(IGraphicsBufferPool* p, IGraphicsBufferD* buf);
public:
GLDataFactoryImpl(IGraphicsContext* parent, uint32_t drawSamples);
~GLDataFactoryImpl() {destroyAllData();}
Platform platform() const {return Platform::OpenGL;}
const SystemChar* platformName() const {return _S("OpenGL");}
GraphicsDataToken commitTransaction(const FactoryCommitFunc&);
GraphicsBufferPoolToken newBufferPool();
void _unregisterShareableShader(uint64_t key) { m_sharedShaders.erase(key); }
};
ThreadLocalPtr<struct GLData> GLDataFactoryImpl::m_deferredData;
struct GLData : IGraphicsDataPriv<GLData> struct GLData : IGraphicsDataPriv<GLData>
{ {
std::vector<std::unique_ptr<class GLShaderPipeline>> m_SPs; std::vector<std::unique_ptr<class GLShaderPipeline>> m_SPs;
@ -75,6 +117,7 @@ public:
class GLGraphicsBufferD : public IGraphicsBufferD class GLGraphicsBufferD : public IGraphicsBufferD
{ {
friend class GLDataFactory; friend class GLDataFactory;
friend class GLDataFactoryImpl;
friend struct GLCommandQueue; friend struct GLCommandQueue;
GLuint m_bufs[3]; GLuint m_bufs[3];
GLenum m_target; GLenum m_target;
@ -109,7 +152,7 @@ IGraphicsBufferS*
GLDataFactory::Context::newStaticBuffer(BufferUse use, const void* data, size_t stride, size_t count) GLDataFactory::Context::newStaticBuffer(BufferUse use, const void* data, size_t stride, size_t count)
{ {
GLGraphicsBufferS* retval = new GLGraphicsBufferS(use, data, stride * count); GLGraphicsBufferS* retval = new GLGraphicsBufferS(use, data, stride * count);
m_deferredData->m_SBufs.emplace_back(retval); GLDataFactoryImpl::m_deferredData->m_SBufs.emplace_back(retval);
return retval; return retval;
} }
@ -344,7 +387,7 @@ GLDataFactory::Context::newStaticTexture(size_t width, size_t height, size_t mip
const void* data, size_t sz) const void* data, size_t sz)
{ {
GLTextureS* retval = new GLTextureS(width, height, mips, fmt, data, sz); GLTextureS* retval = new GLTextureS(width, height, mips, fmt, data, sz);
m_deferredData->m_STexs.emplace_back(retval); GLDataFactoryImpl::m_deferredData->m_STexs.emplace_back(retval);
return retval; return retval;
} }
@ -353,7 +396,7 @@ GLDataFactory::Context::newStaticArrayTexture(size_t width, size_t height, size_
TextureFormat fmt, const void *data, size_t sz) TextureFormat fmt, const void *data, size_t sz)
{ {
GLTextureSA* retval = new GLTextureSA(width, height, layers, mips, fmt, data, sz); GLTextureSA* retval = new GLTextureSA(width, height, layers, mips, fmt, data, sz);
m_deferredData->m_SATexs.emplace_back(retval); GLDataFactoryImpl::m_deferredData->m_SATexs.emplace_back(retval);
return retval; return retval;
} }
@ -362,8 +405,8 @@ class GLShaderPipeline : public IShaderPipeline
friend class GLDataFactory; friend class GLDataFactory;
friend struct GLCommandQueue; friend struct GLCommandQueue;
friend struct GLShaderDataBinding; friend struct GLShaderDataBinding;
GLuint m_vert = 0; GLShareableShader::Token m_vert;
GLuint m_frag = 0; GLShareableShader::Token m_frag;
GLuint m_prog = 0; GLuint m_prog = 0;
GLenum m_sfactor = GL_ONE; GLenum m_sfactor = GL_ONE;
GLenum m_dfactor = GL_ZERO; GLenum m_dfactor = GL_ZERO;
@ -372,48 +415,17 @@ class GLShaderPipeline : public IShaderPipeline
bool m_depthWrite = true; bool m_depthWrite = true;
bool m_backfaceCulling = true; bool m_backfaceCulling = true;
std::vector<GLint> m_uniLocs; std::vector<GLint> m_uniLocs;
bool initObjects()
{
m_vert = glCreateShader(GL_VERTEX_SHADER);
m_frag = glCreateShader(GL_FRAGMENT_SHADER);
m_prog = glCreateProgram();
if (!m_vert || !m_frag || !m_prog)
{
glDeleteShader(m_vert);
m_vert = 0;
glDeleteShader(m_frag);
m_frag = 0;
glDeleteProgram(m_prog);
m_prog = 0;
return false;
}
glAttachShader(m_prog, m_vert);
glAttachShader(m_prog, m_frag);
return true;
}
void clearObjects()
{
if (m_vert)
glDeleteShader(m_vert);
if (m_frag)
glDeleteShader(m_frag);
if (m_prog)
glDeleteProgram(m_prog);
}
GLShaderPipeline() = default; GLShaderPipeline() = default;
public: public:
operator bool() const {return m_prog != 0;} operator bool() const {return m_prog != 0;}
~GLShaderPipeline() {clearObjects();} ~GLShaderPipeline() { glDeleteProgram(m_prog); }
GLShaderPipeline& operator=(const GLShaderPipeline&) = delete; GLShaderPipeline& operator=(const GLShaderPipeline&) = delete;
GLShaderPipeline(const GLShaderPipeline&) = delete; GLShaderPipeline(const GLShaderPipeline&) = delete;
GLShaderPipeline& operator=(GLShaderPipeline&& other) GLShaderPipeline& operator=(GLShaderPipeline&& other)
{ {
m_vert = other.m_vert; m_vert = std::move(other.m_vert);
other.m_vert = 0; m_frag = std::move(other.m_frag);
m_frag = other.m_frag; m_prog = std::move(other.m_prog);
other.m_frag = 0;
m_prog = other.m_prog;
other.m_prog = 0;
m_sfactor = other.m_sfactor; m_sfactor = other.m_sfactor;
m_dfactor = other.m_dfactor; m_dfactor = other.m_dfactor;
m_depthTest = other.m_depthTest; m_depthTest = other.m_depthTest;
@ -482,47 +494,95 @@ IShaderPipeline* GLDataFactory::Context::newShaderPipeline
BlendFactor srcFac, BlendFactor dstFac, Primitive prim, BlendFactor srcFac, BlendFactor dstFac, Primitive prim,
bool depthTest, bool depthWrite, bool backfaceCulling) bool depthTest, bool depthWrite, bool backfaceCulling)
{ {
GLDataFactoryImpl& factory = static_cast<GLDataFactoryImpl&>(m_parent);
GLShaderPipeline shader; GLShaderPipeline shader;
if (!shader.initObjects())
{
Log.report(logvisor::Error, "unable to create shader objects\n");
return nullptr;
}
shader.m_sfactor = BLEND_FACTOR_TABLE[int(srcFac)];
shader.m_dfactor = BLEND_FACTOR_TABLE[int(dstFac)];
shader.m_depthTest = depthTest;
shader.m_depthWrite = depthWrite;
shader.m_backfaceCulling = backfaceCulling;
shader.m_drawPrim = PRIMITIVE_TABLE[int(prim)];
glShaderSource(shader.m_vert, 1, &vertSource, nullptr); XXH64_state_t hashState;
glCompileShader(shader.m_vert); uint64_t hashes[2];
XXH64_reset(&hashState, 0);
XXH64_update(&hashState, vertSource, strlen(vertSource));
hashes[0] = XXH64_digest(&hashState);
XXH64_reset(&hashState, 0);
XXH64_update(&hashState, fragSource, strlen(fragSource));
hashes[1] = XXH64_digest(&hashState);
GLint status; GLint status;
glGetShaderiv(shader.m_vert, GL_COMPILE_STATUS, &status); auto vertFind = factory.m_sharedShaders.find(hashes[0]);
if (status != GL_TRUE) if (vertFind != factory.m_sharedShaders.end())
{ {
GLint logLen; shader.m_vert = vertFind->second->lock();
glGetShaderiv(shader.m_vert, GL_INFO_LOG_LENGTH, &logLen); }
char* log = (char*)malloc(logLen); else
glGetShaderInfoLog(shader.m_vert, logLen, nullptr, log); {
Log.report(logvisor::Error, "unable to compile vert source\n%s\n%s\n", log, vertSource); GLuint sobj = glCreateShader(GL_VERTEX_SHADER);
free(log); if (!sobj)
{
Log.report(logvisor::Error, "unable to create vert shader");
return nullptr;
}
glShaderSource(sobj, 1, &vertSource, nullptr);
glCompileShader(sobj);
glGetShaderiv(sobj, GL_COMPILE_STATUS, &status);
if (status != GL_TRUE)
{
GLint logLen;
glGetShaderiv(sobj, GL_INFO_LOG_LENGTH, &logLen);
char* log = (char*)malloc(logLen);
glGetShaderInfoLog(sobj, logLen, nullptr, log);
Log.report(logvisor::Error, "unable to compile vert source\n%s\n%s\n", log, vertSource);
free(log);
return nullptr;
}
auto it =
factory.m_sharedShaders.emplace(std::make_pair(hashes[0],
std::make_unique<GLShareableShader>(factory, hashes[0], sobj))).first;
shader.m_vert = it->second->lock();
}
auto fragFind = factory.m_sharedShaders.find(hashes[1]);
if (fragFind != factory.m_sharedShaders.end())
{
shader.m_frag = fragFind->second->lock();
}
else
{
GLuint sobj = glCreateShader(GL_FRAGMENT_SHADER);
if (!sobj)
{
Log.report(logvisor::Error, "unable to create frag shader");
return nullptr;
}
glShaderSource(sobj, 1, &fragSource, nullptr);
glCompileShader(sobj);
glGetShaderiv(sobj, GL_COMPILE_STATUS, &status);
if (status != GL_TRUE)
{
GLint logLen;
glGetShaderiv(sobj, GL_INFO_LOG_LENGTH, &logLen);
char* log = (char*)malloc(logLen);
glGetShaderInfoLog(sobj, logLen, nullptr, log);
Log.report(logvisor::Error, "unable to compile frag source\n%s\n%s\n", log, fragSource);
free(log);
return nullptr;
}
auto it =
factory.m_sharedShaders.emplace(std::make_pair(hashes[1],
std::make_unique<GLShareableShader>(factory, hashes[1], sobj))).first;
shader.m_frag = it->second->lock();
}
shader.m_prog = glCreateProgram();
if (!shader.m_prog)
{
Log.report(logvisor::Error, "unable to create shader program");
return nullptr; return nullptr;
} }
glShaderSource(shader.m_frag, 1, &fragSource, nullptr); glAttachShader(shader.m_prog, shader.m_vert.get().m_shader);
glCompileShader(shader.m_frag); glAttachShader(shader.m_prog, shader.m_frag.get().m_shader);
glGetShaderiv(shader.m_frag, GL_COMPILE_STATUS, &status);
if (status != GL_TRUE)
{
GLint logLen;
glGetShaderiv(shader.m_frag, GL_INFO_LOG_LENGTH, &logLen);
char* log = (char*)malloc(logLen);
glGetShaderInfoLog(shader.m_frag, logLen, nullptr, log);
Log.report(logvisor::Error, "unable to compile frag source\n%s\n%s\n", log, fragSource);
free(log);
return nullptr;
}
glLinkProgram(shader.m_prog); glLinkProgram(shader.m_prog);
glGetProgramiv(shader.m_prog, GL_LINK_STATUS, &status); glGetProgramiv(shader.m_prog, GL_LINK_STATUS, &status);
@ -563,8 +623,15 @@ IShaderPipeline* GLDataFactory::Context::newShaderPipeline
} }
} }
shader.m_sfactor = BLEND_FACTOR_TABLE[int(srcFac)];
shader.m_dfactor = BLEND_FACTOR_TABLE[int(dstFac)];
shader.m_depthTest = depthTest;
shader.m_depthWrite = depthWrite;
shader.m_backfaceCulling = backfaceCulling;
shader.m_drawPrim = PRIMITIVE_TABLE[int(prim)];
GLShaderPipeline* retval = new GLShaderPipeline(std::move(shader)); GLShaderPipeline* retval = new GLShaderPipeline(std::move(shader));
m_deferredData->m_SPs.emplace_back(retval); GLDataFactoryImpl::m_deferredData->m_SPs.emplace_back(retval);
return retval; return retval;
} }
@ -699,17 +766,17 @@ GLDataFactory::Context::newShaderDataBinding(IShaderPipeline* pipeline,
size_t texCount, ITexture** texs, size_t baseVert, size_t baseInst) size_t texCount, ITexture** texs, size_t baseVert, size_t baseInst)
{ {
GLShaderDataBinding* retval = GLShaderDataBinding* retval =
new GLShaderDataBinding(m_deferredData.get(), pipeline, vtxFormat, ubufCount, ubufs, new GLShaderDataBinding(GLDataFactoryImpl::m_deferredData.get(), pipeline, vtxFormat, ubufCount, ubufs,
ubufOffs, ubufSizes, texCount, texs); ubufOffs, ubufSizes, texCount, texs);
m_deferredData->m_SBinds.emplace_back(retval); GLDataFactoryImpl::m_deferredData->m_SBinds.emplace_back(retval);
return retval; return retval;
} }
GLDataFactory::GLDataFactory(IGraphicsContext* parent, uint32_t drawSamples) GLDataFactoryImpl::GLDataFactoryImpl(IGraphicsContext* parent, uint32_t drawSamples)
: m_parent(parent), m_drawSamples(drawSamples) {} : m_parent(parent), m_drawSamples(drawSamples) {}
GraphicsDataToken GLDataFactory::commitTransaction(const FactoryCommitFunc& trans) GraphicsDataToken GLDataFactoryImpl::commitTransaction(const FactoryCommitFunc& trans)
{ {
if (m_deferredData.get()) if (m_deferredData.get())
Log.report(logvisor::Fatal, "nested commitTransaction usage detected"); Log.report(logvisor::Fatal, "nested commitTransaction usage detected");
@ -736,7 +803,7 @@ GraphicsDataToken GLDataFactory::commitTransaction(const FactoryCommitFunc& tran
return GraphicsDataToken(this, retval); return GraphicsDataToken(this, retval);
} }
GraphicsBufferPoolToken GLDataFactory::newBufferPool() GraphicsBufferPoolToken GLDataFactoryImpl::newBufferPool()
{ {
std::unique_lock<std::mutex> lk(m_committedMutex); std::unique_lock<std::mutex> lk(m_committedMutex);
GLPool* retval = new GLPool; GLPool* retval = new GLPool;
@ -744,7 +811,7 @@ GraphicsBufferPoolToken GLDataFactory::newBufferPool()
return GraphicsBufferPoolToken(this, retval); return GraphicsBufferPoolToken(this, retval);
} }
void GLDataFactory::destroyData(IGraphicsData* d) void GLDataFactoryImpl::destroyData(IGraphicsData* d)
{ {
std::unique_lock<std::mutex> lk(m_committedMutex); std::unique_lock<std::mutex> lk(m_committedMutex);
GLData* data = static_cast<GLData*>(d); GLData* data = static_cast<GLData*>(d);
@ -752,7 +819,7 @@ void GLDataFactory::destroyData(IGraphicsData* d)
data->decrement(); data->decrement();
} }
void GLDataFactory::destroyAllData() void GLDataFactoryImpl::destroyAllData()
{ {
std::unique_lock<std::mutex> lk(m_committedMutex); std::unique_lock<std::mutex> lk(m_committedMutex);
for (GLData* data : m_committedData) for (GLData* data : m_committedData)
@ -763,7 +830,7 @@ void GLDataFactory::destroyAllData()
m_committedPools.clear(); m_committedPools.clear();
} }
void GLDataFactory::destroyPool(IGraphicsBufferPool* p) void GLDataFactoryImpl::destroyPool(IGraphicsBufferPool* p)
{ {
std::unique_lock<std::mutex> lk(m_committedMutex); std::unique_lock<std::mutex> lk(m_committedMutex);
GLPool* pool = static_cast<GLPool*>(p); GLPool* pool = static_cast<GLPool*>(p);
@ -771,8 +838,8 @@ void GLDataFactory::destroyPool(IGraphicsBufferPool* p)
delete pool; delete pool;
} }
IGraphicsBufferD* GLDataFactory::newPoolBuffer(IGraphicsBufferPool* p, BufferUse use, IGraphicsBufferD* GLDataFactoryImpl::newPoolBuffer(IGraphicsBufferPool* p, BufferUse use,
size_t stride, size_t count) size_t stride, size_t count)
{ {
GLPool* pool = static_cast<GLPool*>(p); GLPool* pool = static_cast<GLPool*>(p);
GLGraphicsBufferD* retval = new GLGraphicsBufferD(use, stride * count); GLGraphicsBufferD* retval = new GLGraphicsBufferD(use, stride * count);
@ -780,7 +847,7 @@ IGraphicsBufferD* GLDataFactory::newPoolBuffer(IGraphicsBufferPool* p, BufferUse
return retval; return retval;
} }
void GLDataFactory::deletePoolBuffer(IGraphicsBufferPool *p, IGraphicsBufferD *buf) void GLDataFactoryImpl::deletePoolBuffer(IGraphicsBufferPool *p, IGraphicsBufferD *buf)
{ {
GLPool* pool = static_cast<GLPool*>(p); GLPool* pool = static_cast<GLPool*>(p);
pool->m_DBufs.erase(static_cast<GLGraphicsBufferD*>(buf)); pool->m_DBufs.erase(static_cast<GLGraphicsBufferD*>(buf));
@ -1336,7 +1403,7 @@ struct GLCommandQueue : IGraphicsCommandQueue
} }
/* Update dynamic data here */ /* Update dynamic data here */
GLDataFactory* gfxF = static_cast<GLDataFactory*>(m_parent->getDataFactory()); GLDataFactoryImpl* gfxF = static_cast<GLDataFactoryImpl*>(m_parent->getDataFactory());
std::unique_lock<std::mutex> datalk(gfxF->m_committedMutex); std::unique_lock<std::mutex> datalk(gfxF->m_committedMutex);
for (GLData* d : gfxF->m_committedData) for (GLData* d : gfxF->m_committedData)
{ {
@ -1403,7 +1470,7 @@ IGraphicsBufferD*
GLDataFactory::Context::newDynamicBuffer(BufferUse use, size_t stride, size_t count) GLDataFactory::Context::newDynamicBuffer(BufferUse use, size_t stride, size_t count)
{ {
GLGraphicsBufferD* retval = new GLGraphicsBufferD(use, stride * count); GLGraphicsBufferD* retval = new GLGraphicsBufferD(use, stride * count);
m_deferredData->m_DBufs.emplace_back(retval); GLDataFactoryImpl::m_deferredData->m_DBufs.emplace_back(retval);
return retval; return retval;
} }
@ -1478,7 +1545,7 @@ ITextureD*
GLDataFactory::Context::newDynamicTexture(size_t width, size_t height, TextureFormat fmt) GLDataFactory::Context::newDynamicTexture(size_t width, size_t height, TextureFormat fmt)
{ {
GLTextureD* retval = new GLTextureD(width, height, fmt); GLTextureD* retval = new GLTextureD(width, height, fmt);
m_deferredData->m_DTexs.emplace_back(retval); GLDataFactoryImpl::m_deferredData->m_DTexs.emplace_back(retval);
return retval; return retval;
} }
@ -1546,11 +1613,12 @@ ITextureR*
GLDataFactory::Context::newRenderTexture(size_t width, size_t height, GLDataFactory::Context::newRenderTexture(size_t width, size_t height,
bool enableShaderColorBinding, bool enableShaderDepthBinding) bool enableShaderColorBinding, bool enableShaderDepthBinding)
{ {
GLCommandQueue* q = static_cast<GLCommandQueue*>(m_parent.m_parent->getCommandQueue()); GLDataFactoryImpl& factory = static_cast<GLDataFactoryImpl&>(m_parent);
GLTextureR* retval = new GLTextureR(q, width, height, m_parent.m_drawSamples, GLCommandQueue* q = static_cast<GLCommandQueue*>(factory.m_parent->getCommandQueue());
GLTextureR* retval = new GLTextureR(q, width, height, factory.m_drawSamples,
enableShaderColorBinding, enableShaderDepthBinding); enableShaderColorBinding, enableShaderDepthBinding);
q->resizeRenderTexture(retval, width, height); q->resizeRenderTexture(retval, width, height);
m_deferredData->m_RTexs.emplace_back(retval); GLDataFactoryImpl::m_deferredData->m_RTexs.emplace_back(retval);
return retval; return retval;
} }
@ -1572,9 +1640,10 @@ IVertexFormat* GLDataFactory::Context::newVertexFormat
(size_t elementCount, const VertexElementDescriptor* elements, (size_t elementCount, const VertexElementDescriptor* elements,
size_t baseVert, size_t baseInst) size_t baseVert, size_t baseInst)
{ {
GLCommandQueue* q = static_cast<GLCommandQueue*>(m_parent.m_parent->getCommandQueue()); GLDataFactoryImpl& factory = static_cast<GLDataFactoryImpl&>(m_parent);
GLCommandQueue* q = static_cast<GLCommandQueue*>(factory.m_parent->getCommandQueue());
GLVertexFormat* retval = new struct GLVertexFormat(q, elementCount, elements, baseVert, baseInst); GLVertexFormat* retval = new struct GLVertexFormat(q, elementCount, elements, baseVert, baseInst);
m_deferredData->m_VFmts.emplace_back(retval); GLDataFactoryImpl::m_deferredData->m_VFmts.emplace_back(retval);
return retval; return retval;
} }
@ -1583,4 +1652,9 @@ IGraphicsCommandQueue* _NewGLCommandQueue(IGraphicsContext* parent)
return new struct GLCommandQueue(parent); return new struct GLCommandQueue(parent);
} }
IGraphicsDataFactory* _NewGLDataFactory(IGraphicsContext* parent, uint32_t drawSamples)
{
return new class GLDataFactoryImpl(parent, drawSamples);
}
} }

View File

@ -5,6 +5,9 @@
#include "boo/IGraphicsContext.hpp" #include "boo/IGraphicsContext.hpp"
#include "Common.hpp" #include "Common.hpp"
#include <vector> #include <vector>
#include <unordered_map>
#include <unordered_set>
#include "xxhash.h"
#if !__has_feature(objc_arc) #if !__has_feature(objc_arc)
#error ARC Required #error ARC Required
@ -17,8 +20,48 @@ namespace boo
{ {
static logvisor::Module Log("boo::Metal"); static logvisor::Module Log("boo::Metal");
struct MetalCommandQueue; struct MetalCommandQueue;
class MetalDataFactoryImpl;
ThreadLocalPtr<struct MetalData> MetalDataFactory::m_deferredData; struct MetalShareableShader : IShareableShader<MetalDataFactoryImpl, MetalShareableShader>
{
id<MTLFunction> m_shader;
MetalShareableShader(MetalDataFactoryImpl& fac, uint64_t key, id<MTLFunction> s)
: IShareableShader(fac, key), m_shader(s) {}
};
class MetalDataFactoryImpl : public MetalDataFactory
{
friend struct MetalCommandQueue;
friend class MetalDataFactory::Context;
IGraphicsContext* m_parent;
static ThreadLocalPtr<struct MetalData> m_deferredData;
std::unordered_set<struct MetalData*> m_committedData;
std::unordered_set<struct MetalPool*> m_committedPools;
std::mutex m_committedMutex;
std::unordered_map<uint64_t, std::unique_ptr<MetalShareableShader>> m_sharedShaders;
struct MetalContext* m_ctx;
uint32_t m_sampleCount;
void destroyData(IGraphicsData*);
void destroyAllData();
void destroyPool(IGraphicsBufferPool*);
IGraphicsBufferD* newPoolBuffer(IGraphicsBufferPool* pool, BufferUse use,
size_t stride, size_t count);
void deletePoolBuffer(IGraphicsBufferPool* p, IGraphicsBufferD* buf);
public:
MetalDataFactoryImpl(IGraphicsContext* parent, MetalContext* ctx, uint32_t sampleCount);
~MetalDataFactoryImpl() {}
Platform platform() const {return Platform::Metal;}
const char* platformName() const {return "Metal";}
GraphicsDataToken commitTransaction(const std::function<bool(IGraphicsDataFactory::Context& ctx)>&);
GraphicsBufferPoolToken newBufferPool();
void _unregisterShareableShader(uint64_t key) { m_sharedShaders.erase(key); }
};
ThreadLocalPtr<struct MetalData> MetalDataFactoryImpl::m_deferredData;
struct MetalData : IGraphicsDataPriv<MetalData> struct MetalData : IGraphicsDataPriv<MetalData>
{ {
std::vector<std::unique_ptr<class MetalShaderPipeline>> m_SPs; std::vector<std::unique_ptr<class MetalShaderPipeline>> m_SPs;
@ -60,6 +103,7 @@ public:
class MetalGraphicsBufferD : public IGraphicsBufferD class MetalGraphicsBufferD : public IGraphicsBufferD
{ {
friend class MetalDataFactory; friend class MetalDataFactory;
friend class MetalDataFactoryImpl;
friend struct MetalCommandQueue; friend struct MetalCommandQueue;
MetalCommandQueue* m_q; MetalCommandQueue* m_q;
std::unique_ptr<uint8_t[]> m_cpuBuf; std::unique_ptr<uint8_t[]> m_cpuBuf;
@ -502,19 +546,24 @@ class MetalShaderPipeline : public IShaderPipeline
MTLCullMode m_cullMode = MTLCullModeNone; MTLCullMode m_cullMode = MTLCullModeNone;
MTLPrimitiveType m_drawPrim; MTLPrimitiveType m_drawPrim;
const MetalVertexFormat* m_vtxFmt; const MetalVertexFormat* m_vtxFmt;
MetalShareableShader::Token m_vert;
MetalShareableShader::Token m_frag;
MetalShaderPipeline(MetalContext* ctx, id<MTLFunction> vert, id<MTLFunction> frag, MetalShaderPipeline(MetalContext* ctx,
MetalShareableShader::Token&& vert,
MetalShareableShader::Token&& frag,
const MetalVertexFormat* vtxFmt, NSUInteger targetSamples, const MetalVertexFormat* vtxFmt, NSUInteger targetSamples,
BlendFactor srcFac, BlendFactor dstFac, Primitive prim, BlendFactor srcFac, BlendFactor dstFac, Primitive prim,
bool depthTest, bool depthWrite, bool backfaceCulling) bool depthTest, bool depthWrite, bool backfaceCulling)
: m_drawPrim(PRIMITIVE_TABLE[int(prim)]), m_vtxFmt(vtxFmt) : m_drawPrim(PRIMITIVE_TABLE[int(prim)]), m_vtxFmt(vtxFmt),
m_vert(std::move(vert)), m_frag(std::move(frag))
{ {
if (backfaceCulling) if (backfaceCulling)
m_cullMode = MTLCullModeBack; m_cullMode = MTLCullModeBack;
MTLRenderPipelineDescriptor* desc = [MTLRenderPipelineDescriptor new]; MTLRenderPipelineDescriptor* desc = [MTLRenderPipelineDescriptor new];
desc.vertexFunction = vert; desc.vertexFunction = m_vert.get().m_shader;
desc.fragmentFunction = frag; desc.fragmentFunction = m_frag.get().m_shader;
desc.vertexDescriptor = vtxFmt->m_vdesc; desc.vertexDescriptor = vtxFmt->m_vdesc;
desc.sampleCount = targetSamples; desc.sampleCount = targetSamples;
desc.colorAttachments[0].pixelFormat = MTLPixelFormatBGRA8Unorm; desc.colorAttachments[0].pixelFormat = MTLPixelFormatBGRA8Unorm;
@ -897,7 +946,7 @@ struct MetalCommandQueue : IGraphicsCommandQueue
return; return;
/* Update dynamic data here */ /* Update dynamic data here */
MetalDataFactory* gfxF = static_cast<MetalDataFactory*>(m_parent->getDataFactory()); MetalDataFactoryImpl* gfxF = static_cast<MetalDataFactoryImpl*>(m_parent->getDataFactory());
std::unique_lock<std::mutex> datalk(gfxF->m_committedMutex); std::unique_lock<std::mutex> datalk(gfxF->m_committedMutex);
for (MetalData* d : gfxF->m_committedData) for (MetalData* d : gfxF->m_committedData)
{ {
@ -1042,49 +1091,55 @@ void MetalTextureD::unmap()
m_validSlots = 0; m_validSlots = 0;
} }
MetalDataFactory::MetalDataFactory(IGraphicsContext* parent, MetalContext* ctx, uint32_t sampleCount) MetalDataFactoryImpl::MetalDataFactoryImpl(IGraphicsContext* parent, MetalContext* ctx, uint32_t sampleCount)
: m_parent(parent), m_ctx(ctx), m_sampleCount(sampleCount) {} : m_parent(parent), m_ctx(ctx), m_sampleCount(sampleCount) {}
IGraphicsBufferS* MetalDataFactory::Context::newStaticBuffer(BufferUse use, const void* data, size_t stride, size_t count) IGraphicsBufferS* MetalDataFactory::Context::newStaticBuffer(BufferUse use, const void* data, size_t stride, size_t count)
{ {
MetalGraphicsBufferS* retval = new MetalGraphicsBufferS(use, m_parent.m_ctx, data, stride, count); MetalDataFactoryImpl& factory = static_cast<MetalDataFactoryImpl&>(m_parent);
m_deferredData->m_SBufs.emplace_back(retval); MetalGraphicsBufferS* retval = new MetalGraphicsBufferS(use, factory.m_ctx, data, stride, count);
MetalDataFactoryImpl::m_deferredData->m_SBufs.emplace_back(retval);
return retval; return retval;
} }
IGraphicsBufferD* MetalDataFactory::Context::newDynamicBuffer(BufferUse use, size_t stride, size_t count) IGraphicsBufferD* MetalDataFactory::Context::newDynamicBuffer(BufferUse use, size_t stride, size_t count)
{ {
MetalCommandQueue* q = static_cast<MetalCommandQueue*>(m_parent.m_parent->getCommandQueue()); MetalDataFactoryImpl& factory = static_cast<MetalDataFactoryImpl&>(m_parent);
MetalGraphicsBufferD* retval = new MetalGraphicsBufferD(q, use, m_parent.m_ctx, stride, count); MetalCommandQueue* q = static_cast<MetalCommandQueue*>(factory.m_parent->getCommandQueue());
m_deferredData->m_DBufs.emplace_back(retval); MetalGraphicsBufferD* retval = new MetalGraphicsBufferD(q, use, factory.m_ctx, stride, count);
MetalDataFactoryImpl::m_deferredData->m_DBufs.emplace_back(retval);
return retval; return retval;
} }
ITextureS* MetalDataFactory::Context::newStaticTexture(size_t width, size_t height, size_t mips, TextureFormat fmt, ITextureS* MetalDataFactory::Context::newStaticTexture(size_t width, size_t height, size_t mips, TextureFormat fmt,
const void* data, size_t sz) const void* data, size_t sz)
{ {
MetalTextureS* retval = new MetalTextureS(m_parent.m_ctx, width, height, mips, fmt, data, sz); MetalDataFactoryImpl& factory = static_cast<MetalDataFactoryImpl&>(m_parent);
m_deferredData->m_STexs.emplace_back(retval); MetalTextureS* retval = new MetalTextureS(factory.m_ctx, width, height, mips, fmt, data, sz);
MetalDataFactoryImpl::m_deferredData->m_STexs.emplace_back(retval);
return retval; return retval;
} }
ITextureSA* MetalDataFactory::Context::newStaticArrayTexture(size_t width, size_t height, size_t layers, size_t mips, ITextureSA* MetalDataFactory::Context::newStaticArrayTexture(size_t width, size_t height, size_t layers, size_t mips,
TextureFormat fmt, const void* data, size_t sz) TextureFormat fmt, const void* data, size_t sz)
{ {
MetalTextureSA* retval = new MetalTextureSA(m_parent.m_ctx, width, height, layers, mips, fmt, data, sz); MetalDataFactoryImpl& factory = static_cast<MetalDataFactoryImpl&>(m_parent);
m_deferredData->m_SATexs.emplace_back(retval); MetalTextureSA* retval = new MetalTextureSA(factory.m_ctx, width, height, layers, mips, fmt, data, sz);
MetalDataFactoryImpl::m_deferredData->m_SATexs.emplace_back(retval);
return retval; return retval;
} }
ITextureD* MetalDataFactory::Context::newDynamicTexture(size_t width, size_t height, TextureFormat fmt) ITextureD* MetalDataFactory::Context::newDynamicTexture(size_t width, size_t height, TextureFormat fmt)
{ {
MetalCommandQueue* q = static_cast<MetalCommandQueue*>(m_parent.m_parent->getCommandQueue()); MetalDataFactoryImpl& factory = static_cast<MetalDataFactoryImpl&>(m_parent);
MetalTextureD* retval = new MetalTextureD(q, m_parent.m_ctx, width, height, fmt); MetalCommandQueue* q = static_cast<MetalCommandQueue*>(factory.m_parent->getCommandQueue());
m_deferredData->m_DTexs.emplace_back(retval); MetalTextureD* retval = new MetalTextureD(q, factory.m_ctx, width, height, fmt);
MetalDataFactoryImpl::m_deferredData->m_DTexs.emplace_back(retval);
return retval; return retval;
} }
ITextureR* MetalDataFactory::Context::newRenderTexture(size_t width, size_t height, ITextureR* MetalDataFactory::Context::newRenderTexture(size_t width, size_t height,
bool enableShaderColorBinding, bool enableShaderDepthBinding) bool enableShaderColorBinding, bool enableShaderDepthBinding)
{ {
MetalTextureR* retval = new MetalTextureR(m_parent.m_ctx, width, height, m_parent.m_sampleCount, enableShaderColorBinding); MetalDataFactoryImpl& factory = static_cast<MetalDataFactoryImpl&>(m_parent);
m_deferredData->m_RTexs.emplace_back(retval); MetalTextureR* retval = new MetalTextureR(factory.m_ctx, width, height, factory.m_sampleCount, enableShaderColorBinding);
MetalDataFactoryImpl::m_deferredData->m_RTexs.emplace_back(retval);
return retval; return retval;
} }
@ -1092,7 +1147,7 @@ IVertexFormat* MetalDataFactory::Context::newVertexFormat(size_t elementCount, c
size_t baseVert, size_t baseInst) size_t baseVert, size_t baseInst)
{ {
MetalVertexFormat* retval = new struct MetalVertexFormat(elementCount, elements); MetalVertexFormat* retval = new struct MetalVertexFormat(elementCount, elements);
m_deferredData->m_VFmts.emplace_back(retval); MetalDataFactoryImpl::m_deferredData->m_VFmts.emplace_back(retval);
return retval; return retval;
} }
@ -1101,34 +1156,71 @@ IShaderPipeline* MetalDataFactory::Context::newShaderPipeline(const char* vertSo
BlendFactor srcFac, BlendFactor dstFac, Primitive prim, BlendFactor srcFac, BlendFactor dstFac, Primitive prim,
bool depthTest, bool depthWrite, bool backfaceCulling) bool depthTest, bool depthWrite, bool backfaceCulling)
{ {
MetalDataFactoryImpl& factory = static_cast<MetalDataFactoryImpl&>(m_parent);
MTLCompileOptions* compOpts = [MTLCompileOptions new]; MTLCompileOptions* compOpts = [MTLCompileOptions new];
compOpts.languageVersion = MTLLanguageVersion1_1; compOpts.languageVersion = MTLLanguageVersion1_1;
NSError* err = nullptr; NSError* err = nullptr;
id<MTLLibrary> vertShaderLib = [m_parent.m_ctx->m_dev newLibraryWithSource:@(vertSource) XXH64_state_t hashState;
options:compOpts uint64_t hashes[2];
error:&err]; XXH64_reset(&hashState, 0);
if (!vertShaderLib) XXH64_update(&hashState, vertSource, strlen(vertSource));
{ hashes[0] = XXH64_digest(&hashState);
printf("%s\n", vertSource); XXH64_reset(&hashState, 0);
Log.report(logvisor::Fatal, "error compiling vert shader: %s", [[err localizedDescription] UTF8String]); XXH64_update(&hashState, fragSource, strlen(fragSource));
} hashes[1] = XXH64_digest(&hashState);
id<MTLFunction> vertFunc = [vertShaderLib newFunctionWithName:@"vmain"];
id<MTLLibrary> fragShaderLib = [m_parent.m_ctx->m_dev newLibraryWithSource:@(fragSource) MetalShareableShader::Token vertShader;
options:compOpts MetalShareableShader::Token fragShader;
error:&err]; auto vertFind = factory.m_sharedShaders.find(hashes[0]);
if (!fragShaderLib) if (vertFind != factory.m_sharedShaders.end())
{ {
printf("%s\n", fragSource); vertShader = vertFind->second->lock();
Log.report(logvisor::Fatal, "error compiling frag shader: %s", [[err localizedDescription] UTF8String]);
} }
id<MTLFunction> fragFunc = [fragShaderLib newFunctionWithName:@"fmain"]; else
{
id<MTLLibrary> vertShaderLib = [factory.m_ctx->m_dev newLibraryWithSource:@(vertSource)
options:compOpts
error:&err];
if (!vertShaderLib)
{
printf("%s\n", vertSource);
Log.report(logvisor::Fatal, "error compiling vert shader: %s", [[err localizedDescription] UTF8String]);
}
id<MTLFunction> vertFunc = [vertShaderLib newFunctionWithName:@"vmain"];
MetalShaderPipeline* retval = new MetalShaderPipeline(m_parent.m_ctx, vertFunc, fragFunc, auto it =
factory.m_sharedShaders.emplace(std::make_pair(hashes[0],
std::make_unique<MetalShareableShader>(factory, hashes[0], vertFunc))).first;
vertShader = it->second->lock();
}
auto fragFind = factory.m_sharedShaders.find(hashes[1]);
if (fragFind != factory.m_sharedShaders.end())
{
fragShader = fragFind->second->lock();
}
else
{
id<MTLLibrary> fragShaderLib = [factory.m_ctx->m_dev newLibraryWithSource:@(fragSource)
options:compOpts
error:&err];
if (!fragShaderLib)
{
printf("%s\n", fragSource);
Log.report(logvisor::Fatal, "error compiling frag shader: %s", [[err localizedDescription] UTF8String]);
}
id<MTLFunction> fragFunc = [fragShaderLib newFunctionWithName:@"fmain"];
auto it =
factory.m_sharedShaders.emplace(std::make_pair(hashes[1],
std::make_unique<MetalShareableShader>(factory, hashes[1], fragFunc))).first;
fragShader = it->second->lock();
}
MetalShaderPipeline* retval = new MetalShaderPipeline(factory.m_ctx, std::move(vertShader), std::move(fragShader),
static_cast<const MetalVertexFormat*>(vtxFmt), targetSamples, static_cast<const MetalVertexFormat*>(vtxFmt), targetSamples,
srcFac, dstFac, prim, depthTest, depthWrite, backfaceCulling); srcFac, dstFac, prim, depthTest, depthWrite, backfaceCulling);
m_deferredData->m_SPs.emplace_back(retval); MetalDataFactoryImpl::m_deferredData->m_SPs.emplace_back(retval);
return retval; return retval;
} }
@ -1140,16 +1232,17 @@ MetalDataFactory::Context::newShaderDataBinding(IShaderPipeline* pipeline,
const size_t* ubufOffs, const size_t* ubufSizes, const size_t* ubufOffs, const size_t* ubufSizes,
size_t texCount, ITexture** texs, size_t baseVert, size_t baseInst) size_t texCount, ITexture** texs, size_t baseVert, size_t baseInst)
{ {
MetalDataFactoryImpl& factory = static_cast<MetalDataFactoryImpl&>(m_parent);
MetalShaderDataBinding* retval = MetalShaderDataBinding* retval =
new MetalShaderDataBinding(m_deferredData.get(), new MetalShaderDataBinding(MetalDataFactoryImpl::m_deferredData.get(),
m_parent.m_ctx, pipeline, vbuf, instVbo, ibuf, factory.m_ctx, pipeline, vbuf, instVbo, ibuf,
ubufCount, ubufs, ubufStages, ubufOffs, ubufCount, ubufs, ubufStages, ubufOffs,
ubufSizes, texCount, texs, baseVert, baseInst); ubufSizes, texCount, texs, baseVert, baseInst);
m_deferredData->m_SBinds.emplace_back(retval); MetalDataFactoryImpl::m_deferredData->m_SBinds.emplace_back(retval);
return retval; return retval;
} }
GraphicsDataToken MetalDataFactory::commitTransaction(const FactoryCommitFunc& trans) GraphicsDataToken MetalDataFactoryImpl::commitTransaction(const FactoryCommitFunc& trans)
{ {
if (m_deferredData.get()) if (m_deferredData.get())
Log.report(logvisor::Fatal, "nested commitTransaction usage detected"); Log.report(logvisor::Fatal, "nested commitTransaction usage detected");
@ -1170,7 +1263,7 @@ GraphicsDataToken MetalDataFactory::commitTransaction(const FactoryCommitFunc& t
return GraphicsDataToken(this, retval); return GraphicsDataToken(this, retval);
} }
GraphicsBufferPoolToken MetalDataFactory::newBufferPool() GraphicsBufferPoolToken MetalDataFactoryImpl::newBufferPool()
{ {
std::unique_lock<std::mutex> lk(m_committedMutex); std::unique_lock<std::mutex> lk(m_committedMutex);
MetalPool* retval = new MetalPool; MetalPool* retval = new MetalPool;
@ -1178,7 +1271,7 @@ GraphicsBufferPoolToken MetalDataFactory::newBufferPool()
return GraphicsBufferPoolToken(this, retval); return GraphicsBufferPoolToken(this, retval);
} }
void MetalDataFactory::destroyData(IGraphicsData* d) void MetalDataFactoryImpl::destroyData(IGraphicsData* d)
{ {
std::unique_lock<std::mutex> lk(m_committedMutex); std::unique_lock<std::mutex> lk(m_committedMutex);
MetalData* data = static_cast<MetalData*>(d); MetalData* data = static_cast<MetalData*>(d);
@ -1186,7 +1279,7 @@ void MetalDataFactory::destroyData(IGraphicsData* d)
data->decrement(); data->decrement();
} }
void MetalDataFactory::destroyAllData() void MetalDataFactoryImpl::destroyAllData()
{ {
std::unique_lock<std::mutex> lk(m_committedMutex); std::unique_lock<std::mutex> lk(m_committedMutex);
for (MetalData* data : m_committedData) for (MetalData* data : m_committedData)
@ -1197,7 +1290,7 @@ void MetalDataFactory::destroyAllData()
m_committedPools.clear(); m_committedPools.clear();
} }
void MetalDataFactory::destroyPool(IGraphicsBufferPool* p) void MetalDataFactoryImpl::destroyPool(IGraphicsBufferPool* p)
{ {
std::unique_lock<std::mutex> lk(m_committedMutex); std::unique_lock<std::mutex> lk(m_committedMutex);
MetalPool* pool = static_cast<MetalPool*>(p); MetalPool* pool = static_cast<MetalPool*>(p);
@ -1205,8 +1298,8 @@ void MetalDataFactory::destroyPool(IGraphicsBufferPool* p)
delete pool; delete pool;
} }
IGraphicsBufferD* MetalDataFactory::newPoolBuffer(IGraphicsBufferPool* p, BufferUse use, IGraphicsBufferD* MetalDataFactoryImpl::newPoolBuffer(IGraphicsBufferPool* p, BufferUse use,
size_t stride, size_t count) size_t stride, size_t count)
{ {
MetalPool* pool = static_cast<MetalPool*>(p); MetalPool* pool = static_cast<MetalPool*>(p);
MetalCommandQueue* q = static_cast<MetalCommandQueue*>(m_parent->getCommandQueue()); MetalCommandQueue* q = static_cast<MetalCommandQueue*>(m_parent->getCommandQueue());
@ -1215,7 +1308,7 @@ IGraphicsBufferD* MetalDataFactory::newPoolBuffer(IGraphicsBufferPool* p, Buffer
return retval; return retval;
} }
void MetalDataFactory::deletePoolBuffer(IGraphicsBufferPool* p, IGraphicsBufferD* buf) void MetalDataFactoryImpl::deletePoolBuffer(IGraphicsBufferPool* p, IGraphicsBufferD* buf)
{ {
MetalPool* pool = static_cast<MetalPool*>(p); MetalPool* pool = static_cast<MetalPool*>(p);
pool->m_DBufs.erase(static_cast<MetalGraphicsBufferD*>(buf)); pool->m_DBufs.erase(static_cast<MetalGraphicsBufferD*>(buf));
@ -1227,6 +1320,11 @@ IGraphicsCommandQueue* _NewMetalCommandQueue(MetalContext* ctx, IWindow* parentW
return new struct MetalCommandQueue(ctx, parentWindow, parent); return new struct MetalCommandQueue(ctx, parentWindow, parent);
} }
IGraphicsDataFactory* _NewMetalDataFactory(IGraphicsContext* parent, MetalContext* ctx, uint32_t sampleCount)
{
return new class MetalDataFactoryImpl(parent, ctx, sampleCount);
}
} }
#endif #endif

View File

@ -185,8 +185,11 @@ namespace boo
{ {
static logvisor::Module Log("boo::WindowCocoa"); static logvisor::Module Log("boo::WindowCocoa");
IGraphicsCommandQueue* _NewGLCommandQueue(IGraphicsContext* parent); IGraphicsCommandQueue* _NewGLCommandQueue(IGraphicsContext* parent);
IGraphicsDataFactory* _NewGLDataFactory(IGraphicsContext* parent, uint32_t drawSamples);
IGraphicsCommandQueue* _NewMetalCommandQueue(MetalContext* ctx, IWindow* parentWindow, IGraphicsCommandQueue* _NewMetalCommandQueue(MetalContext* ctx, IWindow* parentWindow,
IGraphicsContext* parent); IGraphicsContext* parent);
IGraphicsDataFactory* _NewMetalDataFactory(IGraphicsContext* parent,
MetalContext* ctx, uint32_t sampleCount);
void _CocoaUpdateLastGLCtx(NSOpenGLContext* lastGLCtx); void _CocoaUpdateLastGLCtx(NSOpenGLContext* lastGLCtx);
class GraphicsContextCocoaGL : public GraphicsContextCocoa class GraphicsContextCocoaGL : public GraphicsContextCocoa
@ -206,7 +209,7 @@ public:
: GraphicsContextCocoa(api, EPixelFormat::RGBA8, parentWindow), : GraphicsContextCocoa(api, EPixelFormat::RGBA8, parentWindow),
m_lastCtx(lastGLCtx) m_lastCtx(lastGLCtx)
{ {
m_dataFactory = new GLDataFactory(this, sampleCount); m_dataFactory = _NewGLDataFactory(this, sampleCount);
} }
~GraphicsContextCocoaGL() ~GraphicsContextCocoaGL()
@ -362,7 +365,7 @@ public:
: GraphicsContextCocoa(api, EPixelFormat::RGBA8, parentWindow), : GraphicsContextCocoa(api, EPixelFormat::RGBA8, parentWindow),
m_parentWindow(parentWindow), m_metalCtx(metalCtx) m_parentWindow(parentWindow), m_metalCtx(metalCtx)
{ {
m_dataFactory = new MetalDataFactory(this, metalCtx, sampleCount); m_dataFactory = _NewMetalDataFactory(this, metalCtx, sampleCount);
} }
~GraphicsContextCocoaMetal() ~GraphicsContextCocoaMetal()

View File

@ -36,6 +36,7 @@ IGraphicsDataFactory* _NewD3D12DataFactory(D3D12Context* ctx, IGraphicsContext*
IGraphicsCommandQueue* _NewD3D11CommandQueue(D3D11Context* ctx, D3D11Context::Window* windowCtx, IGraphicsContext* parent); IGraphicsCommandQueue* _NewD3D11CommandQueue(D3D11Context* ctx, D3D11Context::Window* windowCtx, IGraphicsContext* parent);
IGraphicsDataFactory* _NewD3D11DataFactory(D3D11Context* ctx, IGraphicsContext* parent, uint32_t sampleCount); IGraphicsDataFactory* _NewD3D11DataFactory(D3D11Context* ctx, IGraphicsContext* parent, uint32_t sampleCount);
IGraphicsCommandQueue* _NewGLCommandQueue(IGraphicsContext* parent); IGraphicsCommandQueue* _NewGLCommandQueue(IGraphicsContext* parent);
IGraphicsDataFactory* _NewGLDataFactory(IGraphicsContext* parent, uint32_t drawSamples);
#if BOO_HAS_VULKAN #if BOO_HAS_VULKAN
IGraphicsCommandQueue* _NewVulkanCommandQueue(VulkanContext* ctx, IGraphicsCommandQueue* _NewVulkanCommandQueue(VulkanContext* ctx,
VulkanContext::Window* windowCtx, VulkanContext::Window* windowCtx,
@ -280,7 +281,7 @@ public:
Log.report(logvisor::Fatal, "unable to share contexts"); Log.report(logvisor::Fatal, "unable to share contexts");
m_3dCtx.m_ctxOgl.m_lastContext = w.m_mainContext; m_3dCtx.m_ctxOgl.m_lastContext = w.m_mainContext;
m_dataFactory = new GLDataFactory(this, sampleCount); m_dataFactory = _NewGLDataFactory(this, sampleCount);
m_commandQueue = _NewGLCommandQueue(this); m_commandQueue = _NewGLCommandQueue(this);
} }

View File

@ -114,6 +114,7 @@ namespace boo
{ {
static logvisor::Module Log("boo::WindowXlib"); static logvisor::Module Log("boo::WindowXlib");
IGraphicsCommandQueue* _NewGLCommandQueue(IGraphicsContext* parent); IGraphicsCommandQueue* _NewGLCommandQueue(IGraphicsContext* parent);
IGraphicsDataFactory* _NewGLDataFactory(IGraphicsContext* parent, uint32_t drawSamples);
#if BOO_HAS_VULKAN #if BOO_HAS_VULKAN
IGraphicsCommandQueue* _NewVulkanCommandQueue(VulkanContext* ctx, IGraphicsCommandQueue* _NewVulkanCommandQueue(VulkanContext* ctx,
VulkanContext::Window* windowCtx, VulkanContext::Window* windowCtx,
@ -327,7 +328,7 @@ public:
: GraphicsContextXlib(api, EPixelFormat::RGBA8, parentWindow, display, drawSamples), : GraphicsContextXlib(api, EPixelFormat::RGBA8, parentWindow, display, drawSamples),
m_lastCtx(lastCtx) m_lastCtx(lastCtx)
{ {
m_dataFactory = new class GLDataFactory(this, drawSamples); m_dataFactory = _NewGLDataFactory(this, drawSamples);
/* Query framebuffer configurations */ /* Query framebuffer configurations */
GLXFBConfig* fbConfigs = nullptr; GLXFBConfig* fbConfigs = nullptr;

View File

@ -1,2 +1,2 @@
add_executable(booTest WIN32 main.cpp) add_executable(booTest WIN32 main.cpp)
target_link_libraries(booTest boo logvisor ${BOO_SYS_LIBS}) target_link_libraries(booTest boo logvisor xxhash ${BOO_SYS_LIBS})

1
xxhash/CMakeLists.txt Normal file
View File

@ -0,0 +1 @@
add_library(xxhash xxhash.c xxhash.h)

24
xxhash/LICENSE Normal file
View File

@ -0,0 +1,24 @@
xxHash Library
Copyright (c) 2012-2014, Yann Collet
All rights reserved.
Redistribution and use in source and binary forms, with or without modification,
are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright notice, this
list of conditions and the following disclaimer in the documentation and/or
other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

962
xxhash/xxhash.c Normal file
View File

@ -0,0 +1,962 @@
/*
xxHash - Fast Hash algorithm
Copyright (C) 2012-2015, Yann Collet
BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above
copyright notice, this list of conditions and the following disclaimer
in the documentation and/or other materials provided with the
distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
You can contact the author at :
- xxHash source repository : https://github.com/Cyan4973/xxHash
*/
/**************************************
* Tuning parameters
**************************************/
/* XXH_FORCE_MEMORY_ACCESS
* By default, access to unaligned memory is controlled by `memcpy()`, which is safe and portable.
* Unfortunately, on some target/compiler combinations, the generated assembly is sub-optimal.
* The below switch allow to select different access method for improved performance.
* Method 0 (default) : use `memcpy()`. Safe and portable.
* Method 1 : `__packed` statement. It depends on compiler extension (ie, not portable).
* This method is safe if your compiler supports it, and *generally* as fast or faster than `memcpy`.
* Method 2 : direct access. This method is portable but violate C standard.
* It can generate buggy code on targets which generate assembly depending on alignment.
* But in some circumstances, it's the only known way to get the most performance (ie GCC + ARMv6)
* See http://stackoverflow.com/a/32095106/646947 for details.
* Prefer these methods in priority order (0 > 1 > 2)
*/
#ifndef XXH_FORCE_MEMORY_ACCESS /* can be defined externally, on command line for example */
# if defined(__GNUC__) && ( defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) )
# define XXH_FORCE_MEMORY_ACCESS 2
# elif defined(__INTEL_COMPILER) || \
(defined(__GNUC__) && ( defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) || defined(__ARM_ARCH_7S__) ))
# define XXH_FORCE_MEMORY_ACCESS 1
# endif
#endif
/* XXH_ACCEPT_NULL_INPUT_POINTER :
* If the input pointer is a null pointer, xxHash default behavior is to trigger a memory access error, since it is a bad pointer.
* When this option is enabled, xxHash output for null input pointers will be the same as a null-length input.
* By default, this option is disabled. To enable it, uncomment below define :
*/
/* #define XXH_ACCEPT_NULL_INPUT_POINTER 1 */
/* XXH_FORCE_NATIVE_FORMAT :
* By default, xxHash library provides endian-independant Hash values, based on little-endian convention.
* Results are therefore identical for little-endian and big-endian CPU.
* This comes at a performance cost for big-endian CPU, since some swapping is required to emulate little-endian format.
* Should endian-independance be of no importance for your application, you may set the #define below to 1,
* to improve speed for Big-endian CPU.
* This option has no impact on Little_Endian CPU.
*/
#define XXH_FORCE_NATIVE_FORMAT 0
/* XXH_USELESS_ALIGN_BRANCH :
* This is a minor performance trick, only useful with lots of very small keys.
* It means : don't make a test between aligned/unaligned, because performance will be the same.
* It saves one initial branch per hash.
*/
#if defined(__i386) || defined(_M_IX86) || defined(__x86_64__) || defined(_M_X64)
# define XXH_USELESS_ALIGN_BRANCH 1
#endif
/**************************************
* Compiler Specific Options
***************************************/
#ifdef _MSC_VER /* Visual Studio */
# pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */
# define FORCE_INLINE static __forceinline
#else
# if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* C99 */
# ifdef __GNUC__
# define FORCE_INLINE static inline __attribute__((always_inline))
# else
# define FORCE_INLINE static inline
# endif
# else
# define FORCE_INLINE static
# endif /* __STDC_VERSION__ */
#endif
/**************************************
* Includes & Memory related functions
***************************************/
#include "xxhash.h"
/* Modify the local functions below should you wish to use some other memory routines */
/* for malloc(), free() */
#include <stdlib.h>
static void* XXH_malloc(size_t s) { return malloc(s); }
static void XXH_free (void* p) { free(p); }
/* for memcpy() */
#include <string.h>
static void* XXH_memcpy(void* dest, const void* src, size_t size) { return memcpy(dest,src,size); }
/**************************************
* Basic Types
***************************************/
#if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* C99 */
# include <stdint.h>
typedef uint8_t BYTE;
typedef uint16_t U16;
typedef uint32_t U32;
typedef int32_t S32;
typedef uint64_t U64;
#else
typedef unsigned char BYTE;
typedef unsigned short U16;
typedef unsigned int U32;
typedef signed int S32;
typedef unsigned long long U64;
#endif
#if (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==2))
/* Force direct memory access. Only works on CPU which support unaligned memory access in hardware */
static U32 XXH_read32(const void* memPtr) { return *(const U32*) memPtr; }
static U64 XXH_read64(const void* memPtr) { return *(const U64*) memPtr; }
#elif (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==1))
/* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */
/* currently only defined for gcc and icc */
typedef union { U32 u32; U64 u64; } __attribute__((packed)) unalign;
static U32 XXH_read32(const void* ptr) { return ((const unalign*)ptr)->u32; }
static U64 XXH_read64(const void* ptr) { return ((const unalign*)ptr)->u64; }
#else
/* portable and safe solution. Generally efficient.
* see : http://stackoverflow.com/a/32095106/646947
*/
static U32 XXH_read32(const void* memPtr)
{
U32 val;
memcpy(&val, memPtr, sizeof(val));
return val;
}
static U64 XXH_read64(const void* memPtr)
{
U64 val;
memcpy(&val, memPtr, sizeof(val));
return val;
}
#endif // XXH_FORCE_DIRECT_MEMORY_ACCESS
/******************************************
* Compiler-specific Functions and Macros
******************************************/
#define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
/* Note : although _rotl exists for minGW (GCC under windows), performance seems poor */
#if defined(_MSC_VER)
# define XXH_rotl32(x,r) _rotl(x,r)
# define XXH_rotl64(x,r) _rotl64(x,r)
#else
# define XXH_rotl32(x,r) ((x << r) | (x >> (32 - r)))
# define XXH_rotl64(x,r) ((x << r) | (x >> (64 - r)))
#endif
#if defined(_MSC_VER) /* Visual Studio */
# define XXH_swap32 _byteswap_ulong
# define XXH_swap64 _byteswap_uint64
#elif GCC_VERSION >= 403
# define XXH_swap32 __builtin_bswap32
# define XXH_swap64 __builtin_bswap64
#else
static U32 XXH_swap32 (U32 x)
{
return ((x << 24) & 0xff000000 ) |
((x << 8) & 0x00ff0000 ) |
((x >> 8) & 0x0000ff00 ) |
((x >> 24) & 0x000000ff );
}
static U64 XXH_swap64 (U64 x)
{
return ((x << 56) & 0xff00000000000000ULL) |
((x << 40) & 0x00ff000000000000ULL) |
((x << 24) & 0x0000ff0000000000ULL) |
((x << 8) & 0x000000ff00000000ULL) |
((x >> 8) & 0x00000000ff000000ULL) |
((x >> 24) & 0x0000000000ff0000ULL) |
((x >> 40) & 0x000000000000ff00ULL) |
((x >> 56) & 0x00000000000000ffULL);
}
#endif
/***************************************
* Architecture Macros
***************************************/
typedef enum { XXH_bigEndian=0, XXH_littleEndian=1 } XXH_endianess;
/* XXH_CPU_LITTLE_ENDIAN can be defined externally, for example one the compiler command line */
#ifndef XXH_CPU_LITTLE_ENDIAN
static const int one = 1;
# define XXH_CPU_LITTLE_ENDIAN (*(const char*)(&one))
#endif
/*****************************
* Memory reads
*****************************/
typedef enum { XXH_aligned, XXH_unaligned } XXH_alignment;
FORCE_INLINE U32 XXH_readLE32_align(const void* ptr, XXH_endianess endian, XXH_alignment align)
{
if (align==XXH_unaligned)
return endian==XXH_littleEndian ? XXH_read32(ptr) : XXH_swap32(XXH_read32(ptr));
else
return endian==XXH_littleEndian ? *(const U32*)ptr : XXH_swap32(*(const U32*)ptr);
}
FORCE_INLINE U32 XXH_readLE32(const void* ptr, XXH_endianess endian)
{
return XXH_readLE32_align(ptr, endian, XXH_unaligned);
}
FORCE_INLINE U64 XXH_readLE64_align(const void* ptr, XXH_endianess endian, XXH_alignment align)
{
if (align==XXH_unaligned)
return endian==XXH_littleEndian ? XXH_read64(ptr) : XXH_swap64(XXH_read64(ptr));
else
return endian==XXH_littleEndian ? *(const U64*)ptr : XXH_swap64(*(const U64*)ptr);
}
FORCE_INLINE U64 XXH_readLE64(const void* ptr, XXH_endianess endian)
{
return XXH_readLE64_align(ptr, endian, XXH_unaligned);
}
/***************************************
* Macros
***************************************/
#define XXH_STATIC_ASSERT(c) { enum { XXH_static_assert = 1/(!!(c)) }; } /* use only *after* variable declarations */
/***************************************
* Constants
***************************************/
#define PRIME32_1 2654435761U
#define PRIME32_2 2246822519U
#define PRIME32_3 3266489917U
#define PRIME32_4 668265263U
#define PRIME32_5 374761393U
#define PRIME64_1 11400714785074694791ULL
#define PRIME64_2 14029467366897019727ULL
#define PRIME64_3 1609587929392839161ULL
#define PRIME64_4 9650029242287828579ULL
#define PRIME64_5 2870177450012600261ULL
/*****************************
* Simple Hash Functions
*****************************/
FORCE_INLINE U32 XXH32_endian_align(const void* input, size_t len, U32 seed, XXH_endianess endian, XXH_alignment align)
{
const BYTE* p = (const BYTE*)input;
const BYTE* bEnd = p + len;
U32 h32;
#define XXH_get32bits(p) XXH_readLE32_align(p, endian, align)
#ifdef XXH_ACCEPT_NULL_INPUT_POINTER
if (p==NULL)
{
len=0;
bEnd=p=(const BYTE*)(size_t)16;
}
#endif
if (len>=16)
{
const BYTE* const limit = bEnd - 16;
U32 v1 = seed + PRIME32_1 + PRIME32_2;
U32 v2 = seed + PRIME32_2;
U32 v3 = seed + 0;
U32 v4 = seed - PRIME32_1;
do
{
v1 += XXH_get32bits(p) * PRIME32_2;
v1 = XXH_rotl32(v1, 13);
v1 *= PRIME32_1;
p+=4;
v2 += XXH_get32bits(p) * PRIME32_2;
v2 = XXH_rotl32(v2, 13);
v2 *= PRIME32_1;
p+=4;
v3 += XXH_get32bits(p) * PRIME32_2;
v3 = XXH_rotl32(v3, 13);
v3 *= PRIME32_1;
p+=4;
v4 += XXH_get32bits(p) * PRIME32_2;
v4 = XXH_rotl32(v4, 13);
v4 *= PRIME32_1;
p+=4;
}
while (p<=limit);
h32 = XXH_rotl32(v1, 1) + XXH_rotl32(v2, 7) + XXH_rotl32(v3, 12) + XXH_rotl32(v4, 18);
}
else
{
h32 = seed + PRIME32_5;
}
h32 += (U32) len;
while (p+4<=bEnd)
{
h32 += XXH_get32bits(p) * PRIME32_3;
h32 = XXH_rotl32(h32, 17) * PRIME32_4 ;
p+=4;
}
while (p<bEnd)
{
h32 += (*p) * PRIME32_5;
h32 = XXH_rotl32(h32, 11) * PRIME32_1 ;
p++;
}
h32 ^= h32 >> 15;
h32 *= PRIME32_2;
h32 ^= h32 >> 13;
h32 *= PRIME32_3;
h32 ^= h32 >> 16;
return h32;
}
unsigned int XXH32 (const void* input, size_t len, unsigned int seed)
{
#if 0
/* Simple version, good for code maintenance, but unfortunately slow for small inputs */
XXH32_state_t state;
XXH32_reset(&state, seed);
XXH32_update(&state, input, len);
return XXH32_digest(&state);
#else
XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN;
# if !defined(XXH_USELESS_ALIGN_BRANCH)
if ((((size_t)input) & 3) == 0) /* Input is 4-bytes aligned, leverage the speed benefit */
{
if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
return XXH32_endian_align(input, len, seed, XXH_littleEndian, XXH_aligned);
else
return XXH32_endian_align(input, len, seed, XXH_bigEndian, XXH_aligned);
}
# endif
if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
return XXH32_endian_align(input, len, seed, XXH_littleEndian, XXH_unaligned);
else
return XXH32_endian_align(input, len, seed, XXH_bigEndian, XXH_unaligned);
#endif
}
FORCE_INLINE U64 XXH64_endian_align(const void* input, size_t len, U64 seed, XXH_endianess endian, XXH_alignment align)
{
const BYTE* p = (const BYTE*)input;
const BYTE* bEnd = p + len;
U64 h64;
#define XXH_get64bits(p) XXH_readLE64_align(p, endian, align)
#ifdef XXH_ACCEPT_NULL_INPUT_POINTER
if (p==NULL)
{
len=0;
bEnd=p=(const BYTE*)(size_t)32;
}
#endif
if (len>=32)
{
const BYTE* const limit = bEnd - 32;
U64 v1 = seed + PRIME64_1 + PRIME64_2;
U64 v2 = seed + PRIME64_2;
U64 v3 = seed + 0;
U64 v4 = seed - PRIME64_1;
do
{
v1 += XXH_get64bits(p) * PRIME64_2;
p+=8;
v1 = XXH_rotl64(v1, 31);
v1 *= PRIME64_1;
v2 += XXH_get64bits(p) * PRIME64_2;
p+=8;
v2 = XXH_rotl64(v2, 31);
v2 *= PRIME64_1;
v3 += XXH_get64bits(p) * PRIME64_2;
p+=8;
v3 = XXH_rotl64(v3, 31);
v3 *= PRIME64_1;
v4 += XXH_get64bits(p) * PRIME64_2;
p+=8;
v4 = XXH_rotl64(v4, 31);
v4 *= PRIME64_1;
}
while (p<=limit);
h64 = XXH_rotl64(v1, 1) + XXH_rotl64(v2, 7) + XXH_rotl64(v3, 12) + XXH_rotl64(v4, 18);
v1 *= PRIME64_2;
v1 = XXH_rotl64(v1, 31);
v1 *= PRIME64_1;
h64 ^= v1;
h64 = h64 * PRIME64_1 + PRIME64_4;
v2 *= PRIME64_2;
v2 = XXH_rotl64(v2, 31);
v2 *= PRIME64_1;
h64 ^= v2;
h64 = h64 * PRIME64_1 + PRIME64_4;
v3 *= PRIME64_2;
v3 = XXH_rotl64(v3, 31);
v3 *= PRIME64_1;
h64 ^= v3;
h64 = h64 * PRIME64_1 + PRIME64_4;
v4 *= PRIME64_2;
v4 = XXH_rotl64(v4, 31);
v4 *= PRIME64_1;
h64 ^= v4;
h64 = h64 * PRIME64_1 + PRIME64_4;
}
else
{
h64 = seed + PRIME64_5;
}
h64 += (U64) len;
while (p+8<=bEnd)
{
U64 k1 = XXH_get64bits(p);
k1 *= PRIME64_2;
k1 = XXH_rotl64(k1,31);
k1 *= PRIME64_1;
h64 ^= k1;
h64 = XXH_rotl64(h64,27) * PRIME64_1 + PRIME64_4;
p+=8;
}
if (p+4<=bEnd)
{
h64 ^= (U64)(XXH_get32bits(p)) * PRIME64_1;
h64 = XXH_rotl64(h64, 23) * PRIME64_2 + PRIME64_3;
p+=4;
}
while (p<bEnd)
{
h64 ^= (*p) * PRIME64_5;
h64 = XXH_rotl64(h64, 11) * PRIME64_1;
p++;
}
h64 ^= h64 >> 33;
h64 *= PRIME64_2;
h64 ^= h64 >> 29;
h64 *= PRIME64_3;
h64 ^= h64 >> 32;
return h64;
}
unsigned long long XXH64 (const void* input, size_t len, unsigned long long seed)
{
#if 0
/* Simple version, good for code maintenance, but unfortunately slow for small inputs */
XXH64_state_t state;
XXH64_reset(&state, seed);
XXH64_update(&state, input, len);
return XXH64_digest(&state);
#else
XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN;
# if !defined(XXH_USELESS_ALIGN_BRANCH)
if ((((size_t)input) & 7)==0) /* Input is aligned, let's leverage the speed advantage */
{
if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
return XXH64_endian_align(input, len, seed, XXH_littleEndian, XXH_aligned);
else
return XXH64_endian_align(input, len, seed, XXH_bigEndian, XXH_aligned);
}
# endif
if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
return XXH64_endian_align(input, len, seed, XXH_littleEndian, XXH_unaligned);
else
return XXH64_endian_align(input, len, seed, XXH_bigEndian, XXH_unaligned);
#endif
}
/****************************************************
* Advanced Hash Functions
****************************************************/
/*** Allocation ***/
typedef struct
{
U64 total_len;
U32 seed;
U32 v1;
U32 v2;
U32 v3;
U32 v4;
U32 mem32[4]; /* defined as U32 for alignment */
U32 memsize;
} XXH_istate32_t;
typedef struct
{
U64 total_len;
U64 seed;
U64 v1;
U64 v2;
U64 v3;
U64 v4;
U64 mem64[4]; /* defined as U64 for alignment */
U32 memsize;
} XXH_istate64_t;
XXH32_state_t* XXH32_createState(void)
{
XXH_STATIC_ASSERT(sizeof(XXH32_state_t) >= sizeof(XXH_istate32_t)); /* A compilation error here means XXH32_state_t is not large enough */
return (XXH32_state_t*)XXH_malloc(sizeof(XXH32_state_t));
}
XXH_errorcode XXH32_freeState(XXH32_state_t* statePtr)
{
XXH_free(statePtr);
return XXH_OK;
}
XXH64_state_t* XXH64_createState(void)
{
XXH_STATIC_ASSERT(sizeof(XXH64_state_t) >= sizeof(XXH_istate64_t)); /* A compilation error here means XXH64_state_t is not large enough */
return (XXH64_state_t*)XXH_malloc(sizeof(XXH64_state_t));
}
XXH_errorcode XXH64_freeState(XXH64_state_t* statePtr)
{
XXH_free(statePtr);
return XXH_OK;
}
/*** Hash feed ***/
XXH_errorcode XXH32_reset(XXH32_state_t* state_in, unsigned int seed)
{
XXH_istate32_t* state = (XXH_istate32_t*) state_in;
state->seed = seed;
state->v1 = seed + PRIME32_1 + PRIME32_2;
state->v2 = seed + PRIME32_2;
state->v3 = seed + 0;
state->v4 = seed - PRIME32_1;
state->total_len = 0;
state->memsize = 0;
return XXH_OK;
}
XXH_errorcode XXH64_reset(XXH64_state_t* state_in, unsigned long long seed)
{
XXH_istate64_t* state = (XXH_istate64_t*) state_in;
state->seed = seed;
state->v1 = seed + PRIME64_1 + PRIME64_2;
state->v2 = seed + PRIME64_2;
state->v3 = seed + 0;
state->v4 = seed - PRIME64_1;
state->total_len = 0;
state->memsize = 0;
return XXH_OK;
}
FORCE_INLINE XXH_errorcode XXH32_update_endian (XXH32_state_t* state_in, const void* input, size_t len, XXH_endianess endian)
{
XXH_istate32_t* state = (XXH_istate32_t *) state_in;
const BYTE* p = (const BYTE*)input;
const BYTE* const bEnd = p + len;
#ifdef XXH_ACCEPT_NULL_INPUT_POINTER
if (input==NULL) return XXH_ERROR;
#endif
state->total_len += len;
if (state->memsize + len < 16) /* fill in tmp buffer */
{
XXH_memcpy((BYTE*)(state->mem32) + state->memsize, input, len);
state->memsize += (U32)len;
return XXH_OK;
}
if (state->memsize) /* some data left from previous update */
{
XXH_memcpy((BYTE*)(state->mem32) + state->memsize, input, 16-state->memsize);
{
const U32* p32 = state->mem32;
state->v1 += XXH_readLE32(p32, endian) * PRIME32_2;
state->v1 = XXH_rotl32(state->v1, 13);
state->v1 *= PRIME32_1;
p32++;
state->v2 += XXH_readLE32(p32, endian) * PRIME32_2;
state->v2 = XXH_rotl32(state->v2, 13);
state->v2 *= PRIME32_1;
p32++;
state->v3 += XXH_readLE32(p32, endian) * PRIME32_2;
state->v3 = XXH_rotl32(state->v3, 13);
state->v3 *= PRIME32_1;
p32++;
state->v4 += XXH_readLE32(p32, endian) * PRIME32_2;
state->v4 = XXH_rotl32(state->v4, 13);
state->v4 *= PRIME32_1;
p32++;
}
p += 16-state->memsize;
state->memsize = 0;
}
if (p <= bEnd-16)
{
const BYTE* const limit = bEnd - 16;
U32 v1 = state->v1;
U32 v2 = state->v2;
U32 v3 = state->v3;
U32 v4 = state->v4;
do
{
v1 += XXH_readLE32(p, endian) * PRIME32_2;
v1 = XXH_rotl32(v1, 13);
v1 *= PRIME32_1;
p+=4;
v2 += XXH_readLE32(p, endian) * PRIME32_2;
v2 = XXH_rotl32(v2, 13);
v2 *= PRIME32_1;
p+=4;
v3 += XXH_readLE32(p, endian) * PRIME32_2;
v3 = XXH_rotl32(v3, 13);
v3 *= PRIME32_1;
p+=4;
v4 += XXH_readLE32(p, endian) * PRIME32_2;
v4 = XXH_rotl32(v4, 13);
v4 *= PRIME32_1;
p+=4;
}
while (p<=limit);
state->v1 = v1;
state->v2 = v2;
state->v3 = v3;
state->v4 = v4;
}
if (p < bEnd)
{
XXH_memcpy(state->mem32, p, bEnd-p);
state->memsize = (int)(bEnd-p);
}
return XXH_OK;
}
XXH_errorcode XXH32_update (XXH32_state_t* state_in, const void* input, size_t len)
{
XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN;
if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
return XXH32_update_endian(state_in, input, len, XXH_littleEndian);
else
return XXH32_update_endian(state_in, input, len, XXH_bigEndian);
}
FORCE_INLINE U32 XXH32_digest_endian (const XXH32_state_t* state_in, XXH_endianess endian)
{
const XXH_istate32_t* state = (const XXH_istate32_t*) state_in;
const BYTE * p = (const BYTE*)state->mem32;
const BYTE* bEnd = (const BYTE*)(state->mem32) + state->memsize;
U32 h32;
if (state->total_len >= 16)
{
h32 = XXH_rotl32(state->v1, 1) + XXH_rotl32(state->v2, 7) + XXH_rotl32(state->v3, 12) + XXH_rotl32(state->v4, 18);
}
else
{
h32 = state->seed + PRIME32_5;
}
h32 += (U32) state->total_len;
while (p+4<=bEnd)
{
h32 += XXH_readLE32(p, endian) * PRIME32_3;
h32 = XXH_rotl32(h32, 17) * PRIME32_4;
p+=4;
}
while (p<bEnd)
{
h32 += (*p) * PRIME32_5;
h32 = XXH_rotl32(h32, 11) * PRIME32_1;
p++;
}
h32 ^= h32 >> 15;
h32 *= PRIME32_2;
h32 ^= h32 >> 13;
h32 *= PRIME32_3;
h32 ^= h32 >> 16;
return h32;
}
unsigned int XXH32_digest (const XXH32_state_t* state_in)
{
XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN;
if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
return XXH32_digest_endian(state_in, XXH_littleEndian);
else
return XXH32_digest_endian(state_in, XXH_bigEndian);
}
FORCE_INLINE XXH_errorcode XXH64_update_endian (XXH64_state_t* state_in, const void* input, size_t len, XXH_endianess endian)
{
XXH_istate64_t * state = (XXH_istate64_t *) state_in;
const BYTE* p = (const BYTE*)input;
const BYTE* const bEnd = p + len;
#ifdef XXH_ACCEPT_NULL_INPUT_POINTER
if (input==NULL) return XXH_ERROR;
#endif
state->total_len += len;
if (state->memsize + len < 32) /* fill in tmp buffer */
{
XXH_memcpy(((BYTE*)state->mem64) + state->memsize, input, len);
state->memsize += (U32)len;
return XXH_OK;
}
if (state->memsize) /* some data left from previous update */
{
XXH_memcpy(((BYTE*)state->mem64) + state->memsize, input, 32-state->memsize);
{
const U64* p64 = state->mem64;
state->v1 += XXH_readLE64(p64, endian) * PRIME64_2;
state->v1 = XXH_rotl64(state->v1, 31);
state->v1 *= PRIME64_1;
p64++;
state->v2 += XXH_readLE64(p64, endian) * PRIME64_2;
state->v2 = XXH_rotl64(state->v2, 31);
state->v2 *= PRIME64_1;
p64++;
state->v3 += XXH_readLE64(p64, endian) * PRIME64_2;
state->v3 = XXH_rotl64(state->v3, 31);
state->v3 *= PRIME64_1;
p64++;
state->v4 += XXH_readLE64(p64, endian) * PRIME64_2;
state->v4 = XXH_rotl64(state->v4, 31);
state->v4 *= PRIME64_1;
p64++;
}
p += 32-state->memsize;
state->memsize = 0;
}
if (p+32 <= bEnd)
{
const BYTE* const limit = bEnd - 32;
U64 v1 = state->v1;
U64 v2 = state->v2;
U64 v3 = state->v3;
U64 v4 = state->v4;
do
{
v1 += XXH_readLE64(p, endian) * PRIME64_2;
v1 = XXH_rotl64(v1, 31);
v1 *= PRIME64_1;
p+=8;
v2 += XXH_readLE64(p, endian) * PRIME64_2;
v2 = XXH_rotl64(v2, 31);
v2 *= PRIME64_1;
p+=8;
v3 += XXH_readLE64(p, endian) * PRIME64_2;
v3 = XXH_rotl64(v3, 31);
v3 *= PRIME64_1;
p+=8;
v4 += XXH_readLE64(p, endian) * PRIME64_2;
v4 = XXH_rotl64(v4, 31);
v4 *= PRIME64_1;
p+=8;
}
while (p<=limit);
state->v1 = v1;
state->v2 = v2;
state->v3 = v3;
state->v4 = v4;
}
if (p < bEnd)
{
XXH_memcpy(state->mem64, p, bEnd-p);
state->memsize = (int)(bEnd-p);
}
return XXH_OK;
}
XXH_errorcode XXH64_update (XXH64_state_t* state_in, const void* input, size_t len)
{
XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN;
if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
return XXH64_update_endian(state_in, input, len, XXH_littleEndian);
else
return XXH64_update_endian(state_in, input, len, XXH_bigEndian);
}
FORCE_INLINE U64 XXH64_digest_endian (const XXH64_state_t* state_in, XXH_endianess endian)
{
const XXH_istate64_t * state = (const XXH_istate64_t *) state_in;
const BYTE * p = (const BYTE*)state->mem64;
const BYTE* bEnd = (const BYTE*)state->mem64 + state->memsize;
U64 h64;
if (state->total_len >= 32)
{
U64 v1 = state->v1;
U64 v2 = state->v2;
U64 v3 = state->v3;
U64 v4 = state->v4;
h64 = XXH_rotl64(v1, 1) + XXH_rotl64(v2, 7) + XXH_rotl64(v3, 12) + XXH_rotl64(v4, 18);
v1 *= PRIME64_2;
v1 = XXH_rotl64(v1, 31);
v1 *= PRIME64_1;
h64 ^= v1;
h64 = h64*PRIME64_1 + PRIME64_4;
v2 *= PRIME64_2;
v2 = XXH_rotl64(v2, 31);
v2 *= PRIME64_1;
h64 ^= v2;
h64 = h64*PRIME64_1 + PRIME64_4;
v3 *= PRIME64_2;
v3 = XXH_rotl64(v3, 31);
v3 *= PRIME64_1;
h64 ^= v3;
h64 = h64*PRIME64_1 + PRIME64_4;
v4 *= PRIME64_2;
v4 = XXH_rotl64(v4, 31);
v4 *= PRIME64_1;
h64 ^= v4;
h64 = h64*PRIME64_1 + PRIME64_4;
}
else
{
h64 = state->seed + PRIME64_5;
}
h64 += (U64) state->total_len;
while (p+8<=bEnd)
{
U64 k1 = XXH_readLE64(p, endian);
k1 *= PRIME64_2;
k1 = XXH_rotl64(k1,31);
k1 *= PRIME64_1;
h64 ^= k1;
h64 = XXH_rotl64(h64,27) * PRIME64_1 + PRIME64_4;
p+=8;
}
if (p+4<=bEnd)
{
h64 ^= (U64)(XXH_readLE32(p, endian)) * PRIME64_1;
h64 = XXH_rotl64(h64, 23) * PRIME64_2 + PRIME64_3;
p+=4;
}
while (p<bEnd)
{
h64 ^= (*p) * PRIME64_5;
h64 = XXH_rotl64(h64, 11) * PRIME64_1;
p++;
}
h64 ^= h64 >> 33;
h64 *= PRIME64_2;
h64 ^= h64 >> 29;
h64 *= PRIME64_3;
h64 ^= h64 >> 32;
return h64;
}
unsigned long long XXH64_digest (const XXH64_state_t* state_in)
{
XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN;
if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
return XXH64_digest_endian(state_in, XXH_littleEndian);
else
return XXH64_digest_endian(state_in, XXH_bigEndian);
}

192
xxhash/xxhash.h Normal file
View File

@ -0,0 +1,192 @@
/*
xxHash - Extremely Fast Hash algorithm
Header File
Copyright (C) 2012-2015, Yann Collet.
BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above
copyright notice, this list of conditions and the following disclaimer
in the documentation and/or other materials provided with the
distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
You can contact the author at :
- xxHash source repository : https://github.com/Cyan4973/xxHash
*/
/* Notice extracted from xxHash homepage :
xxHash is an extremely fast Hash algorithm, running at RAM speed limits.
It also successfully passes all tests from the SMHasher suite.
Comparison (single thread, Windows Seven 32 bits, using SMHasher on a Core 2 Duo @3GHz)
Name Speed Q.Score Author
xxHash 5.4 GB/s 10
CrapWow 3.2 GB/s 2 Andrew
MumurHash 3a 2.7 GB/s 10 Austin Appleby
SpookyHash 2.0 GB/s 10 Bob Jenkins
SBox 1.4 GB/s 9 Bret Mulvey
Lookup3 1.2 GB/s 9 Bob Jenkins
SuperFastHash 1.2 GB/s 1 Paul Hsieh
CityHash64 1.05 GB/s 10 Pike & Alakuijala
FNV 0.55 GB/s 5 Fowler, Noll, Vo
CRC32 0.43 GB/s 9
MD5-32 0.33 GB/s 10 Ronald L. Rivest
SHA1-32 0.28 GB/s 10
Q.Score is a measure of quality of the hash function.
It depends on successfully passing SMHasher test set.
10 is a perfect score.
A 64-bits version, named XXH64, is available since r35.
It offers much better speed, but for 64-bits applications only.
Name Speed on 64 bits Speed on 32 bits
XXH64 13.8 GB/s 1.9 GB/s
XXH32 6.8 GB/s 6.0 GB/s
*/
#pragma once
#if defined (__cplusplus)
extern "C" {
#endif
/*****************************
* Definitions
*****************************/
#include <stddef.h> /* size_t */
typedef enum { XXH_OK=0, XXH_ERROR } XXH_errorcode;
/*****************************
* Namespace Emulation
*****************************/
/* Motivations :
If you need to include xxHash into your library,
but wish to avoid xxHash symbols to be present on your library interface
in an effort to avoid potential name collision if another library also includes xxHash,
you can use XXH_NAMESPACE, which will automatically prefix any symbol from xxHash
with the value of XXH_NAMESPACE (so avoid to keep it NULL, and avoid numeric values).
Note that no change is required within the calling program :
it can still call xxHash functions using their regular name.
They will be automatically translated by this header.
*/
#ifdef XXH_NAMESPACE
# define XXH_CAT(A,B) A##B
# define XXH_NAME2(A,B) XXH_CAT(A,B)
# define XXH32 XXH_NAME2(XXH_NAMESPACE, XXH32)
# define XXH64 XXH_NAME2(XXH_NAMESPACE, XXH64)
# define XXH32_createState XXH_NAME2(XXH_NAMESPACE, XXH32_createState)
# define XXH64_createState XXH_NAME2(XXH_NAMESPACE, XXH64_createState)
# define XXH32_freeState XXH_NAME2(XXH_NAMESPACE, XXH32_freeState)
# define XXH64_freeState XXH_NAME2(XXH_NAMESPACE, XXH64_freeState)
# define XXH32_reset XXH_NAME2(XXH_NAMESPACE, XXH32_reset)
# define XXH64_reset XXH_NAME2(XXH_NAMESPACE, XXH64_reset)
# define XXH32_update XXH_NAME2(XXH_NAMESPACE, XXH32_update)
# define XXH64_update XXH_NAME2(XXH_NAMESPACE, XXH64_update)
# define XXH32_digest XXH_NAME2(XXH_NAMESPACE, XXH32_digest)
# define XXH64_digest XXH_NAME2(XXH_NAMESPACE, XXH64_digest)
#endif
/*****************************
* Simple Hash Functions
*****************************/
unsigned int XXH32 (const void* input, size_t length, unsigned seed);
unsigned long long XXH64 (const void* input, size_t length, unsigned long long seed);
/*
XXH32() :
Calculate the 32-bits hash of sequence "length" bytes stored at memory address "input".
The memory between input & input+length must be valid (allocated and read-accessible).
"seed" can be used to alter the result predictably.
This function successfully passes all SMHasher tests.
Speed on Core 2 Duo @ 3 GHz (single thread, SMHasher benchmark) : 5.4 GB/s
XXH64() :
Calculate the 64-bits hash of sequence of length "len" stored at memory address "input".
Faster on 64-bits systems. Slower on 32-bits systems.
*/
/*****************************
* Advanced Hash Functions
*****************************/
typedef struct { long long ll[ 6]; } XXH32_state_t;
typedef struct { long long ll[11]; } XXH64_state_t;
/*
These structures allow static allocation of XXH states.
States must then be initialized using XXHnn_reset() before first use.
If you prefer dynamic allocation, please refer to functions below.
*/
XXH32_state_t* XXH32_createState(void);
XXH_errorcode XXH32_freeState(XXH32_state_t* statePtr);
XXH64_state_t* XXH64_createState(void);
XXH_errorcode XXH64_freeState(XXH64_state_t* statePtr);
/*
These functions create and release memory for XXH state.
States must then be initialized using XXHnn_reset() before first use.
*/
XXH_errorcode XXH32_reset (XXH32_state_t* statePtr, unsigned seed);
XXH_errorcode XXH32_update (XXH32_state_t* statePtr, const void* input, size_t length);
unsigned int XXH32_digest (const XXH32_state_t* statePtr);
XXH_errorcode XXH64_reset (XXH64_state_t* statePtr, unsigned long long seed);
XXH_errorcode XXH64_update (XXH64_state_t* statePtr, const void* input, size_t length);
unsigned long long XXH64_digest (const XXH64_state_t* statePtr);
/*
These functions calculate the xxHash of an input provided in multiple smaller packets,
as opposed to an input provided as a single block.
XXH state space must first be allocated, using either static or dynamic method provided above.
Start a new hash by initializing state with a seed, using XXHnn_reset().
Then, feed the hash state by calling XXHnn_update() as many times as necessary.
Obviously, input must be valid, meaning allocated and read accessible.
The function returns an error code, with 0 meaning OK, and any other value meaning there is an error.
Finally, you can produce a hash anytime, by using XXHnn_digest().
This function returns the final nn-bits hash.
You can nonetheless continue feeding the hash state with more input,
and therefore get some new hashes, by calling again XXHnn_digest().
When you are done, don't forget to free XXH state space, using typically XXHnn_freeState().
*/
#if defined (__cplusplus)
}
#endif