Huge shader infrastructure refactor

2025-12-14 15:46:19 +00:00 · 2018-10-06 16:49:22 -10:00
parent 08d632a8bd
commit c29d837ab5
55 changed files with 10392 additions and 1472 deletions
--- a/.gitmodules
+++ b/.gitmodules
@@ -4,3 +4,9 @@
 [submodule "logvisor"]
 	path = logvisor
 	url = ../logvisor.git
+[submodule "lib/graphicsdev/NX/mesa"]
+	path = lib/graphicsdev/nx/mesa
+	url = ../mesa.git
+[submodule "lib/graphicsdev/NX/libdrm_nouveau"]
+	path = lib/graphicsdev/nx/libdrm_nouveau
+	url = https://github.com/devkitPro/libdrm_nouveau.git
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -32,9 +32,14 @@ add_subdirectory(soxr)

 set(BOO_INCLUDE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/include CACHE PATH "boo include path" FORCE)

-include_directories(include xxhash ${LOGVISOR_INCLUDE_DIR})
+include_directories(include ${CMAKE_CURRENT_SOURCE_DIR} ${LOGVISOR_INCLUDE_DIR})

-if(NOT GEKKO AND NOT CAFE AND NOT WINDOWS_STORE)
+add_subdirectory(lib/graphicsdev/nx)
+if(TARGET nx_compiler)
+  list(APPEND _BOO_SYS_DEFINES -DHECL_NOUVEAU_NX=1)
+endif()
+
+if(NOT GEKKO AND NOT CAFE AND NOT WINDOWS_STORE AND NOT NX)
 list(APPEND PLAT_SRCS
    lib/graphicsdev/GL.cpp
    lib/graphicsdev/glew.c)
@@ -59,7 +64,8 @@ if(WINDOWS_STORE)
       lib/inputdev/HIDDeviceUWP.cpp
       lib/graphicsdev/D3D11.cpp
       lib/graphicsdev/D3D12.cpp
-       lib/audiodev/WASAPI.cpp)
+       lib/audiodev/WASAPI.cpp
+       lib/audiodev/AudioMatrixSSE.cpp)

  list(APPEND PLAT_HDRS
        include/boo/UWPViewProvider.hpp
@@ -99,7 +105,8 @@ elseif(WIN32)
       lib/inputdev/HIDListenerWinUSB.cpp
       lib/inputdev/HIDDeviceWinUSB.cpp
       lib/graphicsdev/D3D11.cpp
-       lib/audiodev/WASAPI.cpp)
+       lib/audiodev/WASAPI.cpp
+       lib/audiodev/AudioMatrixSSE.cpp)

  list(APPEND PLAT_HDRS
        include/boo/graphicsdev/D3D.hpp)
@@ -116,7 +123,8 @@ elseif(APPLE)
       lib/inputdev/HIDListenerIOKit.cpp
       lib/inputdev/HIDDeviceIOKit.cpp
       lib/graphicsdev/Metal.mm
-       lib/audiodev/AQS.cpp)
+       lib/audiodev/AQS.cpp
+       lib/audiodev/AudioMatrixSSE.cpp)
  set_source_files_properties(lib/mac/ApplicationCocoa.mm
                              lib/mac/WindowCocoa.mm
                              lib/graphicsdev/Metal.mm
@@ -147,6 +155,16 @@ elseif(APPLE)
                            ${QUARTZCORE_LIBRARY} ${COREVIDEO_LIBRARY} ${AUDIOTOOLBOX_LIBRARY}
                            ${COREAUDIO_LIBRARY} ${COREMIDI_LIBRARY})

+elseif(NX)
+  list(APPEND _BOO_SYS_DEFINES -DBOO_HAS_NX=1)
+  list(APPEND PLAT_SRCS
+       lib/nx/ApplicationNX.cpp
+       lib/nx/WindowNX.cpp
+       lib/audiodev/AudioMatrix.cpp
+       lib/inputdev/HIDListenerNX.cpp
+       lib/inputdev/HIDDeviceNX.cpp)
+  list(APPEND _BOO_SYS_LIBS nx_runtime)
+
 else(NOT GEKKO)
  list(APPEND PLAT_SRCS
       lib/x11/XlibCommon.hpp
@@ -226,38 +244,42 @@ else(NOT GEKKO)
    endif()
    list(APPEND PLAT_SRCS
         lib/inputdev/HIDListenerUdev.cpp
-         lib/inputdev/HIDDeviceUdev.cpp)
+         lib/inputdev/HIDDeviceUdev.cpp
+         lib/audiodev/AudioMatrixSSE.cpp)
    list(APPEND _BOO_SYS_LIBS xcb X11-xcb dl udev)
  else()
    list(APPEND PLAT_SRCS
        lib/inputdev/HIDListenerBSD.cpp
-        lib/inputdev/HIDDeviceBSD.cpp)
+        lib/inputdev/HIDDeviceBSD.cpp
+        lib/audiodev/AudioMatrixSSE.cpp)
    list(APPEND _BOO_SYS_LIBS execinfo)
  endif()

 endif()

-# For some reason, clang takes forever if glew.c is not built with -Os
-if(CMAKE_C_COMPILER_ID STREQUAL "Clang" OR CMAKE_C_COMPILER_ID STREQUAL "AppleClang")
-  set_source_files_properties(lib/graphicsdev/glew.c PROPERTIES COMPILE_FLAGS -Os)
+if(NOT NX)
+  # For some reason, clang takes forever if glew.c is not built with -Os
+  if(CMAKE_C_COMPILER_ID STREQUAL "Clang" OR CMAKE_C_COMPILER_ID STREQUAL "AppleClang")
+    set_source_files_properties(lib/graphicsdev/glew.c PROPERTIES COMPILE_FLAGS -Os)
+  endif()
+
+  # Empty link args for boo's use
+  function(glslang_set_link_args TARGET)
+  endfunction(glslang_set_link_args)
+
+  add_definitions("-DENABLE_OPT=0")
+  add_subdirectory(glslang/glslang)
+  add_subdirectory(glslang/OGLCompilersDLL)
+  add_subdirectory(glslang/SPIRV)
+  add_subdirectory(glslang/StandAlone)
+
+  target_include_directories(glslang-default-resource-limits
+      PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/glslang
+  )
+
+  list(APPEND _BOO_SYS_LIBS glslang soxr xxhash OSDependent OGLCompiler SPIRV glslang-default-resource-limits)
 endif()

-# Empty link args for boo's use
-function(glslang_set_link_args TARGET)
-endfunction(glslang_set_link_args)
-
-add_definitions("-DENABLE_OPT=0")
-add_subdirectory(glslang/glslang)
-add_subdirectory(glslang/OGLCompilersDLL)
-add_subdirectory(glslang/SPIRV)
-add_subdirectory(glslang/StandAlone)
-
-target_include_directories(glslang-default-resource-limits
-    PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/glslang
-)
-
-list(APPEND _BOO_SYS_LIBS glslang soxr xxhash OSDependent OGLCompiler SPIRV glslang-default-resource-limits)
-
 set(BOO_SYS_LIBS ${_BOO_SYS_LIBS} CACHE PATH "boo system libraries" FORCE)
 set(BOO_SYS_DEFINES ${_BOO_SYS_DEFINES} CACHE STRING "boo system defines" FORCE)
 set(BOO_SYS_INCLUDES ${_BOO_SYS_INCLUDES} CACHE PATH "boo system includes" FORCE)
@@ -281,8 +303,6 @@ add_library(boo
            lib/audiodev/Common.hpp
            lib/audiodev/WAVOut.cpp
            lib/audiodev/AudioMatrix.hpp
-            #lib/audiodev/AudioMatrix.cpp
-            lib/audiodev/AudioMatrixSSE.cpp
            lib/audiodev/AudioVoiceEngine.hpp
            lib/audiodev/AudioVoiceEngine.cpp
            lib/audiodev/AudioVoice.hpp
--- a/include/boo/BooObject.hpp
+++ b/include/boo/BooObject.hpp
@@ -3,6 +3,7 @@

 #include <atomic>
 #include <mutex>
+#include "nxstl/mutex"

 namespace boo
 {
--- a/include/boo/DeferredWindowEvents.hpp
+++ b/include/boo/DeferredWindowEvents.hpp
@@ -4,6 +4,7 @@
 #include <boo/boo.hpp>
 #include <mutex>
 #include <condition_variable>
+#include "nxstl/condition_variable"

 namespace boo
 {
--- a/include/boo/IApplication.hpp
+++ b/include/boo/IApplication.hpp
@@ -41,7 +41,8 @@ public:
        UWP         = 7,
        Revolution  = 8,
        Cafe        = 9,
-        Qt          = 10
+        NX          = 10,
+        Qt          = 11
    };
    virtual EPlatformType getPlatformType() const=0;
    
--- a/include/boo/IGraphicsContext.hpp
+++ b/include/boo/IGraphicsContext.hpp
@@ -26,7 +26,8 @@ public:
        D3D11      = 4,
        Metal      = 6,
        GX         = 7,
-        GX2        = 8
+        GX2        = 8,
+        NX         = 9
    };
    
    enum class EPixelFormat
@@ -39,7 +40,7 @@ public:
        RGBAF32_Z24 = 5
    };
    
-    virtual ~IGraphicsContext() {}
+    virtual ~IGraphicsContext() = default;
    
    virtual EGraphicsAPI getAPI() const=0;
    virtual EPixelFormat getPixelFormat() const=0;
--- a/include/boo/System.hpp
+++ b/include/boo/System.hpp
@@ -60,15 +60,15 @@ namespace boo
    using SystemString = std::wstring;
    using SystemStringView = std::wstring_view;
    using SystemChar = wchar_t;
-#   ifndef _S
-#   define _S(val) L ## val
+#   ifndef _SYS_STR
+#   define _SYS_STR(val) L ## val
 #   endif
 #else
    using SystemString = std::string;
    using SystemStringView = std::string_view;
    using SystemChar = char;
-#   ifndef _S
-#   define _S(val) val
+#   ifndef _SYS_STR
+#   define _SYS_STR(val) val
 #   endif
 #endif

--- a/include/boo/ThreadLocalPtr.hpp
+++ b/include/boo/ThreadLocalPtr.hpp
@@ -1,6 +1,8 @@
 #ifndef BOO_THREADLOCALPTR_HPP
 #define BOO_THREADLOCALPTR_HPP

+#ifndef __SWITCH__
+
 #if _WIN32
 #else
 #include <pthread.h>
@@ -28,4 +30,6 @@ public:
    T* operator->() {return get();}
 };

+#endif
+
 #endif // BOO_THREADLOCALPTR_HPP
--- a/include/boo/graphicsdev/D3D.hpp
+++ b/include/boo/graphicsdev/D3D.hpp
@@ -23,7 +23,7 @@ class D3DDataFactory : public IGraphicsDataFactory
 public:
    virtual ~D3DDataFactory() {}

-    class Context : public IGraphicsDataFactory::Context
+    class Context final : public IGraphicsDataFactory::Context
    {
    public:
        bool bindingNeedsVertexFormat() const {return false;}
--- a/include/boo/graphicsdev/GL.hpp
+++ b/include/boo/graphicsdev/GL.hpp
@@ -21,7 +21,7 @@ struct GLContext
 class GLDataFactory : public IGraphicsDataFactory
 {
 public:
-    class Context : public IGraphicsDataFactory::Context
+    class Context final : public IGraphicsDataFactory::Context
    {
        friend class GLDataFactoryImpl;
        GLDataFactory& m_parent;
@@ -30,7 +30,7 @@ public:
        ~Context();
    public:
        Platform platform() const { return Platform::OpenGL; }
-        const SystemChar* platformName() const { return _S("OpenGL"); }
+        const SystemChar* platformName() const { return _SYS_STR("OpenGL"); }

        ObjToken<IGraphicsBufferS> newStaticBuffer(BufferUse use, const void* data, size_t stride, size_t count);
        ObjToken<IGraphicsBufferD> newDynamicBuffer(BufferUse use, size_t stride, size_t count);
@@ -43,28 +43,17 @@ public:
        ObjToken<ITextureR> newRenderTexture(size_t width, size_t height, TextureClampMode clampMode,
                                             size_t colorBindingCount, size_t depthBindingCount);

-        bool bindingNeedsVertexFormat() const { return true; }
-        ObjToken<IVertexFormat> newVertexFormat(size_t elementCount, const VertexElementDescriptor* elements,
-                                                size_t baseVert = 0, size_t baseInst = 0);
+        ObjToken<IShaderStage>
+        newShaderStage(const uint8_t* data, size_t size, PipelineStage stage);

-        ObjToken<IShaderPipeline> newShaderPipeline(const char* vertSource, const char* fragSource,
-                                                    size_t texCount, const char** texNames,
-                                                    size_t uniformBlockCount, const char** uniformBlockNames,
-                                                    BlendFactor srcFac, BlendFactor dstFac, Primitive prim,
-                                                    ZTest depthTest, bool depthWrite, bool colorWrite,
-                                                    bool alphaWrite, CullMode culling, bool overwriteAlpha = true);
-
-        ObjToken<IShaderPipeline> newTessellationShaderPipeline(const char* vertSource, const char* fragSource,
-                                                    const char* controlSource, const char* evaluationSource,
-                                                    size_t texCount, const char** texNames,
-                                                    size_t uniformBlockCount, const char** uniformBlockNames,
-                                                    BlendFactor srcFac, BlendFactor dstFac, uint32_t patchSize,
-                                                    ZTest depthTest, bool depthWrite, bool colorWrite,
-                                                    bool alphaWrite, CullMode culling, bool overwriteAlpha = true);
+        ObjToken<IShaderPipeline>
+        newShaderPipeline(ObjToken<IShaderStage> vertex, ObjToken<IShaderStage> fragment,
+                          ObjToken<IShaderStage> geometry, ObjToken<IShaderStage> control,
+                          ObjToken<IShaderStage> evaluation, const VertexFormatInfo& vtxFmt,
+                          const AdditionalPipelineInfo& additionalInfo);

        ObjToken<IShaderDataBinding>
        newShaderDataBinding(const ObjToken<IShaderPipeline>& pipeline,
-                             const ObjToken<IVertexFormat>& vtxFormat,
                             const ObjToken<IGraphicsBuffer>& vbo,
                             const ObjToken<IGraphicsBuffer>& instVbo,
                             const ObjToken<IGraphicsBuffer>& ibo,
--- a/include/boo/graphicsdev/IGraphicsDataFactory.hpp
+++ b/include/boo/graphicsdev/IGraphicsDataFactory.hpp
@@ -8,6 +8,10 @@
 #include "boo/ThreadLocalPtr.hpp"
 #include "boo/BooObject.hpp"

+#ifdef __SWITCH__
+#include <ctype.h>
+#endif
+
 namespace boo
 {
 struct IGraphicsCommandQueue;
@@ -120,11 +124,6 @@ protected:
    ITextureR() : ITexture(TextureType::Render) {}
 };

-/** Opaque token for representing the data layout of a vertex
- *  in a VBO. Also able to reference buffers for platforms like
- *  OpenGL that cache object refs */
-struct IVertexFormat : IObj {};
-
 /** Types of vertex attributes */
 enum class VertexSemantic
 {
@@ -147,16 +146,35 @@ ENABLE_BITWISE_ENUM(VertexSemantic)
 /** Used to create IVertexFormat */
 struct VertexElementDescriptor
 {
-    ObjToken<IGraphicsBuffer> vertBuffer;
-    ObjToken<IGraphicsBuffer> indexBuffer;
    VertexSemantic semantic;
    int semanticIdx = 0;
    VertexElementDescriptor() = default;
-    VertexElementDescriptor(const ObjToken<IGraphicsBuffer>& v, const ObjToken<IGraphicsBuffer>& i,
-                            VertexSemantic s, int idx=0)
-    : vertBuffer(v), indexBuffer(i), semantic(s), semanticIdx(idx) {}
+    VertexElementDescriptor(VertexSemantic s, int idx=0)
+    : semantic(s), semanticIdx(idx) {}
 };

+/** Structure for passing vertex format info for pipeline construction */
+struct VertexFormatInfo
+{
+    size_t elementCount = 0;
+    const VertexElementDescriptor* elements = nullptr;
+
+    VertexFormatInfo() = default;
+
+    VertexFormatInfo(size_t sz, const VertexElementDescriptor* elem)
+    : elementCount(sz), elements(elem) {}
+
+    template<typename T>
+    VertexFormatInfo(const T& tp)
+    : elementCount(std::extent_v<T>), elements(tp) {}
+
+    VertexFormatInfo(const std::initializer_list<VertexElementDescriptor>& l)
+    : elementCount(l.size()), elements(l.begin()) {}
+};
+
+/** Opaque token for referencing a shader stage usable in a graphics pipeline */
+struct IShaderStage : IObj {};
+
 /** Opaque token for referencing a complete graphics pipeline state necessary
 *  to rasterize geometry (shaders and blending modes mainly) */
 struct IShaderPipeline : IObj {};
@@ -169,8 +187,12 @@ struct IShaderDataBinding : IObj {};
 /** Used wherever distinction of pipeline stages is needed */
 enum class PipelineStage
 {
+    Null,
    Vertex,
-    Fragment
+    Fragment,
+    Geometry,
+    Control,
+    Evaluation
 };

 /** Used by platform shader pipeline constructors */
@@ -178,7 +200,7 @@ enum class Primitive
 {
    Triangles,
    TriStrips,
-    Patches /* Do not use directly, construct a tessellation pipeline instead */
+    Patches
 };

 /** Used by platform shader pipeline constructors */
@@ -219,6 +241,22 @@ enum class BlendFactor
    Subtract
 };

+/** Structure for passing additional pipeline construction information */
+struct AdditionalPipelineInfo
+{
+    BlendFactor srcFac = BlendFactor::One;
+    BlendFactor dstFac = BlendFactor::Zero;
+    Primitive prim = Primitive::TriStrips;
+    ZTest depthTest = ZTest::LEqual;
+    bool depthWrite = true;
+    bool colorWrite = true;
+    bool alphaWrite = false;
+    CullMode culling = CullMode::Backface;
+    uint32_t patchSize = 0;
+    bool overwriteAlpha = true;
+    bool depthAttachment = true;
+};
+
 /** Factory object for creating batches of resources as an IGraphicsData token */
 struct IGraphicsDataFactory
 {
@@ -232,7 +270,7 @@ struct IGraphicsDataFactory
        Metal,
        Vulkan,
        GX,
-        GX2
+        NX
    };
    virtual Platform platform() const=0;
    virtual const SystemChar* platformName() const=0;
@@ -259,14 +297,30 @@ struct IGraphicsDataFactory
        newRenderTexture(size_t width, size_t height, TextureClampMode clampMode,
                         size_t colorBindingCount, size_t depthBindingCount)=0;

-        virtual bool bindingNeedsVertexFormat() const=0;
-        virtual ObjToken<IVertexFormat>
-        newVertexFormat(size_t elementCount, const VertexElementDescriptor* elements,
-                        size_t baseVert = 0, size_t baseInst = 0)=0;
+        virtual ObjToken<IShaderStage>
+        newShaderStage(const uint8_t* data, size_t size, PipelineStage stage)=0;
+
+        ObjToken<IShaderStage>
+        newShaderStage(const std::vector<uint8_t>& data, PipelineStage stage)
+        {
+            return newShaderStage(data.data(), data.size(), stage);
+        }
+
+        virtual ObjToken<IShaderPipeline>
+        newShaderPipeline(ObjToken<IShaderStage> vertex, ObjToken<IShaderStage> fragment,
+                          ObjToken<IShaderStage> geometry, ObjToken<IShaderStage> control,
+                          ObjToken<IShaderStage> evaluation, const VertexFormatInfo& vtxFmt,
+                          const AdditionalPipelineInfo& additionalInfo)=0;
+
+        ObjToken<IShaderPipeline>
+        newShaderPipeline(ObjToken<IShaderStage> vertex, ObjToken<IShaderStage> fragment,
+                          const VertexFormatInfo& vtxFmt, const AdditionalPipelineInfo& additionalInfo)
+        {
+            return newShaderPipeline(vertex, fragment, {}, {}, {}, vtxFmt, additionalInfo);
+        }

        virtual ObjToken<IShaderDataBinding>
        newShaderDataBinding(const ObjToken<IShaderPipeline>& pipeline,
-                             const ObjToken<IVertexFormat>& vtxFormat,
                             const ObjToken<IGraphicsBuffer>& vbo,
                             const ObjToken<IGraphicsBuffer>& instVbo,
                             const ObjToken<IGraphicsBuffer>& ibo,
@@ -278,7 +332,6 @@ struct IGraphicsDataFactory

        ObjToken<IShaderDataBinding>
        newShaderDataBinding(const ObjToken<IShaderPipeline>& pipeline,
-                             const ObjToken<IVertexFormat>& vtxFormat,
                             const ObjToken<IGraphicsBuffer>& vbo,
                             const ObjToken<IGraphicsBuffer>& instVbo,
                             const ObjToken<IGraphicsBuffer>& ibo,
@@ -287,7 +340,7 @@ struct IGraphicsDataFactory
                             const int* texBindIdx, const bool* depthBind,
                             size_t baseVert = 0, size_t baseInst = 0)
        {
-            return newShaderDataBinding(pipeline, vtxFormat, vbo, instVbo, ibo,
+            return newShaderDataBinding(pipeline, vbo, instVbo, ibo,
                                        ubufCount, ubufs, ubufStages, nullptr,
                                        nullptr, texCount, texs, texBindIdx, depthBind,
                                        baseVert, baseInst);
--- a/include/boo/graphicsdev/Metal.hpp
+++ b/include/boo/graphicsdev/Metal.hpp
@@ -14,7 +14,7 @@ struct BaseGraphicsData;
 class MetalDataFactory : public IGraphicsDataFactory
 {
 public:
-    class Context : public IGraphicsDataFactory::Context
+    class Context final : public IGraphicsDataFactory::Context
    {
        friend class MetalDataFactoryImpl;
        MetalDataFactory& m_parent;
--- a/include/boo/graphicsdev/NX.hpp
+++ b/include/boo/graphicsdev/NX.hpp
@@ -0,0 +1,93 @@
+#pragma once
+#if BOO_HAS_NX
+
+#include "IGraphicsDataFactory.hpp"
+#include "IGraphicsCommandQueue.hpp"
+#include "nx_compiler.hpp"
+
+#include <switch/nvidia/fence.h>
+
+struct pipe_screen;
+struct pipe_context;
+struct st_context;
+struct pipe_surface;
+
+namespace boo
+{
+struct BaseGraphicsData;
+
+struct NXContext
+{
+    struct pipe_surface* m_windowSurfaces[2];
+    NvFence m_fences[2];
+    bool m_fence_swap;
+
+    bool initialize();
+    bool terminate();
+    bool _resizeWindowSurfaces();
+
+    unsigned m_sampleCount = 1;
+
+    struct pipe_screen* m_screen = nullptr;
+    struct pipe_context* m_pctx = nullptr;
+    struct st_context* m_st = nullptr;
+    nx_compiler m_compiler;
+
+    std::unordered_map<uint32_t, void*> m_samplers;
+    std::unordered_map<uint32_t, void*> m_blendStates;
+    std::unordered_map<uint32_t, void*> m_rasStates;
+    std::unordered_map<uint32_t, void*> m_dsStates;
+    std::unordered_map<uint64_t, void*> m_vtxElemStates;
+};
+
+class NXDataFactory : public IGraphicsDataFactory
+{
+public:
+    class Context final : public IGraphicsDataFactory::Context
+    {
+        friend class NXDataFactoryImpl;
+        NXDataFactory& m_parent;
+        boo::ObjToken<BaseGraphicsData> m_data;
+        Context(NXDataFactory& parent __BooTraceArgs);
+        ~Context();
+    public:
+        Platform platform() const {return Platform::NX;}
+        const SystemChar* platformName() const {return _SYS_STR("NX");}
+
+        boo::ObjToken<IGraphicsBufferS> newStaticBuffer(BufferUse use, const void* data, size_t stride, size_t count);
+        boo::ObjToken<IGraphicsBufferD> newDynamicBuffer(BufferUse use, size_t stride, size_t count);
+
+        boo::ObjToken<ITextureS> newStaticTexture(size_t width, size_t height, size_t mips, TextureFormat fmt,
+                                                  TextureClampMode clampMode, const void* data, size_t sz);
+        boo::ObjToken<ITextureSA> newStaticArrayTexture(size_t width, size_t height, size_t layers, size_t mips,
+                                                        TextureFormat fmt, TextureClampMode clampMode,
+                                                        const void* data, size_t sz);
+        boo::ObjToken<ITextureD> newDynamicTexture(size_t width, size_t height, TextureFormat fmt, TextureClampMode clampMode);
+        boo::ObjToken<ITextureR> newRenderTexture(size_t width, size_t height, TextureClampMode clampMode,
+                                                  size_t colorBindCount, size_t depthBindCount);
+
+        ObjToken<IShaderStage>
+        newShaderStage(const uint8_t* data, size_t size, PipelineStage stage);
+
+        ObjToken<IShaderPipeline>
+        newShaderPipeline(ObjToken<IShaderStage> vertex, ObjToken<IShaderStage> fragment,
+                          ObjToken<IShaderStage> geometry, ObjToken<IShaderStage> control,
+                          ObjToken<IShaderStage> evaluation, const VertexFormatInfo& vtxFmt,
+                          const AdditionalPipelineInfo& additionalInfo);
+
+        boo::ObjToken<IShaderDataBinding>
+        newShaderDataBinding(const boo::ObjToken<IShaderPipeline>& pipeline,
+                             const boo::ObjToken<IGraphicsBuffer>& vbo,
+                             const boo::ObjToken<IGraphicsBuffer>& instVbo,
+                             const boo::ObjToken<IGraphicsBuffer>& ibo,
+                             size_t ubufCount, const boo::ObjToken<IGraphicsBuffer>* ubufs, const PipelineStage* ubufStages,
+                             const size_t* ubufOffs, const size_t* ubufSizes,
+                             size_t texCount, const boo::ObjToken<ITexture>* texs,
+                             const int* bindIdxs, const bool* bindDepth,
+                             size_t baseVert = 0, size_t baseInst = 0);
+    };
+};
+
+}
+
+#endif
--- a/include/boo/graphicsdev/Vulkan.hpp
+++ b/include/boo/graphicsdev/Vulkan.hpp
@@ -134,7 +134,7 @@ extern VulkanContext g_VulkanContext;
 class VulkanDataFactory : public IGraphicsDataFactory
 {
 public:
-    class Context : public IGraphicsDataFactory::Context
+    class Context final : public IGraphicsDataFactory::Context
    {
        friend class VulkanDataFactoryImpl;
        VulkanDataFactory& m_parent;
@@ -143,7 +143,7 @@ public:
        ~Context();
    public:
        Platform platform() const {return Platform::Vulkan;}
-        const SystemChar* platformName() const {return _S("Vulkan");}
+        const SystemChar* platformName() const {return _SYS_STR("Vulkan");}

        boo::ObjToken<IGraphicsBufferS> newStaticBuffer(BufferUse use, const void* data, size_t stride, size_t count);
        boo::ObjToken<IGraphicsBufferD> newDynamicBuffer(BufferUse use, size_t stride, size_t count);
@@ -157,58 +157,17 @@ public:
        boo::ObjToken<ITextureR> newRenderTexture(size_t width, size_t height, TextureClampMode clampMode,
                                                  size_t colorBindCount, size_t depthBindCount);

-        bool bindingNeedsVertexFormat() const {return false;}
-        boo::ObjToken<IVertexFormat> newVertexFormat(size_t elementCount, const VertexElementDescriptor* elements,
-                                                     size_t baseVert = 0, size_t baseInst = 0);
+        ObjToken<IShaderStage>
+        newShaderStage(const uint8_t* data, size_t size, PipelineStage stage);

-        boo::ObjToken<IShaderPipeline> newShaderPipeline(const char* vertSource, const char* fragSource,
-                                                         std::vector<unsigned int>* vertBlobOut,
-                                                         std::vector<unsigned int>* fragBlobOut,
-                                                         std::vector<unsigned char>* pipelineBlob,
-                                                         const boo::ObjToken<IVertexFormat>& vtxFmt,
-                                                         BlendFactor srcFac, BlendFactor dstFac, Primitive prim,
-                                                         ZTest depthTest, bool depthWrite, bool colorWrite,
-                                                         bool alphaWrite, CullMode culling, bool overwriteAlpha = true);
-
-        boo::ObjToken<IShaderPipeline> newShaderPipeline(const char* vertSource, const char* fragSource,
-                                                         const boo::ObjToken<IVertexFormat>& vtxFmt,
-                                                         BlendFactor srcFac, BlendFactor dstFac, Primitive prim,
-                                                         ZTest depthTest, bool depthWrite, bool colorWrite,
-                                                         bool alphaWrite, CullMode culling)
-        {
-            return newShaderPipeline(vertSource, fragSource, nullptr, nullptr, nullptr,
-                                     vtxFmt, srcFac, dstFac, prim, depthTest, depthWrite,
-                                     colorWrite, alphaWrite, culling);
-        }
-
-        boo::ObjToken<IShaderPipeline> newTessellationShaderPipeline(const char* vertSource, const char* fragSource,
-                                                                     const char* controlSource, const char* evaluationSource,
-                                                                     std::vector<unsigned int>* vertBlobOut,
-                                                                     std::vector<unsigned int>* fragBlobOut,
-                                                                     std::vector<unsigned int>* controlBlobOut,
-                                                                     std::vector<unsigned int>* evaluationBlobOut,
-                                                                     std::vector<unsigned char>* pipelineBlob,
-                                                                     const boo::ObjToken<IVertexFormat>& vtxFmt,
-                                                                     BlendFactor srcFac, BlendFactor dstFac, uint32_t patchSize,
-                                                                     ZTest depthTest, bool depthWrite, bool colorWrite,
-                                                                     bool alphaWrite, CullMode culling, bool overwriteAlpha = true);
-
-        boo::ObjToken<IShaderPipeline> newTessellationShaderPipeline(const char* vertSource, const char* fragSource,
-                                                                     const char* controlSource, const char* evaluationSource,
-                                                                     const boo::ObjToken<IVertexFormat>& vtxFmt,
-                                                                     BlendFactor srcFac, BlendFactor dstFac, uint32_t patchSize,
-                                                                     ZTest depthTest, bool depthWrite, bool colorWrite,
-                                                                     bool alphaWrite, CullMode culling)
-        {
-            return newTessellationShaderPipeline(vertSource, fragSource, controlSource, evaluationSource,
-                                                 nullptr, nullptr, nullptr, nullptr, nullptr,
-                                                 vtxFmt, srcFac, dstFac, patchSize, depthTest, depthWrite,
-                                                 colorWrite, alphaWrite, culling);
-        }
+        ObjToken<IShaderPipeline>
+        newShaderPipeline(ObjToken<IShaderStage> vertex, ObjToken<IShaderStage> fragment,
+                          ObjToken<IShaderStage> geometry, ObjToken<IShaderStage> control,
+                          ObjToken<IShaderStage> evaluation, const VertexFormatInfo& vtxFmt,
+                          const AdditionalPipelineInfo& additionalInfo);

        boo::ObjToken<IShaderDataBinding>
        newShaderDataBinding(const boo::ObjToken<IShaderPipeline>& pipeline,
-                             const boo::ObjToken<IVertexFormat>& vtxFormat,
                             const boo::ObjToken<IGraphicsBuffer>& vbo,
                             const boo::ObjToken<IGraphicsBuffer>& instVbo,
                             const boo::ObjToken<IGraphicsBuffer>& ibo,
@@ -218,6 +177,8 @@ public:
                             const int* bindIdxs, const bool* bindDepth,
                             size_t baseVert = 0, size_t baseInst = 0);
    };
+
+    static std::vector<uint8_t> CompileGLSL(const char* source, PipelineStage stage);
 };

 }
--- a/include/boo/graphicsdev/nx_compiler.hpp
+++ b/include/boo/graphicsdev/nx_compiler.hpp
@@ -0,0 +1,81 @@
+#pragma once
+#include <memory>
+#include <string>
+
+/* These match mesa's internal stages */
+enum class nx_shader_stage
+{
+    NONE = -1,
+    VERTEX = 0,
+    TESS_CTRL = 1,
+    TESS_EVAL = 2,
+    GEOMETRY = 3,
+    FRAGMENT = 4,
+    COMPUTE = 5,
+};
+
+struct standalone_options
+{
+    int glsl_version;
+    int dump_ast;
+    int dump_hir;
+    int dump_lir;
+    int dump_builder;
+    int do_link;
+    int just_log;
+};
+
+class nx_compiler;
+class nx_shader_stage_object
+{
+    friend class nx_compiler;
+    nx_compiler* m_parent = nullptr;
+    struct gl_shader *m_shader = nullptr;
+    nx_shader_stage_object(nx_compiler& parent) : m_parent(&parent) {}
+public:
+    nx_shader_stage_object() = default;
+    nx_shader_stage_object(const nx_shader_stage_object&);
+    nx_shader_stage_object& operator=(const nx_shader_stage_object&);
+    ~nx_shader_stage_object() { reset(); }
+    void reset();
+    operator bool() const;
+    nx_shader_stage stage() const;
+    const char* info_log() const;
+};
+
+class nx_linked_shader
+{
+    friend class nx_compiler;
+    nx_compiler* m_parent = nullptr;
+    struct gl_shader_program* m_program = nullptr;
+    nx_linked_shader(nx_compiler& parent) : m_parent(&parent) {}
+public:
+    nx_linked_shader() = default;
+    nx_linked_shader(const nx_linked_shader&);
+    nx_linked_shader& operator=(const nx_linked_shader&);
+    ~nx_linked_shader() { reset(); }
+    void reset();
+    operator bool() const { return m_program != nullptr; }
+    const struct gl_shader_program* program() const { return m_program; }
+};
+
+class nx_compiler
+{
+    friend class nx_shader_stage_object;
+    friend class nx_linked_shader;
+    struct pipe_screen *m_screen = nullptr;
+    struct st_context *m_st = nullptr;
+    struct standalone_options m_options = {};
+    bool m_ownsCtx = false;
+    void compile_shader(struct gl_context *ctx, struct gl_shader *shader);
+public:
+    nx_compiler();
+    ~nx_compiler();
+    bool initialize(struct pipe_screen *screen, struct st_context *st,
+                    const struct standalone_options *o = nullptr);
+    bool initialize(const struct standalone_options *o = nullptr);
+    nx_shader_stage_object compile(nx_shader_stage type, const char *source);
+    nx_linked_shader link(unsigned num_stages, const nx_shader_stage_object **stages, std::string* infoLog = nullptr);
+    std::pair<std::shared_ptr<uint8_t[]>, size_t>
+    offline_link(unsigned num_stages, const nx_shader_stage_object **stages, std::string* infoLog = nullptr);
+};
--- a/include/boo/inputdev/DeviceBase.hpp
+++ b/include/boo/inputdev/DeviceBase.hpp
@@ -7,6 +7,7 @@
 #include <memory>
 #include <vector>
 #include <mutex>
+#include "nxstl/mutex"
 #include "boo/System.hpp"

 #if _WIN32
@@ -31,13 +32,17 @@ class DeviceBase : public std::enable_shared_from_this<DeviceBase>
    friend struct DeviceSignature;
    friend class HIDDeviceIOKit;

+    uint64_t m_typeHash;
    class DeviceToken* m_token;
    std::shared_ptr<IHIDDevice> m_hidDev;
    void _deviceDisconnected();

 public:
-    DeviceBase(DeviceToken* token);
+    DeviceBase(uint64_t typeHash, DeviceToken* token);
    virtual ~DeviceBase() = default;
+
+    uint64_t getTypeHash() const { return m_typeHash; }
+
    void closeDevice();
    
    /* Callbacks */
@@ -76,7 +81,7 @@ protected:
    std::mutex m_callbackLock;
    CB* m_callback = nullptr;
 public:
-    TDeviceBase(DeviceToken* token) : DeviceBase(token) {}
+    TDeviceBase(uint64_t typeHash, DeviceToken* token) : DeviceBase(typeHash, token) {}
    void setCallback(CB* cb)
    {
        std::lock_guard<std::mutex> lk(m_callbackLock);
--- a/include/boo/inputdev/DeviceFinder.hpp
+++ b/include/boo/inputdev/DeviceFinder.hpp
@@ -93,7 +93,7 @@ public:
    };

    /* Application must specify its interested device-types */
-    DeviceFinder(std::unordered_set<std::type_index> types)
+    DeviceFinder(std::unordered_set<uint64_t> types)
    {
        if (skDevFinder)
        {
@@ -101,12 +101,12 @@ public:
            abort();
        }
        skDevFinder = this;
-        for (const std::type_index& typeIdx : types)
+        for (const uint64_t& typeHash : types)
        {
            const DeviceSignature* sigIter = BOO_DEVICE_SIGS;
            while (sigIter->m_name)
            {
-                if (sigIter->m_typeIdx == typeIdx)
+                if (sigIter->m_typeHash == typeHash)
                    m_types.push_back(sigIter);
                ++sigIter;
            }
--- a/include/boo/inputdev/DeviceSignature.hpp
+++ b/include/boo/inputdev/DeviceSignature.hpp
@@ -21,26 +21,28 @@ enum class DeviceType
 class DeviceToken;
 class DeviceBase;

+#define dev_typeid(type) std::hash<std::string>()(#type)
+
 struct DeviceSignature
 {
    typedef std::vector<const DeviceSignature*> TDeviceSignatureSet;
    typedef std::function<std::shared_ptr<DeviceBase>(DeviceToken*)> TFactoryLambda;
    const char* m_name;
-    std::type_index m_typeIdx;
+    uint64_t m_typeHash;
    unsigned m_vid, m_pid;
    TFactoryLambda m_factory;
    DeviceType m_type;
-    DeviceSignature() : m_name(NULL), m_typeIdx(typeid(DeviceSignature)) {} /* Sentinel constructor */
-    DeviceSignature(const char* name, std::type_index&& typeIdx, unsigned vid, unsigned pid,
+    DeviceSignature() : m_name(NULL), m_typeHash(dev_typeid(DeviceSignature)) {} /* Sentinel constructor */
+    DeviceSignature(const char* name, uint64_t typeHash, unsigned vid, unsigned pid,
                    TFactoryLambda&& factory, DeviceType type=DeviceType::None)
-        : m_name(name), m_typeIdx(typeIdx), m_vid(vid), m_pid(pid),
+        : m_name(name), m_typeHash(typeHash), m_vid(vid), m_pid(pid),
          m_factory(factory), m_type(type) {}
    static bool DeviceMatchToken(const DeviceToken& token, const TDeviceSignatureSet& sigSet);
    static std::shared_ptr<DeviceBase> DeviceNew(DeviceToken& token);
 };

 #define DEVICE_SIG(name, vid, pid, type) \
-    DeviceSignature(#name, typeid(name), vid, pid,\
+    DeviceSignature(#name, dev_typeid(name), vid, pid,\
    [](DeviceToken* tok) -> std::shared_ptr<DeviceBase> {return std::make_shared<name>(tok);}, type)
 #define DEVICE_SIG_SENTINEL() DeviceSignature()

--- a/include/boo/inputdev/DolphinSmashAdapter.hpp
+++ b/include/boo/inputdev/DolphinSmashAdapter.hpp
@@ -68,7 +68,7 @@ class DolphinSmashAdapter final : public TDeviceBase<IDolphinSmashAdapterCallbac
    uint8_t m_knownControllers = 0;
    uint8_t m_rumbleRequest = 0;
    bool m_hardStop[4] = {false};
-    uint8_t m_rumbleState = 0;
+    uint8_t m_rumbleState = 0xf; /* Force initial send of stop-rumble command */
    void deviceDisconnected();
    void initialCycle();
    void transferCycle();
--- a/include/boo/inputdev/XInputPad.hpp
+++ b/include/boo/inputdev/XInputPad.hpp
@@ -2,6 +2,7 @@
 #define XINPUTPAD_HPP

 #include "DeviceBase.hpp"
+#include "DeviceSignature.hpp"
 #include "boo/System.hpp"

 namespace boo
@@ -39,7 +40,7 @@ class XInputPad final : public TDeviceBase<IXInputPadCallback>
    uint16_t m_rumbleRequest[2] = {};
    uint16_t m_rumbleState[2] = {};
 public:
-    XInputPad(DeviceToken* token) : TDeviceBase<IXInputPadCallback>(token) {}
+    XInputPad(DeviceToken* token) : TDeviceBase<IXInputPadCallback>(dev_typeid(XInputPad), token) {}
    void deviceDisconnected()
    {
        std::lock_guard<std::mutex> lk(m_callbackLock);
--- a/lib/audiodev/AudioMatrix.hpp
+++ b/lib/audiodev/AudioMatrix.hpp
@@ -134,8 +134,8 @@ public:
                m_oldCoefs.v[i][0] = m_coefs.v[i][0];
                m_oldCoefs.v[i][1] = m_coefs.v[i][1];
            }
-            m_coefs.v[i][0] = coefs.v[i][0];
-            m_coefs.v[i][1] = coefs.v[i][1];
+            m_coefs.v[i][0] = coefs[i][0];
+            m_coefs.v[i][1] = coefs[i][1];
        }
 #endif
        m_curSlewFrame = 0;
--- a/lib/graphicsdev/Common.hpp
+++ b/lib/graphicsdev/Common.hpp
@@ -45,6 +45,7 @@ struct BaseGraphicsData : ListNode<BaseGraphicsData, GraphicsDataFactoryHead*>

    __BooTraceFields

+    GraphicsDataNode<IShaderStage, BaseGraphicsData>* m_Ss = nullptr;
    GraphicsDataNode<IShaderPipeline, BaseGraphicsData>* m_SPs = nullptr;
    GraphicsDataNode<IShaderDataBinding, BaseGraphicsData>* m_SBinds = nullptr;
    GraphicsDataNode<IGraphicsBufferS, BaseGraphicsData>* m_SBufs = nullptr;
@@ -53,7 +54,6 @@ struct BaseGraphicsData : ListNode<BaseGraphicsData, GraphicsDataFactoryHead*>
    GraphicsDataNode<ITextureSA, BaseGraphicsData>* m_SATexs = nullptr;
    GraphicsDataNode<ITextureD, BaseGraphicsData>* m_DTexs = nullptr;
    GraphicsDataNode<ITextureR, BaseGraphicsData>* m_RTexs = nullptr;
-    GraphicsDataNode<IVertexFormat, BaseGraphicsData>* m_VFmts = nullptr;
    template<class T> GraphicsDataNode<T, BaseGraphicsData>*& getHead();
    template<class T> size_t countForward()
    { auto* head = getHead<T>(); return head ? head->countForward() : 0; }
@@ -65,6 +65,8 @@ struct BaseGraphicsData : ListNode<BaseGraphicsData, GraphicsDataFactoryHead*>
    {}
 };

+template <> inline GraphicsDataNode<IShaderStage, BaseGraphicsData>*&
+BaseGraphicsData::getHead<IShaderStage>() { return m_Ss; }
 template <> inline GraphicsDataNode<IShaderPipeline, BaseGraphicsData>*&
 BaseGraphicsData::getHead<IShaderPipeline>() { return m_SPs; }
 template <> inline GraphicsDataNode<IShaderDataBinding, BaseGraphicsData>*&
@@ -81,8 +83,6 @@ template <> inline GraphicsDataNode<ITextureD, BaseGraphicsData>*&
 BaseGraphicsData::getHead<ITextureD>() { return m_DTexs; }
 template <> inline GraphicsDataNode<ITextureR, BaseGraphicsData>*&
 BaseGraphicsData::getHead<ITextureR>() { return m_RTexs; }
-template <> inline GraphicsDataNode<IVertexFormat, BaseGraphicsData>*&
-BaseGraphicsData::getHead<IVertexFormat>() { return m_VFmts; }

 /** Private generalized pool container class.
 *  Keeps head pointer to exactly one dynamic buffer while otherwise conforming to BaseGraphicsData
@@ -159,46 +159,6 @@ struct GraphicsDataNode : ListNode<GraphicsDataNode<NodeCls, DataCls>, ObjToken<
    }
 };

-/** Hash table entry for owning sharable shader objects */
-template <class FactoryImpl, class ShaderImpl>
-class IShareableShader
-{
-    std::atomic_int m_refCount = {0};
-    FactoryImpl& m_factory;
-    uint64_t m_srckey, m_binKey;
-public:
-    IShareableShader(FactoryImpl& factory, uint64_t srcKey, uint64_t binKey)
-    : m_factory(factory), m_srckey(srcKey), m_binKey(binKey) {}
-    void increment() { m_refCount++; }
-    void decrement()
-    {
-        if (m_refCount.fetch_sub(1) == 1)
-            m_factory._unregisterShareableShader(m_srckey, m_binKey);
-    }
-
-    class Token
-    {
-        IShareableShader<FactoryImpl, ShaderImpl>* m_parent = nullptr;
-    public:
-        Token() = default;
-        Token(IShareableShader* p)
-        : m_parent(p)
-        { m_parent->increment(); }
-        Token& operator=(const Token&) = delete;
-        Token(const Token&) = delete;
-        Token& operator=(Token&& other)
-        { m_parent = other.m_parent; other.m_parent = nullptr; return *this; }
-        Token(Token&& other)
-        { m_parent = other.m_parent; other.m_parent = nullptr; }
-        void reset() { if (m_parent) m_parent->decrement(); m_parent = nullptr; }
-        ~Token() { if (m_parent) m_parent->decrement(); }
-        operator bool() const { return m_parent != nullptr; }
-        ShaderImpl& get() const { return static_cast<ShaderImpl&>(*m_parent); }
-    };
-
-    Token lock() { return Token(this); }
-};
-
 void UpdateGammaLUT(ITextureD* tex, float gamma);

 }
--- a/lib/graphicsdev/D3D11.cpp
+++ b/lib/graphicsdev/D3D11.cpp
@@ -12,7 +12,7 @@
 #include <algorithm>
 #include <atomic>
 #include <forward_list>
-#include "xxhash.h"
+#include "xxhash/xxhash.h"

 #undef min
 #undef max
--- a/lib/graphicsdev/GL.cpp
+++ b/lib/graphicsdev/GL.cpp
@@ -7,7 +7,10 @@
 #include <array>
 #include <unordered_map>
 #include <unordered_set>
-#include "xxhash.h"
+#include "xxhash/xxhash.h"
+#include "glslang/Public/ShaderLang.h"
+#include "glslang/Include/Types.h"
+#include "StandAlone/ResourceLimits.h"

 #if _WIN32
 #include "../win/WinCommon.hpp"
@@ -60,21 +63,12 @@ namespace boo
 static logvisor::Module Log("boo::GL");
 class GLDataFactoryImpl;

-struct GLShareableShader : IShareableShader<GLDataFactoryImpl, GLShareableShader>
-{
-    GLuint m_shader = 0;
-    GLShareableShader(GLDataFactoryImpl& fac, uint64_t srcKey, GLuint s)
-    : IShareableShader(fac, srcKey, 0), m_shader(s) {}
-    ~GLShareableShader() { glDeleteShader(m_shader); }
-};
-
 class GLDataFactoryImpl : public GLDataFactory, public GraphicsDataFactoryHead
 {
    friend struct GLCommandQueue;
    friend class GLDataFactory::Context;
    IGraphicsContext* m_parent;
    GLContext* m_glCtx;
-    std::unordered_map<uint64_t, std::unique_ptr<GLShareableShader>> m_sharedShaders;

    bool m_hasTessellation = false;
    uint32_t m_maxPatchSize = 0;
@@ -83,10 +77,13 @@ class GLDataFactoryImpl : public GLDataFactory, public GraphicsDataFactoryHead
    ObjToken<IShaderPipeline> m_gammaShader;
    ObjToken<ITextureD> m_gammaLUT;
    ObjToken<IGraphicsBufferS> m_gammaVBO;
-    ObjToken<IVertexFormat> m_gammaVFMT;
+    ObjToken<IShaderDataBinding> m_gammaBinding;
    void SetupGammaResources()
    {
        /* Good enough place for this */
+        if (!glslang::InitializeProcess())
+            Log.report(logvisor::Error, "unable to initialize glslang");
+
        if (GLEW_ARB_tessellation_shader)
        {
            m_hasTessellation = true;
@@ -97,10 +94,18 @@ class GLDataFactoryImpl : public GLDataFactory, public GraphicsDataFactoryHead

        commitTransaction([this](IGraphicsDataFactory::Context& ctx)
        {
-            const char* texNames[] = {"screenTex", "gammaLUT"};
-            m_gammaShader = static_cast<Context&>(ctx).newShaderPipeline(GammaVS, GammaFS,
-                2, texNames, 0, nullptr, BlendFactor::One, BlendFactor::Zero,
-                Primitive::TriStrips, ZTest::None, false, true, false, CullMode::None);
+            auto vertex = ctx.newShaderStage((uint8_t*)GammaVS, 0, PipelineStage::Vertex);
+            auto fragment = ctx.newShaderStage((uint8_t*)GammaFS, 0, PipelineStage::Fragment);
+            AdditionalPipelineInfo info =
+            {
+                BlendFactor::One, BlendFactor::Zero,
+                Primitive::TriStrips, ZTest::None, false, true, false, CullMode::None
+            };
+            const VertexElementDescriptor vfmt[] = {
+                {VertexSemantic::Position4},
+                {VertexSemantic::UV4}
+            };
+            m_gammaShader = ctx.newShaderPipeline(vertex, fragment, vfmt, info);
            m_gammaLUT = ctx.newDynamicTexture(256, 256, TextureFormat::I16, TextureClampMode::ClampToEdge);
            const struct Vert {
                float pos[4];
@@ -112,11 +117,9 @@ class GLDataFactoryImpl : public GLDataFactory, public GraphicsDataFactoryHead
                {{ 1.f,  1.f, 0.f, 1.f}, {1.f, 1.f, 0.f, 0.f}}
            };
            m_gammaVBO = ctx.newStaticBuffer(BufferUse::Vertex, verts, 32, 4);
-            const VertexElementDescriptor vfmt[] = {
-                {m_gammaVBO.get(), nullptr, VertexSemantic::Position4},
-                {m_gammaVBO.get(), nullptr, VertexSemantic::UV4}
-            };
-            m_gammaVFMT = ctx.newVertexFormat(2, vfmt);
+            ObjToken<ITexture> texs[] = {{}, m_gammaLUT.get()};
+            m_gammaBinding = ctx.newShaderDataBinding(m_gammaShader, m_gammaVBO.get(), {}, {},
+                                                      0, nullptr, nullptr, 2, texs, nullptr, nullptr);
            return true;
        } BooTrace);
    }
@@ -126,10 +129,9 @@ public:
    : m_parent(parent), m_glCtx(glCtx) {}

    Platform platform() const { return Platform::OpenGL; }
-    const SystemChar* platformName() const { return _S("OpenGL"); }
+    const SystemChar* platformName() const { return _SYS_STR("OpenGL"); }
    void commitTransaction(const FactoryCommitFunc& trans __BooTraceArgs);
    ObjToken<IGraphicsBufferD> newPoolBuffer(BufferUse use, size_t stride, size_t count __BooTraceArgs);
-    void _unregisterShareableShader(uint64_t srcKey, uint64_t binKey) { m_sharedShaders.erase(srcKey); }

    void setDisplayGamma(float gamma)
    {
@@ -143,8 +145,6 @@ public:
        maxPatchSizeOut = m_maxPatchSize;
        return m_hasTessellation;
    }
-
-    GLShareableShader::Token PrepareShaderStage(const char* source, GLenum stage);
 };

 static const GLenum USE_TABLE[] =
@@ -705,14 +705,120 @@ static const GLenum BLEND_FACTOR_TABLE[] =
    GL_ONE_MINUS_SRC1_COLOR
 };

+static const GLenum SHADER_STAGE_TABLE[] =
+{
+    0,
+    GL_VERTEX_SHADER,
+    GL_FRAGMENT_SHADER,
+    GL_GEOMETRY_SHADER,
+    GL_TESS_CONTROL_SHADER,
+    GL_TESS_EVALUATION_SHADER
+};
+
+class GLShaderStage : public GraphicsDataNode<IShaderStage>
+{
+    friend class GLDataFactory;
+    GLuint m_shad = 0;
+    std::vector<std::pair<std::string, int>> m_texNames;
+    std::vector<std::string> m_blockNames;
+
+    static constexpr EShLanguage ShaderTypes[] =
+    {
+        EShLangVertex,
+        EShLangVertex,
+        EShLangFragment,
+        EShLangGeometry,
+        EShLangTessControl,
+        EShLangTessEvaluation
+    };
+
+    /* Use glslang's reflection API to pull out uniform indices from Vulkan
+     * version of shader. Aids in glGetUniformBlockIndex and glGetUniformLocation calls */
+    void BuildNameLists(const char* source, PipelineStage stage)
+    {
+        EShLanguage lang = ShaderTypes[int(stage)];
+        const EShMessages messages = EShMessages(EShMsgSpvRules | EShMsgVulkanRules);
+        glslang::TShader shader(lang);
+        shader.setStrings(&source, 1);
+        if (!shader.parse(&glslang::DefaultTBuiltInResource, 110, false, messages))
+        {
+            printf("%s\n", source);
+            Log.report(logvisor::Fatal, "unable to compile shader\n%s", shader.getInfoLog());
+        }
+
+        glslang::TProgram prog;
+        prog.addShader(&shader);
+        if (!prog.link(messages))
+        {
+            Log.report(logvisor::Fatal, "unable to link shader program\n%s", prog.getInfoLog());
+        }
+
+        prog.buildReflection();
+        int count = prog.getNumLiveUniformVariables();
+        for (int i = 0; i < count; ++i)
+        {
+            const glslang::TType* tp = prog.getUniformTType(i);
+            if (tp->getBasicType() != glslang::TBasicType::EbtSampler)
+                continue;
+            const auto& qual = tp->getQualifier();
+            if (!qual.hasBinding())
+                Log.report(logvisor::Fatal, "shader uniform %s does not have layout binding", prog.getUniformName(i));
+            m_texNames.emplace_back(std::make_pair(prog.getUniformName(i),
+                                    qual.layoutBinding - BOO_GLSL_MAX_UNIFORM_COUNT));
+        }
+        count = prog.getNumLiveUniformBlocks();
+        m_blockNames.reserve(count);
+        for (int i = 0; i < count; ++i)
+            m_blockNames.emplace_back(prog.getUniformBlockName(i));
+    }
+
+    GLShaderStage(const ObjToken<BaseGraphicsData>& parent, const char* source, PipelineStage stage)
+    : GraphicsDataNode<IShaderStage>(parent)
+    {
+        BuildNameLists(source, stage);
+
+        m_shad = glCreateShader(SHADER_STAGE_TABLE[int(stage)]);
+        if (!m_shad)
+        {
+            Log.report(logvisor::Fatal, "unable to create shader");
+            return;
+        }
+
+        glShaderSource(m_shad, 1, &source, nullptr);
+        glCompileShader(m_shad);
+        GLint status;
+        glGetShaderiv(m_shad, GL_COMPILE_STATUS, &status);
+        if (status != GL_TRUE)
+        {
+            GLint logLen;
+            glGetShaderiv(m_shad, GL_INFO_LOG_LENGTH, &logLen);
+            std::unique_ptr<char[]> log(new char[logLen]);
+            glGetShaderInfoLog(m_shad, logLen, nullptr, log.get());
+            Log.report(logvisor::Fatal, "unable to compile source\n%s\n%s\n", log.get(), source);
+            return;
+        }
+    }
+public:
+    ~GLShaderStage() { if (m_shad) glDeleteShader(m_shad); }
+    GLuint getShader() const { return m_shad; }
+    const std::vector<std::pair<std::string, int>>& getTexNames() const { return m_texNames; }
+    const std::vector<std::string>& getBlockNames() const { return m_blockNames; }
+};
+
 class GLShaderPipeline : public GraphicsDataNode<IShaderPipeline>
 {
 protected:
    friend class GLDataFactory;
    friend struct GLCommandQueue;
    friend struct GLShaderDataBinding;
-    mutable GLShareableShader::Token m_vert;
-    mutable GLShareableShader::Token m_frag;
+    mutable ObjToken<IShaderStage> m_vertex;
+    mutable ObjToken<IShaderStage> m_fragment;
+    mutable ObjToken<IShaderStage> m_geometry;
+    mutable ObjToken<IShaderStage> m_control;
+    mutable ObjToken<IShaderStage> m_evaluation;
+    std::vector<VertexElementDescriptor> m_elements;
+    size_t baseVert = 0;
+    size_t baseInst = 0;
    mutable GLuint m_prog = 0;
    GLenum m_sfactor = GL_ONE;
    GLenum m_dfactor = GL_ZERO;
@@ -724,26 +830,12 @@ protected:
    bool m_subtractBlend = false;
    bool m_overwriteAlpha = false;
    CullMode m_culling;
+    uint32_t m_patchSize = 0;
    mutable std::vector<GLint> m_uniLocs;
-    mutable std::vector<std::string> m_texNames;
-    mutable std::vector<std::string> m_blockNames;
-    GLShaderPipeline(const ObjToken<BaseGraphicsData>& parent,
-                     size_t texCount, const char** texNames,
-                     size_t uniformBlockCount, const char** uniformBlockNames,
-                     BlendFactor srcFac, BlendFactor dstFac, Primitive prim,
-                     ZTest depthTest, bool depthWrite, bool colorWrite,
-                     bool alphaWrite, CullMode culling, bool overwriteAlpha)
+    GLShaderPipeline(const ObjToken<BaseGraphicsData>& parent, const AdditionalPipelineInfo& info)
    : GraphicsDataNode<IShaderPipeline>(parent)
    {
-        m_texNames.reserve(texCount);
-        for (int i=0 ; i<texCount ; ++i)
-            m_texNames.emplace_back(texNames[i]);
-
-        m_blockNames.reserve(uniformBlockCount);
-        for (int i=0 ; i<uniformBlockCount ; ++i)
-            m_blockNames.emplace_back(uniformBlockNames[i]);
-
-        if (srcFac == BlendFactor::Subtract || dstFac == BlendFactor::Subtract)
+        if (info.srcFac == BlendFactor::Subtract || info.dstFac == BlendFactor::Subtract)
        {
            m_sfactor = GL_SRC_ALPHA;
            m_dfactor = GL_ONE;
@@ -751,26 +843,23 @@ protected:
        }
        else
        {
-            m_sfactor = BLEND_FACTOR_TABLE[int(srcFac)];
-            m_dfactor = BLEND_FACTOR_TABLE[int(dstFac)];
+            m_sfactor = BLEND_FACTOR_TABLE[int(info.srcFac)];
+            m_dfactor = BLEND_FACTOR_TABLE[int(info.dstFac)];
            m_subtractBlend = false;
        }

-        m_depthTest = depthTest;
-        m_depthWrite = depthWrite;
-        m_colorWrite = colorWrite;
-        m_alphaWrite = alphaWrite;
-        m_overwriteAlpha = overwriteAlpha;
-        m_culling = culling;
-        m_drawPrim = PRIMITIVE_TABLE[int(prim)];
+        m_depthTest = info.depthTest;
+        m_depthWrite = info.depthWrite;
+        m_colorWrite = info.colorWrite;
+        m_alphaWrite = info.alphaWrite;
+        m_overwriteAlpha = info.overwriteAlpha;
+        m_culling = info.culling;
+        m_drawPrim = PRIMITIVE_TABLE[int(info.prim)];
+        m_patchSize = info.patchSize;
    }
 public:
    ~GLShaderPipeline() { if (m_prog) glDeleteProgram(m_prog); }

-    virtual void attachExtraStages() const {}
-    virtual void resetExtraStages() const {}
-    virtual void setExtraParameters() const {}
-
    GLuint bind() const
    {
        if (!m_prog)
@@ -782,18 +871,29 @@ public:
                return 0;
            }

-            glAttachShader(m_prog, m_vert.get().m_shader);
-            glAttachShader(m_prog, m_frag.get().m_shader);
-            attachExtraStages();
+            if (m_vertex)
+                glAttachShader(m_prog, m_vertex.cast<GLShaderStage>()->getShader());
+            if (m_fragment)
+                glAttachShader(m_prog, m_fragment.cast<GLShaderStage>()->getShader());
+            if (m_geometry)
+                glAttachShader(m_prog, m_geometry.cast<GLShaderStage>()->getShader());
+            if (m_control)
+                glAttachShader(m_prog, m_control.cast<GLShaderStage>()->getShader());
+            if (m_evaluation)
+                glAttachShader(m_prog, m_evaluation.cast<GLShaderStage>()->getShader());

            glLinkProgram(m_prog);

-            glDetachShader(m_prog, m_vert.get().m_shader);
-            glDetachShader(m_prog, m_frag.get().m_shader);
-
-            m_vert.reset();
-            m_frag.reset();
-            resetExtraStages();
+            if (m_vertex)
+                glDetachShader(m_prog, m_vertex.cast<GLShaderStage>()->getShader());
+            if (m_fragment)
+                glDetachShader(m_prog, m_fragment.cast<GLShaderStage>()->getShader());
+            if (m_geometry)
+                glDetachShader(m_prog, m_geometry.cast<GLShaderStage>()->getShader());
+            if (m_control)
+                glDetachShader(m_prog, m_control.cast<GLShaderStage>()->getShader());
+            if (m_evaluation)
+                glDetachShader(m_prog, m_evaluation.cast<GLShaderStage>()->getShader());

            GLint status;
            glGetProgramiv(m_prog, GL_LINK_STATUS, &status);
@@ -809,31 +909,33 @@ public:

            glUseProgram(m_prog);

-            if (m_blockNames.size())
+            for (const auto& shader : {m_vertex, m_fragment, m_geometry, m_control, m_evaluation})
            {
-                m_uniLocs.reserve(m_blockNames.size());
-                for (size_t i=0 ; i<m_blockNames.size() ; ++i)
+                if (const GLShaderStage* stage = shader.cast<GLShaderStage>())
                {
-                    GLint uniLoc = glGetUniformBlockIndex(m_prog, m_blockNames[i].c_str());
-                    //if (uniLoc < 0)
-                    //    Log.report(logvisor::Warning, "unable to find uniform block '%s'", uniformBlockNames[i]);
-                    m_uniLocs.push_back(uniLoc);
+                    for (const auto& name : stage->getBlockNames())
+                    {
+                        GLint uniLoc = glGetUniformBlockIndex(m_prog, name.c_str());
+                        //if (uniLoc < 0)
+                        //    Log.report(logvisor::Warning, "unable to find uniform block '%s'", uniformBlockNames[i]);
+                        m_uniLocs.push_back(uniLoc);
+                    }
+                    for (const auto& name : stage->getTexNames())
+                    {
+                        GLint texLoc = glGetUniformLocation(m_prog, name.first.c_str());
+                        if (texLoc < 0)
+                        { /* Log.report(logvisor::Warning, "unable to find sampler variable '%s'", texNames[i]); */ }
+                        else
+                            glUniform1i(texLoc, name.second);
+                    }
                }
-                m_blockNames = std::vector<std::string>();
            }

-            if (m_texNames.size())
-            {
-                for (int i=0 ; i<m_texNames.size() ; ++i)
-                {
-                    GLint texLoc = glGetUniformLocation(m_prog, m_texNames[i].c_str());
-                    if (texLoc < 0)
-                    { /* Log.report(logvisor::Warning, "unable to find sampler variable '%s'", texNames[i]); */ }
-                    else
-                        glUniform1i(texLoc, i);
-                }
-                m_texNames = std::vector<std::string>();
-            }
+            m_vertex.reset();
+            m_fragment.reset();
+            m_geometry.reset();
+            m_control.reset();
+            m_evaluation.reset();
        }
        else
        {
@@ -889,161 +991,65 @@ public:
        else
            glDisable(GL_CULL_FACE);

-        setExtraParameters();
+        glPatchParameteri(GL_PATCH_VERTICES, m_patchSize);

        return m_prog;
    }
 };

-class GLTessellationShaderPipeline : public GLShaderPipeline
-{
-    friend class GLDataFactory;
-    friend struct GLCommandQueue;
-    friend struct GLShaderDataBinding;
-    GLint m_patchSize;
-    mutable GLShareableShader::Token m_control;
-    mutable GLShareableShader::Token m_evaluation;
-    GLTessellationShaderPipeline(const ObjToken<BaseGraphicsData>& parent,
-                                 size_t texCount, const char** texNames,
-                                 size_t uniformBlockCount, const char** uniformBlockNames,
-                                 BlendFactor srcFac, BlendFactor dstFac, uint32_t patchSize,
-                                 ZTest depthTest, bool depthWrite, bool colorWrite,
-                                 bool alphaWrite, CullMode culling, bool overwriteAlpha)
-    : GLShaderPipeline(parent, texCount, texNames, uniformBlockCount, uniformBlockNames,
-                       srcFac, dstFac, Primitive::Patches, depthTest, depthWrite, colorWrite,
-                       alphaWrite, culling, overwriteAlpha), m_patchSize(patchSize)
-    {}
-public:
-    ~GLTessellationShaderPipeline() = default;
-
-    void attachExtraStages() const
-    {
-        glAttachShader(m_prog, m_control.get().m_shader);
-        glAttachShader(m_prog, m_evaluation.get().m_shader);
-    }
-
-    void resetExtraStages() const
-    {
-        glDetachShader(m_prog, m_control.get().m_shader);
-        glDetachShader(m_prog, m_evaluation.get().m_shader);
-        m_control.reset();
-        m_evaluation.reset();
-    }
-
-    void setExtraParameters() const
-    {
-        glPatchParameteri(GL_PATCH_VERTICES, m_patchSize);
-    }
-};
-
-GLShareableShader::Token GLDataFactoryImpl::PrepareShaderStage(const char* source, GLenum stage)
-{
-    XXH64_state_t hashState;
-    XXH64_reset(&hashState, 0);
-    XXH64_update(&hashState, source, strlen(source));
-    uint64_t hash = XXH64_digest(&hashState);
-
-    GLint status;
-    auto search = m_sharedShaders.find(hash);
-    if (search != m_sharedShaders.end())
-    {
-        return search->second->lock();
-    }
-    else
-    {
-        GLuint sobj = glCreateShader(stage);
-        if (!sobj)
-        {
-            Log.report(logvisor::Fatal, "unable to create shader");
-            return {};
-        }
-
-        glShaderSource(sobj, 1, &source, nullptr);
-        glCompileShader(sobj);
-        glGetShaderiv(sobj, GL_COMPILE_STATUS, &status);
-        if (status != GL_TRUE)
-        {
-            GLint logLen;
-            glGetShaderiv(sobj, GL_INFO_LOG_LENGTH, &logLen);
-            std::unique_ptr<char[]> log(new char[logLen]);
-            glGetShaderInfoLog(sobj, logLen, nullptr, log.get());
-            Log.report(logvisor::Fatal, "unable to compile source\n%s\n%s\n", log.get(), source);
-            return {};
-        }
-
-        auto it =
-            m_sharedShaders.emplace(std::make_pair(hash,
-            std::make_unique<GLShareableShader>(*this, hash, sobj))).first;
-        return it->second->lock();
-    }
-}
-
-ObjToken<IShaderPipeline> GLDataFactory::Context::newShaderPipeline
-(const char* vertSource, const char* fragSource,
- size_t texCount, const char** texNames,
- size_t uniformBlockCount, const char** uniformBlockNames,
- BlendFactor srcFac, BlendFactor dstFac, Primitive prim,
- ZTest depthTest, bool depthWrite, bool colorWrite,
- bool alphaWrite, CullMode culling, bool overwriteAlpha)
+ObjToken<IShaderStage>
+GLDataFactory::Context::newShaderStage(const uint8_t* data, size_t size, PipelineStage stage)
 {
    GLDataFactoryImpl& factory = static_cast<GLDataFactoryImpl&>(m_parent);
-    ObjToken<IShaderPipeline> retval(new GLShaderPipeline(
-        m_data, texCount, texNames, uniformBlockCount, uniformBlockNames, srcFac, dstFac, prim,
-        depthTest, depthWrite, colorWrite, alphaWrite, culling, overwriteAlpha));
+
+    if (stage == PipelineStage::Control || stage == PipelineStage::Evaluation)
+    {
+        if (!factory.m_hasTessellation)
+            Log.report(logvisor::Fatal, "Device does not support tessellation shaders");
+    }
+
+    return {new GLShaderStage(m_data, (char*)data, stage)};
+}
+
+ObjToken<IShaderPipeline>
+GLDataFactory::Context::newShaderPipeline(ObjToken<IShaderStage> vertex, ObjToken<IShaderStage> fragment,
+                                          ObjToken<IShaderStage> geometry, ObjToken<IShaderStage> control,
+                                          ObjToken<IShaderStage> evaluation, const VertexFormatInfo& vtxFmt,
+                                          const AdditionalPipelineInfo& additionalInfo)
+{
+    GLDataFactoryImpl& factory = static_cast<GLDataFactoryImpl&>(m_parent);
+
+    if (control || evaluation)
+    {
+        if (!factory.m_hasTessellation)
+            Log.report(logvisor::Fatal, "Device does not support tessellation shaders");
+        if (additionalInfo.patchSize > factory.m_maxPatchSize)
+            Log.report(logvisor::Fatal, "Device supports %d patch vertices, %d requested",
+                       int(factory.m_maxPatchSize), int(additionalInfo.patchSize));
+    }
+
+    ObjToken<IShaderPipeline> retval(new GLShaderPipeline(m_data, additionalInfo));
    GLShaderPipeline& shader = *retval.cast<GLShaderPipeline>();

-    shader.m_vert = factory.PrepareShaderStage(vertSource, GL_VERTEX_SHADER);
-    shader.m_frag = factory.PrepareShaderStage(fragSource, GL_FRAGMENT_SHADER);
+    shader.m_vertex = vertex;
+    shader.m_fragment = fragment;
+    shader.m_geometry = geometry;
+    shader.m_control = control;
+    shader.m_evaluation = evaluation;
+
+    shader.m_elements.reserve(vtxFmt.elementCount);
+    for (size_t i=0 ; i<vtxFmt.elementCount ; ++i)
+        shader.m_elements.push_back(vtxFmt.elements[i]);

    return retval;
 }

-ObjToken<IShaderPipeline> GLDataFactory::Context::newTessellationShaderPipeline
-(const char* vertSource, const char* fragSource,
- const char* controlSource, const char* evaluationSource,
- size_t texCount, const char** texNames,
- size_t uniformBlockCount, const char** uniformBlockNames,
- BlendFactor srcFac, BlendFactor dstFac, uint32_t patchSize,
- ZTest depthTest, bool depthWrite, bool colorWrite,
- bool alphaWrite, CullMode culling, bool overwriteAlpha)
-{
-    GLDataFactoryImpl& factory = static_cast<GLDataFactoryImpl&>(m_parent);
-
-    if (!factory.m_hasTessellation)
-        Log.report(logvisor::Fatal, "Device does not support tessellation shaders");
-    if (patchSize > factory.m_maxPatchSize)
-        Log.report(logvisor::Fatal, "Device supports %d patch vertices, %d requested",
-                   int(factory.m_maxPatchSize), int(patchSize));
-
-    ObjToken<IShaderPipeline> retval(new GLTessellationShaderPipeline(
-        m_data, texCount, texNames, uniformBlockCount, uniformBlockNames, srcFac, dstFac, patchSize,
-        depthTest, depthWrite, colorWrite, alphaWrite, culling, overwriteAlpha));
-    GLTessellationShaderPipeline& shader = *retval.cast<GLTessellationShaderPipeline>();
-
-    shader.m_vert = factory.PrepareShaderStage(vertSource, GL_VERTEX_SHADER);
-    shader.m_frag = factory.PrepareShaderStage(fragSource, GL_FRAGMENT_SHADER);
-    shader.m_control = factory.PrepareShaderStage(controlSource, GL_TESS_CONTROL_SHADER);
-    shader.m_evaluation = factory.PrepareShaderStage(evaluationSource, GL_TESS_EVALUATION_SHADER);
-
-    return retval;
-}
-
-struct GLVertexFormat : GraphicsDataNode<IVertexFormat>
-{
-    GLuint m_vao[3] = {};
-    GLuint m_baseVert, m_baseInst;
-    std::vector<VertexElementDescriptor> m_elements;
-    GLVertexFormat(const ObjToken<BaseGraphicsData>& parent, GLCommandQueue* q,
-                   size_t elementCount, const VertexElementDescriptor* elements,
-                   size_t baseVert, size_t baseInst);
-    ~GLVertexFormat() { glDeleteVertexArrays(3, m_vao); }
-    void bind(int idx) const { glBindVertexArray(m_vao[idx]); }
-};
-
 struct GLShaderDataBinding : GraphicsDataNode<IShaderDataBinding>
 {
    ObjToken<IShaderPipeline> m_pipeline;
-    ObjToken<IVertexFormat> m_vtxFormat;
+    ObjToken<IGraphicsBuffer> m_vbo;
+    ObjToken<IGraphicsBuffer> m_instVbo;
+    ObjToken<IGraphicsBuffer> m_ibo;
    std::vector<ObjToken<IGraphicsBuffer>> m_ubufs;
    std::vector<std::pair<size_t,size_t>> m_ubufOffs;
    struct BoundTex
@@ -1053,18 +1059,22 @@ struct GLShaderDataBinding : GraphicsDataNode<IShaderDataBinding>
        bool depth;
    };
    std::vector<BoundTex> m_texs;
+    size_t m_baseVert;
+    size_t m_baseInst;
+    GLuint m_vao[3] = {};

    GLShaderDataBinding(const ObjToken<BaseGraphicsData>& d,
                        const ObjToken<IShaderPipeline>& pipeline,
-                        const ObjToken<IVertexFormat>& vtxFormat,
+                        const ObjToken<IGraphicsBuffer>& vbo,
+                        const ObjToken<IGraphicsBuffer>& instVbo,
+                        const ObjToken<IGraphicsBuffer>& ibo,
                        size_t ubufCount, const ObjToken<IGraphicsBuffer>* ubufs,
                        const size_t* ubufOffs, const size_t* ubufSizes,
                        size_t texCount, const ObjToken<ITexture>* texs,
-                        const int* bindTexIdx,
-                        const bool* depthBind)
+                        const int* bindTexIdx, const bool* depthBind,
+                        size_t baseVert, size_t baseInst)
    : GraphicsDataNode<IShaderDataBinding>(d),
-      m_pipeline(pipeline),
-      m_vtxFormat(vtxFormat)
+      m_pipeline(pipeline), m_vbo(vbo), m_instVbo(instVbo), m_ibo(ibo), m_baseVert(baseVert), m_baseInst(baseInst)
    {
        if (ubufOffs && ubufSizes)
        {
@@ -1093,11 +1103,17 @@ struct GLShaderDataBinding : GraphicsDataNode<IShaderDataBinding>
            m_texs.push_back({texs[i], bindTexIdx ? bindTexIdx[i] : 0, depthBind ? depthBind[i] : false});
        }
    }
+
+    ~GLShaderDataBinding()
+    {
+        glDeleteVertexArrays(3, m_vao);
+    }
+
    void bind(int b) const
    {
        GLShaderPipeline& pipeline = *m_pipeline.cast<GLShaderPipeline>();
        GLuint prog = pipeline.bind();
-        m_vtxFormat.cast<GLVertexFormat>()->bind(b);
+        glBindVertexArray(m_vao[b]);
        if (m_ubufOffs.size())
        {
            for (size_t i=0 ; i<m_ubufs.size() && i<pipeline.m_uniLocs.size() ; ++i)
@@ -1155,23 +1171,6 @@ struct GLShaderDataBinding : GraphicsDataNode<IShaderDataBinding>
    }
 };

-ObjToken<IShaderDataBinding>
-GLDataFactory::Context::newShaderDataBinding(const ObjToken<IShaderPipeline>& pipeline,
-                                             const ObjToken<IVertexFormat>& vtxFormat,
-                                             const ObjToken<IGraphicsBuffer>& vbo,
-                                             const ObjToken<IGraphicsBuffer>& instVbo,
-                                             const ObjToken<IGraphicsBuffer>& ibo,
-                                             size_t ubufCount, const ObjToken<IGraphicsBuffer>* ubufs,
-                                             const PipelineStage* ubufStages,
-                                             const size_t* ubufOffs, const size_t* ubufSizes,
-                                             size_t texCount, const ObjToken<ITexture>* texs,
-                                             const int* texBindIdx, const bool* depthBind,
-                                             size_t baseVert, size_t baseInst)
-{
-    return {new GLShaderDataBinding(m_data, pipeline, vtxFormat, ubufCount, ubufs,
-                                    ubufOffs, ubufSizes, texCount, texs, texBindIdx, depthBind)};
-}
-
 GLDataFactory::Context::Context(GLDataFactory& parent __BooTraceArgs)
 : m_parent(parent), m_data(new BaseGraphicsData(static_cast<GLDataFactoryImpl&>(parent) __BooTraceArgsUse))
 {}
@@ -1244,7 +1243,7 @@ static const GLenum SEMANTIC_TYPE_TABLE[] =
 struct GLCommandQueue : IGraphicsCommandQueue
 {
    Platform platform() const { return IGraphicsDataFactory::Platform::OpenGL; }
-    const SystemChar* platformName() const { return _S("OpenGL"); }
+    const SystemChar* platformName() const { return _SYS_STR("OpenGL"); }
    IGraphicsContext* m_parent = nullptr;
    GLContext* m_glCtx = nullptr;

@@ -1318,18 +1317,19 @@ struct GLCommandQueue : IGraphicsCommandQueue
    std::vector<RenderTextureResize> m_pendingResizes;
    std::vector<std::function<void(void)>> m_pendingPosts1;
    std::vector<std::function<void(void)>> m_pendingPosts2;
-    std::vector<ObjToken<IVertexFormat>> m_pendingFmtAdds;
+    std::vector<ObjToken<IShaderDataBinding>> m_pendingFmtAdds;
    std::vector<ObjToken<ITextureR>> m_pendingFboAdds;

-    static void ConfigureVertexFormat(GLVertexFormat* fmt)
+    static void ConfigureVertexFormat(GLShaderDataBinding* fmt)
    {
        glGenVertexArrays(3, fmt->m_vao);

        size_t stride = 0;
        size_t instStride = 0;
-        for (size_t i=0 ; i<fmt->m_elements.size() ; ++i)
+        auto pipeline = fmt->m_pipeline.cast<GLShaderPipeline>();
+        for (size_t i=0 ; i<pipeline->m_elements.size() ; ++i)
        {
-            const VertexElementDescriptor& desc = fmt->m_elements[i];
+            const VertexElementDescriptor& desc = pipeline->m_elements[i];
            if ((desc.semantic & VertexSemantic::Instanced) != VertexSemantic::None)
                instStride += SEMANTIC_SIZE_TABLE[int(desc.semantic & VertexSemantic::SemanticMask)];
            else
@@ -1343,20 +1343,23 @@ struct GLCommandQueue : IGraphicsCommandQueue
            glBindVertexArray(fmt->m_vao[b]);
            IGraphicsBuffer* lastVBO = nullptr;
            IGraphicsBuffer* lastEBO = nullptr;
-            for (size_t i=0 ; i<fmt->m_elements.size() ; ++i)
+            for (size_t i=0 ; i<pipeline->m_elements.size() ; ++i)
            {
-                const VertexElementDescriptor& desc = fmt->m_elements[i];
-                if (desc.vertBuffer.get() != lastVBO)
+                const VertexElementDescriptor& desc = pipeline->m_elements[i];
+                IGraphicsBuffer* vbo = (desc.semantic & VertexSemantic::Instanced) != VertexSemantic::None
+                                       ? fmt->m_instVbo.get() : fmt->m_vbo.get();
+                IGraphicsBuffer* ebo = fmt->m_ibo.get();
+                if (vbo != lastVBO)
                {
-                    lastVBO = desc.vertBuffer.get();
+                    lastVBO = vbo;
                    if (lastVBO->dynamic())
                        static_cast<GLGraphicsBufferD<BaseGraphicsData>*>(lastVBO)->bindVertex(b);
                    else
                        static_cast<GLGraphicsBufferS*>(lastVBO)->bindVertex();
                }
-                if (desc.indexBuffer.get() != lastEBO)
+                if (ebo != lastEBO)
                {
-                    lastEBO = desc.indexBuffer.get();
+                    lastEBO = ebo;
                    if (lastEBO->dynamic())
                        static_cast<GLGraphicsBufferD<BaseGraphicsData>*>(lastEBO)->bindIndex(b);
                    else
@@ -1474,8 +1477,8 @@ struct GLCommandQueue : IGraphicsCommandQueue

                if (self->m_pendingFmtAdds.size())
                {
-                    for (ObjToken<IVertexFormat>& fmt : self->m_pendingFmtAdds)
-                        if (fmt) ConfigureVertexFormat(fmt.cast<GLVertexFormat>());
+                    for (ObjToken<IShaderDataBinding>& fmt : self->m_pendingFmtAdds)
+                        if (fmt) ConfigureVertexFormat(fmt.cast<GLShaderDataBinding>());
                    self->m_pendingFmtAdds.clear();
                }

@@ -1628,9 +1631,7 @@ struct GLCommandQueue : IGraphicsCommandQueue
                            }

                            glBindFramebuffer(GL_DRAW_FRAMEBUFFER, 0);
-                            dataFactory->m_gammaShader.cast<GLShaderPipeline>()->bind();
-                            dataFactory->m_gammaVFMT.cast<GLVertexFormat>()->bind(self->m_drawBuf);
-                            dataFactory->m_gammaLUT.cast<GLTextureD>()->bind(1, self->m_drawBuf);
+                            dataFactory->m_gammaBinding.cast<GLShaderDataBinding>()->bind(self->m_drawBuf);
                            glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
                        }
                        else
@@ -1809,7 +1810,7 @@ struct GLCommandQueue : IGraphicsCommandQueue
        cmds.back().source = source;
    }

-    void addVertexFormat(const ObjToken<IVertexFormat>& fmt)
+    void addVertexFormat(const ObjToken<IShaderDataBinding>& fmt)
    {
        std::unique_lock<std::mutex> lk(m_mt);
        m_pendingFmtAdds.push_back(fmt);
@@ -1955,25 +1956,24 @@ GLDataFactory::Context::newRenderTexture(size_t width, size_t height, TextureCla
    return retval;
 }

-GLVertexFormat::GLVertexFormat(const ObjToken<BaseGraphicsData>& parent, GLCommandQueue* q,
-                               size_t elementCount, const VertexElementDescriptor* elements,
-                               size_t baseVert, size_t baseInst)
-: GraphicsDataNode<IVertexFormat>(parent),
-  m_baseVert(baseVert), m_baseInst(baseInst)
-{
-    m_elements.reserve(elementCount);
-    for (size_t i=0 ; i<elementCount ; ++i)
-        m_elements.push_back(elements[i]);
-    q->addVertexFormat(this);
-}
-
-ObjToken<IVertexFormat> GLDataFactory::Context::newVertexFormat
-(size_t elementCount, const VertexElementDescriptor* elements,
- size_t baseVert, size_t baseInst)
+ObjToken<IShaderDataBinding>
+GLDataFactory::Context::newShaderDataBinding(const ObjToken<IShaderPipeline>& pipeline,
+                                             const ObjToken<IGraphicsBuffer>& vbo,
+                                             const ObjToken<IGraphicsBuffer>& instVbo,
+                                             const ObjToken<IGraphicsBuffer>& ibo,
+                                             size_t ubufCount, const ObjToken<IGraphicsBuffer>* ubufs,
+                                             const PipelineStage* ubufStages,
+                                             const size_t* ubufOffs, const size_t* ubufSizes,
+                                             size_t texCount, const ObjToken<ITexture>* texs,
+                                             const int* texBindIdx, const bool* depthBind,
+                                             size_t baseVert, size_t baseInst)
 {
    GLDataFactoryImpl& factory = static_cast<GLDataFactoryImpl&>(m_parent);
    GLCommandQueue* q = static_cast<GLCommandQueue*>(factory.m_parent->getCommandQueue());
-    return {new GLVertexFormat(m_data, q, elementCount, elements, baseVert, baseInst)};
+    ObjToken<GLShaderDataBinding> ret = {new GLShaderDataBinding(m_data, pipeline, vbo, instVbo, ibo, ubufCount, ubufs,
+                                         ubufOffs, ubufSizes, texCount, texs, texBindIdx, depthBind, baseVert, baseInst)};
+    q->addVertexFormat(ret.get());
+    return ret.get();
 }

 std::unique_ptr<IGraphicsCommandQueue> _NewGLCommandQueue(IGraphicsContext* parent, GLContext* glCtx)
--- a/lib/graphicsdev/Metal.mm
+++ b/lib/graphicsdev/Metal.mm
@@ -8,7 +8,7 @@
 #include <vector>
 #include <unordered_map>
 #include <unordered_set>
-#include "xxhash.h"
+#include "xxhash/xxhash.h"

 #if !__has_feature(objc_arc)
 #error ARC Required
--- a/lib/graphicsdev/Vulkan.cpp
+++ b/lib/graphicsdev/Vulkan.cpp
@@ -9,7 +9,7 @@
 #include <SPIRV/disassemble.h>
 #include "boo/graphicsdev/GLSLMacros.hpp"
 #include "Common.hpp"
-#include "xxhash.h"
+#include "xxhash/xxhash.h"

 #define AMD_PAL_HACK 1

@@ -70,15 +70,6 @@ class VulkanDataFactoryImpl;
 struct VulkanCommandQueue;
 struct VulkanDescriptorPool;

-struct VulkanShareableShader : IShareableShader<VulkanDataFactoryImpl, VulkanShareableShader>
-{
-    VkShaderModule m_shader;
-    VulkanShareableShader(VulkanDataFactoryImpl& fac, uint64_t srcKey, uint64_t binKey,
-                          VkShaderModule s)
-    : IShareableShader(fac, srcKey, binKey), m_shader(s) {}
-    ~VulkanShareableShader() { vk::DestroyShaderModule(g_VulkanContext.m_dev, m_shader, nullptr); }
-};
-
 class VulkanDataFactoryImpl : public VulkanDataFactory, public GraphicsDataFactoryHead
 {
    friend struct VulkanCommandQueue;
@@ -90,28 +81,30 @@ class VulkanDataFactoryImpl : public VulkanDataFactory, public GraphicsDataFacto
    IGraphicsContext* m_parent;
    VulkanContext* m_ctx;
    VulkanDescriptorPool* m_descPoolHead = nullptr;
-    std::unordered_map<uint64_t, std::unique_ptr<VulkanShareableShader>> m_sharedShaders;
-    std::unordered_map<uint64_t, uint64_t> m_sourceToBinary;
-    std::vector<int> m_texUnis;

    float m_gamma = 1.f;
    ObjToken<IShaderPipeline> m_gammaShader;
    ObjToken<ITextureD> m_gammaLUT;
    ObjToken<IGraphicsBufferS> m_gammaVBO;
-    ObjToken<IVertexFormat> m_gammaVFMT;
    ObjToken<IShaderDataBinding> m_gammaBinding;
    void SetupGammaResources()
    {
        commitTransaction([this](IGraphicsDataFactory::Context& ctx)
        {
+            auto vertexSiprv = VulkanDataFactory::CompileGLSL(GammaVS, PipelineStage::Vertex);
+            auto vertexShader = ctx.newShaderStage(vertexSiprv, PipelineStage::Vertex);
+            auto fragmentSiprv = VulkanDataFactory::CompileGLSL(GammaFS, PipelineStage::Fragment);
+            auto fragmentShader = ctx.newShaderStage(fragmentSiprv, PipelineStage::Fragment);
            const VertexElementDescriptor vfmt[] = {
-                {nullptr, nullptr, VertexSemantic::Position4},
-                {nullptr, nullptr, VertexSemantic::UV4}
+                {VertexSemantic::Position4},
+                {VertexSemantic::UV4}
            };
-            m_gammaVFMT = ctx.newVertexFormat(2, vfmt);
-            m_gammaShader = static_cast<Context&>(ctx).newShaderPipeline(GammaVS, GammaFS,
-                m_gammaVFMT, BlendFactor::One, BlendFactor::Zero,
-                Primitive::TriStrips, ZTest::None, false, true, false, CullMode::None);
+            AdditionalPipelineInfo info =
+            {
+                BlendFactor::One, BlendFactor::Zero,
+                Primitive::TriStrips, ZTest::None, false, true, false, CullMode::None
+            };
+            m_gammaShader = ctx.newShaderPipeline(vertexShader, fragmentShader, vfmt, info);
            m_gammaLUT = ctx.newDynamicTexture(256, 256, TextureFormat::I16, TextureClampMode::ClampToEdge);
            setDisplayGamma(1.f);
            const struct Vert {
@@ -125,7 +118,7 @@ class VulkanDataFactoryImpl : public VulkanDataFactory, public GraphicsDataFacto
            };
            m_gammaVBO = ctx.newStaticBuffer(BufferUse::Vertex, verts, 32, 4);
            ObjToken<ITexture> texs[] = {{}, m_gammaLUT.get()};
-            m_gammaBinding = ctx.newShaderDataBinding(m_gammaShader, m_gammaVFMT, m_gammaVBO.get(), {}, {},
+            m_gammaBinding = ctx.newShaderDataBinding(m_gammaShader, m_gammaVBO.get(), {}, {},
                                                      0, nullptr, nullptr, 2, texs, nullptr, nullptr);
            return true;
        } BooTrace);
@@ -134,7 +127,6 @@ class VulkanDataFactoryImpl : public VulkanDataFactory, public GraphicsDataFacto
    void DestroyGammaResources()
    {
        m_gammaBinding.reset();
-        m_gammaVFMT.reset();
        m_gammaVBO.reset();
        m_gammaLUT.reset();
        m_gammaShader.reset();
@@ -148,7 +140,7 @@ public:
    }

    Platform platform() const {return Platform::Vulkan;}
-    const SystemChar* platformName() const {return _S("Vulkan");}
+    const SystemChar* platformName() const {return _SYS_STR("Vulkan");}

    boo::ObjToken<VulkanDescriptorPool> allocateDescriptorSets(VkDescriptorSet* out);

@@ -156,13 +148,6 @@ public:

    boo::ObjToken<IGraphicsBufferD> newPoolBuffer(BufferUse use, size_t stride, size_t count __BooTraceArgs);

-    void _unregisterShareableShader(uint64_t srcKey, uint64_t binKey)
-    {
-        if (srcKey)
-            m_sourceToBinary.erase(srcKey);
-        m_sharedShaders.erase(binKey);
-    }
-
    void setDisplayGamma(float gamma)
    {
        m_gamma = gamma;
@@ -178,10 +163,6 @@ public:
        maxPatchSizeOut = m_ctx->m_gpuProps.limits.maxTessellationPatchSize;
        return true;
    }
-
-    VulkanShareableShader::Token PrepareShaderStage(const char* source, std::vector<unsigned int>* blobOut,
-                                                    EShLanguage lang);
-    uint64_t Compile(std::vector<unsigned int>& out, const char* source, uint64_t srcKey, EShLanguage lang);
 };

 static inline void ThrowIfFailed(VkResult res)
@@ -433,6 +414,7 @@ bool VulkanContext::initVulkan(std::string_view appName, PFN_vkGetInstanceProcAd

 #ifndef NDEBUG
    m_layerNames.push_back("VK_LAYER_LUNARG_standard_validation");
+    //m_layerNames.push_back("VK_LAYER_RENDERDOC_Capture");
    //m_layerNames.push_back("VK_LAYER_LUNARG_api_dump");
    //m_layerNames.push_back("VK_LAYER_LUNARG_core_validation");
    //m_layerNames.push_back("VK_LAYER_LUNARG_object_tracker");
@@ -1282,7 +1264,7 @@ class VulkanGraphicsBufferS : public GraphicsDataNode<IGraphicsBufferS>
    VulkanGraphicsBufferS(const boo::ObjToken<BaseGraphicsData>& parent, BufferUse use,
                          VulkanContext* ctx, const void* data, size_t stride, size_t count)
    : GraphicsDataNode<IGraphicsBufferS>(parent),
-      m_ctx(ctx), m_stride(stride), m_count(count), m_sz(stride * count),
+      m_ctx(ctx), m_sz(stride * count),
      m_stagingBuf(new uint8_t[m_sz]), m_use(use)
    {
        memmove(m_stagingBuf.get(), data, m_sz);
@@ -1290,8 +1272,6 @@ class VulkanGraphicsBufferS : public GraphicsDataNode<IGraphicsBufferS>
    }
 public:
    size_t size() const {return m_sz;}
-    size_t m_stride;
-    size_t m_count;
    VkDescriptorBufferInfo m_bufferInfo;
    BufferUse m_use;

@@ -1331,8 +1311,7 @@ class VulkanGraphicsBufferD : public GraphicsDataNode<IGraphicsBufferD, DataCls>
    VulkanGraphicsBufferD(const boo::ObjToken<DataCls>& parent, BufferUse use,
                          VulkanContext* ctx, size_t stride, size_t count)
    : GraphicsDataNode<IGraphicsBufferD, DataCls>(parent),
-      m_ctx(ctx), m_stride(stride), m_count(count),
-      m_cpuSz(stride * count), m_cpuBuf(new uint8_t[m_cpuSz]), m_use(use)
+      m_ctx(ctx), m_cpuSz(stride * count), m_cpuBuf(new uint8_t[m_cpuSz]), m_use(use)
    {
        m_bufferInfo[0].range = m_cpuSz;
        m_bufferInfo[1].range = m_cpuSz;
@@ -1340,8 +1319,6 @@ class VulkanGraphicsBufferD : public GraphicsDataNode<IGraphicsBufferD, DataCls>
    void update(int b);

 public:
-    size_t m_stride;
-    size_t m_count;
    VkDescriptorBufferInfo m_bufferInfo[2];
    uint8_t* m_bufferPtrs[2] = {};
    BufferUse m_use;
@@ -2105,7 +2082,7 @@ static const VkFormat SEMANTIC_TYPE_TABLE[] =
    VK_FORMAT_R32G32B32A32_SFLOAT
 };

-struct VulkanVertexFormat : GraphicsDataNode<IVertexFormat>
+struct VulkanVertexFormat
 {
    VkVertexInputBindingDescription m_bindings[2];
    std::unique_ptr<VkVertexInputAttributeDescription[]> m_attributes;
@@ -2113,22 +2090,20 @@ struct VulkanVertexFormat : GraphicsDataNode<IVertexFormat>
    size_t m_stride = 0;
    size_t m_instStride = 0;

-    VulkanVertexFormat(const boo::ObjToken<BaseGraphicsData>& parent, size_t elementCount,
-                       const VertexElementDescriptor* elements)
-    : GraphicsDataNode<IVertexFormat>(parent),
-      m_attributes(new VkVertexInputAttributeDescription[elementCount])
+    VulkanVertexFormat(const VertexFormatInfo& info)
+    : m_attributes(new VkVertexInputAttributeDescription[info.elementCount])
    {
        m_info.sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO;
        m_info.pNext = nullptr;
        m_info.flags = 0;
        m_info.vertexBindingDescriptionCount = 0;
        m_info.pVertexBindingDescriptions = m_bindings;
-        m_info.vertexAttributeDescriptionCount = elementCount;
+        m_info.vertexAttributeDescriptionCount = info.elementCount;
        m_info.pVertexAttributeDescriptions = m_attributes.get();

-        for (size_t i=0 ; i<elementCount ; ++i)
+        for (size_t i=0 ; i<info.elementCount ; ++i)
        {
-            const VertexElementDescriptor* elemin = &elements[i];
+            const VertexElementDescriptor* elemin = &info.elements[i];
            VkVertexInputAttributeDescription& attribute = m_attributes[i];
            int semantic = int(elemin->semantic & boo::VertexSemantic::SemanticMask);
            attribute.location = i;
@@ -2187,6 +2162,32 @@ static const VkBlendFactor BLEND_FACTOR_TABLE[] =
    VK_BLEND_FACTOR_ONE_MINUS_SRC1_COLOR
 };

+class VulkanShaderStage : public GraphicsDataNode<IShaderStage>
+{
+    friend class VulkanDataFactory;
+    VulkanContext* m_ctx;
+    VkShaderModule m_module;
+    VulkanShaderStage(const boo::ObjToken<BaseGraphicsData>& parent, VulkanContext* ctx,
+                      const uint8_t* data, size_t size, PipelineStage stage)
+    : GraphicsDataNode<IShaderStage>(parent), m_ctx(ctx)
+    {
+        VkShaderModuleCreateInfo smCreateInfo = {};
+        smCreateInfo.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO;
+        smCreateInfo.pNext = nullptr;
+        smCreateInfo.flags = 0;
+
+        smCreateInfo.codeSize = size;
+        smCreateInfo.pCode = (uint32_t*)data;
+        ThrowIfFailed(vk::CreateShaderModule(m_ctx->m_dev, &smCreateInfo, nullptr, &m_module));
+    }
+public:
+    ~VulkanShaderStage()
+    {
+        vk::DestroyShaderModule(m_ctx->m_dev, m_module, nullptr);
+    }
+    VkShaderModule shader() const { return m_module; }
+};
+
 class VulkanShaderPipeline : public GraphicsDataNode<IShaderPipeline>
 {
 protected:
@@ -2194,9 +2195,12 @@ protected:
    friend struct VulkanShaderDataBinding;
    VulkanContext* m_ctx;
    VkPipelineCache m_pipelineCache;
-    boo::ObjToken<IVertexFormat> m_vtxFmt;
-    mutable VulkanShareableShader::Token m_vert;
-    mutable VulkanShareableShader::Token m_frag;
+    mutable VulkanVertexFormat m_vtxFmt;
+    mutable ObjToken<IShaderStage> m_vertex;
+    mutable ObjToken<IShaderStage> m_fragment;
+    mutable ObjToken<IShaderStage> m_geometry;
+    mutable ObjToken<IShaderStage> m_control;
+    mutable ObjToken<IShaderStage> m_evaluation;
    BlendFactor m_srcFac;
    BlendFactor m_dstFac;
    Primitive m_prim;
@@ -2206,22 +2210,27 @@ protected:
    bool m_alphaWrite;
    bool m_overwriteAlpha;
    CullMode m_culling;
+    uint32_t m_patchSize;
    mutable VkPipeline m_pipeline = VK_NULL_HANDLE;

    VulkanShaderPipeline(const boo::ObjToken<BaseGraphicsData>& parent,
                         VulkanContext* ctx,
-                         VulkanShareableShader::Token&& vert,
-                         VulkanShareableShader::Token&& frag,
+                         ObjToken<IShaderStage> vertex,
+                         ObjToken<IShaderStage> fragment,
+                         ObjToken<IShaderStage> geometry,
+                         ObjToken<IShaderStage> control,
+                         ObjToken<IShaderStage> evaluation,
                         VkPipelineCache pipelineCache,
-                         const boo::ObjToken<IVertexFormat>& vtxFmt,
-                         BlendFactor srcFac, BlendFactor dstFac, Primitive prim,
-                         ZTest depthTest, bool depthWrite, bool colorWrite,
-                         bool alphaWrite, bool overwriteAlpha, CullMode culling)
+                         const VertexFormatInfo& vtxFmt,
+                         const AdditionalPipelineInfo& info)
    : GraphicsDataNode<IShaderPipeline>(parent),
      m_ctx(ctx), m_pipelineCache(pipelineCache), m_vtxFmt(vtxFmt),
-      m_vert(std::move(vert)), m_frag(std::move(frag)), m_srcFac(srcFac), m_dstFac(dstFac),
-      m_prim(prim), m_depthTest(depthTest), m_depthWrite(depthWrite), m_colorWrite(colorWrite),
-      m_alphaWrite(alphaWrite), m_overwriteAlpha(overwriteAlpha), m_culling(culling)
+      m_vertex(vertex), m_fragment(fragment), m_geometry(geometry), m_control(control), m_evaluation(evaluation),
+      m_srcFac(info.srcFac), m_dstFac(info.dstFac), m_prim(info.prim),
+      m_depthTest(info.depthTest), m_depthWrite(info.depthWrite),
+      m_colorWrite(info.colorWrite), m_alphaWrite(info.alphaWrite),
+      m_overwriteAlpha(info.overwriteAlpha), m_culling(info.culling),
+      m_patchSize(info.patchSize)
    {}
 public:
    ~VulkanShaderPipeline()
@@ -2233,9 +2242,6 @@ public:
    }
    VulkanShaderPipeline& operator=(const VulkanShaderPipeline&) = delete;
    VulkanShaderPipeline(const VulkanShaderPipeline&) = delete;
-    virtual uint32_t defineExtraStages(VkPipelineShaderStageCreateInfo* stages) const { return 0; }
-    virtual const VkPipelineTessellationStateCreateInfo* getTessellationInfo() const { return nullptr; }
-    virtual void resetExtraStages() const {}
    VkPipeline bind(VkRenderPass rPass = 0) const
    {
        if (!m_pipeline)
@@ -2265,25 +2271,63 @@ public:
            dynamicState.pDynamicStates = dynamicStateEnables;
            dynamicState.dynamicStateCount = 0;

-            VkPipelineShaderStageCreateInfo stages[4] = {};
+            VkPipelineShaderStageCreateInfo stages[5] = {};
+            uint32_t numStages = 0;

-            stages[0].sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO;
-            stages[0].pNext = nullptr;
-            stages[0].flags = 0;
-            stages[0].stage = VK_SHADER_STAGE_VERTEX_BIT;
-            stages[0].module = m_vert.get().m_shader;
-            stages[0].pName = "main";
-            stages[0].pSpecializationInfo = nullptr;
+            if (m_vertex)
+            {
+                stages[numStages].sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO;
+                stages[numStages].pNext = nullptr;
+                stages[numStages].flags = 0;
+                stages[numStages].stage = VK_SHADER_STAGE_VERTEX_BIT;
+                stages[numStages].module = m_vertex.cast<VulkanShaderStage>()->shader();
+                stages[numStages].pName = "main";
+                stages[numStages++].pSpecializationInfo = nullptr;
+            }

-            stages[1].sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO;
-            stages[1].pNext = nullptr;
-            stages[1].flags = 0;
-            stages[1].stage = VK_SHADER_STAGE_FRAGMENT_BIT;
-            stages[1].module = m_frag.get().m_shader;
-            stages[1].pName = "main";
-            stages[1].pSpecializationInfo = nullptr;
+            if (m_fragment)
+            {
+                stages[numStages].sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO;
+                stages[numStages].pNext = nullptr;
+                stages[numStages].flags = 0;
+                stages[numStages].stage = VK_SHADER_STAGE_FRAGMENT_BIT;
+                stages[numStages].module = m_fragment.cast<VulkanShaderStage>()->shader();
+                stages[numStages].pName = "main";
+                stages[numStages++].pSpecializationInfo = nullptr;
+            }

-            uint32_t extraStages = defineExtraStages(&stages[2]);
+            if (m_geometry)
+            {
+                stages[numStages].sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO;
+                stages[numStages].pNext = nullptr;
+                stages[numStages].flags = 0;
+                stages[numStages].stage = VK_SHADER_STAGE_GEOMETRY_BIT;
+                stages[numStages].module = m_geometry.cast<VulkanShaderStage>()->shader();
+                stages[numStages].pName = "main";
+                stages[numStages++].pSpecializationInfo = nullptr;
+            }
+
+            if (m_control)
+            {
+                stages[numStages].sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO;
+                stages[numStages].pNext = nullptr;
+                stages[numStages].flags = 0;
+                stages[numStages].stage = VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT;
+                stages[numStages].module = m_control.cast<VulkanShaderStage>()->shader();
+                stages[numStages].pName = "main";
+                stages[numStages++].pSpecializationInfo = nullptr;
+            }
+
+            if (m_evaluation)
+            {
+                stages[numStages].sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO;
+                stages[numStages].pNext = nullptr;
+                stages[numStages].flags = 0;
+                stages[numStages].stage = VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT;
+                stages[numStages].module = m_evaluation.cast<VulkanShaderStage>()->shader();
+                stages[numStages].pName = "main";
+                stages[numStages++].pSpecializationInfo = nullptr;
+            }

            VkPipelineInputAssemblyStateCreateInfo assemblyInfo = {};
            assemblyInfo.sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO;
@@ -2292,6 +2336,12 @@ public:
            assemblyInfo.topology = PRIMITIVE_TABLE[int(m_prim)];
            assemblyInfo.primitiveRestartEnable = VK_TRUE;

+            VkPipelineTessellationStateCreateInfo tessInfo = {};
+            tessInfo.sType = VK_STRUCTURE_TYPE_PIPELINE_TESSELLATION_STATE_CREATE_INFO;
+            tessInfo.pNext = nullptr;
+            tessInfo.flags = 0;
+            tessInfo.patchControlPoints = m_patchSize;
+
            VkPipelineViewportStateCreateInfo viewportInfo = {};
            viewportInfo.sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO;
            viewportInfo.pNext = nullptr;
@@ -2408,11 +2458,11 @@ public:
            pipelineCreateInfo.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO;
            pipelineCreateInfo.pNext = nullptr;
            pipelineCreateInfo.flags = 0;
-            pipelineCreateInfo.stageCount = 2 + extraStages;
+            pipelineCreateInfo.stageCount = numStages;
            pipelineCreateInfo.pStages = stages;
-            pipelineCreateInfo.pVertexInputState = &m_vtxFmt.cast<VulkanVertexFormat>()->m_info;
+            pipelineCreateInfo.pVertexInputState = &m_vtxFmt.m_info;
            pipelineCreateInfo.pInputAssemblyState = &assemblyInfo;
-            pipelineCreateInfo.pTessellationState = getTessellationInfo();
+            pipelineCreateInfo.pTessellationState = &tessInfo;
            pipelineCreateInfo.pViewportState = &viewportInfo;
            pipelineCreateInfo.pRasterizationState = &rasterizationInfo;
            pipelineCreateInfo.pMultisampleState = &multisampleInfo;
@@ -2425,81 +2475,16 @@ public:
            ThrowIfFailed(vk::CreateGraphicsPipelines(m_ctx->m_dev, m_pipelineCache, 1, &pipelineCreateInfo,
                                                      nullptr, &m_pipeline));

-            m_vert.reset();
-            m_frag.reset();
-            resetExtraStages();
+            m_vertex.reset();
+            m_fragment.reset();
+            m_geometry.reset();
+            m_control.reset();
+            m_evaluation.reset();
        }
        return m_pipeline;
    }
 };

-class VulkanTessellationShaderPipeline : public VulkanShaderPipeline
-{
-    friend class VulkanDataFactory;
-    friend struct VulkanShaderDataBinding;
-
-    mutable VulkanShareableShader::Token m_control;
-    mutable VulkanShareableShader::Token m_evaluation;
-
-    VkPipelineTessellationStateCreateInfo m_tessInfo;
-
-    VulkanTessellationShaderPipeline(const boo::ObjToken<BaseGraphicsData>& parent,
-                                     VulkanContext* ctx,
-                                     VulkanShareableShader::Token&& vert,
-                                     VulkanShareableShader::Token&& frag,
-                                     VulkanShareableShader::Token&& control,
-                                     VulkanShareableShader::Token&& evaluation,
-                                     VkPipelineCache pipelineCache,
-                                     const boo::ObjToken<IVertexFormat>& vtxFmt,
-                                     BlendFactor srcFac, BlendFactor dstFac, uint32_t patchSize,
-                                     ZTest depthTest, bool depthWrite, bool colorWrite,
-                                     bool alphaWrite, bool overwriteAlpha, CullMode culling)
-    : VulkanShaderPipeline(parent, ctx, std::move(vert), std::move(frag), pipelineCache, vtxFmt, srcFac, dstFac,
-                           Primitive::Patches, depthTest, depthWrite, colorWrite, alphaWrite, overwriteAlpha, culling),
-      m_control(std::move(control)), m_evaluation(std::move(evaluation))
-    {
-        m_tessInfo.sType = VK_STRUCTURE_TYPE_PIPELINE_TESSELLATION_STATE_CREATE_INFO;
-        m_tessInfo.pNext = nullptr;
-        m_tessInfo.flags = 0;
-        m_tessInfo.patchControlPoints = patchSize;
-    }
-
-public:
-    ~VulkanTessellationShaderPipeline() = default;
-
-    uint32_t defineExtraStages(VkPipelineShaderStageCreateInfo* stages) const
-    {
-        stages[0].sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO;
-        stages[0].pNext = nullptr;
-        stages[0].flags = 0;
-        stages[0].stage = VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT;
-        stages[0].module = m_control.get().m_shader;
-        stages[0].pName = "main";
-        stages[0].pSpecializationInfo = nullptr;
-
-        stages[1].sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO;
-        stages[1].pNext = nullptr;
-        stages[1].flags = 0;
-        stages[1].stage = VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT;
-        stages[1].module = m_evaluation.get().m_shader;
-        stages[1].pName = "main";
-        stages[1].pSpecializationInfo = nullptr;
-
-        return 2;
-    }
-
-    const VkPipelineTessellationStateCreateInfo* getTessellationInfo() const
-    {
-        return &m_tessInfo;
-    }
-
-    void resetExtraStages() const
-    {
-        m_control.reset();
-        m_evaluation.reset();
-    }
-};
-
 static const VkDescriptorBufferInfo* GetBufferGPUResource(const IGraphicsBuffer* buf, int idx)
 {
    if (buf->dynamic())
@@ -2596,9 +2581,9 @@ struct VulkanShaderDataBinding : GraphicsDataNode<IShaderDataBinding>
      m_ibuf(ibuf)
    {
        VulkanShaderPipeline* cpipeline = m_pipeline.cast<VulkanShaderPipeline>();
-        VulkanVertexFormat* vtxFmt = cpipeline->m_vtxFmt.cast<VulkanVertexFormat>();
-        m_vertOffset = baseVert * vtxFmt->m_stride;
-        m_instOffset = baseInst * vtxFmt->m_instStride;
+        VulkanVertexFormat& vtxFmt = cpipeline->m_vtxFmt;
+        m_vertOffset = baseVert * vtxFmt.m_stride;
+        m_instOffset = baseInst * vtxFmt.m_instStride;

        if (ubufOffs && ubufSizes)
        {
@@ -2798,7 +2783,7 @@ struct VulkanShaderDataBinding : GraphicsDataNode<IShaderDataBinding>
 struct VulkanCommandQueue : IGraphicsCommandQueue
 {
    Platform platform() const {return IGraphicsDataFactory::Platform::Vulkan;}
-    const SystemChar* platformName() const {return _S("Vulkan");}
+    const SystemChar* platformName() const {return _SYS_STR("Vulkan");}
    VulkanContext* m_ctx;
    VulkanContext::Window* m_windowCtx;
    IGraphicsContext* m_parent;
@@ -3573,203 +3558,6 @@ void VulkanTextureD::unmap()
 VulkanDataFactoryImpl::VulkanDataFactoryImpl(IGraphicsContext* parent, VulkanContext* ctx)
 : m_parent(parent), m_ctx(ctx) {}

-VulkanShareableShader::Token VulkanDataFactoryImpl::PrepareShaderStage(const char* source,
-                                                                       std::vector<unsigned int>* blobOut,
-                                                                       EShLanguage lang)
-{
-    uint64_t srcHash = 0;
-    uint64_t binHash = 0;
-    XXH64_state_t hashState;
-    XXH64_reset(&hashState, 0);
-    if (source)
-    {
-        XXH64_update(&hashState, source, strlen(source));
-        srcHash = XXH64_digest(&hashState);
-        auto binSearch = m_sourceToBinary.find(srcHash);
-        if (binSearch != m_sourceToBinary.cend())
-            binHash = binSearch->second;
-    }
-    else if (blobOut && blobOut->size())
-    {
-        XXH64_update(&hashState, blobOut->data(), blobOut->size() * sizeof(unsigned int));
-        binHash = XXH64_digest(&hashState);
-    }
-
-    if (blobOut && blobOut->empty())
-        binHash = Compile(*blobOut, source, srcHash, lang);
-
-    auto search = binHash ? m_sharedShaders.find(binHash) : m_sharedShaders.end();
-    if (search != m_sharedShaders.end())
-    {
-        return search->second->lock();
-    }
-    else
-    {
-        std::vector<unsigned int> blob;
-        const std::vector<unsigned int>* useBlob;
-        if (blobOut)
-        {
-            useBlob = blobOut;
-        }
-        else
-        {
-            useBlob = &blob;
-            binHash = Compile(blob, source, srcHash, lang);
-        }
-
-        VkShaderModuleCreateInfo smCreateInfo = {};
-        smCreateInfo.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO;
-        smCreateInfo.pNext = nullptr;
-        smCreateInfo.flags = 0;
-
-        VkShaderModule module;
-        smCreateInfo.codeSize = useBlob->size() * sizeof(unsigned int);
-        smCreateInfo.pCode = useBlob->data();
-        ThrowIfFailed(vk::CreateShaderModule(m_ctx->m_dev, &smCreateInfo, nullptr, &module));
-
-        auto it =
-            m_sharedShaders.emplace(std::make_pair(binHash,
-            std::make_unique<VulkanShareableShader>(*this, srcHash, binHash, module))).first;
-        return it->second->lock();
-    }
-}
-
-uint64_t VulkanDataFactoryImpl::Compile(std::vector<unsigned int>& out, const char* source,
-                                        uint64_t srcKey, EShLanguage lang)
-{
-    const EShMessages messages = EShMessages(EShMsgSpvRules | EShMsgVulkanRules);
-    glslang::TShader shader(lang);
-    shader.setStrings(&source, 1);
-    if (!shader.parse(&glslang::DefaultTBuiltInResource, 110, false, messages))
-    {
-        printf("%s\n", source);
-        Log.report(logvisor::Fatal, "unable to compile shader\n%s", shader.getInfoLog());
-    }
-
-    glslang::TProgram prog;
-    prog.addShader(&shader);
-    if (!prog.link(messages))
-    {
-        Log.report(logvisor::Fatal, "unable to link shader program\n%s", prog.getInfoLog());
-    }
-    glslang::GlslangToSpv(*prog.getIntermediate(lang), out);
-    //spv::Disassemble(std::cerr, out);
-
-    XXH64_state_t hashState;
-    XXH64_reset(&hashState, 0);
-    XXH64_update(&hashState, out.data(), out.size() * sizeof(unsigned int));
-    uint64_t binKey = XXH64_digest(&hashState);
-    m_sourceToBinary[srcKey] = binKey;
-    return binKey;
-}
-
-boo::ObjToken<IShaderPipeline> VulkanDataFactory::Context::newShaderPipeline
-(const char* vertSource, const char* fragSource,
- std::vector<unsigned int>* vertBlobOut, std::vector<unsigned int>* fragBlobOut,
- std::vector<unsigned char>* pipelineBlob, const boo::ObjToken<IVertexFormat>& vtxFmt,
- BlendFactor srcFac, BlendFactor dstFac, Primitive prim,
- ZTest depthTest, bool depthWrite, bool colorWrite,
- bool alphaWrite, CullMode culling, bool overwriteAlpha)
-{
-    VulkanDataFactoryImpl& factory = static_cast<VulkanDataFactoryImpl&>(m_parent);
-
-    VulkanShareableShader::Token vertShader = factory.PrepareShaderStage(vertSource, vertBlobOut, EShLangVertex);
-    VulkanShareableShader::Token fragShader = factory.PrepareShaderStage(fragSource, fragBlobOut, EShLangFragment);
-
-    VkPipelineCache pipelineCache = VK_NULL_HANDLE;
-    if (pipelineBlob)
-    {
-        VkPipelineCacheCreateInfo cacheDataInfo = {};
-        cacheDataInfo.sType = VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO;
-        cacheDataInfo.pNext = nullptr;
-
-        cacheDataInfo.initialDataSize = pipelineBlob->size();
-        if (cacheDataInfo.initialDataSize)
-            cacheDataInfo.pInitialData = pipelineBlob->data();
-
-        ThrowIfFailed(vk::CreatePipelineCache(factory.m_ctx->m_dev, &cacheDataInfo, nullptr, &pipelineCache));
-    }
-
-    VulkanShaderPipeline* retval = new VulkanShaderPipeline(m_data, factory.m_ctx, std::move(vertShader),
-                                                            std::move(fragShader), pipelineCache, vtxFmt, srcFac,
-                                                            dstFac, prim, depthTest, depthWrite, colorWrite,
-                                                            alphaWrite, overwriteAlpha, culling);
-
-    if (pipelineBlob && pipelineBlob->empty())
-    {
-        size_t cacheSz = 0;
-        ThrowIfFailed(vk::GetPipelineCacheData(factory.m_ctx->m_dev, pipelineCache, &cacheSz, nullptr));
-        if (cacheSz)
-        {
-            pipelineBlob->resize(cacheSz);
-            ThrowIfFailed(vk::GetPipelineCacheData(factory.m_ctx->m_dev, pipelineCache,
-                                                   &cacheSz, pipelineBlob->data()));
-            pipelineBlob->resize(cacheSz);
-        }
-    }
-
-    return {retval};
-}
-
-boo::ObjToken<IShaderPipeline> VulkanDataFactory::Context::newTessellationShaderPipeline
-(const char* vertSource, const char* fragSource, const char* controlSource, const char* evaluationSource,
- std::vector<unsigned int>* vertBlobOut, std::vector<unsigned int>* fragBlobOut,
- std::vector<unsigned int>* controlBlobOut, std::vector<unsigned int>* evaluationBlobOut,
- std::vector<unsigned char>* pipelineBlob, const boo::ObjToken<IVertexFormat>& vtxFmt,
- BlendFactor srcFac, BlendFactor dstFac, uint32_t patchSize,
- ZTest depthTest, bool depthWrite, bool colorWrite,
- bool alphaWrite, CullMode culling, bool overwriteAlpha)
-{
-    VulkanDataFactoryImpl& factory = static_cast<VulkanDataFactoryImpl&>(m_parent);
-
-    if (!factory.m_ctx->m_features.tessellationShader)
-        Log.report(logvisor::Fatal, "Device does not support tessellation shaders");
-    if (patchSize > factory.m_ctx->m_gpuProps.limits.maxTessellationPatchSize)
-        Log.report(logvisor::Fatal, "Device supports %d patch vertices, %d requested",
-                   int(factory.m_ctx->m_gpuProps.limits.maxTessellationPatchSize), int(patchSize));
-
-    VulkanShareableShader::Token vertShader = factory.PrepareShaderStage(vertSource, vertBlobOut, EShLangVertex);
-    VulkanShareableShader::Token fragShader = factory.PrepareShaderStage(fragSource, fragBlobOut, EShLangFragment);
-    VulkanShareableShader::Token controlShader = factory.PrepareShaderStage(controlSource, controlBlobOut, EShLangTessControl);
-    VulkanShareableShader::Token evaluationShader = factory.PrepareShaderStage(evaluationSource, evaluationBlobOut, EShLangTessEvaluation);
-
-    VkPipelineCache pipelineCache = VK_NULL_HANDLE;
-    if (pipelineBlob)
-    {
-        VkPipelineCacheCreateInfo cacheDataInfo = {};
-        cacheDataInfo.sType = VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO;
-        cacheDataInfo.pNext = nullptr;
-
-        cacheDataInfo.initialDataSize = pipelineBlob->size();
-        if (cacheDataInfo.initialDataSize)
-            cacheDataInfo.pInitialData = pipelineBlob->data();
-
-        ThrowIfFailed(vk::CreatePipelineCache(factory.m_ctx->m_dev, &cacheDataInfo, nullptr, &pipelineCache));
-    }
-
-    VulkanShaderPipeline* retval =
-        new VulkanTessellationShaderPipeline(m_data, factory.m_ctx, std::move(vertShader),
-                                             std::move(fragShader), std::move(controlShader),
-                                             std::move(evaluationShader), pipelineCache, vtxFmt, srcFac,
-                                             dstFac, patchSize, depthTest, depthWrite, colorWrite,
-                                             alphaWrite, overwriteAlpha, culling);
-
-    if (pipelineBlob && pipelineBlob->empty())
-    {
-        size_t cacheSz = 0;
-        ThrowIfFailed(vk::GetPipelineCacheData(factory.m_ctx->m_dev, pipelineCache, &cacheSz, nullptr));
-        if (cacheSz)
-        {
-            pipelineBlob->resize(cacheSz);
-            ThrowIfFailed(vk::GetPipelineCacheData(factory.m_ctx->m_dev, pipelineCache,
-                                                   &cacheSz, pipelineBlob->data()));
-            pipelineBlob->resize(cacheSz);
-        }
-    }
-
-    return {retval};
-}
-
 VulkanDataFactory::Context::Context(VulkanDataFactory& parent __BooTraceArgs)
 : m_parent(parent), m_data(new VulkanData(static_cast<VulkanDataFactoryImpl&>(parent) __BooTraceArgsUse)) {}
 VulkanDataFactory::Context::~Context() {}
@@ -3785,7 +3573,6 @@ boo::ObjToken<IGraphicsBufferD>
 VulkanDataFactory::Context::newDynamicBuffer(BufferUse use, size_t stride, size_t count)
 {
    VulkanDataFactoryImpl& factory = static_cast<VulkanDataFactoryImpl&>(m_parent);
-    VulkanCommandQueue* q = static_cast<VulkanCommandQueue*>(factory.m_parent->getCommandQueue());
    return {new VulkanGraphicsBufferD<BaseGraphicsData>(m_data, use, factory.m_ctx, stride, count)};
 }

@@ -3825,18 +3612,44 @@ VulkanDataFactory::Context::newRenderTexture(size_t width, size_t height, Textur
    return {new VulkanTextureR(m_data, q, width, height, clampMode, colorBindCount, depthBindCount)};
 }

-boo::ObjToken<IVertexFormat>
-VulkanDataFactory::Context::newVertexFormat(size_t elementCount,
-                                            const VertexElementDescriptor* elements,
-                                            size_t baseVert, size_t baseInst)
+ObjToken<IShaderStage>
+VulkanDataFactory::Context::newShaderStage(const uint8_t* data, size_t size, PipelineStage stage)
 {
-    return {new struct VulkanVertexFormat(m_data, elementCount, elements)};
+    VulkanDataFactoryImpl& factory = static_cast<VulkanDataFactoryImpl&>(m_parent);
+
+    if (stage == PipelineStage::Control || stage == PipelineStage::Evaluation)
+    {
+        if (!factory.m_ctx->m_features.tessellationShader)
+            Log.report(logvisor::Fatal, "Device does not support tessellation shaders");
+    }
+
+    return {new VulkanShaderStage(m_data, factory.m_ctx, data, size, stage)};
+}
+
+ObjToken<IShaderPipeline>
+VulkanDataFactory::Context::newShaderPipeline(ObjToken<IShaderStage> vertex, ObjToken<IShaderStage> fragment,
+                                              ObjToken<IShaderStage> geometry, ObjToken<IShaderStage> control,
+                                              ObjToken<IShaderStage> evaluation, const VertexFormatInfo& vtxFmt,
+                                              const AdditionalPipelineInfo& additionalInfo)
+{
+    VulkanDataFactoryImpl& factory = static_cast<VulkanDataFactoryImpl&>(m_parent);
+
+    if (control || evaluation)
+    {
+        if (!factory.m_ctx->m_features.tessellationShader)
+            Log.report(logvisor::Fatal, "Device does not support tessellation shaders");
+        if (additionalInfo.patchSize > factory.m_ctx->m_gpuProps.limits.maxTessellationPatchSize)
+            Log.report(logvisor::Fatal, "Device supports %d patch vertices, %d requested",
+                       int(factory.m_ctx->m_gpuProps.limits.maxTessellationPatchSize), int(additionalInfo.patchSize));
+    }
+
+    return {new VulkanShaderPipeline(m_data, factory.m_ctx, vertex, fragment, geometry,
+        control, evaluation, VK_NULL_HANDLE, vtxFmt, additionalInfo)};
 }

 boo::ObjToken<IShaderDataBinding>
 VulkanDataFactory::Context::newShaderDataBinding(
        const boo::ObjToken<IShaderPipeline>& pipeline,
-        const boo::ObjToken<IVertexFormat>& /*vtxFormat*/,
        const boo::ObjToken<IGraphicsBuffer>& vbuf,
        const boo::ObjToken<IGraphicsBuffer>& instVbuf,
        const boo::ObjToken<IGraphicsBuffer>& ibuf,
@@ -4165,4 +3978,42 @@ std::unique_ptr<IGraphicsDataFactory> _NewVulkanDataFactory(IGraphicsContext* pa
    return std::make_unique<VulkanDataFactoryImpl>(parent, ctx);
 }

+static const EShLanguage ShaderTypes[] =
+{
+    EShLangVertex,
+    EShLangVertex,
+    EShLangFragment,
+    EShLangGeometry,
+    EShLangTessControl,
+    EShLangTessEvaluation
+};
+
+std::vector<uint8_t> VulkanDataFactory::CompileGLSL(const char* source, PipelineStage stage)
+{
+    EShLanguage lang = ShaderTypes[int(stage)];
+    const EShMessages messages = EShMessages(EShMsgSpvRules | EShMsgVulkanRules);
+    glslang::TShader shader(lang);
+    shader.setStrings(&source, 1);
+    if (!shader.parse(&glslang::DefaultTBuiltInResource, 110, false, messages))
+    {
+        printf("%s\n", source);
+        Log.report(logvisor::Fatal, "unable to compile shader\n%s", shader.getInfoLog());
+    }
+
+    glslang::TProgram prog;
+    prog.addShader(&shader);
+    if (!prog.link(messages))
+    {
+        Log.report(logvisor::Fatal, "unable to link shader program\n%s", prog.getInfoLog());
+    }
+
+    std::vector<unsigned int> out;
+    glslang::GlslangToSpv(*prog.getIntermediate(lang), out);
+    //spv::Disassemble(std::cerr, out);
+
+    std::vector<uint8_t> ret(out.size() * 4);
+    memcpy(ret.data(), out.data(), ret.size());
+    return ret;
+}
+
 }
--- a/lib/graphicsdev/nx/CMakeLists.txt
+++ b/lib/graphicsdev/nx/CMakeLists.txt
@@ -0,0 +1,153 @@
+find_program(MESON_PROG meson)
+find_program(NINJA_PROG ninja)
+if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/mesa/meson.build AND
+   EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/libdrm_nouveau/Makefile AND
+   MESON_PROG AND NINJA_PROG)
+message(STATUS "Enabling NX support")
+
+set(LIBDRM_DIR ${CMAKE_CURRENT_SOURCE_DIR}/libdrm_nouveau)
+set(MESA_DIR ${CMAKE_CURRENT_SOURCE_DIR}/mesa)
+set(MESA_BUILD_DIR ${CMAKE_CURRENT_BINARY_DIR}/mesa)
+file(MAKE_DIRECTORY ${MESA_BUILD_DIR})
+
+if(NOT NX)
+  set(PLAT_MESA_TARGETS src/mesa/libmesa_sse41.a)
+else()
+  set(PLAT_MESA_TARGETS src/gallium/winsys/nouveau/switch/libnouveauwinsys.a)
+endif()
+
+set(MESA_TARGETS
+    src/compiler/libcompiler.a
+    src/compiler/glsl/libglsl.a
+    src/compiler/glsl/glcpp/libglcpp.a
+    src/compiler/nir/libnir.a
+    src/libglsl_util.a
+    src/util/libmesa_util.a
+    src/mesa/libmesa_gallium.a
+    ${PLAT_MESA_TARGETS}
+    src/gallium/auxiliary/libgallium.a
+    src/gallium/auxiliary/libgalliumvl.a
+    src/gallium/drivers/nouveau/libnouveau.a)
+
+include_directories(
+    ${MESA_DIR}/include
+    ${MESA_DIR}/src
+    ${MESA_DIR}/src/mesa
+    ${MESA_DIR}/src/mapi
+    ${MESA_DIR}/src/compiler/glsl
+    ${MESA_BUILD_DIR}/src/compiler
+    ${MESA_DIR}/src/mesa/state_tracker
+    ${MESA_DIR}/src/gallium/include
+    ${MESA_DIR}/src/gallium/auxiliary
+    ${MESA_DIR}/src/gallium/drivers/nouveau
+    ${LIBDRM_DIR}/include)
+
+if(${CMAKE_BUILD_TYPE} STREQUAL Release OR ${CMAKE_BUILD_TYPE} STREQUAL RelWithDebInfo)
+  set(MESON_BUILD_TYPE release)
+  set(MESON_SANITIZE_ARGS "")
+else()
+  set(MESON_BUILD_TYPE debug)
+  set(MESON_SANITIZE_ARGS "-fsanitize=address")
+endif()
+
+if(NX)
+  configure_file(switch_cross_file.txt.in switch_cross_file.txt)
+  set(MESON_CROSS --cross-file ${CMAKE_CURRENT_BINARY_DIR}/switch_cross_file.txt -D c_std=gnu11 -D cpp_std=gnu++17)
+  set(MESA_PLATFORMS switch)
+endif()
+
+if(NOT EXISTS ${MESA_BUILD_DIR}/build.ninja)
+  message(STATUS "Preparing mesa build system")
+  set(ENV{CC} "")
+  set(ENV{CXX} "")
+  execute_process(COMMAND ${MESON_PROG} setup -D buildtype=${MESON_BUILD_TYPE} ${MESON_CROSS}
+        -D gallium-drivers=nouveau -D dri-drivers= -D vulkan-drivers= -D llvm=false
+        -D shared-glapi=true -D gles1=false -D gles2=false -D gbm=false
+        -D shader-cache=false -D boo-offline-mode=true -D "platforms=${MESA_PLATFORMS}" -D glx=disabled
+        -D "c_args=${MESON_SANITIZE_ARGS} -I${LIBDRM_DIR}/include -DDEBUG=1 -DHAVE_LIBDRM"
+        -D "cpp_args=${MESON_SANITIZE_ARGS} -I${LIBDRM_DIR}/include -DDEBUG=1 -DHAVE_LIBDRM"
+        -D "c_link_args=${MESON_SANITIZE_ARGS}"
+        -D "cpp_link_args=${MESON_SANITIZE_ARGS}"
+        ${MESA_DIR} ${MESA_BUILD_DIR}
+        RESULT_VARIABLE MESON_RESULT)
+  if(NOT MESON_RESULT EQUAL 0)
+    message(FATAL_ERROR "meson failed with error code ${MESON_RESULT}")
+  endif()
+endif()
+message(STATUS "Invoking mesa build system")
+execute_process(COMMAND ${NINJA_PROG} -C ${MESA_BUILD_DIR} ${MESA_TARGETS} RESULT_VARIABLE NINJA_RESULT)
+if(NOT NINJA_RESULT EQUAL 0)
+  message(FATAL_ERROR "ninja failed with error code ${NINJA_RESULT}")
+endif()
+
+if(NOT WIN32)
+  add_definitions("-DHAVE_PTHREAD -DHAVE_TIMESPEC_GET")
+  if(${CMAKE_SYSTEM_NAME} STREQUAL Linux)
+    add_definitions("-DHAVE_LINUX_FUTEX_H")
+  endif()
+endif()
+add_definitions("-DHAVE_ZLIB -DDEBUG=1 -DHAVE_LIBDRM")
+
+set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-error=return-type")
+
+add_subdirectory(fake_libdrm_nouveau)
+
+add_library(nx_compiler nx_compiler.cpp
+            ${MESA_DIR}/src/compiler/glsl/ir_builder_print_visitor.cpp)
+target_link_libraries(nx_compiler
+        ${MESA_BUILD_DIR}/src/mesa/libmesa_gallium.a
+        ${MESA_BUILD_DIR}/src/mesa/libmesa_sse41.a
+        ${MESA_BUILD_DIR}/src/compiler/nir/libnir.a
+        ${MESA_BUILD_DIR}/src/compiler/glsl/libglsl.a
+        ${MESA_BUILD_DIR}/src/libglsl_util.a
+        ${MESA_BUILD_DIR}/src/compiler/glsl/glcpp/libglcpp.a
+        ${MESA_BUILD_DIR}/src/compiler/libcompiler.a
+        ${MESA_BUILD_DIR}/src/gallium/drivers/nouveau/libnouveau.a
+        ${MESA_BUILD_DIR}/src/gallium/auxiliary/libgallium.a
+        ${MESA_BUILD_DIR}/src/gallium/auxiliary/libgalliumvl.a
+        ${MESA_BUILD_DIR}/src/util/libmesa_util.a
+        fake_libdrm_nouveau
+        unwind dl pthread z)
+add_executable(nx_compiler_driver nx_compiler_driver.cpp)
+target_link_libraries(nx_compiler_driver nx_compiler)
+
+if(COMMAND add_sanitizers)
+  add_sanitizers(nx_compiler nx_compiler_driver)
+endif()
+
+if(NX)
+  include_directories(${DEVKITPRO}/libnx/include)
+  add_library(libdrm_nouveau
+          libdrm_nouveau/source/bomap.c
+          libdrm_nouveau/source/bufctx.c
+          libdrm_nouveau/source/nouveau.c
+          libdrm_nouveau/source/pushbuf.c)
+  add_definitions(-DBOO_HAS_NX=1)
+
+  add_library(nx_runtime NX.cpp nx_compiler.cpp
+              ${MESA_DIR}/src/compiler/glsl/ir_builder_print_visitor.cpp)
+  target_link_libraries(nx_runtime xxhash
+          ${MESA_BUILD_DIR}/src/mesa/libmesa_gallium.a
+          ${MESA_BUILD_DIR}/src/compiler/nir/libnir.a
+          ${MESA_BUILD_DIR}/src/compiler/glsl/libglsl.a
+          ${MESA_BUILD_DIR}/src/libglsl_util.a
+          ${MESA_BUILD_DIR}/src/compiler/glsl/glcpp/libglcpp.a
+          ${MESA_BUILD_DIR}/src/compiler/libcompiler.a
+          ${MESA_BUILD_DIR}/src/gallium/drivers/nouveau/libnouveau.a
+          ${MESA_BUILD_DIR}/src/gallium/winsys/nouveau/switch/libnouveauwinsys.a
+          ${MESA_BUILD_DIR}/src/gallium/auxiliary/libgallium.a
+          ${MESA_BUILD_DIR}/src/gallium/auxiliary/libgalliumvl.a
+          ${MESA_BUILD_DIR}/src/util/libmesa_util.a
+          libdrm_nouveau nx)
+  if(COMMAND add_sanitizers)
+    add_sanitizers(nx_runtime libdrm_nouveau)
+  endif()
+endif()
+
+else()
+  if(NX)
+    message(FATAL_ERROR "Unable to find meson or ninja or mesa submodules; this is required for NX.")
+  else()
+    message(STATUS "Unable to find meson or ninja or mesa submodules; skipping NX support.")
+  endif()
+endif()
--- a/lib/graphicsdev/nx/NX.cpp
+++ b/lib/graphicsdev/nx/NX.cpp
--- a/lib/graphicsdev/nx/fake_libdrm_nouveau/CMakeLists.txt
+++ b/lib/graphicsdev/nx/fake_libdrm_nouveau/CMakeLists.txt
@@ -0,0 +1 @@
+add_library(fake_libdrm_nouveau nouveau.c pushbuf.c bufctx.c bomap.c)
--- a/lib/graphicsdev/nx/fake_libdrm_nouveau/bomap.c
+++ b/lib/graphicsdev/nx/fake_libdrm_nouveau/bomap.c
@@ -0,0 +1,123 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include "private.h"
+
+#ifdef DEBUG
+#	define TRACE(x...) printf("nouveau: " x)
+#	define CALLED() TRACE("CALLED: %s\n", __PRETTY_FUNCTION__)
+#else
+#	define TRACE(x...)
+# define CALLED()
+#endif
+
+static inline unsigned bo_map_hash(struct nouveau_bo *bo)
+{
+    return bo->handle % BO_MAP_NUM_BUCKETS;
+}
+
+static inline struct nouveau_client_bo_map_entry *bo_map_lookup(struct nouveau_client_bo_map *bomap, struct nouveau_bo *bo)
+{
+    struct nouveau_client_bo_map_entry *ent;
+    for (ent = bomap->buckets[bo_map_hash(bo)]; ent; ent = ent->next)
+        if (ent->bo_handle == bo->handle)
+            break;
+    return ent;
+}
+
+void
+cli_map_free(struct nouveau_client *client)
+{
+    struct nouveau_client_bo_map *bomap = &nouveau_client(client)->bomap;
+    unsigned i;
+
+    // Free all buckets
+    for (i = 0; i < BO_MAP_NUM_BUCKETS+1; i ++) {
+        struct nouveau_client_bo_map_entry *ent, *next;
+        for (ent = bomap->buckets[i]; ent; ent = next) {
+            next = ent->next;
+            free(ent);
+        }
+    }
+}
+
+struct drm_nouveau_gem_pushbuf_bo *
+cli_kref_get(struct nouveau_client *client, struct nouveau_bo *bo)
+{
+    struct nouveau_client_bo_map *bomap = &nouveau_client(client)->bomap;
+    struct nouveau_client_bo_map_entry *ent = bo_map_lookup(bomap, bo);
+    struct drm_nouveau_gem_pushbuf_bo *kref = NULL;
+    if (ent)
+        kref = ent->kref;
+    return kref;
+}
+
+struct nouveau_pushbuf *
+cli_push_get(struct nouveau_client *client, struct nouveau_bo *bo)
+{
+    struct nouveau_client_bo_map *bomap = &nouveau_client(client)->bomap;
+    struct nouveau_client_bo_map_entry *ent = bo_map_lookup(bomap, bo);
+    struct nouveau_pushbuf *push = NULL;
+    if (ent)
+        push = ent->push;
+    return push;
+}
+
+static struct nouveau_client_bo_map_entry *bo_map_get_free(struct nouveau_client_bo_map *bomap)
+{
+    // Try to find an entry first in the bucket of free entries,
+    // and if said bucket is empty then allocate a new entry
+    struct nouveau_client_bo_map_entry *ent = bomap->buckets[BO_MAP_NUM_BUCKETS];
+    if (ent)
+        bomap->buckets[BO_MAP_NUM_BUCKETS] = ent->next;
+    else
+        ent = malloc(sizeof(*ent));
+    return ent;
+}
+
+void
+cli_kref_set(struct nouveau_client *client, struct nouveau_bo *bo,
+             struct drm_nouveau_gem_pushbuf_bo *kref,
+             struct nouveau_pushbuf *push)
+{
+    struct nouveau_client_bo_map *bomap = &nouveau_client(client)->bomap;
+    struct nouveau_client_bo_map_entry *ent = bo_map_lookup(bomap, bo);
+
+    TRACE("setting 0x%x <-- {%p,%p}\n", bo->handle, kref, push);
+
+    if (!ent) {
+        // Do nothing if the user wanted to free the entry anyway
+        if (!kref && !push)
+            return;
+
+        // Try to get a free entry for this bo
+        ent = bo_map_get_free(bomap);
+        if (!ent) {
+            // Shouldn't we panic here?
+            TRACE("panic: out of memory\n");
+            return;
+        }
+
+        // Add entry to bucket list
+        unsigned hash = bo_map_hash(bo);
+        ent->next = bomap->buckets[hash];
+        if (ent->next)
+            ent->next->prev_next = &ent->next;
+        ent->prev_next = &bomap->buckets[hash];
+        ent->bo_handle = bo->handle;
+        bomap->buckets[hash] = ent;
+    }
+
+    if (kref || push) {
+        // Update the entry
+        ent->kref = kref;
+        ent->push = push;
+    }
+    else {
+        // Unlink the entry, and put it in the bucket of free entries
+        *ent->prev_next = ent->next;
+        if (ent->next)
+            ent->next->prev_next = ent->prev_next;
+        ent->next = bomap->buckets[BO_MAP_NUM_BUCKETS];
+        bomap->buckets[BO_MAP_NUM_BUCKETS] = ent;
+    }
+}
--- a/lib/graphicsdev/nx/fake_libdrm_nouveau/bufctx.c
+++ b/lib/graphicsdev/nx/fake_libdrm_nouveau/bufctx.c
@@ -0,0 +1,163 @@
+/*
+ * Copyright 2012 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Ben Skeggs
+ */
+
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <stdbool.h>
+#include <assert.h>
+#include <errno.h>
+
+#include "libdrm_lists.h"
+#include "nouveau.h"
+#include "private.h"
+
+struct nouveau_bufref_priv {
+    struct nouveau_bufref base;
+    struct nouveau_bufref_priv *next;
+    struct nouveau_bufctx *bufctx;
+};
+
+struct nouveau_bufbin_priv {
+    struct nouveau_bufref_priv *list;
+    int relocs;
+};
+
+struct nouveau_bufctx_priv {
+    struct nouveau_bufctx base;
+    struct nouveau_bufref_priv *free;
+    int nr_bins;
+    struct nouveau_bufbin_priv bins[];
+};
+
+static inline struct nouveau_bufctx_priv *
+nouveau_bufctx(struct nouveau_bufctx *bctx)
+{
+    return (struct nouveau_bufctx_priv *)bctx;
+}
+
+int
+nouveau_bufctx_new(struct nouveau_client *client, int bins,
+                   struct nouveau_bufctx **pbctx)
+{
+    struct nouveau_bufctx_priv *priv;
+
+    priv = calloc(1, sizeof(*priv) + sizeof(priv->bins[0]) * bins);
+    if (priv) {
+        DRMINITLISTHEAD(&priv->base.head);
+        DRMINITLISTHEAD(&priv->base.pending);
+        DRMINITLISTHEAD(&priv->base.current);
+        priv->base.client = client;
+        priv->nr_bins = bins;
+        *pbctx = &priv->base;
+        return 0;
+    }
+
+    return -ENOMEM;
+}
+
+void
+nouveau_bufctx_del(struct nouveau_bufctx **pbctx)
+{
+    struct nouveau_bufctx_priv *pctx = nouveau_bufctx(*pbctx);
+    struct nouveau_bufref_priv *pref;
+    if (pctx) {
+        while (pctx->nr_bins--)
+            nouveau_bufctx_reset(&pctx->base, pctx->nr_bins);
+        while ((pref = pctx->free)) {
+            pctx->free = pref->next;
+            free(pref);
+        }
+        free(pctx);
+        *pbctx = NULL;
+    }
+}
+
+void
+nouveau_bufctx_reset(struct nouveau_bufctx *bctx, int bin)
+{
+    struct nouveau_bufctx_priv *pctx = nouveau_bufctx(bctx);
+    struct nouveau_bufbin_priv *pbin = &pctx->bins[bin];
+    struct nouveau_bufref_priv *pref;
+
+    while ((pref = pbin->list)) {
+        DRMLISTDELINIT(&pref->base.thead);
+        pbin->list = pref->next;
+        pref->next = pctx->free;
+        pctx->free = pref;
+    }
+
+    bctx->relocs -= pbin->relocs;
+    pbin->relocs  = 0;
+}
+
+struct nouveau_bufref *
+nouveau_bufctx_refn(struct nouveau_bufctx *bctx, int bin,
+                    struct nouveau_bo *bo, uint32_t flags)
+{
+    struct nouveau_bufctx_priv *pctx = nouveau_bufctx(bctx);
+    struct nouveau_bufbin_priv *pbin = &pctx->bins[bin];
+    struct nouveau_bufref_priv *pref = pctx->free;
+
+    if (!pref)
+        pref = malloc(sizeof(*pref));
+    else
+        pctx->free = pref->next;
+
+    if (pref) {
+        pref->base.bo = bo;
+        pref->base.flags = flags;
+        pref->base.packet = 0;
+
+        DRMLISTADDTAIL(&pref->base.thead, &bctx->pending);
+        pref->bufctx = bctx;
+        pref->next = pbin->list;
+        pbin->list = pref;
+    }
+
+    return &pref->base;
+}
+
+struct nouveau_bufref *
+nouveau_bufctx_mthd(struct nouveau_bufctx *bctx, int bin, uint32_t packet,
+                    struct nouveau_bo *bo, uint64_t data, uint32_t flags,
+                    uint32_t vor, uint32_t tor)
+{
+    struct nouveau_bufctx_priv *pctx = nouveau_bufctx(bctx);
+    struct nouveau_bufbin_priv *pbin = &pctx->bins[bin];
+    struct nouveau_bufref *bref = nouveau_bufctx_refn(bctx, bin, bo, flags);
+    if (bref) {
+        bref->packet = packet;
+        bref->data = data;
+        bref->vor = vor;
+        bref->tor = tor;
+        pbin->relocs++;
+        bctx->relocs++;
+    }
+    return bref;
+}
--- a/lib/graphicsdev/nx/fake_libdrm_nouveau/libdrm_atomics.h
+++ b/lib/graphicsdev/nx/fake_libdrm_nouveau/libdrm_atomics.h
@@ -0,0 +1,17 @@
+#ifndef LIBDRM_ATOMICS_H
+#define LIBDRM_ATOMICS_H
+
+typedef struct {
+	int atomic;
+} atomic_t;
+
+# define atomic_read(x) ((x)->atomic)
+# define atomic_set(x, val) ((x)->atomic = (val))
+# define atomic_inc(x) ((void) __sync_fetch_and_add (&(x)->atomic, 1))
+# define atomic_inc_return(x) (__sync_add_and_fetch (&(x)->atomic, 1))
+# define atomic_dec_and_test(x) (__sync_add_and_fetch (&(x)->atomic, -1) == 0)
+# define atomic_add(x, v) ((void) __sync_add_and_fetch(&(x)->atomic, (v)))
+# define atomic_dec(x, v) ((void) __sync_sub_and_fetch(&(x)->atomic, (v)))
+# define atomic_cmpxchg(x, oldv, newv) __sync_val_compare_and_swap (&(x)->atomic, oldv, newv)
+
+#endif
--- a/lib/graphicsdev/nx/fake_libdrm_nouveau/libdrm_lists.h
+++ b/lib/graphicsdev/nx/fake_libdrm_nouveau/libdrm_lists.h
@@ -0,0 +1,118 @@
+/**************************************************************************
+ *
+ * Copyright 2006 Tungsten Graphics, Inc., Bismarck, ND. USA.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ */
+
+/*
+ * List macros heavily inspired by the Linux kernel
+ * list handling. No list looping yet.
+ */
+
+#include <stddef.h>
+
+typedef struct _drmMMListHead
+{
+    struct _drmMMListHead *prev;
+    struct _drmMMListHead *next;
+} drmMMListHead;
+
+#define DRMINITLISTHEAD(__item)		       \
+  do{					       \
+    (__item)->prev = (__item);		       \
+    (__item)->next = (__item);		       \
+  } while (0)
+
+#define DRMLISTADD(__item, __list)		\
+  do {						\
+    (__item)->prev = (__list);			\
+    (__item)->next = (__list)->next;		\
+    (__list)->next->prev = (__item);		\
+    (__list)->next = (__item);			\
+  } while (0)
+
+#define DRMLISTADDTAIL(__item, __list)		\
+  do {						\
+    (__item)->next = (__list);			\
+    (__item)->prev = (__list)->prev;		\
+    (__list)->prev->next = (__item);		\
+    (__list)->prev = (__item);			\
+  } while(0)
+
+#define DRMLISTDEL(__item)			\
+  do {						\
+    (__item)->prev->next = (__item)->next;	\
+    (__item)->next->prev = (__item)->prev;	\
+  } while(0)
+
+#define DRMLISTDELINIT(__item)			\
+  do {						\
+    (__item)->prev->next = (__item)->next;	\
+    (__item)->next->prev = (__item)->prev;	\
+    (__item)->next = (__item);			\
+    (__item)->prev = (__item);			\
+  } while(0)
+
+#define DRMLISTENTRY(__type, __item, __field)   \
+    ((__type *)(((char *) (__item)) - offsetof(__type, __field)))
+
+#define DRMLISTEMPTY(__item) ((__item)->next == (__item))
+
+#define DRMLISTSINGLE(__list) \
+	(!DRMLISTEMPTY(__list) && ((__list)->next == (__list)->prev))
+
+#define DRMLISTFOREACH(__item, __list)					\
+	for ((__item) = (__list)->next;					\
+	     (__item) != (__list); (__item) = (__item)->next)
+
+#define DRMLISTFOREACHSAFE(__item, __temp, __list)			\
+	for ((__item) = (__list)->next, (__temp) = (__item)->next;	\
+	     (__item) != (__list);					\
+	     (__item) = (__temp), (__temp) = (__item)->next)
+
+#define DRMLISTFOREACHSAFEREVERSE(__item, __temp, __list)		\
+	for ((__item) = (__list)->prev, (__temp) = (__item)->prev;	\
+	     (__item) != (__list);					\
+	     (__item) = (__temp), (__temp) = (__item)->prev)
+
+#define DRMLISTFOREACHENTRY(__item, __list, __head)                            \
+	for ((__item) = DRMLISTENTRY(typeof(*__item), (__list)->next, __head); \
+	     &(__item)->__head != (__list);                                    \
+	     (__item) = DRMLISTENTRY(typeof(*__item),                          \
+				     (__item)->__head.next, __head))
+
+#define DRMLISTFOREACHENTRYSAFE(__item, __temp, __list, __head)                \
+	for ((__item) = DRMLISTENTRY(typeof(*__item), (__list)->next, __head), \
+	     (__temp) = DRMLISTENTRY(typeof(*__item),                          \
+				     (__item)->__head.next, __head);           \
+	     &(__item)->__head != (__list);                                    \
+	     (__item) = (__temp),                                              \
+	     (__temp) = DRMLISTENTRY(typeof(*__item),                          \
+				     (__temp)->__head.next, __head))
+
+#define DRMLISTJOIN(__list, __join) if (!DRMLISTEMPTY(__list)) {	\
+	(__list)->next->prev = (__join);				\
+	(__list)->prev->next = (__join)->next;				\
+	(__join)->next->prev = (__list)->prev;				\
+	(__join)->next = (__list)->next;				\
+}
--- a/lib/graphicsdev/nx/fake_libdrm_nouveau/nouveau.c
+++ b/lib/graphicsdev/nx/fake_libdrm_nouveau/nouveau.c
@@ -0,0 +1,419 @@
+/*
+ * Copyright 2012 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Ben Skeggs
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <string.h>
+#include <strings.h>
+#include <stdbool.h>
+#include <assert.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <malloc.h>
+
+#include "libdrm_lists.h"
+#include "libdrm_atomics.h"
+#include "nouveau_drm.h"
+#include "nouveau.h"
+#include "private.h"
+
+#include "nvif/class.h"
+#include "nvif/cl0080.h"
+#include "nvif/ioctl.h"
+#include "nvif/unpack.h"
+
+#ifdef DEBUG
+#	define TRACE(x...) printf("nouveau: " x)
+#	define CALLED() TRACE("CALLED: %s\n", __PRETTY_FUNCTION__)
+#else
+#	define TRACE(x...)
+# define CALLED()
+#endif
+
+/* Unused
+int
+nouveau_object_mthd(struct nouveau_object *obj,
+		    uint32_t mthd, void *data, uint32_t size)
+{
+	return 0;
+}
+*/
+
+/* Unused
+void
+nouveau_object_sclass_put(struct nouveau_sclass **psclass)
+{
+}
+*/
+
+/* Unused
+int
+nouveau_object_sclass_get(struct nouveau_object *obj,
+			  struct nouveau_sclass **psclass)
+{
+	return 0;
+}
+*/
+
+int
+nouveau_object_mclass(struct nouveau_object *obj,
+                      const struct nouveau_mclass *mclass)
+{
+    // TODO: Only used for VP3 firmware upload
+    CALLED();
+    return 0;
+}
+
+/* NVGPU_IOCTL_CHANNEL_ALLOC_OBJ_CTX */
+int
+nouveau_object_new(struct nouveau_object *parent, uint64_t handle,
+                   uint32_t oclass, void *data, uint32_t length,
+                   struct nouveau_object **pobj)
+{
+    struct nouveau_object *obj;
+    CALLED();
+
+    if (!(obj = calloc(1, sizeof(*obj))))
+        return -ENOMEM;
+
+    if (oclass == NOUVEAU_FIFO_CHANNEL_CLASS)
+    {
+        struct nouveau_fifo *fifo;
+        if (!(fifo = calloc(1, sizeof(*fifo)))) {
+            free(obj);
+            return -ENOMEM;
+        }
+        fifo->object = parent;
+        fifo->channel = 0;
+        fifo->pushbuf = 0;
+        obj->data = fifo;
+        obj->length = sizeof(*fifo);
+    }
+
+    obj->parent = parent;
+    obj->oclass = oclass;
+    *pobj = obj;
+    return 0;
+}
+
+/* NVGPU_IOCTL_CHANNEL_FREE_OBJ_CTX */
+void
+nouveau_object_del(struct nouveau_object **pobj)
+{
+    CALLED();
+    if (!pobj)
+        return;
+
+    struct nouveau_object *obj = *pobj;
+    if (!obj)
+        return;
+
+    if (obj->data)
+        free(obj->data);
+    free(obj);
+    *pobj = NULL;
+}
+
+void
+nouveau_drm_del(struct nouveau_drm **pdrm)
+{
+    CALLED();
+    struct nouveau_drm *drm = *pdrm;
+    free(drm);
+    *pdrm = NULL;
+}
+
+int
+nouveau_drm_new(int fd, struct nouveau_drm **pdrm)
+{
+    CALLED();
+    struct nouveau_drm *drm;
+    if (!(drm = calloc(1, sizeof(*drm)))) {
+        return -ENOMEM;
+    }
+
+    drm->fd = fd;
+    *pdrm = drm;
+    return 0;
+}
+
+int
+nouveau_device_new(struct nouveau_object *parent, int32_t oclass,
+                   void *data, uint32_t size, struct nouveau_device **pdev)
+{
+    struct nouveau_drm *drm = nouveau_drm(parent);
+    struct nouveau_device_priv *nvdev;
+    //Result rc;
+    CALLED();
+
+    if (!(nvdev = calloc(1, sizeof(*nvdev))))
+        return -ENOMEM;
+    *pdev = &nvdev->base;
+    nvdev->base.object.parent = &drm->client;
+    nvdev->base.object.handle = ~0ULL;
+    nvdev->base.object.oclass = NOUVEAU_DEVICE_CLASS;
+    nvdev->base.object.length = ~0;
+    nvdev->base.chipset = 0x120; // NVGPU_GPU_ARCH_GM200
+
+    return 0;
+}
+
+void
+nouveau_device_del(struct nouveau_device **pdev)
+{
+    CALLED();
+    struct nouveau_device_priv *nvdev = nouveau_device(*pdev);
+
+    if (nvdev) {
+        free(nvdev->client);
+        free(nvdev);
+        *pdev = NULL;
+    }
+}
+
+int
+nouveau_getparam(struct nouveau_device *dev, uint64_t param, uint64_t *value)
+{
+    /* NOUVEAU_GETPARAM_PTIMER_TIME = NVGPU_GPU_IOCTL_GET_GPU_TIME */
+    return 0;
+}
+
+/* Unused
+int
+nouveau_setparam(struct nouveau_device *dev, uint64_t param, uint64_t value)
+{
+	return 0;
+}
+*/
+
+int
+nouveau_client_new(struct nouveau_device *dev, struct nouveau_client **pclient)
+{
+    struct nouveau_device_priv *nvdev = nouveau_device(dev);
+    struct nouveau_client_priv *pcli;
+    int id = 0, i, ret = -ENOMEM;
+    uint32_t *clients;
+    CALLED();
+
+
+    for (i = 0; i < nvdev->nr_client; i++) {
+        id = ffs(nvdev->client[i]) - 1;
+        if (id >= 0)
+            goto out;
+    }
+
+    clients = realloc(nvdev->client, sizeof(uint32_t) * (i + 1));
+    if (!clients)
+        goto unlock;
+    nvdev->client = clients;
+    nvdev->client[i] = 0;
+    nvdev->nr_client++;
+
+    out:
+    pcli = calloc(1, sizeof(*pcli));
+    if (pcli) {
+        nvdev->client[i] |= (1 << id);
+        pcli->base.device = dev;
+        pcli->base.id = (i * 32) + id;
+        ret = 0;
+    }
+
+    *pclient = &pcli->base;
+
+    unlock:
+    return ret;
+}
+
+void
+nouveau_client_del(struct nouveau_client **pclient)
+{
+    struct nouveau_client_priv *pcli = nouveau_client(*pclient);
+    struct nouveau_device_priv *nvdev;
+    CALLED();
+    if (pcli) {
+        int id = pcli->base.id;
+        nvdev = nouveau_device(pcli->base.device);
+        nvdev->client[id / 32] &= ~(1 << (id % 32));
+        cli_map_free(&pcli->base);
+        free(pcli);
+    }
+}
+
+static void
+nouveau_bo_del(struct nouveau_bo *bo)
+{
+    CALLED();
+    struct nouveau_bo_priv *nvbo = nouveau_bo(bo);
+
+
+    free(nvbo);
+}
+
+/* Fake mapped data to dereference without crashing
+ * Value of 1 to indicate signalled fence sequence counter */
+static uint32_t MapData = 1;
+
+int
+nouveau_bo_new(struct nouveau_device *dev, uint32_t flags, uint32_t align,
+               uint64_t size, union nouveau_bo_config *config,
+               struct nouveau_bo **pbo)
+{
+    CALLED();
+    struct nouveau_device_priv *nvdev = nouveau_device(dev);
+
+    struct nouveau_bo_priv *nvbo = calloc(1, sizeof(*nvbo));
+    struct nouveau_bo *bo = &nvbo->base;
+
+    nvbo->map_addr = &MapData;
+
+    atomic_set(&nvbo->refcnt, 1);
+    *pbo = bo;
+
+    return 0;
+}
+
+/* Unused
+static int
+nouveau_bo_wrap_locked(struct nouveau_device *dev, uint32_t handle,
+		       struct nouveau_bo **pbo, int name)
+{
+	return 0;
+}
+
+static void
+nouveau_bo_make_global(struct nouveau_bo_priv *nvbo)
+{
+}
+*/
+
+int
+nouveau_bo_wrap(struct nouveau_device *dev, uint32_t handle,
+                struct nouveau_bo **pbo)
+{
+    // TODO: NV30-only
+    CALLED();
+    return 0;
+}
+
+int
+nouveau_bo_name_ref(struct nouveau_device *dev, uint32_t name,
+                    struct nouveau_bo **pbo)
+{
+    CALLED();
+    struct nouveau_device_priv *nvdev = nouveau_device(dev);
+    struct nouveau_bo_priv *nvbo = calloc(1, sizeof(*nvbo));
+    struct nouveau_bo *bo = &nvbo->base;
+
+    atomic_set(&nvbo->refcnt, 1);
+    *pbo = bo;
+
+    return 0;
+}
+
+int
+nouveau_bo_name_get(struct nouveau_bo *bo, uint32_t *name)
+{
+    // TODO: Unimplemented
+    CALLED();
+    return 0;
+}
+
+void
+nouveau_bo_ref(struct nouveau_bo *bo, struct nouveau_bo **pref)
+{
+    CALLED();
+    struct nouveau_bo *ref = *pref;
+    if (bo) {
+        atomic_inc(&nouveau_bo(bo)->refcnt);
+    }
+    if (ref) {
+        if (atomic_dec_and_test(&nouveau_bo(ref)->refcnt))
+            nouveau_bo_del(ref);
+    }
+    *pref = bo;
+}
+
+int
+nouveau_bo_prime_handle_ref(struct nouveau_device *dev, int prime_fd,
+                            struct nouveau_bo **bo)
+{
+    // TODO: Unimplemented
+    CALLED();
+    return 0;
+}
+
+int
+nouveau_bo_set_prime(struct nouveau_bo *bo, int *prime_fd)
+{
+    // TODO: Unimplemented
+    CALLED();
+    return 0;
+}
+
+int
+nouveau_bo_get_syncpoint(struct nouveau_bo *bo, unsigned int *out_threshold)
+{
+    CALLED();
+    struct nouveau_bo_priv *nvbo = nouveau_bo(bo);
+
+
+
+    return 0;
+}
+
+int
+nouveau_bo_wait(struct nouveau_bo *bo, uint32_t access,
+                struct nouveau_client *client)
+{
+    CALLED();
+    struct nouveau_bo_priv *nvbo = nouveau_bo(bo);
+    struct nouveau_pushbuf *push;
+    int ret = 0;
+
+    return ret;
+}
+
+int
+nouveau_bo_map(struct nouveau_bo *bo, uint32_t access,
+               struct nouveau_client *client)
+{
+    CALLED();
+    struct nouveau_bo_priv *nvbo = nouveau_bo(bo);
+    bo->map = nvbo->map_addr;
+    return nouveau_bo_wait(bo, access, client);
+}
+
+void
+nouveau_bo_unmap(struct nouveau_bo *bo)
+{
+    CALLED();
+    bo->map = NULL;
+}
+
+struct nouveau_screen;
+bool nouveau_drm_screen_unref(struct nouveau_screen *screen)
+{
+    return true;
+}
--- a/lib/graphicsdev/nx/fake_libdrm_nouveau/private.h
+++ b/lib/graphicsdev/nx/fake_libdrm_nouveau/private.h
@@ -0,0 +1,86 @@
+#ifndef __NOUVEAU_LIBDRM_PRIVATE_H__
+#define __NOUVEAU_LIBDRM_PRIVATE_H__
+
+#include "libdrm_atomics.h"
+#include "nouveau_drm.h"
+
+#include "nouveau.h"
+
+#include <switch.h>
+
+#define BO_MAP_NUM_BUCKETS 31
+
+struct nouveau_client_bo_map_entry {
+	struct nouveau_client_bo_map_entry *next;
+	struct nouveau_client_bo_map_entry **prev_next;
+	struct drm_nouveau_gem_pushbuf_bo *kref;
+	struct nouveau_pushbuf *push;
+	uint32_t bo_handle;
+};
+
+struct nouveau_client_bo_map {
+	struct nouveau_client_bo_map_entry *buckets[BO_MAP_NUM_BUCKETS+1];
+};
+
+struct nouveau_client_priv {
+	struct nouveau_client base;
+	struct nouveau_client_bo_map bomap;
+};
+
+static inline struct nouveau_client_priv *
+nouveau_client(struct nouveau_client *client)
+{
+	return (struct nouveau_client_priv *)client;
+}
+
+void
+cli_map_free(struct nouveau_client *);
+
+struct drm_nouveau_gem_pushbuf_bo *
+cli_kref_get(struct nouveau_client *, struct nouveau_bo *bo);
+
+struct nouveau_pushbuf *
+cli_push_get(struct nouveau_client *, struct nouveau_bo *bo);
+
+void
+cli_kref_set(struct nouveau_client *, struct nouveau_bo *bo,
+             struct drm_nouveau_gem_pushbuf_bo *kref,
+             struct nouveau_pushbuf *push);
+
+struct nouveau_bo_priv {
+	struct nouveau_bo base;
+	struct nouveau_list head;
+	atomic_t refcnt;
+	void* map_addr;
+	uint32_t name;
+	uint32_t access;
+	NvBuffer buffer;
+	NvFence fence;
+};
+
+static inline struct nouveau_bo_priv *
+nouveau_bo(struct nouveau_bo *bo)
+{
+	return (struct nouveau_bo_priv *)bo;
+}
+
+struct nouveau_device_priv {
+	struct nouveau_device base;
+	int close;
+	struct nouveau_list bo_list;
+	uint32_t *client;
+	int nr_client;
+	bool have_bo_usage;
+	int gart_limit_percent, vram_limit_percent;
+	uint64_t allocspace_offset;
+	Mutex lock;
+	NvGpu gpu;
+};
+
+static inline struct nouveau_device_priv *
+nouveau_device(struct nouveau_device *dev)
+{
+	return (struct nouveau_device_priv *)dev;
+}
+
+#endif
--- a/lib/graphicsdev/nx/fake_libdrm_nouveau/pushbuf.c
+++ b/lib/graphicsdev/nx/fake_libdrm_nouveau/pushbuf.c
@@ -0,0 +1,457 @@
+/*
+ * Copyright 2012 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Ben Skeggs
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <stdbool.h>
+#include <string.h>
+#include <assert.h>
+#include <errno.h>
+
+#include "libdrm_lists.h"
+#include "nouveau_drm.h"
+#include "nouveau.h"
+#include "private.h"
+
+#ifdef DEBUG
+#	define TRACE(x...) printf("nouveau: " x)
+#	define CALLED() TRACE("CALLED: %s\n", __PRETTY_FUNCTION__)
+#else
+#	define TRACE(x...)
+# define CALLED()
+#endif
+
+struct nouveau_pushbuf_krec {
+    struct nouveau_pushbuf_krec *next;
+    struct drm_nouveau_gem_pushbuf_bo buffer[NOUVEAU_GEM_MAX_BUFFERS];
+    struct drm_nouveau_gem_pushbuf_push push[NOUVEAU_GEM_MAX_PUSH];
+    int nr_buffer;
+    int nr_push;
+};
+
+struct nouveau_pushbuf_priv {
+    struct nouveau_pushbuf base;
+    struct nouveau_pushbuf_krec *list;
+    struct nouveau_pushbuf_krec *krec;
+    struct nouveau_list bctx_list;
+    struct nouveau_bo *bo;
+    //NvBuffer fence_buf;
+    //u32 fence_num_cmds;
+    uint32_t type;
+    uint32_t *ptr;
+    uint32_t *bgn;
+    int bo_next;
+    int bo_nr;
+    struct nouveau_bo *bos[];
+};
+
+static inline struct nouveau_pushbuf_priv *
+nouveau_pushbuf(struct nouveau_pushbuf *push)
+{
+    return (struct nouveau_pushbuf_priv *)push;
+}
+
+static int pushbuf_validate(struct nouveau_pushbuf *, bool);
+static int pushbuf_flush(struct nouveau_pushbuf *);
+
+static bool
+pushbuf_kref_fits(struct nouveau_pushbuf *push, struct nouveau_bo *bo,
+                  uint32_t *domains)
+{
+    CALLED();
+
+    // Note: We assume we always have enough memory for the bo.
+    return true;
+}
+
+static struct drm_nouveau_gem_pushbuf_bo *
+pushbuf_kref(struct nouveau_pushbuf *push, struct nouveau_bo *bo,
+             uint32_t flags)
+{
+    CALLED();
+
+    struct nouveau_pushbuf_priv *nvpb = nouveau_pushbuf(push);
+    struct nouveau_pushbuf_krec *krec = nvpb->krec;
+    struct nouveau_pushbuf *fpush;
+    struct drm_nouveau_gem_pushbuf_bo *kref;
+    uint32_t domains, domains_wr, domains_rd;
+
+    domains = NOUVEAU_GEM_DOMAIN_GART;
+
+    domains_wr = domains * !!(flags & NOUVEAU_BO_WR);
+    domains_rd = domains * !!(flags & NOUVEAU_BO_RD);
+
+    /* if buffer is referenced on another pushbuf that is owned by the
+     * same client, we need to flush the other pushbuf first to ensure
+     * the correct ordering of commands
+     */
+    fpush = cli_push_get(push->client, bo);
+    if (fpush && fpush != push)
+        pushbuf_flush(fpush);
+
+    kref = cli_kref_get(push->client, bo);
+    if (kref) {
+        kref->write_domains |= domains_wr;
+        kref->read_domains  |= domains_rd;
+    } else {
+        if (krec->nr_buffer == NOUVEAU_GEM_MAX_BUFFERS ||
+            !pushbuf_kref_fits(push, bo, &domains))
+            return NULL;
+
+        kref = &krec->buffer[krec->nr_buffer++];
+        kref->bo = bo;
+        kref->handle = bo->handle;
+        kref->write_domains = domains_wr;
+        kref->read_domains = domains_rd;
+        cli_kref_set(push->client, bo, kref, push);
+        atomic_inc(&nouveau_bo(bo)->refcnt);
+    }
+
+    return kref;
+}
+
+#if 0
+static uint32_t
+pushbuf_krel(struct nouveau_pushbuf *push, struct nouveau_bo *bo,
+	     uint32_t data, uint32_t flags, uint32_t vor, uint32_t tor)
+{
+	CALLED();
+	// Unneeded
+	return 0;
+}
+#endif
+
+static void
+pushbuf_dump(struct nouveau_pushbuf_krec *krec, int krec_id, int chid)
+{
+    struct drm_nouveau_gem_pushbuf_push *kpsh;
+    struct drm_nouveau_gem_pushbuf_bo *kref;
+    struct nouveau_bo *bo;
+    uint32_t *bgn, *end;
+    int i;
+
+    TRACE("ch%d: krec %d pushes %d bufs %d\n", chid,
+          krec_id, krec->nr_push, krec->nr_buffer);
+
+    kref = krec->buffer;
+    for (i = 0; i < krec->nr_buffer; i++, kref++) {
+        TRACE("ch%d: buf %08x %08x %08x %08x\n", chid, i,
+              kref->handle, kref->read_domains, kref->write_domains);
+    }
+
+    kpsh = krec->push;
+    for (i = 0; i < krec->nr_push; i++, kpsh++) {
+        kref = krec->buffer + kpsh->bo_index;
+        bo = kref->bo;
+        bgn = (uint32_t *)((char *)bo->map + kpsh->offset);
+        end = bgn + (kpsh->length /4);
+
+        TRACE("ch%d: psh %08x %010llx %010llx\n", chid, kpsh->bo_index,
+              (unsigned long long)kpsh->offset,
+              (unsigned long long)(kpsh->offset + kpsh->length));
+        while (bgn < end)
+            TRACE("\t0x%08x\n", *bgn++);
+    }
+
+}
+
+static int
+pushbuf_submit(struct nouveau_pushbuf *push, struct nouveau_object *chan)
+{
+    CALLED();
+    struct nouveau_pushbuf_priv *nvpb = nouveau_pushbuf(push);
+    struct nouveau_pushbuf_krec *krec = nvpb->list;
+    struct nouveau_device *dev = push->client->device;
+    struct nouveau_device_priv *nvdev = nouveau_device(dev);
+    struct drm_nouveau_gem_pushbuf_bo *kref;
+    struct drm_nouveau_gem_pushbuf_push *kpsh;
+    struct nouveau_fifo *fifo = chan->data;
+    struct nouveau_bo *bo;
+    struct nouveau_bo_priv *nvbo;
+    int krec_id = 0;
+    int ret = 0, i;
+
+    return ret;
+}
+
+static int
+pushbuf_flush(struct nouveau_pushbuf *push)
+{
+    CALLED();
+    struct nouveau_pushbuf_priv *nvpb = nouveau_pushbuf(push);
+    struct nouveau_pushbuf_krec *krec = nvpb->krec;
+    struct drm_nouveau_gem_pushbuf_bo *kref;
+    struct nouveau_bufctx *bctx, *btmp;
+    struct nouveau_bo *bo;
+    int ret = 0, i;
+
+    return ret;
+}
+
+static void
+pushbuf_refn_fail(struct nouveau_pushbuf *push, int sref)
+{
+    CALLED();
+    struct nouveau_pushbuf_priv *nvpb = nouveau_pushbuf(push);
+    struct nouveau_pushbuf_krec *krec = nvpb->krec;
+    struct drm_nouveau_gem_pushbuf_bo *kref;
+
+    kref = krec->buffer + sref;
+    while (krec->nr_buffer-- > sref) {
+        struct nouveau_bo *bo = kref->bo;
+        cli_kref_set(push->client, bo, NULL, NULL);
+        nouveau_bo_ref(NULL, &bo);
+        kref++;
+    }
+    krec->nr_buffer = sref;
+}
+
+static int
+pushbuf_refn(struct nouveau_pushbuf *push, bool retry,
+             struct nouveau_pushbuf_refn *refs, int nr)
+{
+    CALLED();
+    return 0;
+
+    struct nouveau_pushbuf_priv *nvpb = nouveau_pushbuf(push);
+    struct nouveau_pushbuf_krec *krec = nvpb->krec;
+    struct drm_nouveau_gem_pushbuf_bo *kref;
+    int sref = krec->nr_buffer;
+    int ret = 0, i;
+
+    for (i = 0; i < nr; i++) {
+        kref = pushbuf_kref(push, refs[i].bo, refs[i].flags);
+        if (!kref) {
+            ret = -ENOSPC;
+            break;
+        }
+    }
+
+    if (ret) {
+        pushbuf_refn_fail(push, sref);
+        if (retry) {
+            pushbuf_flush(push);
+            nouveau_pushbuf_space(push, 0, 0, 0);
+            return pushbuf_refn(push, false, refs, nr);
+        }
+    }
+
+    return ret;
+}
+
+static int
+pushbuf_validate(struct nouveau_pushbuf *push, bool retry)
+{
+    CALLED();
+    struct nouveau_pushbuf_priv *nvpb = nouveau_pushbuf(push);
+    struct nouveau_pushbuf_krec *krec = nvpb->krec;
+    struct drm_nouveau_gem_pushbuf_bo *kref;
+    struct nouveau_bufctx *bctx = push->bufctx;
+    struct nouveau_bufref *bref;
+    int relocs = bctx ? bctx->relocs * 2: 0;
+    int sref, ret;
+
+    ret = nouveau_pushbuf_space(push, relocs, relocs, 0);
+    if (ret || bctx == NULL)
+        return ret;
+
+    sref = krec->nr_buffer;
+
+    DRMLISTDEL(&bctx->head);
+    DRMLISTADD(&bctx->head, &nvpb->bctx_list);
+
+    DRMLISTFOREACHENTRY(bref, &bctx->pending, thead) {
+        kref = pushbuf_kref(push, bref->bo, bref->flags);
+        if (!kref) {
+            ret = -ENOSPC;
+            break;
+        }
+    }
+
+    DRMLISTJOIN(&bctx->pending, &bctx->current);
+    DRMINITLISTHEAD(&bctx->pending);
+
+    if (ret) {
+        pushbuf_refn_fail(push, sref);
+        if (retry) {
+            pushbuf_flush(push);
+            return pushbuf_validate(push, false);
+        }
+    }
+
+    return ret;
+}
+
+typedef uint32_t u32;
+
+static u32
+generate_fence_cmdlist(u32* fence_buf, u32 syncpt_id)
+{
+    u32* cmd = fence_buf;
+    *cmd++ = 0x451 | (0 << 13) | (0 << 16) | (4 << 29);
+    *cmd++ = 0x0B2 | (0 << 13) | (1 << 16) | (1 << 29);
+    *cmd++ = syncpt_id | (1 << 20);
+    *cmd++ = 0x451 | (0 << 13) | (0 << 16) | (4 << 29);
+    *cmd++ = 0x3E0 | (0 << 13) | (0 << 16) | (4 << 29);
+    return cmd - fence_buf;
+}
+
+int
+nouveau_pushbuf_new(struct nouveau_client *client, struct nouveau_object *chan,
+                    int nr, uint32_t size, bool immediate,
+                    struct nouveau_pushbuf **ppush)
+{
+    CALLED();
+    struct nouveau_device_priv *nvdev = nouveau_device(client->device);
+    struct nouveau_pushbuf_priv *nvpb;
+    struct nouveau_pushbuf *push;
+    int ret;
+
+    nvpb = calloc(1, sizeof(*nvpb) + nr * sizeof(*nvpb->bos));
+
+    push = &nvpb->base;
+    push->client = client;
+    push->channel = immediate ? chan : NULL;
+    push->flags = NOUVEAU_BO_RD | NOUVEAU_BO_GART | NOUVEAU_BO_MAP;
+    nvpb->type = NOUVEAU_BO_GART;
+
+    *ppush = push;
+
+
+    return 0;
+}
+
+void
+nouveau_pushbuf_del(struct nouveau_pushbuf **ppush)
+{
+    CALLED();
+    struct nouveau_pushbuf_priv *nvpb = nouveau_pushbuf(*ppush);
+    free(nvpb);
+}
+
+struct nouveau_bufctx *
+nouveau_pushbuf_bufctx(struct nouveau_pushbuf *push, struct nouveau_bufctx *ctx)
+{
+    CALLED();
+
+    struct nouveau_bufctx *prev = push->bufctx;
+    push->bufctx = ctx;
+    return prev;
+}
+
+int
+nouveau_pushbuf_space(struct nouveau_pushbuf *push,
+                      uint32_t dwords, uint32_t relocs, uint32_t pushes)
+{
+    CALLED();
+    struct nouveau_pushbuf_priv *nvpb = nouveau_pushbuf(push);
+    struct nouveau_pushbuf_krec *krec = nvpb->krec;
+    struct nouveau_client *client = push->client;
+    struct nouveau_bo *bo = NULL;
+    bool flushed = false;
+    int ret = 0;
+    return 0;
+}
+
+void
+nouveau_pushbuf_data(struct nouveau_pushbuf *push, struct nouveau_bo *bo,
+                     uint64_t offset, uint64_t length)
+{
+    CALLED();
+
+    struct nouveau_pushbuf_priv *nvpb = nouveau_pushbuf(push);
+    struct nouveau_pushbuf_krec *krec = nvpb->krec;
+    struct drm_nouveau_gem_pushbuf_push *kpsh;
+    struct drm_nouveau_gem_pushbuf_bo *kref;
+
+    if (bo != nvpb->bo && nvpb->bgn != push->cur) {
+        nouveau_pushbuf_data(push, nvpb->bo,
+                             (nvpb->bgn - nvpb->ptr) * 4,
+                             (push->cur - nvpb->bgn) * 4);
+        nvpb->bgn = push->cur;
+    }
+
+    if (bo) {
+        kref = cli_kref_get(push->client, bo);
+        assert(kref);
+        kpsh = &krec->push[krec->nr_push++];
+        kpsh->bo_index = kref - krec->buffer;
+        kpsh->offset   = offset;
+        kpsh->length   = length;
+    }
+}
+
+int
+nouveau_pushbuf_refn(struct nouveau_pushbuf *push,
+                     struct nouveau_pushbuf_refn *refs, int nr)
+{
+    CALLED();
+    return pushbuf_refn(push, true, refs, nr);
+}
+
+void
+nouveau_pushbuf_reloc(struct nouveau_pushbuf *push, struct nouveau_bo *bo,
+                      uint32_t data, uint32_t flags, uint32_t vor, uint32_t tor)
+{
+    CALLED();
+
+    // Unimplemented
+}
+
+int
+nouveau_pushbuf_validate(struct nouveau_pushbuf *push)
+{
+    CALLED();
+    return 0;
+}
+
+uint32_t
+nouveau_pushbuf_refd(struct nouveau_pushbuf *push, struct nouveau_bo *bo)
+{
+    CALLED();
+    struct drm_nouveau_gem_pushbuf_bo *kref;
+    uint32_t flags = 0;
+
+    if (cli_push_get(push->client, bo) == push) {
+        kref = cli_kref_get(push->client, bo);
+        assert(kref);
+        if (kref->read_domains)
+            flags |= NOUVEAU_BO_RD;
+        if (kref->write_domains)
+            flags |= NOUVEAU_BO_WR;
+    }
+
+    return flags;
+}
+
+int
+nouveau_pushbuf_kick(struct nouveau_pushbuf *push, struct nouveau_object *chan)
+{
+    CALLED();
+    if (!push->channel)
+        return pushbuf_submit(push, chan);
+    pushbuf_flush(push);
+    return pushbuf_validate(push, false);
+}
--- a/lib/graphicsdev/nx/libdrm_nouveau
+++ b/lib/graphicsdev/nx/libdrm_nouveau
--- a/lib/graphicsdev/nx/mesa
+++ b/lib/graphicsdev/nx/mesa
--- a/lib/graphicsdev/nx/nx_compiler.cpp
+++ b/lib/graphicsdev/nx/nx_compiler.cpp
@@ -0,0 +1,671 @@
+#include "boo/graphicsdev/nx_compiler.hpp"
+
+/*
+ * Copyright © 2008, 2009 Intel Corporation
+ * Boo Modifications © 2018 Jack Andersen
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+/** @file nx_compiler.cpp
+ *
+ * Based on standalone.cpp in compiler/glsl. This file provides a means to
+ * compile and link GLSL sources directly into driver programs for the
+ * Nouveau GM107 chipset configuration.
+ */
+
+#include "ast.h"
+#include "glsl_parser_extras.h"
+#include "ir_optimization.h"
+#include "program.h"
+#include "loop_analysis.h"
+#include "string_to_uint_map.h"
+#include "util/set.h"
+#include "linker.h"
+#include "ir_builder_print_visitor.h"
+#include "builtin_functions.h"
+#include "opt_add_neg_to_sub.h"
+
+#include "main/shaderobj.h"
+#include "st_program.h"
+
+extern "C" {
+#include "nouveau_winsys.h"
+#include "nouveau_screen.h"
+#include "nvc0/nvc0_program.h"
+}
+
+_GLAPI_EXPORT __thread void * _glapi_tls_Context;
+_GLAPI_EXPORT __thread struct _glapi_table * _glapi_tls_Dispatch;
+
+int
+_glapi_add_dispatch( const char * const * function_names,
+                     const char * parameter_signature )
+{
+    return 0;
+}
+
+void
+_glapi_destroy_multithread(void)
+{
+}
+
+void
+_glapi_check_multithread(void)
+{
+}
+
+void
+_glapi_set_context(void *context)
+{
+    _glapi_tls_Context = context;
+}
+
+void *
+_glapi_get_context()
+{
+    return _glapi_tls_Context;
+}
+
+void
+_glapi_set_dispatch(struct _glapi_table *dispatch)
+{
+    _glapi_tls_Dispatch = dispatch;
+}
+
+struct _glapi_table *
+_glapi_get_dispatch()
+{
+    return _glapi_tls_Dispatch;
+}
+
+GLuint
+_glapi_get_dispatch_table_size(void)
+{
+    /*
+     * The dispatch table size (number of entries) is the size of the
+     * _glapi_table struct plus the number of dynamic entries we can add.
+     * The extra slots can be filled in by DRI drivers that register new
+     * extension functions.
+     */
+    return 0;
+}
+
+class dead_variable_visitor : public ir_hierarchical_visitor {
+public:
+    dead_variable_visitor()
+    {
+        variables = _mesa_set_create(NULL,
+                                     _mesa_hash_pointer,
+                                     _mesa_key_pointer_equal);
+    }
+
+    virtual ~dead_variable_visitor()
+    {
+        _mesa_set_destroy(variables, NULL);
+    }
+
+    virtual ir_visitor_status visit(ir_variable *ir)
+    {
+        /* If the variable is auto or temp, add it to the set of variables that
+         * are candidates for removal.
+         */
+        if (ir->data.mode != ir_var_auto && ir->data.mode != ir_var_temporary)
+            return visit_continue;
+
+        _mesa_set_add(variables, ir);
+
+        return visit_continue;
+    }
+
+    virtual ir_visitor_status visit(ir_dereference_variable *ir)
+    {
+        struct set_entry *entry = _mesa_set_search(variables, ir->var);
+
+        /* If a variable is dereferenced at all, remove it from the set of
+         * variables that are candidates for removal.
+         */
+        if (entry != NULL)
+            _mesa_set_remove(variables, entry);
+
+        return visit_continue;
+    }
+
+    void remove_dead_variables()
+    {
+        struct set_entry *entry;
+
+        set_foreach(variables, entry) {
+            ir_variable *ir = (ir_variable *) entry->key;
+
+            assert(ir->ir_type == ir_type_variable);
+            ir->remove();
+        }
+    }
+
+private:
+    set *variables;
+};
+
+void nx_compiler::compile_shader(struct gl_context *ctx, struct gl_shader *shader)
+{
+    struct _mesa_glsl_parse_state *state =
+        new(shader) _mesa_glsl_parse_state(ctx, shader->Stage, shader);
+
+    _mesa_glsl_compile_shader(ctx, shader, m_options.dump_ast,
+                              m_options.dump_hir, true);
+
+    /* Print out the resulting IR */
+    if (!state->error && m_options.dump_lir) {
+        _mesa_print_ir(stdout, shader->ir, state);
+    }
+}
+
+nx_compiler::nx_compiler()
+{
+    m_options.glsl_version = 330;
+    m_options.do_link = true;
+}
+
+nx_compiler::~nx_compiler()
+{
+    if (m_ownsCtx)
+    {
+        _mesa_glsl_release_types();
+        _mesa_glsl_release_builtin_functions();
+        if (m_st)
+            st_destroy_context(m_st);
+        if (m_screen)
+            m_screen->destroy(m_screen);
+    }
+}
+
+bool nx_compiler::initialize(struct pipe_screen *screen, struct st_context *st,
+                             const struct standalone_options *o)
+{
+    m_screen = screen;
+    m_st = st;
+    if (o)
+        memcpy(&m_options, o, sizeof(*o));
+    return true;
+}
+
+bool nx_compiler::initialize(const struct standalone_options* o)
+{
+    m_ownsCtx = true;
+    bool glsl_es;
+
+    if (o)
+        memcpy(&m_options, o, sizeof(*o));
+
+    switch (m_options.glsl_version) {
+    case 100:
+    case 300:
+        glsl_es = true;
+        break;
+    case 110:
+    case 120:
+    case 130:
+    case 140:
+    case 150:
+    case 330:
+    case 400:
+    case 410:
+    case 420:
+    case 430:
+    case 440:
+    case 450:
+    case 460:
+        glsl_es = false;
+        break;
+    default:
+        fprintf(stderr, "Unrecognized GLSL version `%d'\n", m_options.glsl_version);
+        return false;
+    }
+
+    gl_api use_api;
+    if (glsl_es) {
+        use_api = API_OPENGLES2;
+    } else {
+        use_api = m_options.glsl_version > 130 ? API_OPENGL_CORE : API_OPENGL_COMPAT;
+    }
+
+    struct nouveau_screen *(*init)(struct nouveau_device *);
+
+    struct nouveau_drm *fakedrm = (struct nouveau_drm *)malloc(sizeof(struct nouveau_drm));
+    if (!fakedrm)
+        return false;
+    memset(fakedrm, 0, sizeof(*fakedrm));
+    nouveau_device *ndev;
+    if (nouveau_device_new(&fakedrm->client, 0, nullptr, 0, &ndev))
+        return false;
+
+    switch (ndev->chipset & ~0xf) {
+#if 0
+    case 0x30:
+    case 0x40:
+    case 0x60:
+        init = nv30_screen_create;
+        break;
+    case 0x50:
+    case 0x80:
+    case 0x90:
+    case 0xa0:
+        init = nv50_screen_create;
+        break;
+#endif
+    default:
+    case 0xc0:
+    case 0xd0:
+    case 0xe0:
+    case 0xf0:
+    case 0x100:
+    case 0x110:
+    case 0x120:
+    case 0x130:
+        init = nvc0_screen_create;
+        break;
+    }
+
+    struct nouveau_screen *screen = init(ndev);
+    if (!screen)
+        return false;
+    screen->refcount = 1;
+    struct pipe_context *p_ctx = screen->base.context_create(&screen->base, nullptr, 0);
+    if (!p_ctx)
+    {
+        screen->base.destroy(&screen->base);
+        return false;
+    }
+
+    st_config_options opts = {};
+    struct st_context *st = st_create_context(use_api, p_ctx, nullptr, nullptr, &opts, false);
+    if (!st)
+    {
+        screen->base.destroy(&screen->base);
+        return false;
+    }
+
+    return initialize(&screen->base, st);
+}
+
+nx_shader_stage_object::nx_shader_stage_object(const nx_shader_stage_object& other)
+: m_parent(other.m_parent)
+{
+    if (!other.m_shader || !m_parent)
+        return;
+    struct gl_context *ctx = m_parent->m_st->ctx;
+    _mesa_reference_shader(ctx, &m_shader, other.m_shader);
+}
+
+nx_shader_stage_object& nx_shader_stage_object::operator=(const nx_shader_stage_object& other)
+{
+    m_parent = other.m_parent;
+    if (!other.m_shader || !m_parent)
+        return *this;
+    struct gl_context *ctx = m_parent->m_st->ctx;
+    _mesa_reference_shader(ctx, &m_shader, other.m_shader);
+    return *this;
+}
+
+void nx_shader_stage_object::reset()
+{
+    if (!m_shader || !m_parent)
+        return;
+    struct gl_context *ctx = m_parent->m_st->ctx;
+    _mesa_reference_shader(ctx, &m_shader, nullptr);
+}
+
+nx_shader_stage_object::operator bool() const
+{
+    if (!m_shader)
+        return false;
+    return m_shader->CompileStatus;
+}
+
+nx_shader_stage nx_shader_stage_object::stage() const
+{
+    return nx_shader_stage(m_shader->Stage);
+}
+
+const char* nx_shader_stage_object::info_log() const
+{
+    if (!m_shader)
+        return nullptr;
+    return m_shader->InfoLog;
+}
+
+nx_linked_shader::nx_linked_shader(const nx_linked_shader& other)
+: m_parent(other.m_parent)
+{
+    if (!other.m_program || !m_parent)
+        return;
+    struct gl_context *ctx = m_parent->m_st->ctx;
+    _mesa_reference_shader_program(ctx, &m_program, other.m_program);
+}
+
+nx_linked_shader& nx_linked_shader::operator=(const nx_linked_shader& other)
+{
+    m_parent = other.m_parent;
+    if (!other.m_program || !m_parent)
+        return *this;
+    struct gl_context *ctx = m_parent->m_st->ctx;
+    _mesa_reference_shader_program(ctx, &m_program, other.m_program);
+    return *this;
+}
+
+void nx_linked_shader::reset()
+{
+    if (!m_program || !m_parent)
+        return;
+    struct gl_context *ctx = m_parent->m_st->ctx;
+    _mesa_reference_shader_program(ctx, &m_program, nullptr);
+}
+
+nx_shader_stage_object nx_compiler::compile(nx_shader_stage type, const char *source)
+{
+    struct gl_context *ctx = m_st->ctx;
+
+    nx_shader_stage_object ret(*this);
+    ret.m_shader = rzalloc(nullptr, gl_shader);
+    assert(ret.m_shader != NULL);
+    ret.m_shader->RefCount = 1;
+
+    ret.m_shader->Stage = gl_shader_stage(type);
+    ret.m_shader->Source = source;
+
+    compile_shader(ctx, ret.m_shader);
+
+    /* Mesa doesn't actually own the source, so take it away here */
+    ret.m_shader->Source = nullptr;
+
+    return ret;
+}
+
+nx_linked_shader nx_compiler::link(unsigned num_stages, const nx_shader_stage_object **stages, std::string* infoLog)
+{
+    nx_linked_shader ret(*this);
+    int status = EXIT_SUCCESS;
+    struct gl_context *ctx = m_st->ctx;
+
+    struct gl_shader_program *whole_program;
+
+    whole_program = rzalloc (NULL, struct gl_shader_program);
+    assert(whole_program != NULL);
+    whole_program->Type = GL_SHADER_PROGRAM_MESA;
+    whole_program->data = rzalloc(whole_program, struct gl_shader_program_data);
+    assert(whole_program->data != NULL);
+    whole_program->data->RefCount = 1;
+    whole_program->data->InfoLog = ralloc_strdup(whole_program->data, "");
+    ret.m_program = whole_program;
+
+    whole_program->Shaders = (struct gl_shader **)calloc(num_stages, sizeof(struct gl_shader *));
+    assert(whole_program->Shaders != NULL);
+
+    for (unsigned i = 0; i < num_stages; i++) {
+        whole_program->Shaders[whole_program->NumShaders] = stages[i]->m_shader;
+        stages[i]->m_shader->RefCount++;
+        whole_program->NumShaders++;
+
+        if (!stages[i]->m_shader->CompileStatus) {
+            status = EXIT_FAILURE;
+            break;
+        }
+    }
+
+    if (status == EXIT_SUCCESS) {
+        _mesa_clear_shader_program_data(ctx, whole_program);
+
+        if (m_options.do_link)  {
+            link_shaders(ctx, whole_program);
+            for (int i = 0; i < MESA_SHADER_STAGES; ++i) {
+                if (whole_program->_LinkedShaders[i])
+                    whole_program->_LinkedShaders[i]->Program->Target = _mesa_shader_stage_to_program(i);
+            }
+        } else {
+            const gl_shader_stage stage = whole_program->Shaders[0]->Stage;
+
+            whole_program->data->LinkStatus = LINKING_SUCCESS;
+            whole_program->_LinkedShaders[stage] =
+                link_intrastage_shaders(whole_program /* mem_ctx */,
+                                        ctx,
+                                        whole_program,
+                                        whole_program->Shaders,
+                                        1,
+                                        true);
+            whole_program->_LinkedShaders[stage]->Program->Target = _mesa_shader_stage_to_program(stage);
+
+            /* Par-linking can fail, for example, if there are undefined external
+             * references.
+             */
+            if (whole_program->_LinkedShaders[stage] != NULL) {
+                assert(whole_program->data->LinkStatus);
+
+                struct gl_shader_compiler_options *const compiler_options =
+                    &ctx->Const.ShaderCompilerOptions[stage];
+
+                exec_list *const ir =
+                    whole_program->_LinkedShaders[stage]->ir;
+
+                bool progress;
+                do {
+                    progress = do_function_inlining(ir);
+
+                    progress = do_common_optimization(ir,
+                                                      false,
+                                                      false,
+                                                      compiler_options,
+                                                      true)
+                               && progress;
+                } while(progress);
+            }
+        }
+
+        status = (whole_program->data->LinkStatus) ? EXIT_SUCCESS : EXIT_FAILURE;
+
+        if (infoLog)
+            *infoLog = whole_program->data->InfoLog;
+
+        if (status == EXIT_SUCCESS) {
+            for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
+                struct gl_linked_shader *shader = whole_program->_LinkedShaders[i];
+
+                if (!shader)
+                    continue;
+
+                add_neg_to_sub_visitor v;
+                visit_list_elements(&v, shader->ir);
+
+                dead_variable_visitor dv;
+                visit_list_elements(&dv, shader->ir);
+                dv.remove_dead_variables();
+            }
+
+            if (m_options.dump_builder) {
+                for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
+                    struct gl_linked_shader *shader = whole_program->_LinkedShaders[i];
+
+                    if (!shader)
+                        continue;
+
+                    _mesa_print_builder_for_ir(stdout, shader->ir);
+                }
+            }
+
+            ctx->_Shader = &ctx->Shader;
+            st_link_shader(ctx, whole_program);
+            ctx->_Shader = nullptr;
+            return ret;
+        }
+    }
+
+    return nx_linked_shader(*this);
+}
+
+static void
+SizeProgramBuffer(const nvc0_program *prog, size_t &sz)
+{
+    sz += 140;
+    sz += prog->code_size;
+}
+
+template<class T> static void
+OutputField(T f, uint8_t *&ptr)
+{
+    memcpy(ptr, &f, sizeof(f));
+    ptr += sizeof(f);
+}
+
+static void
+BuildProgramBuffer(const nvc0_program *prog, uint8_t *&ptr)
+{
+    OutputField(prog->type, ptr);
+    OutputField(prog->translated, ptr);
+    OutputField(prog->need_tls, ptr);
+    OutputField(prog->num_gprs, ptr);
+    OutputField<uint32_t>(prog->code_base, ptr);
+    OutputField<uint32_t>(prog->code_size, ptr);
+    OutputField<uint32_t>(prog->parm_size, ptr);
+    for (const auto& h : prog->hdr)
+        OutputField(h, ptr);
+    for (const auto& h : prog->flags)
+        OutputField(h, ptr);
+    OutputField(prog->vp.clip_mode, ptr);
+    OutputField(prog->vp.clip_enable, ptr);
+    OutputField(prog->vp.cull_enable, ptr);
+    OutputField(prog->vp.num_ucps, ptr);
+    OutputField(prog->vp.edgeflag, ptr);
+    OutputField(prog->vp.need_vertex_id, ptr);
+    OutputField(prog->vp.need_draw_parameters, ptr);
+    OutputField(prog->fp.early_z, ptr);
+    OutputField(prog->fp.colors, ptr);
+    OutputField(prog->fp.color_interp[0], ptr);
+    OutputField(prog->fp.color_interp[1], ptr);
+    OutputField(prog->fp.sample_mask_in, ptr);
+    OutputField(prog->fp.force_persample_interp, ptr);
+    OutputField(prog->fp.flatshade, ptr);
+    OutputField(prog->fp.reads_framebuffer, ptr);
+    OutputField(prog->fp.post_depth_coverage, ptr);
+    OutputField(prog->tp.tess_mode, ptr);
+    OutputField(prog->tp.input_patch_size, ptr);
+    OutputField(prog->cp.lmem_size, ptr);
+    OutputField(prog->cp.smem_size, ptr);
+    OutputField(prog->num_barriers, ptr);
+    memcpy(ptr, prog->code, prog->code_size);
+    ptr += prog->code_size;
+}
+
+std::pair<std::shared_ptr<uint8_t[]>, size_t>
+nx_compiler::offline_link(unsigned num_stages, const nx_shader_stage_object **stages, std::string *infoLog)
+{
+    std::pair<std::shared_ptr<uint8_t[]>, size_t> ret = {};
+    auto whole_program = link(num_stages, stages, infoLog);
+    if (!whole_program)
+        return ret;
+
+    for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
+        struct gl_linked_shader *shader = whole_program.m_program->_LinkedShaders[i];
+        if (!shader)
+            continue;
+        struct gl_program *prog = shader->Program;
+
+        switch (prog->Target) {
+        case GL_VERTEX_PROGRAM_ARB: {
+            struct st_vertex_program *p = (struct st_vertex_program *)prog;
+            nvc0_program *dp = (nvc0_program *)p->variants->driver_shader;
+            SizeProgramBuffer(dp, ret.second);
+            break;
+        }
+        case GL_TESS_CONTROL_PROGRAM_NV: {
+            struct st_common_program *p = st_common_program(prog);
+            nvc0_program *dp = (nvc0_program *)p->variants->driver_shader;
+            SizeProgramBuffer(dp, ret.second);
+            break;
+        }
+        case GL_TESS_EVALUATION_PROGRAM_NV: {
+            struct st_common_program *p = st_common_program(prog);
+            nvc0_program *dp = (nvc0_program *)p->variants->driver_shader;
+            SizeProgramBuffer(dp, ret.second);
+            break;
+        }
+        case GL_GEOMETRY_PROGRAM_NV: {
+            struct st_common_program *p = st_common_program(prog);
+            nvc0_program *dp = (nvc0_program *)p->variants->driver_shader;
+            SizeProgramBuffer(dp, ret.second);
+            break;
+        }
+        case GL_FRAGMENT_PROGRAM_ARB: {
+            struct st_fragment_program *p = (struct st_fragment_program *)prog;
+            nvc0_program *dp = (nvc0_program *)p->variants->driver_shader;
+            SizeProgramBuffer(dp, ret.second);
+            break;
+        }
+        default:
+            assert(0);
+        }
+    }
+
+    ret.first.reset(new uint8_t[ret.second]);
+    uint8_t *pbuf = ret.first.get();
+
+    for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
+        struct gl_linked_shader *shader = whole_program.m_program->_LinkedShaders[i];
+        if (!shader)
+            continue;
+        struct gl_program *prog = shader->Program;
+
+        switch (prog->Target) {
+        case GL_VERTEX_PROGRAM_ARB: {
+            struct st_vertex_program *p = (struct st_vertex_program *)prog;
+            nvc0_program *dp = (nvc0_program *)p->variants->driver_shader;
+            BuildProgramBuffer(dp, pbuf);
+            break;
+        }
+        case GL_TESS_CONTROL_PROGRAM_NV: {
+            struct st_common_program *p = st_common_program(prog);
+            nvc0_program *dp = (nvc0_program *)p->variants->driver_shader;
+            BuildProgramBuffer(dp, pbuf);
+            break;
+        }
+        case GL_TESS_EVALUATION_PROGRAM_NV: {
+            struct st_common_program *p = st_common_program(prog);
+            nvc0_program *dp = (nvc0_program *)p->variants->driver_shader;
+            BuildProgramBuffer(dp, pbuf);
+            break;
+        }
+        case GL_GEOMETRY_PROGRAM_NV: {
+            struct st_common_program *p = st_common_program(prog);
+            nvc0_program *dp = (nvc0_program *)p->variants->driver_shader;
+            BuildProgramBuffer(dp, pbuf);
+            break;
+        }
+        case GL_FRAGMENT_PROGRAM_ARB: {
+            struct st_fragment_program *p = (struct st_fragment_program *)prog;
+            nvc0_program *dp = (nvc0_program *)p->variants->driver_shader;
+            BuildProgramBuffer(dp, pbuf);
+            break;
+        }
+        default:
+            assert(0);
+        }
+    }
+
+    return ret;
+}
--- a/lib/graphicsdev/nx/nx_compiler_driver.cpp
+++ b/lib/graphicsdev/nx/nx_compiler_driver.cpp
@@ -0,0 +1,40 @@
+#include "boo/graphicsdev/nx_compiler.hpp"
+
+int main(int argc, char** argv)
+{
+    nx_compiler c;
+    c.initialize();
+
+    nx_shader_stage_object objs[] =
+    {
+        c.compile(nx_shader_stage::VERTEX,
+                  "#version 330\n"
+                  "#extension GL_ARB_separate_shader_objects: enable\n"
+                  "#extension GL_ARB_shading_language_420pack: enable\n"
+                  "layout(location=0) in vec3 in_pos;\n"
+                  "layout(location=1) in vec3 in_norm;\n"
+                  "layout(location=2) in vec2 in_uv;\n"
+                  "layout(location=0) out vec2 out_uv;\n"
+                  "void main()\n"
+                  "{\n"
+                  "    gl_Position = vec4(in_pos, 1.0).zyxx;\n"
+                  "    out_uv = in_uv;\n"
+                  "}"),
+        c.compile(nx_shader_stage::FRAGMENT,
+                  "#version 330\n"
+                  "#extension GL_ARB_separate_shader_objects: enable\n"
+                  "#extension GL_ARB_shading_language_420pack: enable\n"
+                  "layout(binding=8) uniform sampler2D texs[2];\n"
+                  "layout(location=0) out vec4 out_frag;\n"
+                  "layout(location=0) in vec2 out_uv;\n"
+                  "void main()\n"
+                  "{\n"
+                  "    out_frag = texture(texs[0], out_uv) + texture(texs[1], out_uv);\n"
+                  "}")
+    };
+
+    std::string log;
+    auto linkData = c.link(2, objs, &log);
+
+    return 0;
+}
--- a/lib/graphicsdev/nx/switch_cross_file.txt.in
+++ b/lib/graphicsdev/nx/switch_cross_file.txt.in
@@ -0,0 +1,25 @@
+[binaries]
+c = '@CMAKE_C_COMPILER@'
+cpp = '@CMAKE_CXX_COMPILER@'
+ar = '@CMAKE_AR@'
+strip = '@CMAKE_STRIP@'
+
+[properties]
+sizeof_int = 4
+sizeof_wchar_t = 4
+sizeof_void* = 8
+
+alignment_char = 1
+alignment_void* = 8
+alignment_double = 8
+
+c_args = ['-march=armv8-a', '-mtune=cortex-a57', '-mtp=soft', '-fPIC', '-D__SWITCH__', '-I@DEVKITPRO@/libnx/include',
+          '-I@LIBDRM_DIR@/include', '-DDEBUG=1', '-DHAVE_LIBDRM', '-D_GNU_SOURCE']
+cpp_args = ['-march=armv8-a', '-mtune=cortex-a57', '-mtp=soft', '-fPIC', '-D__SWITCH__', '-I@DEVKITPRO@/libnx/include',
+            '-I@LIBDRM_DIR@/include', '-DDEBUG=1', '-DHAVE_LIBDRM', '-D_GNU_SOURCE']
+
+[host_machine]
+system = 'switch'
+cpu_family = 'armv8-a'
+cpu = 'cortex-a57'
+endian = 'little'
--- a/lib/graphicsdev/vk_mem_alloc.h
+++ b/lib/graphicsdev/vk_mem_alloc.h
--- a/lib/inputdev/DeviceBase.cpp
+++ b/lib/inputdev/DeviceBase.cpp
@@ -6,8 +6,8 @@
 namespace boo
 {

-DeviceBase::DeviceBase(DeviceToken* token)
-: m_token(token)
+DeviceBase::DeviceBase(uint64_t typeHash, DeviceToken* token)
+: m_typeHash(typeHash), m_token(token)
 {
 }

--- a/lib/inputdev/DeviceSignature.cpp
+++ b/lib/inputdev/DeviceSignature.cpp
@@ -13,13 +13,14 @@ bool DeviceSignature::DeviceMatchToken(const DeviceToken& token, const TDeviceSi
 {
    if (token.getDeviceType() == DeviceType::HID)
    {
+        uint64_t genPadHash = dev_typeid(GenericPad);
        bool hasGenericPad = false;
        for (const DeviceSignature* sig : sigSet)
        {
            if (sig->m_vid == token.getVendorId() && sig->m_pid == token.getProductId() &&
                sig->m_type != DeviceType::HID)
                return false;
-            if (sig->m_typeIdx == typeid(GenericPad))
+            if (sig->m_typeHash == genPadHash)
                hasGenericPad = true;
        }
        return hasGenericPad;
--- a/lib/inputdev/DolphinSmashAdapter.cpp
+++ b/lib/inputdev/DolphinSmashAdapter.cpp
@@ -1,4 +1,5 @@
 #include "boo/inputdev/DolphinSmashAdapter.hpp"
+#include "boo/inputdev/DeviceSignature.hpp"
 #include <cstdio>
 #include <cstring>

@@ -8,7 +9,8 @@ namespace boo
 * Reference: https://github.com/ToadKing/wii-u-gc-adapter/blob/master/wii-u-gc-adapter.c
 */

-DolphinSmashAdapter::DolphinSmashAdapter(DeviceToken* token) : TDeviceBase<IDolphinSmashAdapterCallback>(token) {}
+DolphinSmashAdapter::DolphinSmashAdapter(DeviceToken* token)
+: TDeviceBase<IDolphinSmashAdapterCallback>(dev_typeid(DolphinSmashAdapter), token) {}

 DolphinSmashAdapter::~DolphinSmashAdapter() {}

--- a/lib/inputdev/DualshockPad.cpp
+++ b/lib/inputdev/DualshockPad.cpp
@@ -1,4 +1,5 @@
 #include "boo/inputdev/DualshockPad.hpp"
+#include "boo/inputdev/DeviceSignature.hpp"
 #define _USE_MATH_DEFINES
 #include <cmath>
 #include <iostream>
@@ -33,7 +34,7 @@ static const uint8_t defaultReport[49] = {
 };

 DualshockPad::DualshockPad(DeviceToken* token)
-    : TDeviceBase<IDualshockPadCallback>(token),
+    : TDeviceBase<IDualshockPadCallback>(dev_typeid(DualshockPad), token),
      m_rumbleRequest(EDualshockMotor::None),
      m_rumbleState(EDualshockMotor::None)
 {
--- a/lib/inputdev/GenericPad.cpp
+++ b/lib/inputdev/GenericPad.cpp
@@ -5,7 +5,7 @@ namespace boo
 {

 GenericPad::GenericPad(DeviceToken* token)
-: TDeviceBase<IGenericPadCallback>(token)
+: TDeviceBase<IGenericPadCallback>(dev_typeid(GenericPad), token)
 {

 }
--- a/lib/inputdev/HIDDeviceNX.cpp
+++ b/lib/inputdev/HIDDeviceNX.cpp
@@ -0,0 +1,34 @@
+#include "IHIDDevice.hpp"
+
+namespace boo
+{
+
+class HIDDeviceNX : public IHIDDevice
+{
+    DeviceToken& m_token;
+    std::shared_ptr<DeviceBase> m_devImp;
+    std::string_view m_devPath;
+
+public:
+    HIDDeviceNX(DeviceToken& token, const std::shared_ptr<DeviceBase>& devImp)
+    : m_token(token),
+      m_devImp(devImp),
+      m_devPath(token.getDevicePath())
+    {
+    }
+
+    void _deviceDisconnected() {}
+    bool _sendUSBInterruptTransfer(const uint8_t* data, size_t length) { return false; }
+    size_t _receiveUSBInterruptTransfer(uint8_t* data, size_t length) { return 0; }
+    std::vector<uint8_t> _getReportDescriptor() { return {}; }
+    bool _sendHIDReport(const uint8_t* data, size_t length, HIDReportType tp, uint32_t message) { return false; }
+    size_t _receiveHIDReport(uint8_t* data, size_t length, HIDReportType tp, uint32_t message) { return 0; }
+    void _startThread() {}
+};
+
+std::shared_ptr<IHIDDevice> IHIDDeviceNew(DeviceToken& token, const std::shared_ptr<DeviceBase>& devImp)
+{
+    return std::make_shared<HIDDeviceNX>(token, devImp);
+}
+
+}
--- a/lib/inputdev/HIDListenerNX.cpp
+++ b/lib/inputdev/HIDListenerNX.cpp
@@ -0,0 +1,25 @@
+#include "boo/inputdev/IHIDListener.hpp"
+
+namespace boo
+{
+
+class HIDListenerNX : public IHIDListener
+{
+    DeviceFinder& m_finder;
+
+public:
+    HIDListenerNX(DeviceFinder& finder)
+    : m_finder(finder)
+    {}
+
+    bool startScanning() { return false; }
+    bool stopScanning() { return false; }
+    bool scanNow() { return false; }
+};
+
+std::unique_ptr<IHIDListener> IHIDListenerNew(DeviceFinder& finder)
+{
+    return std::make_unique<HIDListenerNX>(finder);
+}
+
+}
--- a/lib/inputdev/NintendoPowerA.cpp
+++ b/lib/inputdev/NintendoPowerA.cpp
@@ -1,9 +1,10 @@
 #include "boo/inputdev/NintendoPowerA.hpp"
+#include "boo/inputdev/DeviceSignature.hpp"
 #include <memory.h>
 namespace boo
 {
 NintendoPowerA::NintendoPowerA(DeviceToken* token)
-    : TDeviceBase<INintendoPowerACallback>(token)
+: TDeviceBase<INintendoPowerACallback>(dev_typeid(NintendoPowerA), token)
 {

 }
--- a/lib/nx/ApplicationNX.cpp
+++ b/lib/nx/ApplicationNX.cpp
@@ -0,0 +1,139 @@
+#include "boo/IApplication.hpp"
+#include "logvisor/logvisor.hpp"
+#include "nxstl/thread"
+#include "nxstl/condition_variable"
+#include "boo/graphicsdev/NX.hpp"
+#include <limits.h>
+
+#include <switch.h>
+
+namespace boo
+{
+static logvisor::Module Log("boo::NXApplication");
+
+std::shared_ptr<IWindow> _WindowNXNew(std::string_view title, NXContext* nxCtx);
+
+class ApplicationNX : public IApplication
+{
+    IApplicationCallback& m_callback;
+    const std::string m_uniqueName;
+    const std::string m_friendlyName;
+    const std::string m_pname;
+    const std::vector<std::string> m_args;
+
+    NXContext m_nxCtx;
+
+    void _deletedWindow(IWindow* window) {}
+
+public:
+    ApplicationNX(IApplicationCallback& callback,
+                  std::string_view uniqueName,
+                  std::string_view friendlyName,
+                  std::string_view pname,
+                  const std::vector<std::string>& args,
+                  std::string_view gfxApi,
+                  uint32_t samples,
+                  uint32_t anisotropy,
+                  bool deepColor,
+                  bool singleInstance)
+    : m_callback(callback),
+      m_uniqueName(uniqueName),
+      m_friendlyName(friendlyName),
+      m_pname(pname),
+      m_args(args)
+    {}
+
+    EPlatformType getPlatformType() const { return EPlatformType::NX; }
+
+    int run()
+    {
+        /* Spawn client thread */
+        int clientReturn = INT_MIN;
+        std::mutex initmt;
+        std::condition_variable initcv;
+        std::unique_lock<std::mutex> outerLk(initmt);
+        std::thread clientThread([&]()
+        {
+            std::unique_lock<std::mutex> innerLk(initmt);
+            innerLk.unlock();
+            initcv.notify_one();
+            std::string thrName = std::string(getFriendlyName()) + " Client";
+            logvisor::RegisterThreadName(thrName.c_str());
+            clientReturn = m_callback.appMain(this);
+        });
+        initcv.wait(outerLk);
+
+        // Main graphics loop
+        while (clientReturn == INT_MIN && appletMainLoop())
+        {
+            // Get and process input
+            hidScanInput();
+            u32 kDown = hidKeysDown(CONTROLLER_P1_AUTO);
+            if (kDown & KEY_PLUS)
+                break;
+        }
+
+        m_callback.appQuitting(this);
+        if (clientThread.joinable())
+            clientThread.join();
+
+        return 0;
+    }
+
+    std::string_view getUniqueName() const
+    {
+        return m_uniqueName;
+    }
+
+    std::string_view getFriendlyName() const
+    {
+        return m_friendlyName;
+    }
+
+    std::string_view getProcessName() const
+    {
+        return m_pname;
+    }
+
+    const std::vector<std::string>& getArgs() const
+    {
+        return m_args;
+    }
+
+    std::shared_ptr<IWindow> m_window;
+    std::shared_ptr<IWindow> newWindow(std::string_view title)
+    {
+        if (m_window)
+            Log.report(logvisor::Fatal, "Only 1 window allowed on NX");
+        m_window = _WindowNXNew(title, &m_nxCtx);
+        return m_window;
+    }
+};
+
+IApplication* APP = nullptr;
+int ApplicationRun(IApplication::EPlatformType platform,
+                   IApplicationCallback& cb,
+                   SystemStringView uniqueName,
+                   SystemStringView friendlyName,
+                   SystemStringView pname,
+                   const std::vector<SystemString>& args,
+                   std::string_view gfxApi,
+                   uint32_t samples,
+                   uint32_t anisotropy,
+                   bool deepColor,
+                   bool singleInstance)
+{
+    std::string thrName = std::string(friendlyName) + " Main Thread";
+    logvisor::RegisterThreadName(thrName.c_str());
+
+    if (APP)
+        return 1;
+    APP = new ApplicationNX(cb, uniqueName, friendlyName, pname, args, gfxApi,
+                            samples, anisotropy, deepColor, singleInstance);
+    int ret = APP->run();
+    delete APP;
+    APP = nullptr;
+    return ret;
+}
+
+}
--- a/lib/nx/WindowNX.cpp
+++ b/lib/nx/WindowNX.cpp
@@ -0,0 +1,120 @@
+#include "boo/IWindow.hpp"
+#include "boo/IGraphicsContext.hpp"
+#include "logvisor/logvisor.hpp"
+#include "boo/graphicsdev/NX.hpp"
+
+#include <switch.h>
+
+namespace boo
+{
+
+std::unique_ptr<IGraphicsCommandQueue> _NewNXCommandQueue(NXContext* ctx, IGraphicsContext* parent);
+std::unique_ptr<IGraphicsDataFactory> _NewNXDataFactory(IGraphicsContext* parent, NXContext* ctx);
+
+struct GraphicsContextNX : IGraphicsContext
+{
+    NXContext* m_nxCtx;
+    std::unique_ptr<IGraphicsDataFactory> m_dataFactory;
+    std::unique_ptr<IGraphicsCommandQueue> m_commandQueue;
+public:
+    explicit GraphicsContextNX(NXContext* nxCtx)
+    : m_nxCtx(nxCtx)
+    {
+        m_dataFactory = _NewNXDataFactory(this, nxCtx);
+        m_commandQueue = _NewNXCommandQueue(nxCtx, this);
+    }
+
+    EGraphicsAPI getAPI() const { return EGraphicsAPI::NX; }
+    EPixelFormat getPixelFormat() const { return EPixelFormat::RGBA8; }
+    void setPixelFormat(EPixelFormat pf) {}
+    bool initializeContext(void* handle) { return m_nxCtx->initialize(); }
+    void makeCurrent() {}
+    void postInit() {}
+    void present() {}
+
+    IGraphicsCommandQueue* getCommandQueue() { return m_commandQueue.get(); }
+    IGraphicsDataFactory* getDataFactory() { return m_dataFactory.get(); }
+    IGraphicsDataFactory* getMainContextDataFactory() { return m_dataFactory.get(); }
+    IGraphicsDataFactory* getLoadContextDataFactory() { return m_dataFactory.get(); }
+};
+
+class WindowNX : public IWindow
+{
+    std::string m_title;
+    std::unique_ptr<GraphicsContextNX> m_gfxCtx;
+    IWindowCallback* m_callback = nullptr;
+public:
+    WindowNX(std::string_view title, NXContext* nxCtx)
+    : m_title(title), m_gfxCtx(new GraphicsContextNX(nxCtx))
+    {
+        m_gfxCtx->initializeContext(nullptr);
+    }
+
+    void setCallback(IWindowCallback* cb) { m_callback = cb; }
+
+    void closeWindow() {}
+    void showWindow() {}
+    void hideWindow() {}
+
+    SystemString getTitle() { return m_title; }
+    void setTitle(SystemStringView title) { m_title = title; }
+
+    void setCursor(EMouseCursor cursor) {}
+    void setWaitCursor(bool wait) {}
+
+    void setWindowFrameDefault() {}
+    void getWindowFrame(float& xOut, float& yOut, float& wOut, float& hOut) const
+    {
+        u32 width, height;
+        gfxGetFramebufferResolution(&width, &height);
+        xOut = 0;
+        yOut = 0;
+        wOut = width;
+        hOut = height;
+    }
+    void getWindowFrame(int& xOut, int& yOut, int& wOut, int& hOut) const
+    {
+        u32 width, height;
+        gfxGetFramebufferResolution(&width, &height);
+        xOut = 0;
+        yOut = 0;
+        wOut = width;
+        hOut = height;
+    }
+    void setWindowFrame(float x, float y, float w, float h) {}
+    void setWindowFrame(int x, int y, int w, int h) {}
+    float getVirtualPixelFactor() const { return 1.f; }
+
+    bool isFullscreen() const { return true; }
+    void setFullscreen(bool fs) {}
+
+    void claimKeyboardFocus(const int coord[2]) {}
+    bool clipboardCopy(EClipboardType type, const uint8_t* data, size_t sz) { return false; }
+    std::unique_ptr<uint8_t[]> clipboardPaste(EClipboardType type, size_t& sz) { return {}; }
+
+    void waitForRetrace() {}
+
+    uintptr_t getPlatformHandle() const { return 0; }
+    bool _incomingEvent(void* event) {(void)event; return false;}
+    void _cleanup() {}
+
+    ETouchType getTouchType() const { return ETouchType::Display; }
+
+    void setStyle(EWindowStyle style) {}
+    EWindowStyle getStyle() const { return EWindowStyle::None; }
+
+    void setTouchBarProvider(void*) {}
+
+    IGraphicsCommandQueue* getCommandQueue() { return m_gfxCtx->getCommandQueue(); }
+    IGraphicsDataFactory* getDataFactory() { return m_gfxCtx->getDataFactory(); }
+    IGraphicsDataFactory* getMainContextDataFactory() { return m_gfxCtx->getMainContextDataFactory(); }
+    IGraphicsDataFactory* getLoadContextDataFactory() { return m_gfxCtx->getLoadContextDataFactory(); }
+};
+
+std::shared_ptr<IWindow> _WindowNXNew(std::string_view title, NXContext* nxCtx)
+{
+    std::shared_ptr<IWindow> ret = std::make_shared<WindowNX>(title, nxCtx);
+    return ret;
+}
+
+}
--- a/2
+++ b/2
--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@@ -1,5 +1,11 @@
 add_executable(booTest WIN32 main.cpp)
 target_link_libraries(booTest boo logvisor xxhash ${BOO_SYS_LIBS})
+
+if (COMMAND add_nro_target)
+  set_target_properties(booTest PROPERTIES SUFFIX ".elf")
+  add_nro_target(booTest booTest "Antidote/Jackoalan" "1.0.0")
+endif()
+
 if(COMMAND add_sanitizers)
  add_sanitizers(booTest)
 endif()
--- a/test/main.cpp
+++ b/test/main.cpp
@@ -132,35 +132,34 @@ class TestDeviceFinder : public DeviceFinder
    GenericPadCallback m_genericCb;
 public:
    TestDeviceFinder()
-    : DeviceFinder({typeid(DolphinSmashAdapter), typeid(NintendoPowerA), typeid(GenericPad)})
+    : DeviceFinder({dev_typeid(DolphinSmashAdapter), dev_typeid(NintendoPowerA), dev_typeid(GenericPad)})
    {}
    void deviceConnected(DeviceToken& tok)
    {
-        m_smashAdapter = std::dynamic_pointer_cast<DolphinSmashAdapter>(tok.openAndGetDevice());
-        if (m_smashAdapter)
+        auto dev = tok.openAndGetDevice();
+        if (!dev)
+            return;
+
+        if (dev->getTypeHash() == dev_typeid(DolphinSmashAdapter))
        {
+            m_smashAdapter = std::static_pointer_cast<DolphinSmashAdapter>(dev);
            m_smashAdapter->setCallback(&m_cb);
-            m_smashAdapter->startRumble(0);
-            return;
        }
-        m_nintendoPowerA = std::dynamic_pointer_cast<NintendoPowerA>(tok.openAndGetDevice());
-        if (m_nintendoPowerA)
+        else if (dev->getTypeHash() == dev_typeid(NintendoPowerA))
        {
+            m_nintendoPowerA = std::static_pointer_cast<NintendoPowerA>(dev);
            m_nintendoPowerA->setCallback(&m_nintendoPowerACb);
-            return;
        }
-        m_ds3 = std::dynamic_pointer_cast<DualshockPad>(tok.openAndGetDevice());
-        if (m_ds3)
+        else if (dev->getTypeHash() == dev_typeid(DualshockPad))
        {
+            m_ds3 = std::static_pointer_cast<DualshockPad>(dev);
            m_ds3->setCallback(&m_ds3CB);
            m_ds3->setLED(EDualshockLED::LED_1);
-            return;
        }
-        m_generic = std::dynamic_pointer_cast<GenericPad>(tok.openAndGetDevice());
-        if (m_generic)
+        else if (dev->getTypeHash() == dev_typeid(GenericPad))
        {
+            m_generic = std::static_pointer_cast<GenericPad>(dev);
            m_generic->setCallback(&m_genericCb);
-            return;
        }
    }
    void deviceDisconnected(DeviceToken&, DeviceBase* device)
@@ -315,10 +314,9 @@ struct TestApplicationCallback : IApplicationCallback
            /* Make vertex format */
            VertexElementDescriptor descs[2] =
            {
-                {vbo.get(), nullptr, VertexSemantic::Position3},
-                {vbo.get(), nullptr, VertexSemantic::UV2}
+                {VertexSemantic::Position3},
+                {VertexSemantic::UV2}
            };
-            auto vfmt = ctx.newVertexFormat(2, descs);

            /* Make ramp texture */
            using Pixel = uint8_t[4];
@@ -337,16 +335,23 @@ struct TestApplicationCallback : IApplicationCallback
            /* Make shader pipeline */
            boo::ObjToken<IShaderPipeline> pipeline;
            auto plat = ctx.platform();
+
+            AdditionalPipelineInfo info =
+            {
+                BlendFactor::One, BlendFactor::Zero,
+                Primitive::TriStrips, boo::ZTest::LEqual,
+                true, true, false, CullMode::None
+            };
+
 #if BOO_HAS_GL
            if (plat == IGraphicsDataFactory::Platform::OpenGL)
            {
-                GLDataFactory::Context& glF = dynamic_cast<GLDataFactory::Context&>(ctx);
-
                static const char* VS =
                "#version 330\n"
+                BOO_GLSL_BINDING_HEAD
                "layout(location=0) in vec3 in_pos;\n"
                "layout(location=1) in vec2 in_uv;\n"
-                "out vec2 out_uv;\n"
+                "SBINDING(0) out vec2 out_uv;\n"
                "void main()\n"
                "{\n"
                "    gl_Position = vec4(in_pos, 1.0);\n"
@@ -359,26 +364,25 @@ struct TestApplicationCallback : IApplicationCallback
                "precision highp float;\n"
                "TBINDING0 uniform sampler2D tex;\n"
                "layout(location=0) out vec4 out_frag;\n"
-                "in vec2 out_uv;\n"
+                "SBINDING(0) in vec2 out_uv;\n"
                "void main()\n"
                "{\n"
                "    //out_frag = texture(tex, out_uv);\n"
                "    out_frag = vec4(out_uv.xy, 0.0, 1.0);\n"
                "}\n";

-                static const char* texName = "tex";

-                pipeline = glF.newShaderPipeline(VS, FS, 1, &texName, 0, nullptr,
-                                                 BlendFactor::One, BlendFactor::Zero,
-                                                 Primitive::TriStrips, boo::ZTest::LEqual,
-                                                 true, true, false, CullMode::None);
+                auto vertex = ctx.newShaderStage((uint8_t*)VS, 0, PipelineStage::Vertex);
+                auto fragment = ctx.newShaderStage((uint8_t*)FS, 0, PipelineStage::Fragment);
+
+                pipeline = ctx.newShaderPipeline(vertex, fragment,
+                                                 {{VertexSemantic::Position3},
+                                                  {VertexSemantic::UV2}}, info);
            } else
 #endif
 #if BOO_HAS_VULKAN
            if (plat == IGraphicsDataFactory::Platform::Vulkan)
            {
-                VulkanDataFactory::Context& vkF = dynamic_cast<VulkanDataFactory::Context&>(ctx);
-
                static const char* VS =
                "#version 330\n"
                BOO_GLSL_BINDING_HEAD
@@ -403,9 +407,11 @@ struct TestApplicationCallback : IApplicationCallback
                "    out_frag = texture(texs[0], out_uv);\n"
                "}\n";

-                pipeline = vkF.newShaderPipeline(VS, FS, vfmt, BlendFactor::One, BlendFactor::Zero,
-                                                 Primitive::TriStrips, boo::ZTest::LEqual,
-                                                 true, true, false, CullMode::None);
+                auto vertexSiprv = VulkanDataFactory::CompileGLSL(VS, PipelineStage::Vertex);
+                auto vertexShader = ctx.newShaderStage(vertexSiprv, PipelineStage::Vertex);
+                auto fragmentSiprv = VulkanDataFactory::CompileGLSL(FS, PipelineStage::Fragment);
+                auto fragmentShader = ctx.newShaderStage(fragmentSiprv, PipelineStage::Fragment);
+                pipeline = ctx.newShaderPipeline(vertexShader, fragmentShader, descs, info);
            } else
 #endif
 #if _WIN32
@@ -477,7 +483,7 @@ struct TestApplicationCallback : IApplicationCallback

            /* Make shader data binding */
            self->m_binding =
-            ctx.newShaderDataBinding(pipeline, vfmt, vbo.get(), nullptr, nullptr, 0, nullptr, nullptr,
+            ctx.newShaderDataBinding(pipeline, vbo.get(), nullptr, nullptr, 0, nullptr, nullptr,
                                     1, &texture, nullptr, nullptr);

            return true;
@@ -486,7 +492,7 @@ struct TestApplicationCallback : IApplicationCallback

    int appMain(IApplication* app)
    {
-        mainWindow = app->newWindow(_S("YAY!"));
+        mainWindow = app->newWindow(_SYS_STR("YAY!"));
        mainWindow->setCallback(&windowCallback);
        mainWindow->showWindow();
        windowCallback.m_lastRect = mainWindow->getWindowFrame();
@@ -588,7 +594,7 @@ int main(int argc, const boo::SystemChar** argv)
    logvisor::RegisterConsoleLogger();
    boo::TestApplicationCallback appCb;
    int ret = ApplicationRun(boo::IApplication::EPlatformType::Auto,
-        appCb, _S("boo"), _S("boo"), argc, argv, {}, 1, 1, true);
+        appCb, _SYS_STR("boo"), _SYS_STR("boo"), argc, argv, {}, 1, 1, true);
    printf("IM DYING!!\n");
    return ret;
 }
				`@@ -0,0 +1 @@`
				`add_library(fake_libdrm_nouveau nouveau.c pushbuf.c bufctx.c bomap.c)`