diff options
author | Seungha Yang <seungha@centricular.com> | 2024-09-20 23:46:32 +0900 |
---|---|---|
committer | GStreamer Marge Bot <gitlab-merge-bot@gstreamer-foundation.org> | 2024-09-23 13:52:37 +0000 |
commit | cef201734cd57165a0cd68dc1e679882bebc2956 (patch) | |
tree | 88674621954a12ebc06ec149a733f17f510612f5 | |
parent | 51e1834e81cef95759a952c91a558ed965ab7de2 (diff) |
d3d12: Add d3d12mipmapping element
Adding a new element for texture conversion from single mip level
texture to mipmapping enabled RGBA texture
Part-of: <https://gitlab.freedesktop.org/gstreamer/gstreamer/-/merge_requests/7555>
11 files changed, 1965 insertions, 0 deletions
diff --git a/subprojects/gst-plugins-bad/gst-libs/gst/d3dshader/gstd3dshadercache.cpp b/subprojects/gst-plugins-bad/gst-libs/gst/d3dshader/gstd3dshadercache.cpp index 73fffc48a4..ff82947f96 100644 --- a/subprojects/gst-plugins-bad/gst-libs/gst/d3dshader/gstd3dshadercache.cpp +++ b/subprojects/gst-plugins-bad/gst-libs/gst/d3dshader/gstd3dshadercache.cpp @@ -41,12 +41,14 @@ using namespace Microsoft::WRL; #include "converter_hlsl_cs.h" #include "plugin_hlsl_ps.h" #include "plugin_hlsl_vs.h" +#include "plugin_hlsl_cs.h" #else static std::unordered_map<std::string, std::pair<const BYTE *, SIZE_T>> g_converter_ps_table; static std::unordered_map<std::string, std::pair<const BYTE *, SIZE_T>> g_converter_vs_table; static std::unordered_map<std::string, std::pair<const BYTE *, SIZE_T>> g_converter_cs_table; static std::unordered_map<std::string, std::pair<const BYTE *, SIZE_T>> g_plugin_ps_table; static std::unordered_map<std::string, std::pair<const BYTE *, SIZE_T>> g_plugin_vs_table; +static std::unordered_map<std::string, std::pair<const BYTE *, SIZE_T>> g_plugin_cs_table; #endif static std::vector<std::pair<std::string, ID3DBlob *>> g_compiled_blobs; @@ -81,6 +83,10 @@ static const ShaderItem g_vs_map[] = { {GST_D3D_PLUGIN_VS_POS, BUILD_SOURCE (VSMain_pos)}, }; +static const ShaderItem g_cs_map[] = { + {GST_D3D_PLUGIN_CS_MIP_GEN, BUILD_SOURCE (CSMain_mipgen)}, +}; + #undef BUILD_SOURCE static const gchar * g_sm_map[] = { @@ -196,6 +202,60 @@ gst_d3d_plugin_shader_get_ps_blob (GstD3DPluginPS type, } gboolean +gst_d3d_plugin_shader_get_cs_blob (GstD3DPluginCS type, + GstD3DShaderModel shader_model, GstD3DShaderByteCode * byte_code) +{ + g_return_val_if_fail (type < GST_D3D_PLUGIN_CS_LAST, FALSE); + g_return_val_if_fail (shader_model < GST_D3D_SM_LAST, FALSE); + g_return_val_if_fail (byte_code, FALSE); + + static std::mutex cache_lock; + + auto shader_name = std::string (g_cs_map[type].name) + "_" + + std::string (g_sm_map[shader_model]); + + std::lock_guard <std::mutex> lk (cache_lock); + auto it = g_plugin_cs_table.find (shader_name); + if (it != g_plugin_cs_table.end ()) { + byte_code->byte_code = it->second.first; + byte_code->byte_code_len = it->second.second; + + return TRUE; + } + + auto target = std::string ("cs_") + g_sm_map[shader_model]; + + ID3DBlob *blob = nullptr; + ComPtr<ID3DBlob> error_msg; + + auto hr = gst_d3d_compile (g_cs_map[type].source, g_cs_map[type].source_size, + nullptr, nullptr, nullptr, "ENTRY_POINT", target.c_str (), 0, 0, + &blob, &error_msg); + if (FAILED (hr)) { + const gchar *err = nullptr; + if (error_msg) + err = (const gchar *) error_msg->GetBufferPointer (); + + GST_ERROR ("Couldn't compile code, hr: 0x%x, error detail: %s, " + "source code: \n%s", (guint) hr, GST_STR_NULL (err), + g_cs_map[type].source); + return FALSE; + } + + byte_code->byte_code = blob->GetBufferPointer (); + byte_code->byte_code_len = blob->GetBufferSize (); + + g_plugin_cs_table[shader_name] = { (const BYTE *) blob->GetBufferPointer (), + blob->GetBufferSize ()}; + + std::lock_guard <std::mutex> blk (g_blob_lock); + g_compiled_blobs.push_back ({ shader_name, blob }); + + return TRUE; +} + + +gboolean gst_d3d_converter_shader_get_vs_blob (GstD3DShaderModel shader_model, GstD3DShaderByteCode * byte_code) { diff --git a/subprojects/gst-plugins-bad/gst-libs/gst/d3dshader/gstd3dshadercache.h b/subprojects/gst-plugins-bad/gst-libs/gst/d3dshader/gstd3dshadercache.h index e6958e41eb..1b7c474ec4 100644 --- a/subprojects/gst-plugins-bad/gst-libs/gst/d3dshader/gstd3dshadercache.h +++ b/subprojects/gst-plugins-bad/gst-libs/gst/d3dshader/gstd3dshadercache.h @@ -49,6 +49,12 @@ typedef enum GST_D3D_PLUGIN_VS_LAST, } GstD3DPluginVS; +typedef enum +{ + GST_D3D_PLUGIN_CS_MIP_GEN, + + GST_D3D_PLUGIN_CS_LAST, +} GstD3DPluginCS; typedef enum { @@ -100,6 +106,11 @@ gboolean gst_d3d_plugin_shader_get_ps_blob (GstD3DPluginPS type, GstD3DShaderByteCode * byte_code); GST_D3D_SHADER_API +gboolean gst_d3d_plugin_shader_get_cs_blob (GstD3DPluginCS type, + GstD3DShaderModel shader_model, + GstD3DShaderByteCode * byte_code); + +GST_D3D_SHADER_API gboolean gst_d3d_converter_shader_get_vs_blob (GstD3DShaderModel shader_model, GstD3DShaderByteCode * byte_code); diff --git a/subprojects/gst-plugins-bad/gst-libs/gst/d3dshader/plugin-hlsl/CSMain_mipgen.hlsl b/subprojects/gst-plugins-bad/gst-libs/gst/d3dshader/plugin-hlsl/CSMain_mipgen.hlsl new file mode 100644 index 0000000000..11734ce281 --- /dev/null +++ b/subprojects/gst-plugins-bad/gst-libs/gst/d3dshader/plugin-hlsl/CSMain_mipgen.hlsl @@ -0,0 +1,421 @@ +/** + * MIT License + * + * Copyright (c) 2018 Jeremiah van Oosten + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +/* Source: https://github.com/jpvanoosten/LearningDirectX12 */ + +#ifdef BUILDING_HLSL + +#define BLOCK_SIZE 8 + + // When reducing the size of a texture, it could be that downscaling the texture + // will result in a less than exactly 50% (1/2) of the original texture size. + // This happens if either the width, or the height (or both) dimensions of the texture + // are odd. For example, downscaling a 5x3 texture will result in a 2x1 texture which + // has a 60% reduction in the texture width and 66% reduction in the height. + // When this happens, we need to take more samples from the source texture to + // determine the pixel value in the destination texture. + +#define WIDTH_HEIGHT_EVEN 0 // Both the width and the height of the texture are even. +#define WIDTH_ODD_HEIGHT_EVEN 1 // The texture width is odd and the height is even. +#define WIDTH_EVEN_HEIGHT_ODD 2 // The texture width is even and teh height is odd. +#define WIDTH_HEIGHT_ODD 3 // Both the width and height of the texture are odd. + +struct ComputeShaderInput +{ + uint3 GroupID : SV_GroupID; // 3D index of the thread group in the dispatch. + uint3 GroupThreadID : SV_GroupThreadID; // 3D index of local thread ID in a thread group. + uint3 DispatchThreadID : SV_DispatchThreadID; // 3D index of global thread ID in the dispatch. + uint GroupIndex : SV_GroupIndex; // Flattened local index of the thread within a thread group. +}; + +cbuffer GenerateMipsCB : register( b0 ) +{ + uint SrcMipLevel; // Texture level of source mip + uint NumMipLevels; // Number of OutMips to write: [1-4] + uint SrcDimension; // Width and height of the source texture are even or odd. + uint padding; + float2 TexelSize; // 1.0 / OutMip1.Dimensions +} + +// Source mip map. +Texture2D<float4> SrcMip : register( t0 ); + +// Write up to 4 mip map levels. +RWTexture2D<float4> OutMip1 : register( u0 ); +RWTexture2D<float4> OutMip2 : register( u1 ); +RWTexture2D<float4> OutMip3 : register( u2 ); +RWTexture2D<float4> OutMip4 : register( u3 ); + +// Linear clamp sampler. +SamplerState LinearClampSampler : register( s0 ); + +// The reason for separating channels is to reduce bank conflicts in the +// local data memory controller. A large stride will cause more threads +// to collide on the same memory bank. +groupshared float gs_R[64]; +groupshared float gs_G[64]; +groupshared float gs_B[64]; +groupshared float gs_A[64]; + +void StoreColor( uint Index, float4 Color ) +{ + gs_R[Index] = Color.r; + gs_G[Index] = Color.g; + gs_B[Index] = Color.b; + gs_A[Index] = Color.a; +} + +float4 LoadColor( uint Index ) +{ + return float4( gs_R[Index], gs_G[Index], gs_B[Index], gs_A[Index] ); +} + +[numthreads( BLOCK_SIZE, BLOCK_SIZE, 1 )] +void ENTRY_POINT( ComputeShaderInput IN ) +{ + float4 Src1 = (float4)0; + + // One bilinear sample is insufficient when scaling down by more than 2x. + // You will slightly undersample in the case where the source dimension + // is odd. This is why it's a really good idea to only generate mips on + // power-of-two sized textures. Trying to handle the undersampling case + // will force this shader to be slower and more complicated as it will + // have to take more source texture samples. + + // Determine the path to use based on the dimension of the + // source texture. + // 0b00(0): Both width and height are even. + // 0b01(1): Width is odd, height is even. + // 0b10(2): Width is even, height is odd. + // 0b11(3): Both width and height are odd. + switch ( SrcDimension ) + { + case WIDTH_HEIGHT_EVEN: + { + float2 UV = TexelSize * ( IN.DispatchThreadID.xy + 0.5 ); + + Src1 = SrcMip.SampleLevel( LinearClampSampler, UV, SrcMipLevel ); + } + break; + case WIDTH_ODD_HEIGHT_EVEN: + { + // > 2:1 in X dimension + // Use 2 bilinear samples to guarantee we don't undersample when downsizing by more than 2x + // horizontally. + float2 UV1 = TexelSize * ( IN.DispatchThreadID.xy + float2( 0.25, 0.5 ) ); + float2 Off = TexelSize * float2( 0.5, 0.0 ); + + Src1 = 0.5 * ( SrcMip.SampleLevel( LinearClampSampler, UV1, SrcMipLevel ) + + SrcMip.SampleLevel( LinearClampSampler, UV1 + Off, SrcMipLevel ) ); + } + break; + case WIDTH_EVEN_HEIGHT_ODD: + { + // > 2:1 in Y dimension + // Use 2 bilinear samples to guarantee we don't undersample when downsizing by more than 2x + // vertically. + float2 UV1 = TexelSize * ( IN.DispatchThreadID.xy + float2( 0.5, 0.25 ) ); + float2 Off = TexelSize * float2( 0.0, 0.5 ); + + Src1 = 0.5 * ( SrcMip.SampleLevel( LinearClampSampler, UV1, SrcMipLevel ) + + SrcMip.SampleLevel( LinearClampSampler, UV1 + Off, SrcMipLevel ) ); + } + break; + case WIDTH_HEIGHT_ODD: + { + // > 2:1 in in both dimensions + // Use 4 bilinear samples to guarantee we don't undersample when downsizing by more than 2x + // in both directions. + float2 UV1 = TexelSize * ( IN.DispatchThreadID.xy + float2( 0.25, 0.25 ) ); + float2 Off = TexelSize * 0.5; + + Src1 = SrcMip.SampleLevel( LinearClampSampler, UV1, SrcMipLevel ); + Src1 += SrcMip.SampleLevel( LinearClampSampler, UV1 + float2( Off.x, 0.0 ), SrcMipLevel ); + Src1 += SrcMip.SampleLevel( LinearClampSampler, UV1 + float2( 0.0, Off.y ), SrcMipLevel ); + Src1 += SrcMip.SampleLevel( LinearClampSampler, UV1 + float2( Off.x, Off.y ), SrcMipLevel ); + Src1 *= 0.25; + } + break; + } + + OutMip1[IN.DispatchThreadID.xy] = Src1; + + // A scalar (constant) branch can exit all threads coherently. + if ( NumMipLevels == 1 ) + return; + + // Without lane swizzle operations, the only way to share data with other + // threads is through LDS. + StoreColor( IN.GroupIndex, Src1 ); + + // This guarantees all LDS writes are complete and that all threads have + // executed all instructions so far (and therefore have issued their LDS + // write instructions.) + GroupMemoryBarrierWithGroupSync(); + + // With low three bits for X and high three bits for Y, this bit mask + // (binary: 001001) checks that X and Y are even. + if ( ( IN.GroupIndex & 0x9 ) == 0 ) + { + float4 Src2 = LoadColor( IN.GroupIndex + 0x01 ); + float4 Src3 = LoadColor( IN.GroupIndex + 0x08 ); + float4 Src4 = LoadColor( IN.GroupIndex + 0x09 ); + Src1 = 0.25 * ( Src1 + Src2 + Src3 + Src4 ); + + OutMip2[IN.DispatchThreadID.xy / 2] = Src1; + StoreColor( IN.GroupIndex, Src1 ); + } + + if ( NumMipLevels == 2 ) + return; + + GroupMemoryBarrierWithGroupSync(); + + // This bit mask (binary: 011011) checks that X and Y are multiples of four. + if ( ( IN.GroupIndex & 0x1B ) == 0 ) + { + float4 Src2 = LoadColor( IN.GroupIndex + 0x02 ); + float4 Src3 = LoadColor( IN.GroupIndex + 0x10 ); + float4 Src4 = LoadColor( IN.GroupIndex + 0x12 ); + Src1 = 0.25 * ( Src1 + Src2 + Src3 + Src4 ); + + OutMip3[IN.DispatchThreadID.xy / 4] = Src1; + StoreColor( IN.GroupIndex, Src1 ); + } + + if ( NumMipLevels == 3 ) + return; + + GroupMemoryBarrierWithGroupSync(); + + // This bit mask would be 111111 (X & Y multiples of 8), but only one + // thread fits that criteria. + if ( IN.GroupIndex == 0 ) + { + float4 Src2 = LoadColor( IN.GroupIndex + 0x04 ); + float4 Src3 = LoadColor( IN.GroupIndex + 0x20 ); + float4 Src4 = LoadColor( IN.GroupIndex + 0x24 ); + Src1 = 0.25 * ( Src1 + Src2 + Src3 + Src4 ); + + OutMip4[IN.DispatchThreadID.xy / 8] = Src1; + } +} +#else +static const char str_CSMain_mipgen[] = +"#define BLOCK_SIZE 8\n" +"\n" +" // When reducing the size of a texture, it could be that downscaling the texture\n" +" // will result in a less than exactly 50% (1/2) of the original texture size.\n" +" // This happens if either the width, or the height (or both) dimensions of the texture\n" +" // are odd. For example, downscaling a 5x3 texture will result in a 2x1 texture which\n" +" // has a 60% reduction in the texture width and 66% reduction in the height.\n" +" // When this happens, we need to take more samples from the source texture to\n" +" // determine the pixel value in the destination texture.\n" +"\n" +"#define WIDTH_HEIGHT_EVEN 0 // Both the width and the height of the texture are even.\n" +"#define WIDTH_ODD_HEIGHT_EVEN 1 // The texture width is odd and the height is even.\n" +"#define WIDTH_EVEN_HEIGHT_ODD 2 // The texture width is even and teh height is odd.\n" +"#define WIDTH_HEIGHT_ODD 3 // Both the width and height of the texture are odd.\n" +"\n" +"struct ComputeShaderInput\n" +"{\n" +" uint3 GroupID : SV_GroupID; // 3D index of the thread group in the dispatch.\n" +" uint3 GroupThreadID : SV_GroupThreadID; // 3D index of local thread ID in a thread group.\n" +" uint3 DispatchThreadID : SV_DispatchThreadID; // 3D index of global thread ID in the dispatch.\n" +" uint GroupIndex : SV_GroupIndex; // Flattened local index of the thread within a thread group.\n" +"};\n" +"\n" +"cbuffer GenerateMipsCB : register( b0 )\n" +"{\n" +" uint SrcMipLevel; // Texture level of source mip\n" +" uint NumMipLevels; // Number of OutMips to write: [1-4]\n" +" uint SrcDimension; // Width and height of the source texture are even or odd.\n" +" uint padding;\n" +" float2 TexelSize; // 1.0 / OutMip1.Dimensions\n" +"}\n" +"\n" +"// Source mip map.\n" +"Texture2D<float4> SrcMip : register( t0 );\n" +"\n" +"// Write up to 4 mip map levels.\n" +"RWTexture2D<float4> OutMip1 : register( u0 );\n" +"RWTexture2D<float4> OutMip2 : register( u1 );\n" +"RWTexture2D<float4> OutMip3 : register( u2 );\n" +"RWTexture2D<float4> OutMip4 : register( u3 );\n" +"\n" +"// Linear clamp sampler.\n" +"SamplerState LinearClampSampler : register( s0 );\n" +"\n" +"// The reason for separating channels is to reduce bank conflicts in the\n" +"// local data memory controller. A large stride will cause more threads\n" +"// to collide on the same memory bank.\n" +"groupshared float gs_R[64];\n" +"groupshared float gs_G[64];\n" +"groupshared float gs_B[64];\n" +"groupshared float gs_A[64];\n" +"\n" +"void StoreColor( uint Index, float4 Color )\n" +"{\n" +" gs_R[Index] = Color.r;\n" +" gs_G[Index] = Color.g;\n" +" gs_B[Index] = Color.b;\n" +" gs_A[Index] = Color.a;\n" +"}\n" +"\n" +"float4 LoadColor( uint Index )\n" +"{\n" +" return float4( gs_R[Index], gs_G[Index], gs_B[Index], gs_A[Index] );\n" +"}\n" +"\n" +"[numthreads( BLOCK_SIZE, BLOCK_SIZE, 1 )]\n" +"void ENTRY_POINT( ComputeShaderInput IN )\n" +"{\n" +" float4 Src1 = (float4)0;\n" +"\n" +" // One bilinear sample is insufficient when scaling down by more than 2x.\n" +" // You will slightly undersample in the case where the source dimension\n" +" // is odd. This is why it's a really good idea to only generate mips on\n" +" // power-of-two sized textures. Trying to handle the undersampling case\n" +" // will force this shader to be slower and more complicated as it will\n" +" // have to take more source texture samples.\n" +"\n" +" // Determine the path to use based on the dimension of the\n" +" // source texture.\n" +" // 0b00(0): Both width and height are even.\n" +" // 0b01(1): Width is odd, height is even.\n" +" // 0b10(2): Width is even, height is odd.\n" +" // 0b11(3): Both width and height are odd.\n" +" switch ( SrcDimension )\n" +" {\n" +" case WIDTH_HEIGHT_EVEN:\n" +" {\n" +" float2 UV = TexelSize * ( IN.DispatchThreadID.xy + 0.5 );\n" +"\n" +" Src1 = SrcMip.SampleLevel( LinearClampSampler, UV, SrcMipLevel );\n" +" }\n" +" break;\n" +" case WIDTH_ODD_HEIGHT_EVEN:\n" +" {\n" +" // > 2:1 in X dimension\n" +" // Use 2 bilinear samples to guarantee we don't undersample when downsizing by more than 2x\n" +" // horizontally.\n" +" float2 UV1 = TexelSize * ( IN.DispatchThreadID.xy + float2( 0.25, 0.5 ) );\n" +" float2 Off = TexelSize * float2( 0.5, 0.0 );\n" +"\n" +" Src1 = 0.5 * ( SrcMip.SampleLevel( LinearClampSampler, UV1, SrcMipLevel ) +\n" +" SrcMip.SampleLevel( LinearClampSampler, UV1 + Off, SrcMipLevel ) );\n" +" }\n" +" break;\n" +" case WIDTH_EVEN_HEIGHT_ODD:\n" +" {\n" +" // > 2:1 in Y dimension\n" +" // Use 2 bilinear samples to guarantee we don't undersample when downsizing by more than 2x\n" +" // vertically.\n" +" float2 UV1 = TexelSize * ( IN.DispatchThreadID.xy + float2( 0.5, 0.25 ) );\n" +" float2 Off = TexelSize * float2( 0.0, 0.5 );\n" +"\n" +" Src1 = 0.5 * ( SrcMip.SampleLevel( LinearClampSampler, UV1, SrcMipLevel ) +\n" +" SrcMip.SampleLevel( LinearClampSampler, UV1 + Off, SrcMipLevel ) );\n" +" }\n" +" break;\n" +" case WIDTH_HEIGHT_ODD:\n" +" {\n" +" // > 2:1 in in both dimensions\n" +" // Use 4 bilinear samples to guarantee we don't undersample when downsizing by more than 2x\n" +" // in both directions.\n" +" float2 UV1 = TexelSize * ( IN.DispatchThreadID.xy + float2( 0.25, 0.25 ) );\n" +" float2 Off = TexelSize * 0.5;\n" +"\n" +" Src1 = SrcMip.SampleLevel( LinearClampSampler, UV1, SrcMipLevel );\n" +" Src1 += SrcMip.SampleLevel( LinearClampSampler, UV1 + float2( Off.x, 0.0 ), SrcMipLevel );\n" +" Src1 += SrcMip.SampleLevel( LinearClampSampler, UV1 + float2( 0.0, Off.y ), SrcMipLevel );\n" +" Src1 += SrcMip.SampleLevel( LinearClampSampler, UV1 + float2( Off.x, Off.y ), SrcMipLevel );\n" +" Src1 *= 0.25;\n" +" }\n" +" break;\n" +" }\n" +"\n" +" OutMip1[IN.DispatchThreadID.xy] = Src1;\n" +"\n" +" // A scalar (constant) branch can exit all threads coherently.\n" +" if ( NumMipLevels == 1 )\n" +" return;\n" +"\n" +" // Without lane swizzle operations, the only way to share data with other\n" +" // threads is through LDS.\n" +" StoreColor( IN.GroupIndex, Src1 );\n" +"\n" +" // This guarantees all LDS writes are complete and that all threads have\n" +" // executed all instructions so far (and therefore have issued their LDS\n" +" // write instructions.)\n" +" GroupMemoryBarrierWithGroupSync();\n" +"\n" +" // With low three bits for X and high three bits for Y, this bit mask\n" +" // (binary: 001001) checks that X and Y are even.\n" +" if ( ( IN.GroupIndex & 0x9 ) == 0 )\n" +" {\n" +" float4 Src2 = LoadColor( IN.GroupIndex + 0x01 );\n" +" float4 Src3 = LoadColor( IN.GroupIndex + 0x08 );\n" +" float4 Src4 = LoadColor( IN.GroupIndex + 0x09 );\n" +" Src1 = 0.25 * ( Src1 + Src2 + Src3 + Src4 );\n" +"\n" +" OutMip2[IN.DispatchThreadID.xy / 2] = Src1;\n" +" StoreColor( IN.GroupIndex, Src1 );\n" +" }\n" +"\n" +" if ( NumMipLevels == 2 )\n" +" return;\n" +"\n" +" GroupMemoryBarrierWithGroupSync();\n" +"\n" +" // This bit mask (binary: 011011) checks that X and Y are multiples of four.\n" +" if ( ( IN.GroupIndex & 0x1B ) == 0 )\n" +" {\n" +" float4 Src2 = LoadColor( IN.GroupIndex + 0x02 );\n" +" float4 Src3 = LoadColor( IN.GroupIndex + 0x10 );\n" +" float4 Src4 = LoadColor( IN.GroupIndex + 0x12 );\n" +" Src1 = 0.25 * ( Src1 + Src2 + Src3 + Src4 );\n" +"\n" +" OutMip3[IN.DispatchThreadID.xy / 4] = Src1;\n" +" StoreColor( IN.GroupIndex, Src1 );\n" +" }\n" +"\n" +" if ( NumMipLevels == 3 )\n" +" return;\n" +"\n" +" GroupMemoryBarrierWithGroupSync();\n" +"\n" +" // This bit mask would be 111111 (X & Y multiples of 8), but only one\n" +" // thread fits that criteria.\n" +" if ( IN.GroupIndex == 0 )\n" +" {\n" +" float4 Src2 = LoadColor( IN.GroupIndex + 0x04 );\n" +" float4 Src3 = LoadColor( IN.GroupIndex + 0x20 );\n" +" float4 Src4 = LoadColor( IN.GroupIndex + 0x24 );\n" +" Src1 = 0.25 * ( Src1 + Src2 + Src3 + Src4 );\n" +"\n" +" OutMip4[IN.DispatchThreadID.xy / 8] = Src1;\n" +" }\n" +"}\n"; +#endif diff --git a/subprojects/gst-plugins-bad/gst-libs/gst/d3dshader/plugin-hlsl/hlsl.h b/subprojects/gst-plugins-bad/gst-libs/gst/d3dshader/plugin-hlsl/hlsl.h index 67a81deeaa..843759a4a4 100644 --- a/subprojects/gst-plugins-bad/gst-libs/gst/d3dshader/plugin-hlsl/hlsl.h +++ b/subprojects/gst-plugins-bad/gst-libs/gst/d3dshader/plugin-hlsl/hlsl.h @@ -30,3 +30,4 @@ #include "VSMain_color.hlsl" #include "VSMain_coord.hlsl" #include "VSMain_pos.hlsl" +#include "CSMain_mipgen.hlsl" diff --git a/subprojects/gst-plugins-bad/gst-libs/gst/d3dshader/plugin-hlsl/meson.build b/subprojects/gst-plugins-bad/gst-libs/gst/d3dshader/plugin-hlsl/meson.build index fd23812fd9..c9b1797a1c 100644 --- a/subprojects/gst-plugins-bad/gst-libs/gst/d3dshader/plugin-hlsl/meson.build +++ b/subprojects/gst-plugins-bad/gst-libs/gst/d3dshader/plugin-hlsl/meson.build @@ -10,6 +10,7 @@ hlsl_sources = [ ['VSMain_color', 'vs'], ['VSMain_coord', 'vs'], ['VSMain_pos', 'vs'], + ['CSMain_mipgen', 'cs'], ] shader_model = '5_0' @@ -58,8 +59,19 @@ plugin_vs_collection = custom_target('plugin_hlsl_vs', '--output', '@OUTPUT@' ]) +plugin_cs_collection = custom_target('plugin_hlsl_cs', + input : plugin_hlsl_precompiled, + output : 'plugin_hlsl_cs.h', + command : [header_collector, + '--input', meson.current_build_dir(), + '--prefix', 'CSMain_', + '--name', 'g_plugin_cs_table', + '--output', '@OUTPUT@' + ]) + hlsl_precompiled += [ plugin_hlsl_precompiled, plugin_ps_collection, plugin_vs_collection, + plugin_cs_collection, ] diff --git a/subprojects/gst-plugins-bad/sys/d3d12/gstd3d12mipgen.cpp b/subprojects/gst-plugins-bad/sys/d3d12/gstd3d12mipgen.cpp new file mode 100644 index 0000000000..a5555f0a8c --- /dev/null +++ b/subprojects/gst-plugins-bad/sys/d3d12/gstd3d12mipgen.cpp @@ -0,0 +1,366 @@ +/* GStreamer + * Copyright (C) 2024 Seungha Yang <seungha@centricular.com> + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Library General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Library General Public License for more details. + * + * You should have received a copy of the GNU Library General Public + * License along with this library; if not, write to the + * Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, + * Boston, MA 02110-1301, USA. + */ + +/* + * Copyright(c) 2018 Jeremiah van Oosten + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files(the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and / or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions : + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +/* Reference: https://github.com/jpvanoosten/LearningDirectX12 */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include "gstd3d12mipgen.h" +#include <gst/d3d12/gstd3d12-private.h> +#include <gst/d3dshader/gstd3dshader.h> +#include <directx/d3dx12.h> +#include <wrl.h> +#include <algorithm> + +#define _XM_NO_INTRINSICS_ +#include <DirectXMath.h> + +GST_DEBUG_CATEGORY_STATIC (gst_d3d12_mip_gen_debug); +#define GST_CAT_DEFAULT gst_d3d12_mip_gen_debug + +/* *INDENT-OFF* */ +using namespace Microsoft::WRL; +using namespace DirectX; +/* *INDENT-ON* */ + +struct GenerateMipsCB +{ + UINT SrcMipLevel; + UINT NumMipLevels; + UINT SrcDimension; + UINT padding; + XMFLOAT2 TexelSize; +}; + +struct GstD3D12MipGenPrivate +{ + ~GstD3D12MipGenPrivate () + { + pso = nullptr; + rs = nullptr; + gst_clear_object (&desc_pool); + gst_clear_object (&device); + } + + GstD3D12Device *device = nullptr; + GstD3D12DescriptorPool *desc_pool = nullptr; + ComPtr < ID3D12PipelineState > pso; + ComPtr < ID3D12RootSignature > rs; + guint desc_inc_size; +}; + +struct _GstD3D12MipGen +{ + GstObject parent; + + GstD3D12MipGenPrivate *priv; +}; +/* *INDENT-ON* */ + +static void gst_d3d12_mip_gen_finalize (GObject * object); + +#define gst_d3d12_mip_gen_parent_class parent_class +G_DEFINE_TYPE (GstD3D12MipGen, gst_d3d12_mip_gen, GST_TYPE_OBJECT); + +static void +gst_d3d12_mip_gen_class_init (GstD3D12MipGenClass * klass) +{ + auto object_class = G_OBJECT_CLASS (klass); + + object_class->finalize = gst_d3d12_mip_gen_finalize; + + GST_DEBUG_CATEGORY_INIT (gst_d3d12_mip_gen_debug, + "d3d12mipgen", 0, "d3d12mipgen"); +} + +static void +gst_d3d12_mip_gen_init (GstD3D12MipGen * self) +{ + self->priv = new GstD3D12MipGenPrivate (); +} + +static void +gst_d3d12_mip_gen_finalize (GObject * object) +{ + auto self = GST_D3D12_MIP_GEN (object); + + delete self->priv; + + G_OBJECT_CLASS (parent_class)->finalize (object); +} + +GstD3D12MipGen * +gst_d3d12_mip_gen_new (GstD3D12Device * device) +{ + g_return_val_if_fail (GST_IS_D3D12_DEVICE (device), nullptr); + + D3D12_VERSIONED_ROOT_SIGNATURE_DESC rs_desc = { }; + CD3DX12_ROOT_PARAMETER root_params[3]; + CD3DX12_DESCRIPTOR_RANGE range_srv; + CD3DX12_DESCRIPTOR_RANGE range_uav; + D3D12_STATIC_SAMPLER_DESC sampler_desc = { }; + + auto self = (GstD3D12MipGen *) g_object_new (GST_TYPE_D3D12_MIP_GEN, nullptr); + gst_object_ref_sink (self); + + auto priv = self->priv; + priv->device = (GstD3D12Device *) gst_object_ref (device); + + sampler_desc.Filter = D3D12_FILTER_MIN_MAG_LINEAR_MIP_POINT; + sampler_desc.AddressU = D3D12_TEXTURE_ADDRESS_MODE_CLAMP; + sampler_desc.AddressV = D3D12_TEXTURE_ADDRESS_MODE_CLAMP; + sampler_desc.AddressW = D3D12_TEXTURE_ADDRESS_MODE_CLAMP; + sampler_desc.MipLODBias = 0; + sampler_desc.MaxAnisotropy = 1; + sampler_desc.ComparisonFunc = D3D12_COMPARISON_FUNC_ALWAYS; + sampler_desc.BorderColor = D3D12_STATIC_BORDER_COLOR_OPAQUE_BLACK; + sampler_desc.MinLOD = 0; + sampler_desc.MaxLOD = D3D12_FLOAT32_MAX; + sampler_desc.ShaderRegister = 0; + sampler_desc.RegisterSpace = 0; + sampler_desc.ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL; + + root_params[0].InitAsConstants (6, 0, 0); + + range_srv.Init (D3D12_DESCRIPTOR_RANGE_TYPE_SRV, 1, 0); + root_params[1].InitAsDescriptorTable (1, &range_srv); + + range_uav.Init (D3D12_DESCRIPTOR_RANGE_TYPE_UAV, 4, 0); + root_params[2].InitAsDescriptorTable (1, &range_uav); + + CD3DX12_VERSIONED_ROOT_SIGNATURE_DESC::Init_1_0 (rs_desc, 3, root_params, + 1, &sampler_desc, D3D12_ROOT_SIGNATURE_FLAG_NONE); + + ComPtr < ID3DBlob > rs_blob; + ComPtr < ID3DBlob > error_blob; + auto hr = D3DX12SerializeVersionedRootSignature (&rs_desc, + D3D_ROOT_SIGNATURE_VERSION_1, &rs_blob, &error_blob); + + if (!gst_d3d12_result (hr, device)) { + const gchar *error_msg = nullptr; + if (error_blob) + error_msg = (const gchar *) error_blob->GetBufferPointer (); + + GST_ERROR_OBJECT (self, + "Couldn't serialize root signature, hr: 0x%x, error detail: %s", + (guint) hr, GST_STR_NULL (error_msg)); + gst_object_unref (self); + return nullptr; + } + + auto device_handle = gst_d3d12_device_get_device_handle (device); + hr = device_handle->CreateRootSignature (0, rs_blob->GetBufferPointer (), + rs_blob->GetBufferSize (), IID_PPV_ARGS (&priv->rs)); + if (!gst_d3d12_result (hr, device)) { + GST_ERROR_OBJECT (self, "Couldn't create root signature"); + gst_object_unref (self); + return nullptr; + } + + GstD3DShaderByteCode byte_code; + if (!gst_d3d_plugin_shader_get_cs_blob (GST_D3D_PLUGIN_CS_MIP_GEN, + GST_D3D_SM_5_0, &byte_code)) { + GST_ERROR_OBJECT (self, "Couldn't get shader byte code"); + gst_object_unref (self); + return nullptr; + } + + D3D12_COMPUTE_PIPELINE_STATE_DESC pso_desc = { }; + pso_desc.pRootSignature = priv->rs.Get (); + pso_desc.CS.pShaderBytecode = byte_code.byte_code; + pso_desc.CS.BytecodeLength = byte_code.byte_code_len; + hr = device_handle->CreateComputePipelineState (&pso_desc, + IID_PPV_ARGS (&priv->pso)); + if (!gst_d3d12_result (hr, device)) { + GST_ERROR_OBJECT (self, "Couldn't create PSO"); + gst_object_unref (self); + return nullptr; + } + + D3D12_DESCRIPTOR_HEAP_DESC desc_heap_desc = { }; + desc_heap_desc.Type = D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV; + desc_heap_desc.NumDescriptors = 5; + desc_heap_desc.Flags = D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE; + + priv->desc_pool = gst_d3d12_descriptor_pool_new (device_handle, + &desc_heap_desc); + if (!priv->desc_pool) { + GST_ERROR_OBJECT (self, "Couldn't create descriptor pool"); + gst_object_unref (self); + return nullptr; + } + + priv->desc_inc_size = device_handle->GetDescriptorHandleIncrementSize + (D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); + + return self; +} + +gboolean +gst_d3d12_mip_gen_execute (GstD3D12MipGen * gen, ID3D12Resource * resource, + GstD3D12FenceData * fence_data, ID3D12GraphicsCommandList * cl) +{ + g_return_val_if_fail (GST_IS_D3D12_MIP_GEN (gen), FALSE); + g_return_val_if_fail (resource, FALSE); + g_return_val_if_fail (fence_data, FALSE); + g_return_val_if_fail (cl, FALSE); + + auto desc = GetDesc (resource); + + if (desc.MipLevels == 1) { + GST_LOG_OBJECT (gen, "Single mip level texture"); + return TRUE; + } + + if ((desc.Flags & D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS) != + D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS || + (desc.Flags & D3D12_RESOURCE_FLAG_DENY_SHADER_RESOURCE) == + D3D12_RESOURCE_FLAG_DENY_SHADER_RESOURCE) { + GST_WARNING_OBJECT (gen, "Resource flag is incompatible"); + return FALSE; + } + + auto priv = gen->priv; + auto device = gst_d3d12_device_get_device_handle (priv->device); + + cl->SetComputeRootSignature (priv->rs.Get ()); + cl->SetPipelineState (priv->pso.Get ()); + + D3D12_SHADER_RESOURCE_VIEW_DESC srv_desc = { }; + srv_desc.Format = desc.Format; + srv_desc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING; + srv_desc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2D; + srv_desc.Texture2D.MipLevels = desc.MipLevels; + + for (guint srcMip = 0; srcMip < desc.MipLevels - 1;) { + guint64 srcWidth = desc.Width >> srcMip; + guint srcHeight = desc.Height >> srcMip; + guint dstWidth = static_cast < guint > (srcWidth >> 1); + guint dstHeight = srcHeight >> 1; + GenerateMipsCB cbuf; + + // 0b00(0): Both width and height are even. + // 0b01(1): Width is odd, height is even. + // 0b10(2): Width is even, height is odd. + // 0b11(3): Both width and height are odd. + cbuf.SrcDimension = (srcHeight & 1) << 1 | (srcWidth & 1); + + // How many mipmap levels to compute this pass (max 4 mips per pass) + DWORD mipCount; + + // The number of times we can half the size of the texture and get + // exactly a 50% reduction in size. + // A 1 bit in the width or height indicates an odd dimension. + // The case where either the width or the height is exactly 1 is handled + // as a special case (as the dimension does not require reduction). + _BitScanForward (&mipCount, (dstWidth == 1 ? dstHeight : dstWidth) | + (dstHeight == 1 ? dstWidth : dstHeight)); + // Maximum number of mips to generate is 4. + mipCount = std::min < DWORD > (4, mipCount + 1); + // Clamp to total number of mips left over. + mipCount = (srcMip + mipCount) >= desc.MipLevels ? + desc.MipLevels - srcMip - 1 : mipCount; + + // Dimensions should not reduce to 0. + // This can happen if the width and height are not the same. + dstWidth = std::max < DWORD > (1, dstWidth); + dstHeight = std::max < DWORD > (1, dstHeight); + + cbuf.SrcMipLevel = srcMip; + cbuf.NumMipLevels = mipCount; + cbuf.TexelSize.x = 1.0f / (float) dstWidth; + cbuf.TexelSize.y = 1.0f / (float) dstHeight; + + if (srcMip != 0) { + D3D12_RESOURCE_BARRIER barrier = + CD3DX12_RESOURCE_BARRIER::Transition (resource, + D3D12_RESOURCE_STATE_UNORDERED_ACCESS, + D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE, srcMip); + cl->ResourceBarrier (1, &barrier); + } + + GstD3D12Descriptor *desc_heap; + if (!gst_d3d12_descriptor_pool_acquire (priv->desc_pool, &desc_heap)) { + GST_ERROR_OBJECT (gen, "Couldn't acquire descriptor heap"); + return FALSE; + } + + gst_d3d12_fence_data_push (fence_data, + FENCE_NOTIFY_MINI_OBJECT (desc_heap)); + auto desc_handle = gst_d3d12_descriptor_get_handle (desc_heap); + auto cpu_handle = CD3DX12_CPU_DESCRIPTOR_HANDLE + (GetCPUDescriptorHandleForHeapStart (desc_handle)); + + device->CreateShaderResourceView (resource, &srv_desc, cpu_handle); + + for (guint mip = 0; mip < mipCount; mip++) { + D3D12_UNORDERED_ACCESS_VIEW_DESC uavDesc = { }; + uavDesc.Format = desc.Format; + uavDesc.ViewDimension = D3D12_UAV_DIMENSION_TEXTURE2D; + uavDesc.Texture2D.MipSlice = srcMip + mip + 1; + + cpu_handle.Offset (priv->desc_inc_size); + device->CreateUnorderedAccessView (resource, + nullptr, &uavDesc, cpu_handle); + } + + auto gpu_handle = CD3DX12_GPU_DESCRIPTOR_HANDLE + (GetGPUDescriptorHandleForHeapStart (desc_handle)); + + ID3D12DescriptorHeap *heaps[] = { desc_handle }; + cl->SetDescriptorHeaps (1, heaps); + cl->SetComputeRoot32BitConstants (0, 6, &cbuf, 0); + cl->SetComputeRootDescriptorTable (1, gpu_handle); + gpu_handle.Offset (priv->desc_inc_size); + cl->SetComputeRootDescriptorTable (2, gpu_handle); + + cl->Dispatch ((dstWidth + 7) / 8, (dstHeight + 7) / 8, 1); + + D3D12_RESOURCE_BARRIER barrier = CD3DX12_RESOURCE_BARRIER::UAV (resource); + cl->ResourceBarrier (1, &barrier); + + srcMip += mipCount; + } + + return TRUE; +} diff --git a/subprojects/gst-plugins-bad/sys/d3d12/gstd3d12mipgen.h b/subprojects/gst-plugins-bad/sys/d3d12/gstd3d12mipgen.h new file mode 100644 index 0000000000..9206457761 --- /dev/null +++ b/subprojects/gst-plugins-bad/sys/d3d12/gstd3d12mipgen.h @@ -0,0 +1,38 @@ +/* GStreamer + * Copyright (C) 2024 Seungha Yang <seungha@centricular.com> + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Library General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Library General Public License for more details. + * + * You should have received a copy of the GNU Library General Public + * License along with this library; if not, write to the + * Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, + * Boston, MA 02110-1301, USA. + */ + +#pragma once + +#include <gst/gst.h> +#include <gst/d3d12/gstd3d12.h> + +G_BEGIN_DECLS + +#define GST_TYPE_D3D12_MIP_GEN (gst_d3d12_mip_gen_get_type()) +G_DECLARE_FINAL_TYPE (GstD3D12MipGen, gst_d3d12_mip_gen, GST, D3D12_MIP_GEN, GstObject); + +GstD3D12MipGen * gst_d3d12_mip_gen_new (GstD3D12Device * device); + +gboolean gst_d3d12_mip_gen_execute (GstD3D12MipGen * gen, + ID3D12Resource * resource, + GstD3D12FenceData * fence_data, + ID3D12GraphicsCommandList * cl); + +G_END_DECLS + diff --git a/subprojects/gst-plugins-bad/sys/d3d12/gstd3d12mipmapping.cpp b/subprojects/gst-plugins-bad/sys/d3d12/gstd3d12mipmapping.cpp new file mode 100644 index 0000000000..843ef562bd --- /dev/null +++ b/subprojects/gst-plugins-bad/sys/d3d12/gstd3d12mipmapping.cpp @@ -0,0 +1,1019 @@ +/* GStreamer + * Copyright (C) 2024 Seungha Yang <seungha@centricular.com> + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Library General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Library General Public License for more details. + * + * You should have received a copy of the GNU Library General Public + * License along with this library; if not, write to the + * Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, + * Boston, MA 02110-1301, USA. + */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include "gstd3d12mipmapping.h" +#include "gstd3d12mipgen.h" +#include "gstd3d12pluginutils.h" +#include <directx/d3dx12.h> +#include <mutex> +#include <memory> +#include <queue> +#include <wrl.h> +#include <atomic> + +/* *INDENT-OFF* */ +using namespace Microsoft::WRL; +/* *INDENT-ON* */ + +GST_DEBUG_CATEGORY_STATIC (gst_d3d12_mip_mapping_debug); +#define GST_CAT_DEFAULT gst_d3d12_mip_mapping_debug + +static GstStaticPadTemplate sink_template = GST_STATIC_PAD_TEMPLATE ("sink", + GST_PAD_SINK, + GST_PAD_ALWAYS, + GST_STATIC_CAPS (GST_VIDEO_CAPS_MAKE_WITH_FEATURES + (GST_CAPS_FEATURE_MEMORY_D3D12_MEMORY, GST_D3D12_ALL_FORMATS) "; " + GST_VIDEO_CAPS_MAKE_WITH_FEATURES + (GST_CAPS_FEATURE_MEMORY_D3D12_MEMORY "," + GST_CAPS_FEATURE_META_GST_VIDEO_OVERLAY_COMPOSITION, + GST_D3D12_ALL_FORMATS))); + +static GstStaticPadTemplate src_template = GST_STATIC_PAD_TEMPLATE ("src", + GST_PAD_SRC, + GST_PAD_ALWAYS, + GST_STATIC_CAPS (GST_VIDEO_CAPS_MAKE_WITH_FEATURES + (GST_CAPS_FEATURE_MEMORY_D3D12_MEMORY, "RGBA") "; " + GST_VIDEO_CAPS_MAKE_WITH_FEATURES + (GST_CAPS_FEATURE_MEMORY_D3D12_MEMORY "," + GST_CAPS_FEATURE_META_GST_VIDEO_OVERLAY_COMPOSITION, + "RGBA"))); + +enum +{ + PROP_0, + PROP_ASYNC_DEPTH, +}; + +#define DEFAULT_ASYNC_DEPTH 0 + +/* *INDENT-OFF* */ +struct MipMappingContext +{ + MipMappingContext (GstD3D12Device * dev) + { + device = (GstD3D12Device *) gst_object_ref (dev); + auto device_handle = gst_d3d12_device_get_device_handle (device); + ca_pool = gst_d3d12_command_allocator_pool_new (device_handle, + D3D12_COMMAND_LIST_TYPE_DIRECT); + } + + ~MipMappingContext () + { + gst_d3d12_device_fence_wait (device, D3D12_COMMAND_LIST_TYPE_DIRECT, + fence_val); + + gst_clear_object (&ca_pool); + gst_clear_object (&conv); + gst_clear_object (&gen); + gst_clear_object (&device); + } + + GstD3D12Device *device = nullptr; + GstD3D12Converter *conv = nullptr; + GstD3D12MipGen *gen = nullptr; + ComPtr<ID3D12GraphicsCommandList> cl; + std::queue<guint64> scheduled; + GstD3D12CommandAllocatorPool *ca_pool; + guint64 fence_val = 0; +}; + +struct GstD3D12MipMappingPrivate +{ + GstD3D12MipMappingPrivate () + { + fence_data_pool = gst_d3d12_fence_data_pool_new (); + } + + ~GstD3D12MipMappingPrivate () + { + gst_clear_object (&fence_data_pool); + } + + std::unique_ptr < MipMappingContext > ctx; + GstD3D12FenceDataPool *fence_data_pool; + D3D12_BOX in_rect = { }; + D3D12_BOX prev_in_rect = { }; + + std::atomic<guint> async_depth = { DEFAULT_ASYNC_DEPTH }; + + std::mutex lock; +}; +/* *INDENT-ON* */ + +struct _GstD3D12MipMapping +{ + GstD3D12BaseFilter parent; + + GstD3D12MipMappingPrivate *priv; +}; + +static void gst_d3d12_mip_mapping_finalize (GObject * object); +static void gst_d3d12_mip_mapping_set_property (GObject * object, guint prop_id, + const GValue * value, GParamSpec * pspec); +static void gst_d3d12_mip_mapping_get_property (GObject * object, guint prop_id, + GValue * value, GParamSpec * pspec); +static gboolean gst_d3d12_mip_mapping_stop (GstBaseTransform * trans); +static GstCaps *gst_d3d12_mip_mapping_transform_caps (GstBaseTransform * + trans, GstPadDirection direction, GstCaps * caps, GstCaps * filter); +static GstCaps *gst_d3d12_mip_mapping_fixate_caps (GstBaseTransform * + base, GstPadDirection direction, GstCaps * caps, GstCaps * othercaps); +static gboolean gst_d3d12_mip_mapping_propose_allocation (GstBaseTransform * + trans, GstQuery * decide_query, GstQuery * query); +static gboolean gst_d3d12_mip_mapping_decide_allocation (GstBaseTransform * + trans, GstQuery * query); +static gboolean gst_d3d12_mip_mapping_transform_meta (GstBaseTransform * trans, + GstBuffer * outbuf, GstMeta * meta, GstBuffer * inbuf); +static GstFlowReturn gst_d3d12_mip_mapping_transform (GstBaseTransform * trans, + GstBuffer * inbuf, GstBuffer * outbuf); +static gboolean gst_d3d12_mip_mapping_set_info (GstD3D12BaseFilter * filter, + GstCaps * incaps, GstVideoInfo * in_info, GstCaps * outcaps, + GstVideoInfo * out_info); + +#define gst_d3d12_mip_mapping_parent_class parent_class +G_DEFINE_TYPE (GstD3D12MipMapping, gst_d3d12_mip_mapping, + GST_TYPE_D3D12_BASE_FILTER); + +static void +gst_d3d12_mip_mapping_class_init (GstD3D12MipMappingClass * klass) +{ + auto object_class = G_OBJECT_CLASS (klass); + auto element_class = GST_ELEMENT_CLASS (klass); + auto trans_class = GST_BASE_TRANSFORM_CLASS (klass); + auto filter_class = GST_D3D12_BASE_FILTER_CLASS (klass); + + object_class->set_property = gst_d3d12_mip_mapping_set_property; + object_class->get_property = gst_d3d12_mip_mapping_get_property; + object_class->finalize = gst_d3d12_mip_mapping_finalize; + + g_object_class_install_property (object_class, PROP_ASYNC_DEPTH, + g_param_spec_uint ("async-depth", "Async Depth", + "Number of in-flight GPU commands which can be scheduled without " + "synchronization (0 = unlimited)", 0, G_MAXINT, DEFAULT_ASYNC_DEPTH, + (GParamFlags) (GST_PARAM_MUTABLE_PLAYING | + G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS))); + + gst_element_class_add_static_pad_template (element_class, &sink_template); + gst_element_class_add_static_pad_template (element_class, &src_template); + + gst_element_class_set_static_metadata (element_class, + "Direct3D12 MipMapping", + "Filter/Converter/Video/Hardware", + "Generates RGBA MipMap texture from input", + "Seungha Yang <seungha@centricular.com>"); + + trans_class->passthrough_on_same_caps = FALSE; + + trans_class->stop = GST_DEBUG_FUNCPTR (gst_d3d12_mip_mapping_stop); + trans_class->transform_caps = + GST_DEBUG_FUNCPTR (gst_d3d12_mip_mapping_transform_caps); + trans_class->fixate_caps = + GST_DEBUG_FUNCPTR (gst_d3d12_mip_mapping_fixate_caps); + trans_class->propose_allocation = + GST_DEBUG_FUNCPTR (gst_d3d12_mip_mapping_propose_allocation); + trans_class->decide_allocation = + GST_DEBUG_FUNCPTR (gst_d3d12_mip_mapping_decide_allocation); + trans_class->transform_meta = + GST_DEBUG_FUNCPTR (gst_d3d12_mip_mapping_transform_meta); + trans_class->transform = GST_DEBUG_FUNCPTR (gst_d3d12_mip_mapping_transform); + + filter_class->set_info = GST_DEBUG_FUNCPTR (gst_d3d12_mip_mapping_set_info); + + gst_type_mark_as_plugin_api (GST_TYPE_D3D12_SAMPLING_METHOD, + (GstPluginAPIFlags) 0); + + GST_DEBUG_CATEGORY_INIT (gst_d3d12_mip_mapping_debug, "d3d12convert", 0, + "d3d12convert"); +} + +static void +gst_d3d12_mip_mapping_init (GstD3D12MipMapping * self) +{ + self->priv = new GstD3D12MipMappingPrivate (); +} + +static void +gst_d3d12_mip_mapping_finalize (GObject * object) +{ + auto self = GST_D3D12_MIP_MAPPING (object); + + delete self->priv; + + G_OBJECT_CLASS (parent_class)->finalize (object); +} + +static void +gst_d3d12_mip_mapping_set_property (GObject * object, guint prop_id, + const GValue * value, GParamSpec * pspec) +{ + auto self = GST_D3D12_MIP_MAPPING (object); + auto priv = self->priv; + + switch (prop_id) { + case PROP_ASYNC_DEPTH: + priv->async_depth = g_value_get_uint (value); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec); + break; + } +} + +static void +gst_d3d12_mip_mapping_get_property (GObject * object, guint prop_id, + GValue * value, GParamSpec * pspec) +{ + auto self = GST_D3D12_MIP_MAPPING (object); + auto priv = self->priv; + + switch (prop_id) { + case PROP_ASYNC_DEPTH: + g_value_set_uint (value, priv->async_depth); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec); + break; + } +} + +static gboolean +gst_d3d12_mip_mapping_stop (GstBaseTransform * trans) +{ + auto self = GST_D3D12_MIP_MAPPING (trans); + auto priv = self->priv; + + priv->ctx = nullptr; + + return GST_BASE_TRANSFORM_CLASS (parent_class)->stop (trans); +} + +static GstCaps * +gst_d3d12_mip_mapping_caps_remove_format_info (GstCaps * caps) +{ + GstStructure *st; + GstCapsFeatures *f; + gint i, n; + GstCaps *res; + GstCapsFeatures *feature = + gst_caps_features_from_string (GST_CAPS_FEATURE_MEMORY_D3D12_MEMORY); + + res = gst_caps_new_empty (); + + n = gst_caps_get_size (caps); + for (i = 0; i < n; i++) { + st = gst_caps_get_structure (caps, i); + f = gst_caps_get_features (caps, i); + + /* If this is already expressed by the existing caps + * skip this structure */ + if (i > 0 && gst_caps_is_subset_structure_full (res, st, f)) + continue; + + st = gst_structure_copy (st); + /* Only remove format info for the cases when we can actually convert */ + if (!gst_caps_features_is_any (f) + && gst_caps_features_is_equal (f, feature)) { + gst_structure_remove_fields (st, "format", "colorimetry", "chroma-site", + NULL); + } + + gst_caps_append_structure_full (res, st, gst_caps_features_copy (f)); + } + gst_caps_features_free (feature); + + return res; +} + +static GstCaps * +gst_d3d12_mip_mapping_transform_caps (GstBaseTransform * + trans, GstPadDirection direction, GstCaps * caps, GstCaps * filter) +{ + GstCaps *tmp, *tmp2; + GstCaps *result; + + /* Get all possible caps that we can transform to */ + tmp = gst_d3d12_mip_mapping_caps_remove_format_info (caps); + + if (filter) { + tmp2 = gst_caps_intersect_full (filter, tmp, GST_CAPS_INTERSECT_FIRST); + gst_caps_unref (tmp); + tmp = tmp2; + } + + result = tmp; + + GST_DEBUG_OBJECT (trans, "transformed %" GST_PTR_FORMAT " into %" + GST_PTR_FORMAT, caps, result); + + return result; +} + +/* + * This is an incomplete matrix of in formats and a score for the prefered output + * format. + * + * out: RGB24 RGB16 ARGB AYUV YUV444 YUV422 YUV420 YUV411 YUV410 PAL GRAY + * in + * RGB24 0 2 1 2 2 3 4 5 6 7 8 + * RGB16 1 0 1 2 2 3 4 5 6 7 8 + * ARGB 2 3 0 1 4 5 6 7 8 9 10 + * AYUV 3 4 1 0 2 5 6 7 8 9 10 + * YUV444 2 4 3 1 0 5 6 7 8 9 10 + * YUV422 3 5 4 2 1 0 6 7 8 9 10 + * YUV420 4 6 5 3 2 1 0 7 8 9 10 + * YUV411 4 6 5 3 2 1 7 0 8 9 10 + * YUV410 6 8 7 5 4 3 2 1 0 9 10 + * PAL 1 3 2 6 4 6 7 8 9 0 10 + * GRAY 1 4 3 2 1 5 6 7 8 9 0 + * + * PAL or GRAY are never prefered, if we can we would convert to PAL instead + * of GRAY, though + * less subsampling is prefered and if any, preferably horizontal + * We would like to keep the alpha, even if we would need to to colorspace conversion + * or lose depth. + */ +#define SCORE_FORMAT_CHANGE 1 +#define SCORE_DEPTH_CHANGE 1 +#define SCORE_ALPHA_CHANGE 1 +#define SCORE_CHROMA_W_CHANGE 1 +#define SCORE_CHROMA_H_CHANGE 1 +#define SCORE_PALETTE_CHANGE 1 + +#define SCORE_COLORSPACE_LOSS 2 /* RGB <-> YUV */ +#define SCORE_DEPTH_LOSS 4 /* change bit depth */ +#define SCORE_ALPHA_LOSS 8 /* lose the alpha channel */ +#define SCORE_CHROMA_W_LOSS 16 /* vertical subsample */ +#define SCORE_CHROMA_H_LOSS 32 /* horizontal subsample */ +#define SCORE_PALETTE_LOSS 64 /* convert to palette format */ +#define SCORE_COLOR_LOSS 128 /* convert to GRAY */ + +#define COLORSPACE_MASK (GST_VIDEO_FORMAT_FLAG_YUV | \ + GST_VIDEO_FORMAT_FLAG_RGB | GST_VIDEO_FORMAT_FLAG_GRAY) +#define ALPHA_MASK (GST_VIDEO_FORMAT_FLAG_ALPHA) +#define PALETTE_MASK (GST_VIDEO_FORMAT_FLAG_PALETTE) + +/* calculate how much loss a conversion would be */ +static void +score_value (GstBaseTransform * base, const GstVideoFormatInfo * in_info, + const GValue * val, gint * min_loss, const GstVideoFormatInfo ** out_info) +{ + const gchar *fname; + const GstVideoFormatInfo *t_info; + guint in_flags, t_flags; + gint loss; + + fname = g_value_get_string (val); + t_info = gst_video_format_get_info (gst_video_format_from_string (fname)); + if (!t_info || t_info->format == GST_VIDEO_FORMAT_UNKNOWN) + return; + + /* accept input format immediately without loss */ + if (in_info == t_info) { + *min_loss = 0; + *out_info = t_info; + return; + } + + loss = SCORE_FORMAT_CHANGE; + + in_flags = GST_VIDEO_FORMAT_INFO_FLAGS (in_info); + in_flags &= ~GST_VIDEO_FORMAT_FLAG_LE; + in_flags &= ~GST_VIDEO_FORMAT_FLAG_COMPLEX; + in_flags &= ~GST_VIDEO_FORMAT_FLAG_UNPACK; + + t_flags = GST_VIDEO_FORMAT_INFO_FLAGS (t_info); + t_flags &= ~GST_VIDEO_FORMAT_FLAG_LE; + t_flags &= ~GST_VIDEO_FORMAT_FLAG_COMPLEX; + t_flags &= ~GST_VIDEO_FORMAT_FLAG_UNPACK; + + if ((t_flags & PALETTE_MASK) != (in_flags & PALETTE_MASK)) { + loss += SCORE_PALETTE_CHANGE; + if (t_flags & PALETTE_MASK) + loss += SCORE_PALETTE_LOSS; + } + + if ((t_flags & COLORSPACE_MASK) != (in_flags & COLORSPACE_MASK)) { + loss += SCORE_COLORSPACE_LOSS; + if (t_flags & GST_VIDEO_FORMAT_FLAG_GRAY) + loss += SCORE_COLOR_LOSS; + } + + if ((t_flags & ALPHA_MASK) != (in_flags & ALPHA_MASK)) { + loss += SCORE_ALPHA_CHANGE; + if (in_flags & ALPHA_MASK) + loss += SCORE_ALPHA_LOSS; + } + + if ((in_info->h_sub[1]) != (t_info->h_sub[1])) { + loss += SCORE_CHROMA_H_CHANGE; + if ((in_info->h_sub[1]) < (t_info->h_sub[1])) + loss += SCORE_CHROMA_H_LOSS; + } + if ((in_info->w_sub[1]) != (t_info->w_sub[1])) { + loss += SCORE_CHROMA_W_CHANGE; + if ((in_info->w_sub[1]) < (t_info->w_sub[1])) + loss += SCORE_CHROMA_W_LOSS; + } + + if ((in_info->bits) != (t_info->bits)) { + loss += SCORE_DEPTH_CHANGE; + if ((in_info->bits) > (t_info->bits)) + loss += SCORE_DEPTH_LOSS + (in_info->bits - t_info->bits); + } + + GST_DEBUG_OBJECT (base, "score %s -> %s = %d", + GST_VIDEO_FORMAT_INFO_NAME (in_info), + GST_VIDEO_FORMAT_INFO_NAME (t_info), loss); + + if (loss < *min_loss) { + GST_DEBUG_OBJECT (base, "found new best %d", loss); + *out_info = t_info; + *min_loss = loss; + } +} + +static void +gst_d3d12_mip_mapping_fixate_format (GstBaseTransform * trans, GstCaps * caps, + GstCaps * result) +{ + GstStructure *ins, *outs; + const gchar *in_format; + const GstVideoFormatInfo *in_info, *out_info = nullptr; + gint min_loss = G_MAXINT; + guint i, capslen; + + ins = gst_caps_get_structure (caps, 0); + in_format = gst_structure_get_string (ins, "format"); + if (!in_format) { + return; + } + + GST_DEBUG_OBJECT (trans, "source format %s", in_format); + + in_info = + gst_video_format_get_info (gst_video_format_from_string (in_format)); + if (!in_info) + return; + + outs = gst_caps_get_structure (result, 0); + + capslen = gst_caps_get_size (result); + GST_DEBUG ("iterate %d structures", capslen); + for (i = 0; i < capslen; i++) { + GstStructure *tests; + const GValue *format; + + tests = gst_caps_get_structure (result, i); + format = gst_structure_get_value (tests, "format"); + + /* should not happen */ + if (format == nullptr) + continue; + + if (GST_VALUE_HOLDS_LIST (format)) { + gint j, len; + + len = gst_value_list_get_size (format); + GST_DEBUG_OBJECT (trans, "have %d formats", len); + for (j = 0; j < len; j++) { + const GValue *val; + + val = gst_value_list_get_value (format, j); + if (G_VALUE_HOLDS_STRING (val)) { + score_value (trans, in_info, val, &min_loss, &out_info); + if (min_loss == 0) + break; + } + } + } else if (G_VALUE_HOLDS_STRING (format)) { + score_value (trans, in_info, format, &min_loss, &out_info); + } + } + if (out_info) + gst_structure_set (outs, "format", G_TYPE_STRING, + GST_VIDEO_FORMAT_INFO_NAME (out_info), nullptr); +} + +static gboolean +subsampling_unchanged (GstVideoInfo * in_info, GstVideoInfo * out_info) +{ + guint i; + const GstVideoFormatInfo *in_format, *out_format; + + if (GST_VIDEO_INFO_N_COMPONENTS (in_info) != + GST_VIDEO_INFO_N_COMPONENTS (out_info)) + return FALSE; + + in_format = in_info->finfo; + out_format = out_info->finfo; + + for (i = 0; i < GST_VIDEO_INFO_N_COMPONENTS (in_info); i++) { + if (GST_VIDEO_FORMAT_INFO_W_SUB (in_format, + i) != GST_VIDEO_FORMAT_INFO_W_SUB (out_format, i)) + return FALSE; + if (GST_VIDEO_FORMAT_INFO_H_SUB (in_format, + i) != GST_VIDEO_FORMAT_INFO_H_SUB (out_format, i)) + return FALSE; + } + + return TRUE; +} + +static void +transfer_colorimetry_from_input (GstBaseTransform * trans, GstCaps * in_caps, + GstCaps * out_caps) +{ + GstStructure *out_caps_s = gst_caps_get_structure (out_caps, 0); + GstStructure *in_caps_s = gst_caps_get_structure (in_caps, 0); + gboolean have_colorimetry = + gst_structure_has_field (out_caps_s, "colorimetry"); + gboolean have_chroma_site = + gst_structure_has_field (out_caps_s, "chroma-site"); + + /* If the output already has colorimetry and chroma-site, stop, + * otherwise try and transfer what we can from the input caps */ + if (have_colorimetry && have_chroma_site) + return; + + { + GstVideoInfo in_info, out_info; + const GValue *in_colorimetry = + gst_structure_get_value (in_caps_s, "colorimetry"); + + if (!gst_video_info_from_caps (&in_info, in_caps)) { + GST_WARNING_OBJECT (trans, + "Failed to convert sink pad caps to video info"); + return; + } + if (!gst_video_info_from_caps (&out_info, out_caps)) { + GST_WARNING_OBJECT (trans, + "Failed to convert src pad caps to video info"); + return; + } + + if (!have_colorimetry && in_colorimetry != nullptr) { + if ((GST_VIDEO_INFO_IS_YUV (&out_info) + && GST_VIDEO_INFO_IS_YUV (&in_info)) + || (GST_VIDEO_INFO_IS_RGB (&out_info) + && GST_VIDEO_INFO_IS_RGB (&in_info)) + || (GST_VIDEO_INFO_IS_GRAY (&out_info) + && GST_VIDEO_INFO_IS_GRAY (&in_info))) { + /* Can transfer the colorimetry intact from the input if it has it */ + gst_structure_set_value (out_caps_s, "colorimetry", in_colorimetry); + } else { + gchar *colorimetry_str; + + /* Changing between YUV/RGB - forward primaries and transfer function, but use + * default range and matrix. + * the primaries is used for conversion between RGB and XYZ (CIE 1931 coordinate). + * the transfer function could be another reference (e.g., HDR) + */ + out_info.colorimetry.primaries = in_info.colorimetry.primaries; + out_info.colorimetry.transfer = in_info.colorimetry.transfer; + + colorimetry_str = + gst_video_colorimetry_to_string (&out_info.colorimetry); + gst_caps_set_simple (out_caps, "colorimetry", G_TYPE_STRING, + colorimetry_str, nullptr); + g_free (colorimetry_str); + } + } + + /* Only YUV output needs chroma-site. If the input was also YUV and had the same chroma + * subsampling, transfer the siting. If the sub-sampling is changing, then the planes get + * scaled anyway so there's no real reason to prefer the input siting. */ + if (!have_chroma_site && GST_VIDEO_INFO_IS_YUV (&out_info)) { + if (GST_VIDEO_INFO_IS_YUV (&in_info)) { + const GValue *in_chroma_site = + gst_structure_get_value (in_caps_s, "chroma-site"); + if (in_chroma_site != nullptr + && subsampling_unchanged (&in_info, &out_info)) + gst_structure_set_value (out_caps_s, "chroma-site", in_chroma_site); + } + } + } +} + +static GstCaps * +gst_d3d12_mip_mapping_get_fixed_format (GstBaseTransform * trans, + GstPadDirection direction, GstCaps * caps, GstCaps * othercaps) +{ + GstCaps *result; + + result = gst_caps_intersect (othercaps, caps); + if (gst_caps_is_empty (result)) { + gst_caps_unref (result); + result = gst_caps_copy (othercaps); + } + + gst_d3d12_mip_mapping_fixate_format (trans, caps, result); + + /* fixate remaining fields */ + result = gst_caps_fixate (result); + + if (direction == GST_PAD_SINK) { + if (gst_caps_is_subset (caps, result)) { + gst_caps_replace (&result, caps); + } else { + /* Try and preserve input colorimetry / chroma information */ + transfer_colorimetry_from_input (trans, caps, result); + } + } + + return result; +} + +static GstCaps * +gst_d3d12_mip_mapping_fixate_caps (GstBaseTransform * trans, + GstPadDirection direction, GstCaps * caps, GstCaps * othercaps) +{ + GST_DEBUG_OBJECT (trans, + "trying to fixate othercaps %" GST_PTR_FORMAT " based on caps %" + GST_PTR_FORMAT, othercaps, caps); + + auto format = gst_d3d12_mip_mapping_get_fixed_format (trans, direction, caps, + othercaps); + gst_caps_unref (othercaps); + + if (gst_caps_is_empty (format)) { + GST_ERROR_OBJECT (trans, "Could not convert formats"); + } else { + GST_DEBUG_OBJECT (trans, "fixated othercaps to %" GST_PTR_FORMAT, format); + } + + return format; +} + +static gboolean +gst_d3d12_mip_mapping_propose_allocation (GstBaseTransform * trans, + GstQuery * decide_query, GstQuery * query) +{ + auto filter = GST_D3D12_BASE_FILTER (trans); + GstVideoInfo info; + GstBufferPool *pool = nullptr; + GstCaps *caps; + guint n_pools, i; + guint size; + + if (!GST_BASE_TRANSFORM_CLASS (parent_class)->propose_allocation (trans, + decide_query, query)) { + return FALSE; + } + + gst_query_parse_allocation (query, &caps, nullptr); + + if (!caps) + return FALSE; + + if (!gst_video_info_from_caps (&info, caps)) { + GST_ERROR_OBJECT (filter, "Invalid caps %" GST_PTR_FORMAT, caps); + return FALSE; + } + + n_pools = gst_query_get_n_allocation_pools (query); + for (i = 0; i < n_pools; i++) { + gst_query_parse_nth_allocation_pool (query, i, &pool, nullptr, nullptr, + nullptr); + if (pool) { + if (!GST_IS_D3D12_BUFFER_POOL (pool)) { + gst_clear_object (&pool); + } else { + auto dpool = GST_D3D12_BUFFER_POOL (pool); + if (!gst_d3d12_device_is_equal (dpool->device, filter->device)) + gst_clear_object (&pool); + } + } + } + + if (!pool) + pool = gst_d3d12_buffer_pool_new (filter->device); + + auto config = gst_buffer_pool_get_config (pool); + gst_buffer_pool_config_add_option (config, GST_BUFFER_POOL_OPTION_VIDEO_META); + + auto d3d12_params = + gst_buffer_pool_config_get_d3d12_allocation_params (config); + if (!d3d12_params) { + d3d12_params = gst_d3d12_allocation_params_new (filter->device, &info, + GST_D3D12_ALLOCATION_FLAG_DEFAULT, + D3D12_RESOURCE_FLAG_ALLOW_SIMULTANEOUS_ACCESS, D3D12_HEAP_FLAG_NONE); + } else { + gst_d3d12_allocation_params_set_resource_flags (d3d12_params, + D3D12_RESOURCE_FLAG_ALLOW_SIMULTANEOUS_ACCESS); + gst_d3d12_allocation_params_unset_resource_flags (d3d12_params, + D3D12_RESOURCE_FLAG_DENY_SHADER_RESOURCE); + } + + gst_buffer_pool_config_set_d3d12_allocation_params (config, d3d12_params); + gst_d3d12_allocation_params_free (d3d12_params); + + /* size will be updated by d3d12 buffer pool */ + gst_buffer_pool_config_set_params (config, caps, 0, 0, 0); + + if (!gst_buffer_pool_set_config (pool, config)) { + GST_ERROR_OBJECT (filter, "failed to set config"); + gst_object_unref (pool); + return FALSE; + } + + gst_query_add_allocation_meta (query, GST_VIDEO_META_API_TYPE, nullptr); + gst_query_add_allocation_meta (query, + GST_VIDEO_OVERLAY_COMPOSITION_META_API_TYPE, nullptr); + gst_query_add_allocation_meta (query, GST_VIDEO_CROP_META_API_TYPE, nullptr); + + /* d3d12 buffer pool will update buffer size based on allocated texture, + * get size from config again */ + config = gst_buffer_pool_get_config (pool); + gst_buffer_pool_config_get_params (config, nullptr, &size, nullptr, nullptr); + gst_structure_free (config); + + gst_query_add_allocation_pool (query, pool, size, 0, 0); + + gst_object_unref (pool); + + return TRUE; +} + +static gboolean +gst_d3d12_mip_mapping_decide_allocation (GstBaseTransform * trans, + GstQuery * query) +{ + auto filter = GST_D3D12_BASE_FILTER (trans); + GstCaps *outcaps = nullptr; + GstBufferPool *pool = nullptr; + guint size, min = 0, max = 0; + GstStructure *config; + gboolean update_pool = FALSE; + GstVideoInfo info; + + gst_query_parse_allocation (query, &outcaps, nullptr); + + if (!outcaps) + return FALSE; + + if (!gst_video_info_from_caps (&info, outcaps)) { + GST_ERROR_OBJECT (filter, "Invalid caps %" GST_PTR_FORMAT, outcaps); + return FALSE; + } + + size = GST_VIDEO_INFO_SIZE (&info); + if (gst_query_get_n_allocation_pools (query) > 0) { + gst_query_parse_nth_allocation_pool (query, 0, &pool, &size, &min, &max); + if (pool) { + if (!GST_IS_D3D12_BUFFER_POOL (pool)) { + gst_clear_object (&pool); + } else { + auto dpool = GST_D3D12_BUFFER_POOL (pool); + if (!gst_d3d12_device_is_equal (dpool->device, filter->device)) + gst_clear_object (&pool); + } + } + + update_pool = TRUE; + } + + if (!pool) + pool = gst_d3d12_buffer_pool_new (filter->device); + + config = gst_buffer_pool_get_config (pool); + gst_buffer_pool_config_add_option (config, GST_BUFFER_POOL_OPTION_VIDEO_META); + + D3D12_RESOURCE_FLAGS resource_flags = + D3D12_RESOURCE_FLAG_ALLOW_SIMULTANEOUS_ACCESS | + D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS | + D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET; + + auto d3d12_params = gst_d3d12_allocation_params_new (filter->device, &info, + GST_D3D12_ALLOCATION_FLAG_DEFAULT, resource_flags, + D3D12_HEAP_FLAG_SHARED); + + /* Auto generate mip maps */ + gst_d3d12_allocation_params_set_mip_levels (d3d12_params, 0); + + gst_buffer_pool_config_set_d3d12_allocation_params (config, d3d12_params); + gst_d3d12_allocation_params_free (d3d12_params); + + gst_buffer_pool_config_set_params (config, outcaps, size, min, max); + gst_buffer_pool_set_config (pool, config); + + /* d3d12 buffer pool will update buffer size based on allocated texture, + * get size from config again */ + config = gst_buffer_pool_get_config (pool); + gst_buffer_pool_config_get_params (config, nullptr, &size, nullptr, nullptr); + gst_structure_free (config); + + if (update_pool) + gst_query_set_nth_allocation_pool (query, 0, pool, size, min, max); + else + gst_query_add_allocation_pool (query, pool, size, min, max); + + gst_object_unref (pool); + + return GST_BASE_TRANSFORM_CLASS (parent_class)->decide_allocation (trans, + query); +} + +static gboolean +gst_d3d12_mip_mapping_set_info (GstD3D12BaseFilter * filter, + GstCaps * incaps, GstVideoInfo * in_info, GstCaps * outcaps, + GstVideoInfo * out_info) +{ + auto self = GST_D3D12_MIP_MAPPING (filter); + auto priv = self->priv; + + priv->ctx = nullptr; + + GST_DEBUG_OBJECT (self, "Setup convert with format %s -> %s", + gst_video_format_to_string (GST_VIDEO_INFO_FORMAT (in_info)), + gst_video_format_to_string (GST_VIDEO_INFO_FORMAT (out_info))); + + /* if present, these must match */ + if (in_info->interlace_mode != out_info->interlace_mode) { + GST_ERROR_OBJECT (self, "input and output formats do not match"); + return FALSE; + } + + auto ctx = std::make_unique < MipMappingContext > (filter->device); + + ctx->conv = gst_d3d12_converter_new (filter->device, nullptr, in_info, + out_info, nullptr, nullptr, nullptr); + if (!ctx->conv) { + GST_ERROR_OBJECT (self, "Couldn't create converter"); + return FALSE; + } + + ctx->gen = gst_d3d12_mip_gen_new (filter->device); + if (!ctx->gen) { + GST_ERROR_OBJECT (self, "Couldn't create mip generator"); + return FALSE; + } + + priv->in_rect = CD3DX12_BOX (0, 0, + GST_VIDEO_INFO_WIDTH (in_info), GST_VIDEO_INFO_HEIGHT (in_info)); + priv->prev_in_rect = priv->in_rect; + + priv->ctx = std::move (ctx); + + return TRUE; +} + +static gboolean +gst_d3d12_mip_mapping_transform_meta (GstBaseTransform * trans, + GstBuffer * outbuf, GstMeta * meta, GstBuffer * inbuf) +{ + if (meta->info->api == GST_VIDEO_CROP_META_API_TYPE) + return FALSE; + + return GST_BASE_TRANSFORM_CLASS (parent_class)->transform_meta (trans, + outbuf, meta, inbuf); +} + +static GstFlowReturn +gst_d3d12_mip_mapping_transform (GstBaseTransform * trans, GstBuffer * inbuf, + GstBuffer * outbuf) +{ + auto self = GST_D3D12_MIP_MAPPING (trans); + auto priv = self->priv; + D3D12_BOX in_rect; + + auto crop_meta = gst_buffer_get_video_crop_meta (inbuf); + if (crop_meta) { + GST_LOG_OBJECT (self, "Have crop rect, x:y:w:h = %d:%d:%d:%d", + crop_meta->x, crop_meta->y, crop_meta->width, crop_meta->height); + + in_rect = CD3DX12_BOX (crop_meta->x, crop_meta->y, + crop_meta->x + crop_meta->width, crop_meta->y + crop_meta->height); + } else { + in_rect = priv->in_rect; + } + + if (in_rect != priv->in_rect) { + priv->prev_in_rect = in_rect; + g_object_set (priv->ctx->conv, "src-x", (gint) in_rect.left, + "src-y", (gint) in_rect.top, + "src-width", (gint) in_rect.right - in_rect.left, + "src-height", (gint) in_rect.bottom - in_rect.top, nullptr); + } + + GstD3D12CommandAllocator *gst_ca; + if (!gst_d3d12_command_allocator_pool_acquire (priv->ctx->ca_pool, &gst_ca)) { + GST_ERROR_OBJECT (self, "Couldn't acquire command allocator"); + return GST_FLOW_ERROR; + } + + auto ca = gst_d3d12_command_allocator_get_handle (gst_ca); + auto hr = ca->Reset (); + if (!gst_d3d12_result (hr, priv->ctx->device)) { + GST_ERROR_OBJECT (self, "Couldn't reset command allocator"); + gst_d3d12_command_allocator_unref (gst_ca); + return GST_FLOW_ERROR; + } + + if (!priv->ctx->cl) { + auto device = gst_d3d12_device_get_device_handle (priv->ctx->device); + hr = device->CreateCommandList (0, D3D12_COMMAND_LIST_TYPE_DIRECT, + ca, nullptr, IID_PPV_ARGS (&priv->ctx->cl)); + if (!gst_d3d12_result (hr, priv->ctx->device)) { + GST_ERROR_OBJECT (self, "Couldn't create command list"); + gst_d3d12_command_allocator_unref (gst_ca); + return GST_FLOW_ERROR; + } + } else { + hr = priv->ctx->cl->Reset (ca, nullptr); + if (!gst_d3d12_result (hr, priv->ctx->device)) { + GST_ERROR_OBJECT (self, "Couldn't reset command list"); + gst_d3d12_command_allocator_unref (gst_ca); + return GST_FLOW_ERROR; + } + } + + GstD3D12FenceData *fence_data; + gst_d3d12_fence_data_pool_acquire (priv->fence_data_pool, &fence_data); + gst_d3d12_fence_data_push (fence_data, FENCE_NOTIFY_MINI_OBJECT (gst_ca)); + + auto cq = gst_d3d12_device_get_command_queue (priv->ctx->device, + D3D12_COMMAND_LIST_TYPE_DIRECT); + auto fence = gst_d3d12_command_queue_get_fence_handle (cq); + if (!gst_d3d12_converter_convert_buffer (priv->ctx->conv, + inbuf, outbuf, fence_data, priv->ctx->cl.Get (), TRUE)) { + GST_ERROR_OBJECT (self, "Couldn't build command list"); + gst_d3d12_fence_data_unref (fence_data); + return GST_FLOW_ERROR; + } + + auto dmem = (GstD3D12Memory *) gst_buffer_peek_memory (outbuf, 0); + auto tex = gst_d3d12_memory_get_resource_handle (dmem); + + D3D12_RESOURCE_BARRIER barrier = CD3DX12_RESOURCE_BARRIER::Transition + (tex, D3D12_RESOURCE_STATE_RENDER_TARGET, + D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE, 0); + priv->ctx->cl->ResourceBarrier (1, &barrier); + + if (!gst_d3d12_mip_gen_execute (priv->ctx->gen, tex, fence_data, + priv->ctx->cl.Get ())) { + GST_ERROR_OBJECT (self, "Couldn't build mip gen command"); + gst_d3d12_fence_data_unref (fence_data); + return GST_FLOW_ERROR; + } + + hr = priv->ctx->cl->Close (); + if (!gst_d3d12_result (hr, priv->ctx->device)) { + GST_ERROR_OBJECT (self, "Couldn't close command list"); + gst_d3d12_fence_data_unref (fence_data); + return GST_FLOW_ERROR; + } + + ID3D12CommandList *cmd_list[] = { priv->ctx->cl.Get () }; + + hr = gst_d3d12_command_queue_execute_command_lists (cq, + 1, cmd_list, &priv->ctx->fence_val); + if (!gst_d3d12_result (hr, priv->ctx->device)) { + GST_ERROR_OBJECT (self, "Couldn't execute command list"); + gst_d3d12_fence_data_unref (fence_data); + return GST_FLOW_ERROR; + } + + gst_d3d12_buffer_set_fence (outbuf, fence, priv->ctx->fence_val, FALSE); + gst_d3d12_command_queue_set_notify (cq, priv->ctx->fence_val, + FENCE_NOTIFY_MINI_OBJECT (fence_data)); + + priv->ctx->scheduled.push (priv->ctx->fence_val); + + auto completed = gst_d3d12_device_get_completed_value (priv->ctx->device, + D3D12_COMMAND_LIST_TYPE_DIRECT); + while (!priv->ctx->scheduled.empty ()) { + if (priv->ctx->scheduled.front () > completed) + break; + + priv->ctx->scheduled.pop (); + } + + auto async_depth = priv->async_depth.load (); + if (async_depth > 0 && priv->ctx->scheduled.size () > async_depth) { + auto fence_to_wait = priv->ctx->scheduled.front (); + priv->ctx->scheduled.pop (); + gst_d3d12_device_fence_wait (priv->ctx->device, + D3D12_COMMAND_LIST_TYPE_DIRECT, fence_to_wait); + } + + return GST_FLOW_OK; +} diff --git a/subprojects/gst-plugins-bad/sys/d3d12/gstd3d12mipmapping.h b/subprojects/gst-plugins-bad/sys/d3d12/gstd3d12mipmapping.h new file mode 100644 index 0000000000..668711d88e --- /dev/null +++ b/subprojects/gst-plugins-bad/sys/d3d12/gstd3d12mipmapping.h @@ -0,0 +1,32 @@ +/* GStreamer + * Copyright (C) 2024 Seungha Yang <seungha@centricular.com> + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Library General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Library General Public License for more details. + * + * You should have received a copy of the GNU Library General Public + * License along with this library; if not, write to the + * Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, + * Boston, MA 02110-1301, USA. + */ + +#pragma once + +#include <gst/gst.h> +#include "gstd3d12basefilter.h" + +G_BEGIN_DECLS + +#define GST_TYPE_D3D12_MIP_MAPPING (gst_d3d12_mip_mapping_get_type()) +G_DECLARE_FINAL_TYPE (GstD3D12MipMapping, gst_d3d12_mip_mapping, + GST, D3D12_MIP_MAPPING, GstD3D12BaseFilter) + +G_END_DECLS + diff --git a/subprojects/gst-plugins-bad/sys/d3d12/meson.build b/subprojects/gst-plugins-bad/sys/d3d12/meson.build index bf0e09e46e..594c41bf72 100644 --- a/subprojects/gst-plugins-bad/sys/d3d12/meson.build +++ b/subprojects/gst-plugins-bad/sys/d3d12/meson.build @@ -18,6 +18,8 @@ d3d12_sources = [ 'gstd3d12ipcsink.cpp', 'gstd3d12ipcsrc.cpp', 'gstd3d12mpeg2dec.cpp', + 'gstd3d12mipgen.cpp', + 'gstd3d12mipmapping.cpp', 'gstd3d12overlaycompositor.cpp', 'gstd3d12pluginutils.cpp', 'gstd3d12screencapture.cpp', diff --git a/subprojects/gst-plugins-bad/sys/d3d12/plugin.cpp b/subprojects/gst-plugins-bad/sys/d3d12/plugin.cpp index 00299adf87..e597399fcd 100644 --- a/subprojects/gst-plugins-bad/sys/d3d12/plugin.cpp +++ b/subprojects/gst-plugins-bad/sys/d3d12/plugin.cpp @@ -49,6 +49,7 @@ #include "gstd3d12ipcsrc.h" #include "gstd3d12ipcsink.h" #include "gstd3d12swapchainsink.h" +#include "gstd3d12mipmapping.h" #include <windows.h> #include <versionhelpers.h> #include <wrl.h> @@ -181,6 +182,8 @@ plugin_init (GstPlugin * plugin) "d3d12ipcsink", GST_RANK_NONE, GST_TYPE_D3D12_IPC_SINK); gst_element_register (plugin, "d3d12swapchainsink", GST_RANK_NONE, GST_TYPE_D3D12_SWAPCHAIN_SINK); + gst_element_register (plugin, + "d3d12mipmapping", GST_RANK_NONE, GST_TYPE_D3D12_MIP_MAPPING); g_object_set_data_full (G_OBJECT (plugin), "plugin-d3d12-shutdown", (gpointer) "shutdown-data", |