はじめに
カスタムレンダーテクスチャの頂点シェーダーはUInityの標準ライブラリ CustomRenderTextureVertexShader
から提供されており、これを用いることになっている。
しかし、実装を読み、なおかつDirect3D11の出力アセンブリを読んだところ、あまりよくない実装になっていると感じた個所がある.
この記事では当該箇所を修正することにより,より良い出力アセンブリが得られたことを示す. また修正案をカスタムレンダーテクスチャ用のシェーダーに取り入れる方法についても示す.
CustomRenderTextureVertexShader
の問題点
Uniy 2022.2.2の UnityCustomRenderTexture.cginc
は下記のようになっている.
(古いバージョンでも同じ)
// Unity built-in shader source. Copyright (c) 2016 Unity Technologies. MIT license (see license.txt) #ifndef UNITY_CUSTOM_TEXTURE_INCLUDED #define UNITY_CUSTOM_TEXTURE_INCLUDED #include "UnityCG.cginc" #include "UnityStandardConfig.cginc" // Keep in sync with CustomRenderTexture.h #define kCustomTextureBatchSize 16 struct appdata_customrendertexture { uint vertexID : SV_VertexID; }; // User facing vertex to fragment shader structure struct v2f_customrendertexture { float4 vertex : SV_POSITION; float3 localTexcoord : TEXCOORD0; // Texcoord local to the update zone (== globalTexcoord if no partial update zone is specified) float3 globalTexcoord : TEXCOORD1; // Texcoord relative to the complete custom texture uint primitiveID : TEXCOORD2; // Index of the update zone (correspond to the index in the updateZones of the Custom Texture) float3 direction : TEXCOORD3; // For cube textures, direction of the pixel being rendered in the cubemap }; float2 CustomRenderTextureRotate2D(float2 pos, float angle) { float sn = sin(angle); float cs = cos(angle); return float2( pos.x * cs - pos.y * sn, pos.x * sn + pos.y * cs); } // Internal float4 CustomRenderTextureCenters[kCustomTextureBatchSize]; float4 CustomRenderTextureSizesAndRotations[kCustomTextureBatchSize]; float CustomRenderTexturePrimitiveIDs[kCustomTextureBatchSize]; float4 CustomRenderTextureParameters; #define CustomRenderTextureUpdateSpace CustomRenderTextureParameters.x // Normalized(0)/PixelSpace(1) #define CustomRenderTexture3DTexcoordW CustomRenderTextureParameters.y #define CustomRenderTextureIs3D CustomRenderTextureParameters.z // User facing uniform variables float4 _CustomRenderTextureInfo; // x = width, y = height, z = depth, w = face/3DSlice // Helpers #define _CustomRenderTextureWidth _CustomRenderTextureInfo.x #define _CustomRenderTextureHeight _CustomRenderTextureInfo.y #define _CustomRenderTextureDepth _CustomRenderTextureInfo.z // Those two are mutually exclusive so we can use the same slot #define _CustomRenderTextureCubeFace _CustomRenderTextureInfo.w #define _CustomRenderTexture3DSlice _CustomRenderTextureInfo.w sampler2D _SelfTexture2D; samplerCUBE _SelfTextureCube; sampler3D _SelfTexture3D; float3 CustomRenderTextureComputeCubeDirection(float2 globalTexcoord) { float2 xy = globalTexcoord * 2.0 - 1.0; float3 direction; if(_CustomRenderTextureCubeFace == 0.0) { direction = normalize(float3(1.0, -xy.y, -xy.x)); } else if(_CustomRenderTextureCubeFace == 1.0) { direction = normalize(float3(-1.0, -xy.y, xy.x)); } else if(_CustomRenderTextureCubeFace == 2.0) { direction = normalize(float3(xy.x, 1.0, xy.y)); } else if(_CustomRenderTextureCubeFace == 3.0) { direction = normalize(float3(xy.x, -1.0, -xy.y)); } else if(_CustomRenderTextureCubeFace == 4.0) { direction = normalize(float3(xy.x, -xy.y, 1.0)); } else if(_CustomRenderTextureCubeFace == 5.0) { direction = normalize(float3(-xy.x, -xy.y, -1.0)); } return direction; } // standard custom texture vertex shader that should always be used v2f_customrendertexture CustomRenderTextureVertexShader(appdata_customrendertexture IN) { v2f_customrendertexture OUT; #if UNITY_UV_STARTS_AT_TOP const float2 vertexPositions[6] = { { -1.0f, 1.0f }, { -1.0f, -1.0f }, { 1.0f, -1.0f }, { 1.0f, 1.0f }, { -1.0f, 1.0f }, { 1.0f, -1.0f } }; const float2 texCoords[6] = { { 0.0f, 0.0f }, { 0.0f, 1.0f }, { 1.0f, 1.0f }, { 1.0f, 0.0f }, { 0.0f, 0.0f }, { 1.0f, 1.0f } }; #else const float2 vertexPositions[6] = { { 1.0f, 1.0f }, { -1.0f, -1.0f }, { -1.0f, 1.0f }, { -1.0f, -1.0f }, { 1.0f, 1.0f }, { 1.0f, -1.0f } }; const float2 texCoords[6] = { { 1.0f, 1.0f }, { 0.0f, 0.0f }, { 0.0f, 1.0f }, { 0.0f, 0.0f }, { 1.0f, 1.0f }, { 1.0f, 0.0f } }; #endif uint primitiveID = IN.vertexID / 6; uint vertexID = IN.vertexID % 6; float3 updateZoneCenter = CustomRenderTextureCenters[primitiveID].xyz; float3 updateZoneSize = CustomRenderTextureSizesAndRotations[primitiveID].xyz; float rotation = CustomRenderTextureSizesAndRotations[primitiveID].w * UNITY_PI / 180.0f; #if !UNITY_UV_STARTS_AT_TOP rotation = -rotation; #endif // Normalize rect if needed if (CustomRenderTextureUpdateSpace > 0.0) // Pixel space { // Normalize xy because we need it in clip space. updateZoneCenter.xy /= _CustomRenderTextureInfo.xy; updateZoneSize.xy /= _CustomRenderTextureInfo.xy; } else // normalized space { // Un-normalize depth because we need actual slice index for culling updateZoneCenter.z *= _CustomRenderTextureInfo.z; updateZoneSize.z *= _CustomRenderTextureInfo.z; } // Compute rotation // Compute quad vertex position float2 clipSpaceCenter = updateZoneCenter.xy * 2.0 - 1.0; float2 pos = vertexPositions[vertexID] * updateZoneSize.xy; pos = CustomRenderTextureRotate2D(pos, rotation); pos.x += clipSpaceCenter.x; #if UNITY_UV_STARTS_AT_TOP pos.y += clipSpaceCenter.y; #else pos.y -= clipSpaceCenter.y; #endif // For 3D texture, cull quads outside of the update zone // This is neeeded in additional to the preliminary minSlice/maxSlice done on the CPU because update zones can be disjointed. // ie: slices [1..5] and [10..15] for two differents zones so we need to cull out slices 0 and [6..9] if (CustomRenderTextureIs3D > 0.0) { int minSlice = (int)(updateZoneCenter.z - updateZoneSize.z * 0.5); int maxSlice = minSlice + (int)updateZoneSize.z; if (_CustomRenderTexture3DSlice < minSlice || _CustomRenderTexture3DSlice >= maxSlice) { pos.xy = float2(1000.0, 1000.0); // Vertex outside of ncs } } OUT.vertex = float4(pos, 0.0, 1.0); OUT.primitiveID = asuint(CustomRenderTexturePrimitiveIDs[primitiveID]); OUT.localTexcoord = float3(texCoords[vertexID], CustomRenderTexture3DTexcoordW); OUT.globalTexcoord = float3(pos.xy * 0.5 + 0.5, CustomRenderTexture3DTexcoordW); #if UNITY_UV_STARTS_AT_TOP OUT.globalTexcoord.y = 1.0 - OUT.globalTexcoord.y; #endif OUT.direction = CustomRenderTextureComputeCubeDirection(OUT.globalTexcoord.xy); return OUT; } struct appdata_init_customrendertexture { float4 vertex : POSITION; float2 texcoord : TEXCOORD0; }; // User facing vertex to fragment structure for initialization materials struct v2f_init_customrendertexture { float4 vertex : SV_POSITION; float3 texcoord : TEXCOORD0; float3 direction : TEXCOORD1; }; // standard custom texture vertex shader that should always be used for initialization shaders v2f_init_customrendertexture InitCustomRenderTextureVertexShader (appdata_init_customrendertexture v) { v2f_init_customrendertexture o; o.vertex = UnityObjectToClipPos(v.vertex); o.texcoord = float3(v.texcoord.xy, CustomRenderTexture3DTexcoordW); o.direction = CustomRenderTextureComputeCubeDirection(v.texcoord.xy); return o; } #endif // UNITY_CUSTOM_TEXTURE_INCLUDED
問題に感じたのが CustomRenderTextureComputeCubeDirection()
である.
ここだけ抜粋して再掲する.
float3 CustomRenderTextureComputeCubeDirection(float2 globalTexcoord) { float2 xy = globalTexcoord * 2.0 - 1.0; float3 direction; if(_CustomRenderTextureCubeFace == 0.0) { direction = normalize(float3(1.0, -xy.y, -xy.x)); } else if(_CustomRenderTextureCubeFace == 1.0) { direction = normalize(float3(-1.0, -xy.y, xy.x)); } else if(_CustomRenderTextureCubeFace == 2.0) { direction = normalize(float3(xy.x, 1.0, xy.y)); } else if(_CustomRenderTextureCubeFace == 3.0) { direction = normalize(float3(xy.x, -1.0, -xy.y)); } else if(_CustomRenderTextureCubeFace == 4.0) { direction = normalize(float3(xy.x, -xy.y, 1.0)); } else if(_CustomRenderTextureCubeFace == 5.0) { direction = normalize(float3(-xy.x, -xy.y, -1.0)); } return direction; }
if文を用いているが,uniform変数に対してであるのと,この程度であれば movc
命令を生成するため問題とは思わない.
しかし,各if文中で normalize()
を用いていることが問題であると思う.
normalize()
を用いるならループ外にすべきではないだろうか.
実際に生成されたコードからも何度も normalize()
の呼び出しが行われていることがわかる(dp3
, rsq
, mul
が1回の normalize()
に対応).
Global Keywords: <none> Local Keywords: <none> -- Hardware tier variant: Tier 1 -- Vertex shader for "d3d11": // Stats: 46 math, 8 temp registers, 2 branches Constant Buffer "$Globals" (848 bytes) on slot 0 { Vector4 CustomRenderTextureCenters[16] at 32 Vector4 CustomRenderTextureSizesAndRotations[16] at 288 Vector1 CustomRenderTexturePrimitiveIDs[16] at 544 Vector4 CustomRenderTextureParameters at 800 Vector4 _CustomRenderTextureInfo at 816 } Shader Disassembly: // // Generated by Microsoft (R) D3D Shader Disassembler // // // Input signature: // // Name Index Mask Register SysValue Format Used // -------------------- ----- ------ -------- -------- ------- ------ // SV_VertexID 0 x 0 VERTID uint x // // // Output signature: // // Name Index Mask Register SysValue Format Used // -------------------- ----- ------ -------- -------- ------- ------ // SV_POSITION 0 xyzw 0 POS float xyzw // TEXCOORD 0 xyz 1 NONE float xyz // TEXCOORD 1 xyz 2 NONE float xyz // TEXCOORD 2 x 3 NONE uint x // TEXCOORD 3 xyz 4 NONE float xyz // vs_4_0 dcl_immediateConstantBuffer { { -1.000000, 1.000000, 0, 0}, { -1.000000, -1.000000, 0, 1.000000}, { 1.000000, -1.000000, 1.000000, 1.000000}, { 1.000000, 1.000000, 1.000000, 0}, { -1.000000, 1.000000, 0, 0}, { 1.000000, -1.000000, 1.000000, 1.000000} } dcl_constantbuffer CB0[52], dynamicIndexed dcl_input_sgv v0.x, vertex_id dcl_output_siv o0.xyzw, position dcl_output o1.xyz dcl_output o2.xyz dcl_output o3.x dcl_output o4.xyz dcl_temps 8 0: udiv r0.x, r1.x, v0.x, l(6) 1: mul r0.y, l(0.017453), cb0[r0.x + 18].w 2: lt r0.zw, l(0.000000, 0.000000, 0.000000, 0.000000), cb0[50].xxxz 3: div r1.yz, cb0[r0.x + 2].xxyx, cb0[51].xxyx 4: div r2.yz, cb0[r0.x + 18].xxyx, cb0[51].xxyx 5: mul r3.x, cb0[51].z, cb0[r0.x + 2].z 6: mul r3.w, cb0[51].z, cb0[r0.x + 18].z 7: movc r1.yz, r0.zzzz, r1.yyzy, cb0[r0.x + 2].xxyx 8: mov r2.x, cb0[r0.x + 2].z 9: mov r2.w, cb0[r0.x + 18].z 10: mov r3.yz, cb0[r0.x + 18].xxyx 11: movc r2.xyzw, r0.zzzz, r2.xyzw, r3.xyzw 12: mad r1.yz, r1.yyzy, l(0.000000, 2.000000, 2.000000, 0.000000), l(0.000000, -1.000000, -1.000000, 0.000000) 13: mul r2.yz, r2.zzyz, icb[r1.x + 0].yyxy 14: sincos r3.x, r4.x, r0.y 15: mul r0.yz, r2.yyzy, r3.xxxx 16: mad r0.y, r2.z, r4.x, -r0.y 17: mad r0.z, r2.y, r4.x, r0.z 18: add r3.xy, r1.yzyy, r0.yzyy 19: mad r0.y, -r2.w, l(0.500000), r2.x 20: ftoi r0.z, r0.y 21: ftoi r1.y, r2.w 22: iadd r0.z, r0.z, r1.y 23: round_z r0.y, r0.y 24: lt r0.y, cb0[51].w, r0.y 25: itof r0.z, r0.z 26: ge r0.z, cb0[51].w, r0.z 27: or r0.y, r0.z, r0.y 28: movc r0.yz, r0.yyyy, l(0,1000.000000,1000.000000,0), r3.xxyx 29: movc r0.yz, r0.wwww, r0.yyzy, r3.xxyx 30: mad r2.xy, r0.yzyy, l(0.500000, 0.500000, 0.000000, 0.000000), l(0.500000, 0.500000, 0.000000, 0.000000) 31: add r2.z, -r2.y, l(1.000000) 32: mad r3.xy, r2.xzxx, l(2.000000, 2.000000, 0.000000, 0.000000), l(-1.000000, -1.000000, 0.000000, 0.000000) 33: eq r0.w, cb0[51].w, l(0.000000) 34: if_nz r0.w 35: mov r4.yz, -r3.yyxy 36: mov r4.x, l(1.000000) 37: dp3 r0.w, r4.xyzx, r4.xyzx 38: rsq r0.w, r0.w 39: mul o4.xyz, r0.wwww, r4.xyzx 40: else 41: mov r3.z, l(-1.000000) 42: mov r3.w, -r3.y 43: mul r1.yzw, r3.xxzy, l(0.000000, 1.000000, 1.000000, -1.000000) 44: dp3 r0.w, r3.xzwx, r1.yzwy 45: rsq r0.w, r0.w 46: mul r4.xyz, r0.wwww, r1.zwyz 47: mov r5.xz, r3.xxyx 48: mov r5.y, l(1.000000) 49: dp3 r0.w, r5.xyzx, r5.xyzx 50: rsq r0.w, r0.w 51: mul r6.xyz, r0.wwww, r5.xyzx 52: dp3 r0.w, r1.yzwy, r1.yzwy 53: rsq r0.w, r0.w 54: mul r1.yzw, r0.wwww, r1.yyzw 55: eq r7.xyzw, cb0[51].wwww, l(1.000000, 2.000000, 3.000000, 4.000000) 56: mov r5.w, -r3.y 57: dp3 r0.w, r5.xywx, r5.xywx 58: rsq r0.w, r0.w 59: mul r5.xyz, r0.wwww, r5.xwyx 60: mul r3.xyz, r3.xwzx, l(-1.000000, 1.000000, 1.000000, 0.000000) 61: dp3 r0.w, r3.xyzx, r3.xyzx 62: rsq r0.w, r0.w 63: mul r3.xyz, r0.wwww, r3.xyzx 64: movc r3.xyz, r7.wwww, r5.xyzx, r3.xyzx 65: movc r1.yzw, r7.zzzz, r1.yyzw, r3.xxyz 66: movc r1.yzw, r7.yyyy, r6.xxyz, r1.yyzw 67: movc o4.xyz, r7.xxxx, r4.xyzx, r1.yzwy 68: endif 69: mov o0.xy, r0.yzyy 70: mov o0.zw, l(0,0,0,1.000000) 71: mov o1.xy, icb[r1.x + 0].zwzz 72: mov o1.z, cb0[50].y 73: mov r2.w, cb0[50].y 74: mov o2.xyz, r2.xzwx 75: mov o3.x, cb0[r0.x + 34].x 76: ret // Approximately 0 instruction slots used
改善案
normalize()
をifを抜けた後で呼び出すようにする.
float3 CustomRenderTextureComputeCubeDirection(float2 globalTexcoord) { float2 xy = globalTexcoord * 2.0 - 1.0; float3 direction; if(_CustomRenderTextureCubeFace == 0.0) { direction = float3(1.0, -xy.y, -xy.x); } else if(_CustomRenderTextureCubeFace == 1.0) { direction = float3(-1.0, -xy.y, xy.x); } else if(_CustomRenderTextureCubeFace == 2.0) { direction = float3(xy.x, 1.0, xy.y); } else if(_CustomRenderTextureCubeFace == 3.0) { direction = float3(xy.x, -1.0, -xy.y); } else if(_CustomRenderTextureCubeFace == 4.0) { direction = float3(xy.x, -xy.y, 1.0); } else if(_CustomRenderTextureCubeFace == 5.0) { direction = float3(-xy.x, -xy.y, -1.0); } return normalize(direction); }
movc
命令っぽさを出したいのであれば,下記のように条件演算子を用いてもよいだろう.
float3 CustomRenderTextureComputeCubeDirectionEx(float2 globalTexcoord) { float2 xy = globalTexcoord * 2.0 - 1.0; return normalize(_CustomRenderTextureCubeFace == 0.0 ? float3(1.0, -xy.y, -xy.x) : _CustomRenderTextureCubeFace == 1.0 ? float3(-1.0, -xy.y, xy.x) : _CustomRenderTextureCubeFace == 2.0 ? float3(xy.x, 1.0, xy.y) : _CustomRenderTextureCubeFace == 3.0 ? float3(xy.x, -1.0, -xy.y) : _CustomRenderTextureCubeFace == 4.0 ? float3(xy.x, -xy.y, 1.0) : float3(-xy.x, -xy.y, -1.0)); }
このように改善することで下記のコードが得られた.
movc
の後にnormalizeが行われていることがわかる.
(if文を用いたものと条件演算子を用いたものとで生成コードは同じ)
////////////////////////////////////////////////////// Global Keywords: <none> Local Keywords: <none> -- Hardware tier variant: Tier 1 -- Vertex shader for "d3d11": // Stats: 30 math, 5 temp registers Constant Buffer "$Globals" (848 bytes) on slot 0 { Vector4 CustomRenderTextureCenters[16] at 32 Vector4 CustomRenderTextureSizesAndRotations[16] at 288 Vector1 CustomRenderTexturePrimitiveIDs[16] at 544 Vector4 CustomRenderTextureParameters at 800 Vector4 _CustomRenderTextureInfo at 816 } Shader Disassembly: // // Generated by Microsoft (R) D3D Shader Disassembler // // // Input signature: // // Name Index Mask Register SysValue Format Used // -------------------- ----- ------ -------- -------- ------- ------ // SV_VertexID 0 x 0 VERTID uint x // // // Output signature: // // Name Index Mask Register SysValue Format Used // -------------------- ----- ------ -------- -------- ------- ------ // SV_POSITION 0 xyzw 0 POS float xyzw // TEXCOORD 0 xyz 1 NONE float xyz // TEXCOORD 1 xyz 2 NONE float xyz // TEXCOORD 2 x 3 NONE uint x // TEXCOORD 3 xyz 4 NONE float xyz // vs_4_0 dcl_immediateConstantBuffer { { -1.000000, 1.000000, 0, 0}, { -1.000000, -1.000000, 0, 1.000000}, { 1.000000, -1.000000, 1.000000, 1.000000}, { 1.000000, 1.000000, 1.000000, 0}, { -1.000000, 1.000000, 0, 0}, { 1.000000, -1.000000, 1.000000, 1.000000} } dcl_constantbuffer CB0[52], dynamicIndexed dcl_input_sgv v0.x, vertex_id dcl_output_siv o0.xyzw, position dcl_output o1.xyz dcl_output o2.xyz dcl_output o3.x dcl_output o4.xyz dcl_temps 5 0: lt r0.xy, l(0.000000, 0.000000, 0.000000, 0.000000), cb0[50].xzxx 1: udiv r1.x, r2.x, v0.x, l(6) 2: div r3.yz, cb0[r1.x + 18].xxyx, cb0[51].xxyx 3: mul r4.x, cb0[51].z, cb0[r1.x + 2].z 4: mul r4.w, cb0[51].z, cb0[r1.x + 18].z 5: mov r3.x, cb0[r1.x + 2].z 6: mov r3.w, cb0[r1.x + 18].z 7: mov r4.yz, cb0[r1.x + 18].xxyx 8: movc r3.xyzw, r0.xxxx, r3.xyzw, r4.xyzw 9: mad r0.z, -r3.w, l(0.500000), r3.x 10: ftoi r0.w, r0.z 11: round_z r0.z, r0.z 12: lt r0.z, cb0[51].w, r0.z 13: ftoi r1.y, r3.w 14: mul r1.zw, r3.zzzy, icb[r2.x + 0].yyyx 15: iadd r0.w, r0.w, r1.y 16: itof r0.w, r0.w 17: ge r0.w, cb0[51].w, r0.w 18: or r0.z, r0.w, r0.z 19: mul r0.w, l(0.017453), cb0[r1.x + 18].w 20: sincos r3.x, r4.x, r0.w 21: mul r2.yz, r1.zzwz, r3.xxxx 22: mad r0.w, r1.w, r4.x, -r2.y 23: mad r1.y, r1.z, r4.x, r2.z 24: div r1.zw, cb0[r1.x + 2].xxxy, cb0[51].xxxy 25: movc r1.zw, r0.xxxx, r1.zzzw, cb0[r1.x + 2].xxxy 26: mad r1.zw, r1.zzzw, l(0.000000, 0.000000, 2.000000, 2.000000), l(0.000000, 0.000000, -1.000000, -1.000000) 27: add r3.x, r0.w, r1.z 28: add r3.y, r1.w, r1.y 29: movc r0.xz, r0.zzzz, l(1000.000000,0,1000.000000,0), r3.xxyx 30: movc r0.xy, r0.yyyy, r0.xzxx, r3.xyxx 31: mov o0.xy, r0.xyxx 32: mad r0.xy, r0.xyxx, l(0.500000, 0.500000, 0.000000, 0.000000), l(0.500000, 0.500000, 0.000000, 0.000000) 33: mov o0.zw, l(0,0,0,1.000000) 34: mov o1.xy, icb[r2.x + 0].zwzz 35: mov o3.x, cb0[r1.x + 34].x 36: mov o1.z, cb0[50].y 37: add r0.z, -r0.y, l(1.000000) 38: mov r0.w, cb0[50].y 39: mov o2.xyz, r0.xzwx 40: mad r0.xy, r0.xzxx, l(2.000000, 2.000000, 0.000000, 0.000000), l(-1.000000, -1.000000, 0.000000, 0.000000) 41: mov r0.z, -r0.y 42: mul r1.xy, r0.xzxx, l(-1.000000, 1.000000, 0.000000, 0.000000) 43: eq r1.w, cb0[51].w, l(4.000000) 44: mov r1.z, l(-1.000000) 45: mov r0.w, l(1.000000) 46: movc r1.xyz, r1.wwww, r0.xzwx, r1.xyzx 47: mov r2.yz, r0.zzxz 48: mul r3.xyz, r0.wzxw, l(1.000000, 1.000000, -1.000000, 0.000000) 49: mov r2.x, l(-1.000000) 50: eq r4.xyzw, cb0[51].wwww, l(0.000000, 1.000000, 2.000000, 3.000000) 51: movc r1.xyz, r4.wwww, r2.zxyz, r1.xyzx 52: movc r0.xyz, r4.zzzz, r0.xwyx, r1.xyzx 53: movc r0.xyz, r4.yyyy, r2.xyzx, r0.xyzx 54: movc r0.xyz, r4.xxxx, r3.xyzx, r0.xyzx 55: dp3 r0.w, r0.xyzx, r0.xyzx 56: rsq r0.w, r0.w 57: mul o4.xyz, r0.wwww, r0.xyzx 58: ret // Approximately 0 instruction slots used
カスタムレンダーテクスチャ用シェーダーへの適用
改善したからには自分のカスタムレンダーテクスチャ用のシェーダーに取り入れたいものである. 影響が少なく,使い回しができ,すぐに元に戻せる方法としては以下の通りである.
まず,下記のインクルード用ファイル CustomRenderTextureEx.cginc
を用意する.
#ifndef CUSTOM_TEXTURE_EX_INCLUDED #define CUSTOM_TEXTURE_EX_INCLUDED #include "UnityCustomRenderTexture.cginc" float3 CustomRenderTextureComputeCubeDirectionEx(float2 globalTexcoord) { float2 xy = globalTexcoord * 2.0 - 1.0; return normalize(_CustomRenderTextureCubeFace == 0.0 ? float3(1.0, -xy.y, -xy.x) : _CustomRenderTextureCubeFace == 1.0 ? float3(-1.0, -xy.y, xy.x) : _CustomRenderTextureCubeFace == 2.0 ? float3(xy.x, 1.0, xy.y) : _CustomRenderTextureCubeFace == 3.0 ? float3(xy.x, -1.0, -xy.y) : _CustomRenderTextureCubeFace == 4.0 ? float3(xy.x, -xy.y, 1.0) : float3(-xy.x, -xy.y, -1.0)); } // standard custom texture vertex shader that should always be used v2f_customrendertexture CustomRenderTextureVertexShaderEx(appdata_customrendertexture IN) { v2f_customrendertexture OUT; #if UNITY_UV_STARTS_AT_TOP const float2 vertexPositions[6] = { { -1.0f, 1.0f }, { -1.0f, -1.0f }, { 1.0f, -1.0f }, { 1.0f, 1.0f }, { -1.0f, 1.0f }, { 1.0f, -1.0f } }; const float2 texCoords[6] = { { 0.0f, 0.0f }, { 0.0f, 1.0f }, { 1.0f, 1.0f }, { 1.0f, 0.0f }, { 0.0f, 0.0f }, { 1.0f, 1.0f } }; #else const float2 vertexPositions[6] = { { 1.0f, 1.0f }, { -1.0f, -1.0f }, { -1.0f, 1.0f }, { -1.0f, -1.0f }, { 1.0f, 1.0f }, { 1.0f, -1.0f } }; const float2 texCoords[6] = { { 1.0f, 1.0f }, { 0.0f, 0.0f }, { 0.0f, 1.0f }, { 0.0f, 0.0f }, { 1.0f, 1.0f }, { 1.0f, 0.0f } }; #endif uint primitiveID = IN.vertexID / 6; uint vertexID = IN.vertexID % 6; float3 updateZoneCenter = CustomRenderTextureCenters[primitiveID].xyz; float3 updateZoneSize = CustomRenderTextureSizesAndRotations[primitiveID].xyz; float rotation = CustomRenderTextureSizesAndRotations[primitiveID].w * UNITY_PI / 180.0f; #if !UNITY_UV_STARTS_AT_TOP rotation = -rotation; #endif // Normalize rect if needed if (CustomRenderTextureUpdateSpace > 0.0) // Pixel space { // Normalize xy because we need it in clip space. updateZoneCenter.xy /= _CustomRenderTextureInfo.xy; updateZoneSize.xy /= _CustomRenderTextureInfo.xy; } else // normalized space { // Un-normalize depth because we need actual slice index for culling updateZoneCenter.z *= _CustomRenderTextureInfo.z; updateZoneSize.z *= _CustomRenderTextureInfo.z; } // Compute rotation // Compute quad vertex position float2 clipSpaceCenter = updateZoneCenter.xy * 2.0 - 1.0; float2 pos = vertexPositions[vertexID] * updateZoneSize.xy; pos = CustomRenderTextureRotate2D(pos, rotation); pos.x += clipSpaceCenter.x; #if UNITY_UV_STARTS_AT_TOP pos.y += clipSpaceCenter.y; #else pos.y -= clipSpaceCenter.y; #endif // For 3D texture, cull quads outside of the update zone // This is neeeded in additional to the preliminary minSlice/maxSlice done on the CPU because update zones can be disjointed. // ie: slices [1..5] and [10..15] for two differents zones so we need to cull out slices 0 and [6..9] if (CustomRenderTextureIs3D > 0.0) { int minSlice = (int)(updateZoneCenter.z - updateZoneSize.z * 0.5); int maxSlice = minSlice + (int)updateZoneSize.z; if (_CustomRenderTexture3DSlice < minSlice || _CustomRenderTexture3DSlice >= maxSlice) { pos.xy = float2(1000.0, 1000.0); // Vertex outside of ncs } } OUT.vertex = float4(pos, 0.0, 1.0); OUT.primitiveID = asuint(CustomRenderTexturePrimitiveIDs[primitiveID]); OUT.localTexcoord = float3(texCoords[vertexID], CustomRenderTexture3DTexcoordW); OUT.globalTexcoord = float3(pos.xy * 0.5 + 0.5, CustomRenderTexture3DTexcoordW); #if UNITY_UV_STARTS_AT_TOP OUT.globalTexcoord.y = 1.0 - OUT.globalTexcoord.y; #endif OUT.direction = CustomRenderTextureComputeCubeDirectionEx(OUT.globalTexcoord.xy); return OUT; } #endif // CUSTOM_TEXTURE_EX_INCLUDED
そして,カスタムレンダーテクスチャ用のシェーダーにおいて,
#include "UnityCustomRenderTexture.cginc" #pragma vert CustomRenderTextureVertexShader
としている2行それぞれを
#include "CustomRenderTextureEx.cginc" #pragma vert CustomRenderTextureVertexShaderEx
に置き換えるとよい.
まとめ
標準ライブラリであってもその実装を過信せずに改善を試みてもよいかもしれない.