はじめに
カスタムレンダーテクスチャの頂点シェーダーはUInityの標準ライブラリ CustomRenderTextureVertexShader から提供されており、これを用いることになっている。
しかし、実装を読み、なおかつDirect3D11の出力アセンブリを読んだところ、あまりよくない実装になっていると感じた個所がある.
この記事では当該箇所を修正することにより,より良い出力アセンブリが得られたことを示す. また修正案をカスタムレンダーテクスチャ用のシェーダーに取り入れる方法についても示す.
CustomRenderTextureVertexShader の問題点
Uniy 2022.2.2の UnityCustomRenderTexture.cginc は下記のようになっている.
(古いバージョンでも同じ)
// Unity built-in shader source. Copyright (c) 2016 Unity Technologies. MIT license (see license.txt)
#ifndef UNITY_CUSTOM_TEXTURE_INCLUDED
#define UNITY_CUSTOM_TEXTURE_INCLUDED
#include "UnityCG.cginc"
#include "UnityStandardConfig.cginc"
// Keep in sync with CustomRenderTexture.h
#define kCustomTextureBatchSize 16
struct appdata_customrendertexture
{
uint vertexID : SV_VertexID;
};
// User facing vertex to fragment shader structure
struct v2f_customrendertexture
{
float4 vertex : SV_POSITION;
float3 localTexcoord : TEXCOORD0; // Texcoord local to the update zone (== globalTexcoord if no partial update zone is specified)
float3 globalTexcoord : TEXCOORD1; // Texcoord relative to the complete custom texture
uint primitiveID : TEXCOORD2; // Index of the update zone (correspond to the index in the updateZones of the Custom Texture)
float3 direction : TEXCOORD3; // For cube textures, direction of the pixel being rendered in the cubemap
};
float2 CustomRenderTextureRotate2D(float2 pos, float angle)
{
float sn = sin(angle);
float cs = cos(angle);
return float2( pos.x * cs - pos.y * sn, pos.x * sn + pos.y * cs);
}
// Internal
float4 CustomRenderTextureCenters[kCustomTextureBatchSize];
float4 CustomRenderTextureSizesAndRotations[kCustomTextureBatchSize];
float CustomRenderTexturePrimitiveIDs[kCustomTextureBatchSize];
float4 CustomRenderTextureParameters;
#define CustomRenderTextureUpdateSpace CustomRenderTextureParameters.x // Normalized(0)/PixelSpace(1)
#define CustomRenderTexture3DTexcoordW CustomRenderTextureParameters.y
#define CustomRenderTextureIs3D CustomRenderTextureParameters.z
// User facing uniform variables
float4 _CustomRenderTextureInfo; // x = width, y = height, z = depth, w = face/3DSlice
// Helpers
#define _CustomRenderTextureWidth _CustomRenderTextureInfo.x
#define _CustomRenderTextureHeight _CustomRenderTextureInfo.y
#define _CustomRenderTextureDepth _CustomRenderTextureInfo.z
// Those two are mutually exclusive so we can use the same slot
#define _CustomRenderTextureCubeFace _CustomRenderTextureInfo.w
#define _CustomRenderTexture3DSlice _CustomRenderTextureInfo.w
sampler2D _SelfTexture2D;
samplerCUBE _SelfTextureCube;
sampler3D _SelfTexture3D;
float3 CustomRenderTextureComputeCubeDirection(float2 globalTexcoord)
{
float2 xy = globalTexcoord * 2.0 - 1.0;
float3 direction;
if(_CustomRenderTextureCubeFace == 0.0)
{
direction = normalize(float3(1.0, -xy.y, -xy.x));
}
else if(_CustomRenderTextureCubeFace == 1.0)
{
direction = normalize(float3(-1.0, -xy.y, xy.x));
}
else if(_CustomRenderTextureCubeFace == 2.0)
{
direction = normalize(float3(xy.x, 1.0, xy.y));
}
else if(_CustomRenderTextureCubeFace == 3.0)
{
direction = normalize(float3(xy.x, -1.0, -xy.y));
}
else if(_CustomRenderTextureCubeFace == 4.0)
{
direction = normalize(float3(xy.x, -xy.y, 1.0));
}
else if(_CustomRenderTextureCubeFace == 5.0)
{
direction = normalize(float3(-xy.x, -xy.y, -1.0));
}
return direction;
}
// standard custom texture vertex shader that should always be used
v2f_customrendertexture CustomRenderTextureVertexShader(appdata_customrendertexture IN)
{
v2f_customrendertexture OUT;
#if UNITY_UV_STARTS_AT_TOP
const float2 vertexPositions[6] =
{
{ -1.0f, 1.0f },
{ -1.0f, -1.0f },
{ 1.0f, -1.0f },
{ 1.0f, 1.0f },
{ -1.0f, 1.0f },
{ 1.0f, -1.0f }
};
const float2 texCoords[6] =
{
{ 0.0f, 0.0f },
{ 0.0f, 1.0f },
{ 1.0f, 1.0f },
{ 1.0f, 0.0f },
{ 0.0f, 0.0f },
{ 1.0f, 1.0f }
};
#else
const float2 vertexPositions[6] =
{
{ 1.0f, 1.0f },
{ -1.0f, -1.0f },
{ -1.0f, 1.0f },
{ -1.0f, -1.0f },
{ 1.0f, 1.0f },
{ 1.0f, -1.0f }
};
const float2 texCoords[6] =
{
{ 1.0f, 1.0f },
{ 0.0f, 0.0f },
{ 0.0f, 1.0f },
{ 0.0f, 0.0f },
{ 1.0f, 1.0f },
{ 1.0f, 0.0f }
};
#endif
uint primitiveID = IN.vertexID / 6;
uint vertexID = IN.vertexID % 6;
float3 updateZoneCenter = CustomRenderTextureCenters[primitiveID].xyz;
float3 updateZoneSize = CustomRenderTextureSizesAndRotations[primitiveID].xyz;
float rotation = CustomRenderTextureSizesAndRotations[primitiveID].w * UNITY_PI / 180.0f;
#if !UNITY_UV_STARTS_AT_TOP
rotation = -rotation;
#endif
// Normalize rect if needed
if (CustomRenderTextureUpdateSpace > 0.0) // Pixel space
{
// Normalize xy because we need it in clip space.
updateZoneCenter.xy /= _CustomRenderTextureInfo.xy;
updateZoneSize.xy /= _CustomRenderTextureInfo.xy;
}
else // normalized space
{
// Un-normalize depth because we need actual slice index for culling
updateZoneCenter.z *= _CustomRenderTextureInfo.z;
updateZoneSize.z *= _CustomRenderTextureInfo.z;
}
// Compute rotation
// Compute quad vertex position
float2 clipSpaceCenter = updateZoneCenter.xy * 2.0 - 1.0;
float2 pos = vertexPositions[vertexID] * updateZoneSize.xy;
pos = CustomRenderTextureRotate2D(pos, rotation);
pos.x += clipSpaceCenter.x;
#if UNITY_UV_STARTS_AT_TOP
pos.y += clipSpaceCenter.y;
#else
pos.y -= clipSpaceCenter.y;
#endif
// For 3D texture, cull quads outside of the update zone
// This is neeeded in additional to the preliminary minSlice/maxSlice done on the CPU because update zones can be disjointed.
// ie: slices [1..5] and [10..15] for two differents zones so we need to cull out slices 0 and [6..9]
if (CustomRenderTextureIs3D > 0.0)
{
int minSlice = (int)(updateZoneCenter.z - updateZoneSize.z * 0.5);
int maxSlice = minSlice + (int)updateZoneSize.z;
if (_CustomRenderTexture3DSlice < minSlice || _CustomRenderTexture3DSlice >= maxSlice)
{
pos.xy = float2(1000.0, 1000.0); // Vertex outside of ncs
}
}
OUT.vertex = float4(pos, 0.0, 1.0);
OUT.primitiveID = asuint(CustomRenderTexturePrimitiveIDs[primitiveID]);
OUT.localTexcoord = float3(texCoords[vertexID], CustomRenderTexture3DTexcoordW);
OUT.globalTexcoord = float3(pos.xy * 0.5 + 0.5, CustomRenderTexture3DTexcoordW);
#if UNITY_UV_STARTS_AT_TOP
OUT.globalTexcoord.y = 1.0 - OUT.globalTexcoord.y;
#endif
OUT.direction = CustomRenderTextureComputeCubeDirection(OUT.globalTexcoord.xy);
return OUT;
}
struct appdata_init_customrendertexture
{
float4 vertex : POSITION;
float2 texcoord : TEXCOORD0;
};
// User facing vertex to fragment structure for initialization materials
struct v2f_init_customrendertexture
{
float4 vertex : SV_POSITION;
float3 texcoord : TEXCOORD0;
float3 direction : TEXCOORD1;
};
// standard custom texture vertex shader that should always be used for initialization shaders
v2f_init_customrendertexture InitCustomRenderTextureVertexShader (appdata_init_customrendertexture v)
{
v2f_init_customrendertexture o;
o.vertex = UnityObjectToClipPos(v.vertex);
o.texcoord = float3(v.texcoord.xy, CustomRenderTexture3DTexcoordW);
o.direction = CustomRenderTextureComputeCubeDirection(v.texcoord.xy);
return o;
}
#endif // UNITY_CUSTOM_TEXTURE_INCLUDED
問題に感じたのが CustomRenderTextureComputeCubeDirection() である.
ここだけ抜粋して再掲する.
float3 CustomRenderTextureComputeCubeDirection(float2 globalTexcoord)
{
float2 xy = globalTexcoord * 2.0 - 1.0;
float3 direction;
if(_CustomRenderTextureCubeFace == 0.0)
{
direction = normalize(float3(1.0, -xy.y, -xy.x));
}
else if(_CustomRenderTextureCubeFace == 1.0)
{
direction = normalize(float3(-1.0, -xy.y, xy.x));
}
else if(_CustomRenderTextureCubeFace == 2.0)
{
direction = normalize(float3(xy.x, 1.0, xy.y));
}
else if(_CustomRenderTextureCubeFace == 3.0)
{
direction = normalize(float3(xy.x, -1.0, -xy.y));
}
else if(_CustomRenderTextureCubeFace == 4.0)
{
direction = normalize(float3(xy.x, -xy.y, 1.0));
}
else if(_CustomRenderTextureCubeFace == 5.0)
{
direction = normalize(float3(-xy.x, -xy.y, -1.0));
}
return direction;
}
if文を用いているが,uniform変数に対してであるのと,この程度であれば movc 命令を生成するため問題とは思わない.
しかし,各if文中で normalize() を用いていることが問題であると思う.
normalize() を用いるならループ外にすべきではないだろうか.
実際に生成されたコードからも何度も normalize() の呼び出しが行われていることがわかる(dp3, rsq, mul が1回の normalize() に対応).
Global Keywords: <none>
Local Keywords: <none>
-- Hardware tier variant: Tier 1
-- Vertex shader for "d3d11":
// Stats: 46 math, 8 temp registers, 2 branches
Constant Buffer "$Globals" (848 bytes) on slot 0 {
Vector4 CustomRenderTextureCenters[16] at 32
Vector4 CustomRenderTextureSizesAndRotations[16] at 288
Vector1 CustomRenderTexturePrimitiveIDs[16] at 544
Vector4 CustomRenderTextureParameters at 800
Vector4 _CustomRenderTextureInfo at 816
}
Shader Disassembly:
//
// Generated by Microsoft (R) D3D Shader Disassembler
//
//
// Input signature:
//
// Name Index Mask Register SysValue Format Used
// -------------------- ----- ------ -------- -------- ------- ------
// SV_VertexID 0 x 0 VERTID uint x
//
//
// Output signature:
//
// Name Index Mask Register SysValue Format Used
// -------------------- ----- ------ -------- -------- ------- ------
// SV_POSITION 0 xyzw 0 POS float xyzw
// TEXCOORD 0 xyz 1 NONE float xyz
// TEXCOORD 1 xyz 2 NONE float xyz
// TEXCOORD 2 x 3 NONE uint x
// TEXCOORD 3 xyz 4 NONE float xyz
//
vs_4_0
dcl_immediateConstantBuffer { { -1.000000, 1.000000, 0, 0},
{ -1.000000, -1.000000, 0, 1.000000},
{ 1.000000, -1.000000, 1.000000, 1.000000},
{ 1.000000, 1.000000, 1.000000, 0},
{ -1.000000, 1.000000, 0, 0},
{ 1.000000, -1.000000, 1.000000, 1.000000} }
dcl_constantbuffer CB0[52], dynamicIndexed
dcl_input_sgv v0.x, vertex_id
dcl_output_siv o0.xyzw, position
dcl_output o1.xyz
dcl_output o2.xyz
dcl_output o3.x
dcl_output o4.xyz
dcl_temps 8
0: udiv r0.x, r1.x, v0.x, l(6)
1: mul r0.y, l(0.017453), cb0[r0.x + 18].w
2: lt r0.zw, l(0.000000, 0.000000, 0.000000, 0.000000), cb0[50].xxxz
3: div r1.yz, cb0[r0.x + 2].xxyx, cb0[51].xxyx
4: div r2.yz, cb0[r0.x + 18].xxyx, cb0[51].xxyx
5: mul r3.x, cb0[51].z, cb0[r0.x + 2].z
6: mul r3.w, cb0[51].z, cb0[r0.x + 18].z
7: movc r1.yz, r0.zzzz, r1.yyzy, cb0[r0.x + 2].xxyx
8: mov r2.x, cb0[r0.x + 2].z
9: mov r2.w, cb0[r0.x + 18].z
10: mov r3.yz, cb0[r0.x + 18].xxyx
11: movc r2.xyzw, r0.zzzz, r2.xyzw, r3.xyzw
12: mad r1.yz, r1.yyzy, l(0.000000, 2.000000, 2.000000, 0.000000), l(0.000000, -1.000000, -1.000000, 0.000000)
13: mul r2.yz, r2.zzyz, icb[r1.x + 0].yyxy
14: sincos r3.x, r4.x, r0.y
15: mul r0.yz, r2.yyzy, r3.xxxx
16: mad r0.y, r2.z, r4.x, -r0.y
17: mad r0.z, r2.y, r4.x, r0.z
18: add r3.xy, r1.yzyy, r0.yzyy
19: mad r0.y, -r2.w, l(0.500000), r2.x
20: ftoi r0.z, r0.y
21: ftoi r1.y, r2.w
22: iadd r0.z, r0.z, r1.y
23: round_z r0.y, r0.y
24: lt r0.y, cb0[51].w, r0.y
25: itof r0.z, r0.z
26: ge r0.z, cb0[51].w, r0.z
27: or r0.y, r0.z, r0.y
28: movc r0.yz, r0.yyyy, l(0,1000.000000,1000.000000,0), r3.xxyx
29: movc r0.yz, r0.wwww, r0.yyzy, r3.xxyx
30: mad r2.xy, r0.yzyy, l(0.500000, 0.500000, 0.000000, 0.000000), l(0.500000, 0.500000, 0.000000, 0.000000)
31: add r2.z, -r2.y, l(1.000000)
32: mad r3.xy, r2.xzxx, l(2.000000, 2.000000, 0.000000, 0.000000), l(-1.000000, -1.000000, 0.000000, 0.000000)
33: eq r0.w, cb0[51].w, l(0.000000)
34: if_nz r0.w
35: mov r4.yz, -r3.yyxy
36: mov r4.x, l(1.000000)
37: dp3 r0.w, r4.xyzx, r4.xyzx
38: rsq r0.w, r0.w
39: mul o4.xyz, r0.wwww, r4.xyzx
40: else
41: mov r3.z, l(-1.000000)
42: mov r3.w, -r3.y
43: mul r1.yzw, r3.xxzy, l(0.000000, 1.000000, 1.000000, -1.000000)
44: dp3 r0.w, r3.xzwx, r1.yzwy
45: rsq r0.w, r0.w
46: mul r4.xyz, r0.wwww, r1.zwyz
47: mov r5.xz, r3.xxyx
48: mov r5.y, l(1.000000)
49: dp3 r0.w, r5.xyzx, r5.xyzx
50: rsq r0.w, r0.w
51: mul r6.xyz, r0.wwww, r5.xyzx
52: dp3 r0.w, r1.yzwy, r1.yzwy
53: rsq r0.w, r0.w
54: mul r1.yzw, r0.wwww, r1.yyzw
55: eq r7.xyzw, cb0[51].wwww, l(1.000000, 2.000000, 3.000000, 4.000000)
56: mov r5.w, -r3.y
57: dp3 r0.w, r5.xywx, r5.xywx
58: rsq r0.w, r0.w
59: mul r5.xyz, r0.wwww, r5.xwyx
60: mul r3.xyz, r3.xwzx, l(-1.000000, 1.000000, 1.000000, 0.000000)
61: dp3 r0.w, r3.xyzx, r3.xyzx
62: rsq r0.w, r0.w
63: mul r3.xyz, r0.wwww, r3.xyzx
64: movc r3.xyz, r7.wwww, r5.xyzx, r3.xyzx
65: movc r1.yzw, r7.zzzz, r1.yyzw, r3.xxyz
66: movc r1.yzw, r7.yyyy, r6.xxyz, r1.yyzw
67: movc o4.xyz, r7.xxxx, r4.xyzx, r1.yzwy
68: endif
69: mov o0.xy, r0.yzyy
70: mov o0.zw, l(0,0,0,1.000000)
71: mov o1.xy, icb[r1.x + 0].zwzz
72: mov o1.z, cb0[50].y
73: mov r2.w, cb0[50].y
74: mov o2.xyz, r2.xzwx
75: mov o3.x, cb0[r0.x + 34].x
76: ret
// Approximately 0 instruction slots used
改善案
normalize() をifを抜けた後で呼び出すようにする.
float3 CustomRenderTextureComputeCubeDirection(float2 globalTexcoord)
{
float2 xy = globalTexcoord * 2.0 - 1.0;
float3 direction;
if(_CustomRenderTextureCubeFace == 0.0)
{
direction = float3(1.0, -xy.y, -xy.x);
}
else if(_CustomRenderTextureCubeFace == 1.0)
{
direction = float3(-1.0, -xy.y, xy.x);
}
else if(_CustomRenderTextureCubeFace == 2.0)
{
direction = float3(xy.x, 1.0, xy.y);
}
else if(_CustomRenderTextureCubeFace == 3.0)
{
direction = float3(xy.x, -1.0, -xy.y);
}
else if(_CustomRenderTextureCubeFace == 4.0)
{
direction = float3(xy.x, -xy.y, 1.0);
}
else if(_CustomRenderTextureCubeFace == 5.0)
{
direction = float3(-xy.x, -xy.y, -1.0);
}
return normalize(direction);
}
movc 命令っぽさを出したいのであれば,下記のように条件演算子を用いてもよいだろう.
float3 CustomRenderTextureComputeCubeDirectionEx(float2 globalTexcoord)
{
float2 xy = globalTexcoord * 2.0 - 1.0;
return normalize(_CustomRenderTextureCubeFace == 0.0 ? float3(1.0, -xy.y, -xy.x)
: _CustomRenderTextureCubeFace == 1.0 ? float3(-1.0, -xy.y, xy.x)
: _CustomRenderTextureCubeFace == 2.0 ? float3(xy.x, 1.0, xy.y)
: _CustomRenderTextureCubeFace == 3.0 ? float3(xy.x, -1.0, -xy.y)
: _CustomRenderTextureCubeFace == 4.0 ? float3(xy.x, -xy.y, 1.0)
: float3(-xy.x, -xy.y, -1.0));
}
このように改善することで下記のコードが得られた.
movc の後にnormalizeが行われていることがわかる.
(if文を用いたものと条件演算子を用いたものとで生成コードは同じ)
//////////////////////////////////////////////////////
Global Keywords: <none>
Local Keywords: <none>
-- Hardware tier variant: Tier 1
-- Vertex shader for "d3d11":
// Stats: 30 math, 5 temp registers
Constant Buffer "$Globals" (848 bytes) on slot 0 {
Vector4 CustomRenderTextureCenters[16] at 32
Vector4 CustomRenderTextureSizesAndRotations[16] at 288
Vector1 CustomRenderTexturePrimitiveIDs[16] at 544
Vector4 CustomRenderTextureParameters at 800
Vector4 _CustomRenderTextureInfo at 816
}
Shader Disassembly:
//
// Generated by Microsoft (R) D3D Shader Disassembler
//
//
// Input signature:
//
// Name Index Mask Register SysValue Format Used
// -------------------- ----- ------ -------- -------- ------- ------
// SV_VertexID 0 x 0 VERTID uint x
//
//
// Output signature:
//
// Name Index Mask Register SysValue Format Used
// -------------------- ----- ------ -------- -------- ------- ------
// SV_POSITION 0 xyzw 0 POS float xyzw
// TEXCOORD 0 xyz 1 NONE float xyz
// TEXCOORD 1 xyz 2 NONE float xyz
// TEXCOORD 2 x 3 NONE uint x
// TEXCOORD 3 xyz 4 NONE float xyz
//
vs_4_0
dcl_immediateConstantBuffer { { -1.000000, 1.000000, 0, 0},
{ -1.000000, -1.000000, 0, 1.000000},
{ 1.000000, -1.000000, 1.000000, 1.000000},
{ 1.000000, 1.000000, 1.000000, 0},
{ -1.000000, 1.000000, 0, 0},
{ 1.000000, -1.000000, 1.000000, 1.000000} }
dcl_constantbuffer CB0[52], dynamicIndexed
dcl_input_sgv v0.x, vertex_id
dcl_output_siv o0.xyzw, position
dcl_output o1.xyz
dcl_output o2.xyz
dcl_output o3.x
dcl_output o4.xyz
dcl_temps 5
0: lt r0.xy, l(0.000000, 0.000000, 0.000000, 0.000000), cb0[50].xzxx
1: udiv r1.x, r2.x, v0.x, l(6)
2: div r3.yz, cb0[r1.x + 18].xxyx, cb0[51].xxyx
3: mul r4.x, cb0[51].z, cb0[r1.x + 2].z
4: mul r4.w, cb0[51].z, cb0[r1.x + 18].z
5: mov r3.x, cb0[r1.x + 2].z
6: mov r3.w, cb0[r1.x + 18].z
7: mov r4.yz, cb0[r1.x + 18].xxyx
8: movc r3.xyzw, r0.xxxx, r3.xyzw, r4.xyzw
9: mad r0.z, -r3.w, l(0.500000), r3.x
10: ftoi r0.w, r0.z
11: round_z r0.z, r0.z
12: lt r0.z, cb0[51].w, r0.z
13: ftoi r1.y, r3.w
14: mul r1.zw, r3.zzzy, icb[r2.x + 0].yyyx
15: iadd r0.w, r0.w, r1.y
16: itof r0.w, r0.w
17: ge r0.w, cb0[51].w, r0.w
18: or r0.z, r0.w, r0.z
19: mul r0.w, l(0.017453), cb0[r1.x + 18].w
20: sincos r3.x, r4.x, r0.w
21: mul r2.yz, r1.zzwz, r3.xxxx
22: mad r0.w, r1.w, r4.x, -r2.y
23: mad r1.y, r1.z, r4.x, r2.z
24: div r1.zw, cb0[r1.x + 2].xxxy, cb0[51].xxxy
25: movc r1.zw, r0.xxxx, r1.zzzw, cb0[r1.x + 2].xxxy
26: mad r1.zw, r1.zzzw, l(0.000000, 0.000000, 2.000000, 2.000000), l(0.000000, 0.000000, -1.000000, -1.000000)
27: add r3.x, r0.w, r1.z
28: add r3.y, r1.w, r1.y
29: movc r0.xz, r0.zzzz, l(1000.000000,0,1000.000000,0), r3.xxyx
30: movc r0.xy, r0.yyyy, r0.xzxx, r3.xyxx
31: mov o0.xy, r0.xyxx
32: mad r0.xy, r0.xyxx, l(0.500000, 0.500000, 0.000000, 0.000000), l(0.500000, 0.500000, 0.000000, 0.000000)
33: mov o0.zw, l(0,0,0,1.000000)
34: mov o1.xy, icb[r2.x + 0].zwzz
35: mov o3.x, cb0[r1.x + 34].x
36: mov o1.z, cb0[50].y
37: add r0.z, -r0.y, l(1.000000)
38: mov r0.w, cb0[50].y
39: mov o2.xyz, r0.xzwx
40: mad r0.xy, r0.xzxx, l(2.000000, 2.000000, 0.000000, 0.000000), l(-1.000000, -1.000000, 0.000000, 0.000000)
41: mov r0.z, -r0.y
42: mul r1.xy, r0.xzxx, l(-1.000000, 1.000000, 0.000000, 0.000000)
43: eq r1.w, cb0[51].w, l(4.000000)
44: mov r1.z, l(-1.000000)
45: mov r0.w, l(1.000000)
46: movc r1.xyz, r1.wwww, r0.xzwx, r1.xyzx
47: mov r2.yz, r0.zzxz
48: mul r3.xyz, r0.wzxw, l(1.000000, 1.000000, -1.000000, 0.000000)
49: mov r2.x, l(-1.000000)
50: eq r4.xyzw, cb0[51].wwww, l(0.000000, 1.000000, 2.000000, 3.000000)
51: movc r1.xyz, r4.wwww, r2.zxyz, r1.xyzx
52: movc r0.xyz, r4.zzzz, r0.xwyx, r1.xyzx
53: movc r0.xyz, r4.yyyy, r2.xyzx, r0.xyzx
54: movc r0.xyz, r4.xxxx, r3.xyzx, r0.xyzx
55: dp3 r0.w, r0.xyzx, r0.xyzx
56: rsq r0.w, r0.w
57: mul o4.xyz, r0.wwww, r0.xyzx
58: ret
// Approximately 0 instruction slots used
カスタムレンダーテクスチャ用シェーダーへの適用
改善したからには自分のカスタムレンダーテクスチャ用のシェーダーに取り入れたいものである. 影響が少なく,使い回しができ,すぐに元に戻せる方法としては以下の通りである.
まず,下記のインクルード用ファイル CustomRenderTextureEx.cginc を用意する.
#ifndef CUSTOM_TEXTURE_EX_INCLUDED
#define CUSTOM_TEXTURE_EX_INCLUDED
#include "UnityCustomRenderTexture.cginc"
float3 CustomRenderTextureComputeCubeDirectionEx(float2 globalTexcoord)
{
float2 xy = globalTexcoord * 2.0 - 1.0;
return normalize(_CustomRenderTextureCubeFace == 0.0 ? float3(1.0, -xy.y, -xy.x)
: _CustomRenderTextureCubeFace == 1.0 ? float3(-1.0, -xy.y, xy.x)
: _CustomRenderTextureCubeFace == 2.0 ? float3(xy.x, 1.0, xy.y)
: _CustomRenderTextureCubeFace == 3.0 ? float3(xy.x, -1.0, -xy.y)
: _CustomRenderTextureCubeFace == 4.0 ? float3(xy.x, -xy.y, 1.0)
: float3(-xy.x, -xy.y, -1.0));
}
// standard custom texture vertex shader that should always be used
v2f_customrendertexture CustomRenderTextureVertexShaderEx(appdata_customrendertexture IN)
{
v2f_customrendertexture OUT;
#if UNITY_UV_STARTS_AT_TOP
const float2 vertexPositions[6] =
{
{ -1.0f, 1.0f },
{ -1.0f, -1.0f },
{ 1.0f, -1.0f },
{ 1.0f, 1.0f },
{ -1.0f, 1.0f },
{ 1.0f, -1.0f }
};
const float2 texCoords[6] =
{
{ 0.0f, 0.0f },
{ 0.0f, 1.0f },
{ 1.0f, 1.0f },
{ 1.0f, 0.0f },
{ 0.0f, 0.0f },
{ 1.0f, 1.0f }
};
#else
const float2 vertexPositions[6] =
{
{ 1.0f, 1.0f },
{ -1.0f, -1.0f },
{ -1.0f, 1.0f },
{ -1.0f, -1.0f },
{ 1.0f, 1.0f },
{ 1.0f, -1.0f }
};
const float2 texCoords[6] =
{
{ 1.0f, 1.0f },
{ 0.0f, 0.0f },
{ 0.0f, 1.0f },
{ 0.0f, 0.0f },
{ 1.0f, 1.0f },
{ 1.0f, 0.0f }
};
#endif
uint primitiveID = IN.vertexID / 6;
uint vertexID = IN.vertexID % 6;
float3 updateZoneCenter = CustomRenderTextureCenters[primitiveID].xyz;
float3 updateZoneSize = CustomRenderTextureSizesAndRotations[primitiveID].xyz;
float rotation = CustomRenderTextureSizesAndRotations[primitiveID].w * UNITY_PI / 180.0f;
#if !UNITY_UV_STARTS_AT_TOP
rotation = -rotation;
#endif
// Normalize rect if needed
if (CustomRenderTextureUpdateSpace > 0.0) // Pixel space
{
// Normalize xy because we need it in clip space.
updateZoneCenter.xy /= _CustomRenderTextureInfo.xy;
updateZoneSize.xy /= _CustomRenderTextureInfo.xy;
}
else // normalized space
{
// Un-normalize depth because we need actual slice index for culling
updateZoneCenter.z *= _CustomRenderTextureInfo.z;
updateZoneSize.z *= _CustomRenderTextureInfo.z;
}
// Compute rotation
// Compute quad vertex position
float2 clipSpaceCenter = updateZoneCenter.xy * 2.0 - 1.0;
float2 pos = vertexPositions[vertexID] * updateZoneSize.xy;
pos = CustomRenderTextureRotate2D(pos, rotation);
pos.x += clipSpaceCenter.x;
#if UNITY_UV_STARTS_AT_TOP
pos.y += clipSpaceCenter.y;
#else
pos.y -= clipSpaceCenter.y;
#endif
// For 3D texture, cull quads outside of the update zone
// This is neeeded in additional to the preliminary minSlice/maxSlice done on the CPU because update zones can be disjointed.
// ie: slices [1..5] and [10..15] for two differents zones so we need to cull out slices 0 and [6..9]
if (CustomRenderTextureIs3D > 0.0)
{
int minSlice = (int)(updateZoneCenter.z - updateZoneSize.z * 0.5);
int maxSlice = minSlice + (int)updateZoneSize.z;
if (_CustomRenderTexture3DSlice < minSlice || _CustomRenderTexture3DSlice >= maxSlice)
{
pos.xy = float2(1000.0, 1000.0); // Vertex outside of ncs
}
}
OUT.vertex = float4(pos, 0.0, 1.0);
OUT.primitiveID = asuint(CustomRenderTexturePrimitiveIDs[primitiveID]);
OUT.localTexcoord = float3(texCoords[vertexID], CustomRenderTexture3DTexcoordW);
OUT.globalTexcoord = float3(pos.xy * 0.5 + 0.5, CustomRenderTexture3DTexcoordW);
#if UNITY_UV_STARTS_AT_TOP
OUT.globalTexcoord.y = 1.0 - OUT.globalTexcoord.y;
#endif
OUT.direction = CustomRenderTextureComputeCubeDirectionEx(OUT.globalTexcoord.xy);
return OUT;
}
#endif // CUSTOM_TEXTURE_EX_INCLUDED
そして,カスタムレンダーテクスチャ用のシェーダーにおいて,
#include "UnityCustomRenderTexture.cginc" #pragma vert CustomRenderTextureVertexShader
としている2行それぞれを
#include "CustomRenderTextureEx.cginc" #pragma vert CustomRenderTextureVertexShaderEx
に置き換えるとよい.
まとめ
標準ライブラリであってもその実装を過信せずに改善を試みてもよいかもしれない.