koturnの日記

普通の人です.ブログ上のコードはコピペ自由です.

CustomRenderTextureVertexShader()を改善する

はじめに

カスタムレンダーテクスチャの頂点シェーダーはUInityの標準ライブラリ CustomRenderTextureVertexShader から提供されており、これを用いることになっている。 しかし、実装を読み、なおかつDirect3D11の出力アセンブリを読んだところ、あまりよくない実装になっていると感じた個所がある.

この記事では当該箇所を修正することにより,より良い出力アセンブリが得られたことを示す. また修正案をカスタムレンダーテクスチャ用のシェーダーに取り入れる方法についても示す.

CustomRenderTextureVertexShader の問題点

Uniy 2022.2.2の UnityCustomRenderTexture.cginc は下記のようになっている. (古いバージョンでも同じ)

// Unity built-in shader source. Copyright (c) 2016 Unity Technologies. MIT license (see license.txt)

#ifndef UNITY_CUSTOM_TEXTURE_INCLUDED
#define UNITY_CUSTOM_TEXTURE_INCLUDED

#include "UnityCG.cginc"
#include "UnityStandardConfig.cginc"

// Keep in sync with CustomRenderTexture.h
#define kCustomTextureBatchSize 16

struct appdata_customrendertexture
{
    uint    vertexID    : SV_VertexID;
};

// User facing vertex to fragment shader structure
struct v2f_customrendertexture
{
    float4 vertex           : SV_POSITION;
    float3 localTexcoord    : TEXCOORD0;    // Texcoord local to the update zone (== globalTexcoord if no partial update zone is specified)
    float3 globalTexcoord   : TEXCOORD1;    // Texcoord relative to the complete custom texture
    uint primitiveID        : TEXCOORD2;    // Index of the update zone (correspond to the index in the updateZones of the Custom Texture)
    float3 direction        : TEXCOORD3;    // For cube textures, direction of the pixel being rendered in the cubemap
};

float2 CustomRenderTextureRotate2D(float2 pos, float angle)
{
    float sn = sin(angle);
    float cs = cos(angle);

    return float2( pos.x * cs - pos.y * sn, pos.x * sn + pos.y * cs);
}

// Internal
float4      CustomRenderTextureCenters[kCustomTextureBatchSize];
float4      CustomRenderTextureSizesAndRotations[kCustomTextureBatchSize];
float       CustomRenderTexturePrimitiveIDs[kCustomTextureBatchSize];

float4      CustomRenderTextureParameters;
#define     CustomRenderTextureUpdateSpace  CustomRenderTextureParameters.x // Normalized(0)/PixelSpace(1)
#define     CustomRenderTexture3DTexcoordW  CustomRenderTextureParameters.y
#define     CustomRenderTextureIs3D         CustomRenderTextureParameters.z

// User facing uniform variables
float4      _CustomRenderTextureInfo; // x = width, y = height, z = depth, w = face/3DSlice

// Helpers
#define _CustomRenderTextureWidth   _CustomRenderTextureInfo.x
#define _CustomRenderTextureHeight  _CustomRenderTextureInfo.y
#define _CustomRenderTextureDepth   _CustomRenderTextureInfo.z

// Those two are mutually exclusive so we can use the same slot
#define _CustomRenderTextureCubeFace    _CustomRenderTextureInfo.w
#define _CustomRenderTexture3DSlice     _CustomRenderTextureInfo.w

sampler2D   _SelfTexture2D;
samplerCUBE _SelfTextureCube;
sampler3D   _SelfTexture3D;

float3 CustomRenderTextureComputeCubeDirection(float2 globalTexcoord)
{
    float2 xy = globalTexcoord * 2.0 - 1.0;
    float3 direction;
    if(_CustomRenderTextureCubeFace == 0.0)
    {
        direction = normalize(float3(1.0, -xy.y, -xy.x));
    }
    else if(_CustomRenderTextureCubeFace == 1.0)
    {
        direction = normalize(float3(-1.0, -xy.y, xy.x));
    }
    else if(_CustomRenderTextureCubeFace == 2.0)
    {
        direction = normalize(float3(xy.x, 1.0, xy.y));
    }
    else if(_CustomRenderTextureCubeFace == 3.0)
    {
        direction = normalize(float3(xy.x, -1.0, -xy.y));
    }
    else if(_CustomRenderTextureCubeFace == 4.0)
    {
        direction = normalize(float3(xy.x, -xy.y, 1.0));
    }
    else if(_CustomRenderTextureCubeFace == 5.0)
    {
        direction = normalize(float3(-xy.x, -xy.y, -1.0));
    }

    return direction;
}

// standard custom texture vertex shader that should always be used
v2f_customrendertexture CustomRenderTextureVertexShader(appdata_customrendertexture IN)
{
    v2f_customrendertexture OUT;

#if UNITY_UV_STARTS_AT_TOP
    const float2 vertexPositions[6] =
    {
        { -1.0f,  1.0f },
        { -1.0f, -1.0f },
        {  1.0f, -1.0f },
        {  1.0f,  1.0f },
        { -1.0f,  1.0f },
        {  1.0f, -1.0f }
    };

    const float2 texCoords[6] =
    {
        { 0.0f, 0.0f },
        { 0.0f, 1.0f },
        { 1.0f, 1.0f },
        { 1.0f, 0.0f },
        { 0.0f, 0.0f },
        { 1.0f, 1.0f }
    };
#else
    const float2 vertexPositions[6] =
    {
        {  1.0f,  1.0f },
        { -1.0f, -1.0f },
        { -1.0f,  1.0f },
        { -1.0f, -1.0f },
        {  1.0f,  1.0f },
        {  1.0f, -1.0f }
    };

    const float2 texCoords[6] =
    {
        { 1.0f, 1.0f },
        { 0.0f, 0.0f },
        { 0.0f, 1.0f },
        { 0.0f, 0.0f },
        { 1.0f, 1.0f },
        { 1.0f, 0.0f }
    };
#endif

    uint primitiveID = IN.vertexID / 6;
    uint vertexID = IN.vertexID % 6;
    float3 updateZoneCenter = CustomRenderTextureCenters[primitiveID].xyz;
    float3 updateZoneSize = CustomRenderTextureSizesAndRotations[primitiveID].xyz;
    float rotation = CustomRenderTextureSizesAndRotations[primitiveID].w * UNITY_PI / 180.0f;

#if !UNITY_UV_STARTS_AT_TOP
    rotation = -rotation;
#endif

    // Normalize rect if needed
    if (CustomRenderTextureUpdateSpace > 0.0) // Pixel space
    {
        // Normalize xy because we need it in clip space.
        updateZoneCenter.xy /= _CustomRenderTextureInfo.xy;
        updateZoneSize.xy /= _CustomRenderTextureInfo.xy;
    }
    else // normalized space
    {
        // Un-normalize depth because we need actual slice index for culling
        updateZoneCenter.z *= _CustomRenderTextureInfo.z;
        updateZoneSize.z *= _CustomRenderTextureInfo.z;
    }

    // Compute rotation

    // Compute quad vertex position
    float2 clipSpaceCenter = updateZoneCenter.xy * 2.0 - 1.0;
    float2 pos = vertexPositions[vertexID] * updateZoneSize.xy;
    pos = CustomRenderTextureRotate2D(pos, rotation);
    pos.x += clipSpaceCenter.x;
#if UNITY_UV_STARTS_AT_TOP
    pos.y += clipSpaceCenter.y;
#else
    pos.y -= clipSpaceCenter.y;
#endif

    // For 3D texture, cull quads outside of the update zone
    // This is neeeded in additional to the preliminary minSlice/maxSlice done on the CPU because update zones can be disjointed.
    // ie: slices [1..5] and [10..15] for two differents zones so we need to cull out slices 0 and [6..9]
    if (CustomRenderTextureIs3D > 0.0)
    {
        int minSlice = (int)(updateZoneCenter.z - updateZoneSize.z * 0.5);
        int maxSlice = minSlice + (int)updateZoneSize.z;
        if (_CustomRenderTexture3DSlice < minSlice || _CustomRenderTexture3DSlice >= maxSlice)
        {
            pos.xy = float2(1000.0, 1000.0); // Vertex outside of ncs
        }
    }

    OUT.vertex = float4(pos, 0.0, 1.0);
    OUT.primitiveID = asuint(CustomRenderTexturePrimitiveIDs[primitiveID]);
    OUT.localTexcoord = float3(texCoords[vertexID], CustomRenderTexture3DTexcoordW);
    OUT.globalTexcoord = float3(pos.xy * 0.5 + 0.5, CustomRenderTexture3DTexcoordW);
#if UNITY_UV_STARTS_AT_TOP
    OUT.globalTexcoord.y = 1.0 - OUT.globalTexcoord.y;
#endif
    OUT.direction = CustomRenderTextureComputeCubeDirection(OUT.globalTexcoord.xy);

    return OUT;
}

struct appdata_init_customrendertexture
{
    float4 vertex : POSITION;
    float2 texcoord : TEXCOORD0;
};

// User facing vertex to fragment structure for initialization materials
struct v2f_init_customrendertexture
{
    float4 vertex : SV_POSITION;
    float3 texcoord : TEXCOORD0;
    float3 direction : TEXCOORD1;
};

// standard custom texture vertex shader that should always be used for initialization shaders
v2f_init_customrendertexture InitCustomRenderTextureVertexShader (appdata_init_customrendertexture v)
{
    v2f_init_customrendertexture o;
    o.vertex = UnityObjectToClipPos(v.vertex);
    o.texcoord = float3(v.texcoord.xy, CustomRenderTexture3DTexcoordW);
    o.direction = CustomRenderTextureComputeCubeDirection(v.texcoord.xy);
    return o;
}

#endif // UNITY_CUSTOM_TEXTURE_INCLUDED

問題に感じたのが CustomRenderTextureComputeCubeDirection() である. ここだけ抜粋して再掲する.

float3 CustomRenderTextureComputeCubeDirection(float2 globalTexcoord)
{
    float2 xy = globalTexcoord * 2.0 - 1.0;
    float3 direction;
    if(_CustomRenderTextureCubeFace == 0.0)
    {
        direction = normalize(float3(1.0, -xy.y, -xy.x));
    }
    else if(_CustomRenderTextureCubeFace == 1.0)
    {
        direction = normalize(float3(-1.0, -xy.y, xy.x));
    }
    else if(_CustomRenderTextureCubeFace == 2.0)
    {
        direction = normalize(float3(xy.x, 1.0, xy.y));
    }
    else if(_CustomRenderTextureCubeFace == 3.0)
    {
        direction = normalize(float3(xy.x, -1.0, -xy.y));
    }
    else if(_CustomRenderTextureCubeFace == 4.0)
    {
        direction = normalize(float3(xy.x, -xy.y, 1.0));
    }
    else if(_CustomRenderTextureCubeFace == 5.0)
    {
        direction = normalize(float3(-xy.x, -xy.y, -1.0));
    }

    return direction;
}

if文を用いているが,uniform変数に対してであるのと,この程度であれば movc 命令を生成するため問題とは思わない. しかし,各if文中で normalize() を用いていることが問題であると思う. normalize() を用いるならループ外にすべきではないだろうか.

実際に生成されたコードからも何度も normalize() の呼び出しが行われていることがわかる(dp3, rsq, mul が1回の normalize() に対応).

Global Keywords: <none>
Local Keywords: <none>
-- Hardware tier variant: Tier 1
-- Vertex shader for "d3d11":
// Stats: 46 math, 8 temp registers, 2 branches
Constant Buffer "$Globals" (848 bytes) on slot 0 {
  Vector4 CustomRenderTextureCenters[16] at 32
  Vector4 CustomRenderTextureSizesAndRotations[16] at 288
  Vector1 CustomRenderTexturePrimitiveIDs[16] at 544
  Vector4 CustomRenderTextureParameters at 800
  Vector4 _CustomRenderTextureInfo at 816
}

Shader Disassembly:
//
// Generated by Microsoft (R) D3D Shader Disassembler
//
//
// Input signature:
//
// Name                 Index   Mask Register SysValue  Format   Used
// -------------------- ----- ------ -------- -------- ------- ------
// SV_VertexID              0   x           0   VERTID    uint   x
//
//
// Output signature:
//
// Name                 Index   Mask Register SysValue  Format   Used
// -------------------- ----- ------ -------- -------- ------- ------
// SV_POSITION              0   xyzw        0      POS   float   xyzw
// TEXCOORD                 0   xyz         1     NONE   float   xyz
// TEXCOORD                 1   xyz         2     NONE   float   xyz
// TEXCOORD                 2   x           3     NONE    uint   x
// TEXCOORD                 3   xyz         4     NONE   float   xyz
//
      vs_4_0
      dcl_immediateConstantBuffer { { -1.000000, 1.000000, 0, 0},
                              { -1.000000, -1.000000, 0, 1.000000},
                              { 1.000000, -1.000000, 1.000000, 1.000000},
                              { 1.000000, 1.000000, 1.000000, 0},
                              { -1.000000, 1.000000, 0, 0},
                              { 1.000000, -1.000000, 1.000000, 1.000000} }
      dcl_constantbuffer CB0[52], dynamicIndexed
      dcl_input_sgv v0.x, vertex_id
      dcl_output_siv o0.xyzw, position
      dcl_output o1.xyz
      dcl_output o2.xyz
      dcl_output o3.x
      dcl_output o4.xyz
      dcl_temps 8
   0: udiv r0.x, r1.x, v0.x, l(6)
   1: mul r0.y, l(0.017453), cb0[r0.x + 18].w
   2: lt r0.zw, l(0.000000, 0.000000, 0.000000, 0.000000), cb0[50].xxxz
   3: div r1.yz, cb0[r0.x + 2].xxyx, cb0[51].xxyx
   4: div r2.yz, cb0[r0.x + 18].xxyx, cb0[51].xxyx
   5: mul r3.x, cb0[51].z, cb0[r0.x + 2].z
   6: mul r3.w, cb0[51].z, cb0[r0.x + 18].z
   7: movc r1.yz, r0.zzzz, r1.yyzy, cb0[r0.x + 2].xxyx
   8: mov r2.x, cb0[r0.x + 2].z
   9: mov r2.w, cb0[r0.x + 18].z
  10: mov r3.yz, cb0[r0.x + 18].xxyx
  11: movc r2.xyzw, r0.zzzz, r2.xyzw, r3.xyzw
  12: mad r1.yz, r1.yyzy, l(0.000000, 2.000000, 2.000000, 0.000000), l(0.000000, -1.000000, -1.000000, 0.000000)
  13: mul r2.yz, r2.zzyz, icb[r1.x + 0].yyxy
  14: sincos r3.x, r4.x, r0.y
  15: mul r0.yz, r2.yyzy, r3.xxxx
  16: mad r0.y, r2.z, r4.x, -r0.y
  17: mad r0.z, r2.y, r4.x, r0.z
  18: add r3.xy, r1.yzyy, r0.yzyy
  19: mad r0.y, -r2.w, l(0.500000), r2.x
  20: ftoi r0.z, r0.y
  21: ftoi r1.y, r2.w
  22: iadd r0.z, r0.z, r1.y
  23: round_z r0.y, r0.y
  24: lt r0.y, cb0[51].w, r0.y
  25: itof r0.z, r0.z
  26: ge r0.z, cb0[51].w, r0.z
  27: or r0.y, r0.z, r0.y
  28: movc r0.yz, r0.yyyy, l(0,1000.000000,1000.000000,0), r3.xxyx
  29: movc r0.yz, r0.wwww, r0.yyzy, r3.xxyx
  30: mad r2.xy, r0.yzyy, l(0.500000, 0.500000, 0.000000, 0.000000), l(0.500000, 0.500000, 0.000000, 0.000000)
  31: add r2.z, -r2.y, l(1.000000)
  32: mad r3.xy, r2.xzxx, l(2.000000, 2.000000, 0.000000, 0.000000), l(-1.000000, -1.000000, 0.000000, 0.000000)
  33: eq r0.w, cb0[51].w, l(0.000000)
  34: if_nz r0.w
  35:   mov r4.yz, -r3.yyxy
  36:   mov r4.x, l(1.000000)
  37:   dp3 r0.w, r4.xyzx, r4.xyzx
  38:   rsq r0.w, r0.w
  39:   mul o4.xyz, r0.wwww, r4.xyzx
  40: else
  41:   mov r3.z, l(-1.000000)
  42:   mov r3.w, -r3.y
  43:   mul r1.yzw, r3.xxzy, l(0.000000, 1.000000, 1.000000, -1.000000)
  44:   dp3 r0.w, r3.xzwx, r1.yzwy
  45:   rsq r0.w, r0.w
  46:   mul r4.xyz, r0.wwww, r1.zwyz
  47:   mov r5.xz, r3.xxyx
  48:   mov r5.y, l(1.000000)
  49:   dp3 r0.w, r5.xyzx, r5.xyzx
  50:   rsq r0.w, r0.w
  51:   mul r6.xyz, r0.wwww, r5.xyzx
  52:   dp3 r0.w, r1.yzwy, r1.yzwy
  53:   rsq r0.w, r0.w
  54:   mul r1.yzw, r0.wwww, r1.yyzw
  55:   eq r7.xyzw, cb0[51].wwww, l(1.000000, 2.000000, 3.000000, 4.000000)
  56:   mov r5.w, -r3.y
  57:   dp3 r0.w, r5.xywx, r5.xywx
  58:   rsq r0.w, r0.w
  59:   mul r5.xyz, r0.wwww, r5.xwyx
  60:   mul r3.xyz, r3.xwzx, l(-1.000000, 1.000000, 1.000000, 0.000000)
  61:   dp3 r0.w, r3.xyzx, r3.xyzx
  62:   rsq r0.w, r0.w
  63:   mul r3.xyz, r0.wwww, r3.xyzx
  64:   movc r3.xyz, r7.wwww, r5.xyzx, r3.xyzx
  65:   movc r1.yzw, r7.zzzz, r1.yyzw, r3.xxyz
  66:   movc r1.yzw, r7.yyyy, r6.xxyz, r1.yyzw
  67:   movc o4.xyz, r7.xxxx, r4.xyzx, r1.yzwy
  68: endif
  69: mov o0.xy, r0.yzyy
  70: mov o0.zw, l(0,0,0,1.000000)
  71: mov o1.xy, icb[r1.x + 0].zwzz
  72: mov o1.z, cb0[50].y
  73: mov r2.w, cb0[50].y
  74: mov o2.xyz, r2.xzwx
  75: mov o3.x, cb0[r0.x + 34].x
  76: ret
// Approximately 0 instruction slots used

改善案

normalize() をifを抜けた後で呼び出すようにする.

float3 CustomRenderTextureComputeCubeDirection(float2 globalTexcoord)
{
    float2 xy = globalTexcoord * 2.0 - 1.0;
    float3 direction;
    if(_CustomRenderTextureCubeFace == 0.0)
    {
        direction = float3(1.0, -xy.y, -xy.x);
    }
    else if(_CustomRenderTextureCubeFace == 1.0)
    {
        direction = float3(-1.0, -xy.y, xy.x);
    }
    else if(_CustomRenderTextureCubeFace == 2.0)
    {
        direction = float3(xy.x, 1.0, xy.y);
    }
    else if(_CustomRenderTextureCubeFace == 3.0)
    {
        direction = float3(xy.x, -1.0, -xy.y);
    }
    else if(_CustomRenderTextureCubeFace == 4.0)
    {
        direction = float3(xy.x, -xy.y, 1.0);
    }
    else if(_CustomRenderTextureCubeFace == 5.0)
    {
        direction = float3(-xy.x, -xy.y, -1.0);
    }

    return normalize(direction);
}

movc 命令っぽさを出したいのであれば,下記のように条件演算子を用いてもよいだろう.

float3 CustomRenderTextureComputeCubeDirectionEx(float2 globalTexcoord)
{
    float2 xy = globalTexcoord * 2.0 - 1.0;
    return normalize(_CustomRenderTextureCubeFace == 0.0 ? float3(1.0, -xy.y, -xy.x)
        : _CustomRenderTextureCubeFace == 1.0 ? float3(-1.0, -xy.y, xy.x)
        : _CustomRenderTextureCubeFace == 2.0 ? float3(xy.x, 1.0, xy.y)
        : _CustomRenderTextureCubeFace == 3.0 ? float3(xy.x, -1.0, -xy.y)
        : _CustomRenderTextureCubeFace == 4.0 ? float3(xy.x, -xy.y, 1.0)
        : float3(-xy.x, -xy.y, -1.0));
}

このように改善することで下記のコードが得られた. movc の後にnormalizeが行われていることがわかる. (if文を用いたものと条件演算子を用いたものとで生成コードは同じ)

//////////////////////////////////////////////////////
Global Keywords: <none>
Local Keywords: <none>
-- Hardware tier variant: Tier 1
-- Vertex shader for "d3d11":
// Stats: 30 math, 5 temp registers
Constant Buffer "$Globals" (848 bytes) on slot 0 {
  Vector4 CustomRenderTextureCenters[16] at 32
  Vector4 CustomRenderTextureSizesAndRotations[16] at 288
  Vector1 CustomRenderTexturePrimitiveIDs[16] at 544
  Vector4 CustomRenderTextureParameters at 800
  Vector4 _CustomRenderTextureInfo at 816
}

Shader Disassembly:
//
// Generated by Microsoft (R) D3D Shader Disassembler
//
//
// Input signature:
//
// Name                 Index   Mask Register SysValue  Format   Used
// -------------------- ----- ------ -------- -------- ------- ------
// SV_VertexID              0   x           0   VERTID    uint   x
//
//
// Output signature:
//
// Name                 Index   Mask Register SysValue  Format   Used
// -------------------- ----- ------ -------- -------- ------- ------
// SV_POSITION              0   xyzw        0      POS   float   xyzw
// TEXCOORD                 0   xyz         1     NONE   float   xyz
// TEXCOORD                 1   xyz         2     NONE   float   xyz
// TEXCOORD                 2   x           3     NONE    uint   x
// TEXCOORD                 3   xyz         4     NONE   float   xyz
//
      vs_4_0
      dcl_immediateConstantBuffer { { -1.000000, 1.000000, 0, 0},
                              { -1.000000, -1.000000, 0, 1.000000},
                              { 1.000000, -1.000000, 1.000000, 1.000000},
                              { 1.000000, 1.000000, 1.000000, 0},
                              { -1.000000, 1.000000, 0, 0},
                              { 1.000000, -1.000000, 1.000000, 1.000000} }
      dcl_constantbuffer CB0[52], dynamicIndexed
      dcl_input_sgv v0.x, vertex_id
      dcl_output_siv o0.xyzw, position
      dcl_output o1.xyz
      dcl_output o2.xyz
      dcl_output o3.x
      dcl_output o4.xyz
      dcl_temps 5
   0: lt r0.xy, l(0.000000, 0.000000, 0.000000, 0.000000), cb0[50].xzxx
   1: udiv r1.x, r2.x, v0.x, l(6)
   2: div r3.yz, cb0[r1.x + 18].xxyx, cb0[51].xxyx
   3: mul r4.x, cb0[51].z, cb0[r1.x + 2].z
   4: mul r4.w, cb0[51].z, cb0[r1.x + 18].z
   5: mov r3.x, cb0[r1.x + 2].z
   6: mov r3.w, cb0[r1.x + 18].z
   7: mov r4.yz, cb0[r1.x + 18].xxyx
   8: movc r3.xyzw, r0.xxxx, r3.xyzw, r4.xyzw
   9: mad r0.z, -r3.w, l(0.500000), r3.x
  10: ftoi r0.w, r0.z
  11: round_z r0.z, r0.z
  12: lt r0.z, cb0[51].w, r0.z
  13: ftoi r1.y, r3.w
  14: mul r1.zw, r3.zzzy, icb[r2.x + 0].yyyx
  15: iadd r0.w, r0.w, r1.y
  16: itof r0.w, r0.w
  17: ge r0.w, cb0[51].w, r0.w
  18: or r0.z, r0.w, r0.z
  19: mul r0.w, l(0.017453), cb0[r1.x + 18].w
  20: sincos r3.x, r4.x, r0.w
  21: mul r2.yz, r1.zzwz, r3.xxxx
  22: mad r0.w, r1.w, r4.x, -r2.y
  23: mad r1.y, r1.z, r4.x, r2.z
  24: div r1.zw, cb0[r1.x + 2].xxxy, cb0[51].xxxy
  25: movc r1.zw, r0.xxxx, r1.zzzw, cb0[r1.x + 2].xxxy
  26: mad r1.zw, r1.zzzw, l(0.000000, 0.000000, 2.000000, 2.000000), l(0.000000, 0.000000, -1.000000, -1.000000)
  27: add r3.x, r0.w, r1.z
  28: add r3.y, r1.w, r1.y
  29: movc r0.xz, r0.zzzz, l(1000.000000,0,1000.000000,0), r3.xxyx
  30: movc r0.xy, r0.yyyy, r0.xzxx, r3.xyxx
  31: mov o0.xy, r0.xyxx
  32: mad r0.xy, r0.xyxx, l(0.500000, 0.500000, 0.000000, 0.000000), l(0.500000, 0.500000, 0.000000, 0.000000)
  33: mov o0.zw, l(0,0,0,1.000000)
  34: mov o1.xy, icb[r2.x + 0].zwzz
  35: mov o3.x, cb0[r1.x + 34].x
  36: mov o1.z, cb0[50].y
  37: add r0.z, -r0.y, l(1.000000)
  38: mov r0.w, cb0[50].y
  39: mov o2.xyz, r0.xzwx
  40: mad r0.xy, r0.xzxx, l(2.000000, 2.000000, 0.000000, 0.000000), l(-1.000000, -1.000000, 0.000000, 0.000000)
  41: mov r0.z, -r0.y
  42: mul r1.xy, r0.xzxx, l(-1.000000, 1.000000, 0.000000, 0.000000)
  43: eq r1.w, cb0[51].w, l(4.000000)
  44: mov r1.z, l(-1.000000)
  45: mov r0.w, l(1.000000)
  46: movc r1.xyz, r1.wwww, r0.xzwx, r1.xyzx
  47: mov r2.yz, r0.zzxz
  48: mul r3.xyz, r0.wzxw, l(1.000000, 1.000000, -1.000000, 0.000000)
  49: mov r2.x, l(-1.000000)
  50: eq r4.xyzw, cb0[51].wwww, l(0.000000, 1.000000, 2.000000, 3.000000)
  51: movc r1.xyz, r4.wwww, r2.zxyz, r1.xyzx
  52: movc r0.xyz, r4.zzzz, r0.xwyx, r1.xyzx
  53: movc r0.xyz, r4.yyyy, r2.xyzx, r0.xyzx
  54: movc r0.xyz, r4.xxxx, r3.xyzx, r0.xyzx
  55: dp3 r0.w, r0.xyzx, r0.xyzx
  56: rsq r0.w, r0.w
  57: mul o4.xyz, r0.wwww, r0.xyzx
  58: ret
// Approximately 0 instruction slots used

カスタムレンダーテクスチャ用シェーダーへの適用

改善したからには自分のカスタムレンダーテクスチャ用のシェーダーに取り入れたいものである. 影響が少なく,使い回しができ,すぐに元に戻せる方法としては以下の通りである.

まず,下記のインクルード用ファイル CustomRenderTextureEx.cginc を用意する.

#ifndef CUSTOM_TEXTURE_EX_INCLUDED
#define CUSTOM_TEXTURE_EX_INCLUDED

#include "UnityCustomRenderTexture.cginc"


float3 CustomRenderTextureComputeCubeDirectionEx(float2 globalTexcoord)
{
    float2 xy = globalTexcoord * 2.0 - 1.0;
    return normalize(_CustomRenderTextureCubeFace == 0.0 ? float3(1.0, -xy.y, -xy.x)
        : _CustomRenderTextureCubeFace == 1.0 ? float3(-1.0, -xy.y, xy.x)
        : _CustomRenderTextureCubeFace == 2.0 ? float3(xy.x, 1.0, xy.y)
        : _CustomRenderTextureCubeFace == 3.0 ? float3(xy.x, -1.0, -xy.y)
        : _CustomRenderTextureCubeFace == 4.0 ? float3(xy.x, -xy.y, 1.0)
        : float3(-xy.x, -xy.y, -1.0));
}


// standard custom texture vertex shader that should always be used
v2f_customrendertexture CustomRenderTextureVertexShaderEx(appdata_customrendertexture IN)
{
    v2f_customrendertexture OUT;

#if UNITY_UV_STARTS_AT_TOP
    const float2 vertexPositions[6] =
    {
        { -1.0f,  1.0f },
        { -1.0f, -1.0f },
        {  1.0f, -1.0f },
        {  1.0f,  1.0f },
        { -1.0f,  1.0f },
        {  1.0f, -1.0f }
    };

    const float2 texCoords[6] =
    {
        { 0.0f, 0.0f },
        { 0.0f, 1.0f },
        { 1.0f, 1.0f },
        { 1.0f, 0.0f },
        { 0.0f, 0.0f },
        { 1.0f, 1.0f }
    };
#else
    const float2 vertexPositions[6] =
    {
        {  1.0f,  1.0f },
        { -1.0f, -1.0f },
        { -1.0f,  1.0f },
        { -1.0f, -1.0f },
        {  1.0f,  1.0f },
        {  1.0f, -1.0f }
    };

    const float2 texCoords[6] =
    {
        { 1.0f, 1.0f },
        { 0.0f, 0.0f },
        { 0.0f, 1.0f },
        { 0.0f, 0.0f },
        { 1.0f, 1.0f },
        { 1.0f, 0.0f }
    };
#endif

    uint primitiveID = IN.vertexID / 6;
    uint vertexID = IN.vertexID % 6;
    float3 updateZoneCenter = CustomRenderTextureCenters[primitiveID].xyz;
    float3 updateZoneSize = CustomRenderTextureSizesAndRotations[primitiveID].xyz;
    float rotation = CustomRenderTextureSizesAndRotations[primitiveID].w * UNITY_PI / 180.0f;

#if !UNITY_UV_STARTS_AT_TOP
    rotation = -rotation;
#endif

    // Normalize rect if needed
    if (CustomRenderTextureUpdateSpace > 0.0) // Pixel space
    {
        // Normalize xy because we need it in clip space.
        updateZoneCenter.xy /= _CustomRenderTextureInfo.xy;
        updateZoneSize.xy /= _CustomRenderTextureInfo.xy;
    }
    else // normalized space
    {
        // Un-normalize depth because we need actual slice index for culling
        updateZoneCenter.z *= _CustomRenderTextureInfo.z;
        updateZoneSize.z *= _CustomRenderTextureInfo.z;
    }

    // Compute rotation

    // Compute quad vertex position
    float2 clipSpaceCenter = updateZoneCenter.xy * 2.0 - 1.0;
    float2 pos = vertexPositions[vertexID] * updateZoneSize.xy;
    pos = CustomRenderTextureRotate2D(pos, rotation);
    pos.x += clipSpaceCenter.x;
#if UNITY_UV_STARTS_AT_TOP
    pos.y += clipSpaceCenter.y;
#else
    pos.y -= clipSpaceCenter.y;
#endif

    // For 3D texture, cull quads outside of the update zone
    // This is neeeded in additional to the preliminary minSlice/maxSlice done on the CPU because update zones can be disjointed.
    // ie: slices [1..5] and [10..15] for two differents zones so we need to cull out slices 0 and [6..9]
    if (CustomRenderTextureIs3D > 0.0)
    {
        int minSlice = (int)(updateZoneCenter.z - updateZoneSize.z * 0.5);
        int maxSlice = minSlice + (int)updateZoneSize.z;
        if (_CustomRenderTexture3DSlice < minSlice || _CustomRenderTexture3DSlice >= maxSlice)
        {
            pos.xy = float2(1000.0, 1000.0); // Vertex outside of ncs
        }
    }

    OUT.vertex = float4(pos, 0.0, 1.0);
    OUT.primitiveID = asuint(CustomRenderTexturePrimitiveIDs[primitiveID]);
    OUT.localTexcoord = float3(texCoords[vertexID], CustomRenderTexture3DTexcoordW);
    OUT.globalTexcoord = float3(pos.xy * 0.5 + 0.5, CustomRenderTexture3DTexcoordW);
#if UNITY_UV_STARTS_AT_TOP
    OUT.globalTexcoord.y = 1.0 - OUT.globalTexcoord.y;
#endif
    OUT.direction = CustomRenderTextureComputeCubeDirectionEx(OUT.globalTexcoord.xy);

    return OUT;
}


#endif  // CUSTOM_TEXTURE_EX_INCLUDED

そして,カスタムレンダーテクスチャ用のシェーダーにおいて,

#include "UnityCustomRenderTexture.cginc"
#pragma vert CustomRenderTextureVertexShader

としている2行それぞれを

#include "CustomRenderTextureEx.cginc"
#pragma vert CustomRenderTextureVertexShaderEx

に置き換えるとよい.

まとめ

標準ライブラリであってもその実装を過信せずに改善を試みてもよいかもしれない.

参考文献