#version 320 es #define FORCE_EARLY_Z layout(early_fragment_tests) in #define ATTRIBUTE_LOCATION(x) #define FRAGMENT_OUTPUT_LOCATION(x) #define FRAGMENT_OUTPUT_LOCATION_INDEXED(x, y) #define UBO_BINDING(packing, x) layout(packing, binding = x) #define SAMPLER_BINDING(x) layout(binding = x) #define TEXEL_BUFFER_BINDING(x) layout(binding = x) #define SSBO_BINDING(x) layout(std430, binding = x) #define IMAGE_BINDING(format, x) layout(format, binding = x) #define VARYING_LOCATION(x) #extension GL_ANDROID_extension_pack_es31a : enable #extension GL_EXT_blend_func_extended : enable #extension GL_EXT_shader_framebuffer_fetch: enable #define FRAGMENT_INOUT inout precision highp float; precision highp int; precision highp sampler2DArray; precision highp usamplerBuffer; precision highp sampler2DMSArray; precision highp image2DArray; #define API_OPENGL 1 #define float2 vec2 #define float3 vec3 #define float4 vec4 #define uint2 uvec2 #define uint3 uvec3 #define uint4 uvec4 #define int2 ivec2 #define int3 ivec3 #define int4 ivec4 #define frac fract #define lerp mix // Vertex UberShader for 0 texgens struct Light { int4 color; float4 cosatt; float4 distatt; float4 pos; float4 dir; }; UBO_BINDING(std140, 2) uniform VSBlock { uint components; uint xfmem_dualTexInfo; uint xfmem_numColorChans; uint missing_color_hex; float4 missing_color_value; float4 cpnmtx[6]; float4 cproj[4]; int4 cmtrl[4]; Light clights[8]; float4 ctexmtx[24]; float4 ctrmtx[64]; float4 cnmtx[32]; float4 cpostmtx[64]; float4 cpixelcenter; float2 cviewport; uint4 xfmem_pack1[8]; float4 cnormal; float4 ctangent; float4 cbinormal; uint vertex_stride; uint vertex_offset_rawnormal; uint vertex_offset_rawtangent; uint vertex_offset_rawbinormal; uint vertex_offset_rawpos; uint vertex_offset_posmtx; uint vertex_offset_rawcolor0; uint vertex_offset_rawcolor1; uint4 vertex_offset_rawtex[2]; #define xfmem_texMtxInfo(i) (xfmem_pack1[(i)].x) #define xfmem_postMtxInfo(i) (xfmem_pack1[(i)].y) #define xfmem_color(i) (xfmem_pack1[(i)].z) #define xfmem_alpha(i) (xfmem_pack1[(i)].w) }; struct VS_OUTPUT { float4 pos; float4 colors_0; float4 colors_1; float clipDist0; float clipDist1; }; #define dolphin_isnan(f) isnan(f) int4 CalculateLighting(uint index, uint attnfunc, uint diffusefunc, float3 pos, float3 normal) { float3 ldir, h, cosAttn, distAttn; float dist, dist2, attn; switch (attnfunc) { case 0x0u /* No attenuation */: case 0x2u /* Directional light attenuation */: ldir = normalize(clights[index].pos.xyz - pos.xyz); attn = 1.0; if (length(ldir) == 0.0) ldir = normal; break; case 0x1u /* Point light attenuation */: ldir = normalize(clights[index].pos.xyz - pos.xyz); attn = (dot(normal, ldir) >= 0.0) ? max(0.0, dot(normal, clights[index].dir.xyz)) : 0.0; cosAttn = clights[index].cosatt.xyz; if (diffusefunc == 0x0u /* None */) distAttn = clights[index].distatt.xyz; else distAttn = normalize(clights[index].distatt.xyz); attn = max(0.0, dot(cosAttn, float3(1.0, attn, attn*attn))) / dot(distAttn, float3(1.0, attn, attn*attn)); break; case 0x3u /* Spot light attenuation */: ldir = clights[index].pos.xyz - pos.xyz; dist2 = dot(ldir, ldir); dist = sqrt(dist2); ldir = ldir / dist; attn = max(0.0, dot(ldir, clights[index].dir.xyz)); attn = max(0.0, clights[index].cosatt.x + clights[index].cosatt.y * attn + clights[index].cosatt.z * attn * attn) / dot(clights[index].distatt.xyz, float3(1.0, dist, dist2)); break; default: attn = 1.0; ldir = normal; break; } switch (diffusefunc) { case 0x0u /* None */: return int4(round(attn * float4(clights[index].color))); case 0x1u /* Sign */: return int4(round(attn * dot(ldir, normal) * float4(clights[index].color))); case 0x2u /* Clamp */: return int4(round(attn * max(0.0, dot(ldir, normal)) * float4(clights[index].color))); default: return int4(0, 0, 0, 0); } } ATTRIBUTE_LOCATION(0x0u /* Position */) in float4 rawpos; ATTRIBUTE_LOCATION(0x1u /* Position Matrix */) in uint4 posmtx; ATTRIBUTE_LOCATION(0x2u /* Normal */) in float3 rawnormal; ATTRIBUTE_LOCATION(0x3u /* Tangent */) in float3 rawtangent; ATTRIBUTE_LOCATION(0x4u /* Binormal */) in float3 rawbinormal; ATTRIBUTE_LOCATION(0x5u /* Color 0 */) in float4 rawcolor0; ATTRIBUTE_LOCATION(0x6u /* Color 1 */) in float4 rawcolor1; ATTRIBUTE_LOCATION(0x8u /* Tex Coord 0 */) in float3 rawtex0; ATTRIBUTE_LOCATION(0x9u /* Tex Coord 1 */) in float3 rawtex1; ATTRIBUTE_LOCATION(0xau /* Tex Coord 2 */) in float3 rawtex2; ATTRIBUTE_LOCATION(0xbu /* Tex Coord 3 */) in float3 rawtex3; ATTRIBUTE_LOCATION(0xcu /* Tex Coord 4 */) in float3 rawtex4; ATTRIBUTE_LOCATION(0xdu /* Tex Coord 5 */) in float3 rawtex5; ATTRIBUTE_LOCATION(0xeu /* Tex Coord 6 */) in float3 rawtex6; ATTRIBUTE_LOCATION(0xfu /* Tex Coord 7 */) in float3 rawtex7; VARYING_LOCATION(0) out VertexData { float4 pos; float4 colors_0; float4 colors_1; float clipDist0; float clipDist1; } vs; void main() { VS_OUTPUT o; // Position matrix float4 P0; float4 P1; float4 P2; // Normal matrix float3 N0; float3 N1; float3 N2; if ((components & 2u) != 0u) { // VB_HAS_POSMTXIDX // Vertex format has a per-vertex matrix int posidx = int(posmtx.r); P0 = ctrmtx[posidx]; P1 = ctrmtx[posidx+1]; P2 = ctrmtx[posidx+2]; int normidx = posidx >= 32 ? (posidx - 32) : posidx; N0 = cnmtx[normidx].xyz; N1 = cnmtx[normidx+1].xyz; N2 = cnmtx[normidx+2].xyz; } else { // One shared matrix P0 = cpnmtx[0]; P1 = cpnmtx[1]; P2 = cpnmtx[2]; N0 = cpnmtx[3].xyz; N1 = cpnmtx[4].xyz; N2 = cpnmtx[5].xyz; } // Multiply the position vector by the position matrix float4 pos = float4(dot(P0, rawpos), dot(P1, rawpos), dot(P2, rawpos), 1.0); o.pos = float4(dot(cproj[0], pos), dot(cproj[1], pos), dot(cproj[2], pos), dot(cproj[3], pos)); float3 _rawnormal; float3 _rawtangent; float3 _rawbinormal; if ((components & 1024u) != 0u) // VB_HAS_NORMAL { _rawnormal = rawnormal; } else { _rawnormal = cnormal.xyz; } if ((components & 2048u) != 0u) // VB_HAS_TANGENT { _rawtangent = rawtangent; } else { _rawtangent = ctangent.xyz; } if ((components & 4096u) != 0u) // VB_HAS_BINORMAL { _rawbinormal = rawbinormal; } else { _rawbinormal = cbinormal.xyz; } // The scale of the transform matrix is used to control the size of the emboss map // effect by changing the scale of the transformed binormals (which only get used by // emboss map texgens). By normalising the first transformed normal (which is used // by lighting calculations and needs to be unit length), the same transform matrix // can do double duty, scaling for emboss mapping, and not scaling for lighting. float3 _normal = normalize(float3(dot(N0, _rawnormal), dot(N1, _rawnormal), dot(N2, _rawnormal))); float3 _tangent = float3(dot(N0, _rawtangent), dot(N1, _rawtangent), dot(N2, _rawtangent)); float3 _binormal = float3(dot(N0, _rawbinormal), dot(N1, _rawbinormal), dot(N2, _rawbinormal)); // xfmem.numColorChans controls the number of color channels available to TEV, // but we still need to generate all channels here, as it can be used in texgen. // Cel-damage is an example of this. float4 vertex_color_0, vertex_color_1; // To use color 1, the vertex descriptor must have color 0 and 1. // If color 1 is present but not color 0, it is used for lighting channel 0. bool use_color_1 = ((components & 24576u) == 24576u); // VB_HAS_COL0 | VB_HAS_COL1 if ((components & 24576u) == 24576u) // VB_HAS_COL0 | VB_HAS_COL1 { vertex_color_0 = rawcolor0; vertex_color_1 = rawcolor1; } else if ((components & 8192u) != 0u) // VB_HAS_COL0 { vertex_color_0 = rawcolor0; vertex_color_1 = rawcolor0; } else if ((components & 16384u) != 0u) // VB_HAS_COL1 { vertex_color_0 = rawcolor1; vertex_color_1 = rawcolor1; } else { vertex_color_0 = missing_color_value; vertex_color_1 = missing_color_value; } // Lighting for (uint chan = 0u; chan < 2u; chan++) { uint colorreg = xfmem_color(chan); uint alphareg = xfmem_alpha(chan); int4 mat = cmtrl[chan + 2u]; int4 lacc = int4(255, 255, 255, 255); if (bitfieldExtract(uint(colorreg), 0, 1) != 0u) mat.xyz = int3(round(((chan == 0u) ? vertex_color_0.xyz : vertex_color_1.xyz) * 255.0)); if (bitfieldExtract(uint(alphareg), 0, 1) != 0u) mat.w = int(round(((chan == 0u) ? vertex_color_0.w : vertex_color_1.w) * 255.0)); else mat.w = cmtrl [chan + 2u].w; if (bitfieldExtract(uint(colorreg), 1, 1) != 0u) { if (bitfieldExtract(uint(colorreg), 6, 1) != 0u) lacc.xyz = int3(round(((chan == 0u) ? vertex_color_0.xyz : vertex_color_1.xyz) * 255.0)); else lacc.xyz = cmtrl [chan].xyz; uint light_mask = bitfieldExtract(uint(colorreg), 2, 4) | (bitfieldExtract(uint(colorreg), 11, 4) << 4u); uint attnfunc = bitfieldExtract(uint(colorreg), 9, 2); uint diffusefunc = bitfieldExtract(uint(colorreg), 7, 2); for (uint light_index = 0u; light_index < 8u; light_index++) { if ((light_mask & (1u << light_index)) != 0u) lacc.xyz += CalculateLighting(light_index, attnfunc, diffusefunc, pos.xyz, _normal).xyz; } } if (bitfieldExtract(uint(alphareg), 1, 1) != 0u) { if (bitfieldExtract(uint(alphareg), 6, 1) != 0u) { if ((components & (8192u << chan)) != 0u) // VB_HAS_COL0 lacc.w = int(round(((chan == 0u) ? vertex_color_0.w : vertex_color_1.w) * 255.0)); else if ((components & 8192u) != 0u) // VB_HAS_COLO0 lacc.w = int(round(vertex_color_0.w * 255.0)); else lacc.w = 255; } else { lacc.w = cmtrl [chan].w; } uint light_mask = bitfieldExtract(uint(alphareg), 2, 4) | (bitfieldExtract(uint(alphareg), 11, 4) << 4u); uint attnfunc = bitfieldExtract(uint(alphareg), 9, 2); uint diffusefunc = bitfieldExtract(uint(alphareg), 7, 2); for (uint light_index = 0u; light_index < 8u; light_index++) { if ((light_mask & (1u << light_index)) != 0u) lacc.w += CalculateLighting(light_index, attnfunc, diffusefunc, pos.xyz, _normal).w; } } lacc = clamp(lacc, 0, 255); // Hopefully GPUs that can support dynamic indexing will optimize this. float4 lit_color = float4((mat * (lacc + (lacc >> 7))) >> 8) / 255.0; switch (chan) { case 0u: o.colors_0 = lit_color; break; case 1u: o.colors_1 = lit_color; break; } } // The number of colors available to TEV is determined by numColorChans. // We have to provide the fields to match the interface, so set to zero // if it's not enabled. if (xfmem_numColorChans == 0u) o.colors_0 = float4(0.0, 0.0, 0.0, 0.0); if (xfmem_numColorChans <= 1u) o.colors_1 = float4(0.0, 0.0, 0.0, 0.0); o.pos.z = o.pos.w * cpixelcenter.w - o.pos.z * cpixelcenter.z; o.pos.z = o.pos.z * 2.0 - o.pos.w; o.pos.xy *= sign(cpixelcenter.xy * float2(1.0, -1.0)); o.pos.xy = o.pos.xy - o.pos.w * cpixelcenter.xy; vs.pos = o.pos; vs.colors_0 = o.colors_0; vs.colors_1 = o.colors_1; vs.clipDist0 = o.clipDist0; vs.clipDist1 = o.clipDist1; gl_Position = o.pos; } #version 320 es #define FORCE_EARLY_Z layout(early_fragment_tests) in #define ATTRIBUTE_LOCATION(x) #define FRAGMENT_OUTPUT_LOCATION(x) #define FRAGMENT_OUTPUT_LOCATION_INDEXED(x, y) #define UBO_BINDING(packing, x) layout(packing, binding = x) #define SAMPLER_BINDING(x) layout(binding = x) #define TEXEL_BUFFER_BINDING(x) layout(binding = x) #define SSBO_BINDING(x) layout(std430, binding = x) #define IMAGE_BINDING(format, x) layout(format, binding = x) #define VARYING_LOCATION(x) #extension GL_ANDROID_extension_pack_es31a : enable #extension GL_EXT_blend_func_extended : enable #extension GL_EXT_shader_framebuffer_fetch: enable #define FRAGMENT_INOUT inout precision highp float; precision highp int; precision highp sampler2DArray; precision highp usamplerBuffer; precision highp sampler2DMSArray; precision highp image2DArray; #define API_OPENGL 1 #define float2 vec2 #define float3 vec3 #define float4 vec4 #define uint2 uvec2 #define uint3 uvec3 #define uint4 uvec4 #define int2 ivec2 #define int3 ivec3 #define int4 ivec4 #define frac fract #define lerp mix // Pixel UberShader for 0 texgens, no dual-source blending int idot(int3 x, int3 y) { int3 tmp = x * y; return tmp.x + tmp.y + tmp.z; } int idot(int4 x, int4 y) { int4 tmp = x * y; return tmp.x + tmp.y + tmp.z + tmp.w; } int iround(float x) { return int (round(x)); } int2 iround(float2 x) { return int2(round(x)); } int3 iround(float3 x) { return int3(round(x)); } int4 iround(float4 x) { return int4(round(x)); } SAMPLER_BINDING(0) uniform sampler2DArray samp[8]; UBO_BINDING(std140, 1) uniform PSBlock { int4 color[4]; int4 k[4]; int4 alphaRef; int4 texdim[8]; int4 czbias[2]; int4 cindscale[2]; int4 cindmtx[6]; int4 cfogcolor; int4 cfogi; float4 cfogf; float4 cfogrange[3]; float4 czslope; float2 cefbscale; uint bpmem_genmode; uint bpmem_alphaTest; uint bpmem_fogParam3; uint bpmem_fogRangeBase; uint bpmem_dstalpha; uint bpmem_ztex_op; bool bpmem_late_ztest; bool bpmem_rgba6_format; bool bpmem_dither; bool bpmem_bounding_box; uint4 bpmem_pack1[16]; uint4 bpmem_pack2[8]; int4 konstLookup[32]; bool blend_enable; uint blend_src_factor; uint blend_src_factor_alpha; uint blend_dst_factor; uint blend_dst_factor_alpha; bool blend_subtract; bool blend_subtract_alpha; bool logic_op_enable; uint logic_op_mode; uint time_ms; }; #define bpmem_combiners(i) (bpmem_pack1[(i)].xy) #define bpmem_tevind(i) (bpmem_pack1[(i)].z) #define bpmem_iref(i) (bpmem_pack1[(i)].w) #define bpmem_tevorder(i) (bpmem_pack2[(i)].x) #define bpmem_tevksel(i) (bpmem_pack2[(i)].y) #define samp_texmode0(i) (bpmem_pack2[(i)].z) #define samp_texmode1(i) (bpmem_pack2[(i)].w) int4 sampleTexture(uint texmap, in sampler2DArray tex, int2 uv, int layer) { float size_s = float(texdim[texmap].x * 128); float size_t = float(texdim[texmap].y * 128); float3 coords = float3(float(uv.x) / size_s, float(uv.y) / size_t, layer); uint texmode0 = samp_texmode0(texmap); float lod_bias = float(bitfieldExtract(int(texmode0), 8, 16)) / 256.0f; return iround(255.0 * texture(tex, coords, lod_bias)); } FRAGMENT_OUTPUT_LOCATION(0) FRAGMENT_INOUT vec4 real_ocol0; VARYING_LOCATION(0) in VertexData { float4 pos; float4 colors_0; float4 colors_1; float clipDist0; float clipDist1; }; int4 sampleTextureWrapper(uint texmap, int2 uv, int layer) { return sampleTexture(texmap, samp[texmap], uv, layer); } int4 Swizzle(uint s, int4 color) { // AKA: Color Channel Swapping int4 ret; ret.r = color[bitfieldExtract(uint(bpmem_tevksel(s * 2u)), 0, 2)]; ret.g = color[bitfieldExtract(uint(bpmem_tevksel(s * 2u)), 2, 2)]; ret.b = color[bitfieldExtract(uint(bpmem_tevksel(s * 2u + 1u)), 0, 2)]; ret.a = color[bitfieldExtract(uint(bpmem_tevksel(s * 2u + 1u)), 2, 2)]; return ret; } int Wrap(int coord, uint mode) { if (mode == 0u) // ITW_OFF return coord; else if (mode < 6u) // ITW_256 to ITW_16 return coord & (0xfffe >> mode); else // ITW_0 return 0; } // TEV's Linear Interpolate, plus bias, add/subtract and scale int tevLerp(int A, int B, int C, int D, uint bias, bool op, uint scale) { // Scale C from 0..255 to 0..256 C += C >> 7; // Add bias to D if (bias == 1u) D += 128; else if (bias == 2u) D -= 128; int lerp = (A << 8) + (B - A)*C; if (scale != 3u) { lerp = lerp << scale; D = D << scale; } // TODO: Is this rounding bias still added when the scale is divide by 2? Currently we do not apply it. if (scale != 3u) lerp = lerp + (op ? 127 : 128); int result = lerp >> 8; // Add/Subtract D if (op) // Subtract result = D - result; else // Add result = D + result; // Most of the Scale was moved inside the lerp for improved precision // But we still do the divide by 2 here if (scale == 3u) result = result >> 1; return result; } // TEV's Linear Interpolate, plus bias, add/subtract and scale int3 tevLerp3(int3 A, int3 B, int3 C, int3 D, uint bias, bool op, uint scale) { // Scale C from 0..255 to 0..256 C += C >> 7; // Add bias to D if (bias == 1u) D += 128; else if (bias == 2u) D -= 128; int3 lerp = (A << 8) + (B - A)*C; if (scale != 3u) { lerp = lerp << scale; D = D << scale; } // TODO: Is this rounding bias still added when the scale is divide by 2? Currently we do not apply it. if (scale != 3u) lerp = lerp + (op ? 127 : 128); int3 result = lerp >> 8; // Add/Subtract D if (op) // Subtract result = D - result; else // Add result = D + result; // Most of the Scale was moved inside the lerp for improved precision // But we still do the divide by 2 here if (scale == 3u) result = result >> 1; return result; } // Implements operations 0-5 of TEV's compare mode, // which are common to both color and alpha channels bool tevCompare(uint op, int3 color_A, int3 color_B) { switch (op) { case 0u: // TevCompareMode::R8, TevComparison::GT return (color_A.r > color_B.r); case 1u: // TevCompareMode::R8, TevComparison::EQ return (color_A.r == color_B.r); case 2u: // TevCompareMode::GR16, TevComparison::GT int A_16 = (color_A.r | (color_A.g << 8)); int B_16 = (color_B.r | (color_B.g << 8)); return A_16 > B_16; case 3u: // TevCompareMode::GR16, TevComparison::EQ return (color_A.r == color_B.r && color_A.g == color_B.g); case 4u: // TevCompareMode::BGR24, TevComparison::GT int A_24 = (color_A.r | (color_A.g << 8) | (color_A.b << 16)); int B_24 = (color_B.r | (color_B.g << 8) | (color_B.b << 16)); return A_24 > B_24; case 5u: // TevCompareMode::BGR24, TevComparison::EQ return (color_A.r == color_B.r && color_A.g == color_B.g && color_A.b == color_B.b); default: return false; } } struct State { int4 Reg[4]; int4 RawTexColor; int4 TexColor; int AlphaBump; }; struct StageState { uint stage; uint order; uint cc; uint ac; }; int4 getRasColor(State s, StageState ss, float4 colors_0, float4 colors_1); int4 getKonstColor(State s, StageState ss); // Helper function for Alpha Test bool alphaCompare(int a, int b, uint compare) { if (compare < 4u) { if (compare < 2u) { if (compare < 1u) { return false; // Never (0) } else { return a < b; // Less (1) } } else { if (compare < 3u) { return a == b; // Equal (2) } else { return a <= b; // LEqual (3) } } } else { if (compare < 6u) { if (compare < 5u) { return a > b; // Greater (4) } else { return a != b; // NEqual (5) } } else { if (compare < 7u) { return a >= b; // GEqual (6) } else { return true; // Always (7) } } } } int3 selectColorInput(State s, StageState ss, float4 colors_0, float4 colors_1, uint index) { if (index < 8u) { if (index < 4u) { if (index < 2u) { if (index < 1u) { return s.Reg[0].rgb; // prev.rgb (0) } else { return s.Reg[0].aaa; // prev.aaa (1) } } else { if (index < 3u) { return s.Reg[1].rgb; // c0.rgb (2) } else { return s.Reg[1].aaa; // c0.aaa (3) } } } else { if (index < 6u) { if (index < 5u) { return s.Reg[2].rgb; // c1.rgb (4) } else { return s.Reg[2].aaa; // c1.aaa (5) } } else { if (index < 7u) { return s.Reg[3].rgb; // c2.rgb (6) } else { return s.Reg[3].aaa; // c2.aaa (7) } } } } else { if (index < 12u) { if (index < 10u) { if (index < 9u) { return s.TexColor.rgb; // tex.rgb (8) } else { return s.TexColor.aaa; // tex.aaa (9) } } else { if (index < 11u) { return getRasColor(s, ss, colors_0, colors_1).rgb; // ras.rgb (10) } else { return getRasColor(s, ss, colors_0, colors_1).aaa; // ras.aaa (11) } } } else { if (index < 14u) { if (index < 13u) { return int3(255, 255, 255); // ONE (12) } else { return int3(128, 128, 128); // HALF (13) } } else { if (index < 15u) { return getKonstColor(s, ss).rgb; // konst.rgb (14) } else { return int3(0, 0, 0); // ZERO (15) } } } } } int selectAlphaInput(State s, StageState ss, float4 colors_0, float4 colors_1, uint index) { if (index < 4u) { if (index < 2u) { if (index < 1u) { return s.Reg[0].a; // prev (0) } else { return s.Reg[1].a; // c0 (1) } } else { if (index < 3u) { return s.Reg[2].a; // c1 (2) } else { return s.Reg[3].a; // c2 (3) } } } else { if (index < 6u) { if (index < 5u) { return s.TexColor.a; // tex (4) } else { return getRasColor(s, ss, colors_0, colors_1).a; // ras (5) } } else { if (index < 7u) { return getKonstColor(s, ss).a; // konst (6) } else { return 0; // ZERO (7) } } } } int4 getTevReg(in State s, uint index) { if (index < 2u) { if (index < 1u) { return s.Reg[0]; // prev (0) } else { return s.Reg[1]; // c0 (1) } } else { if (index < 3u) { return s.Reg[2]; // c1 (2) } else { return s.Reg[3]; // c2 (3) } } } void main() { float4 rawpos = gl_FragCoord; uint num_stages = bitfieldExtract(uint(bpmem_genmode), 10, 4); #ifdef FB_FETCH_VALUE float4 initial_ocol0 = FB_FETCH_VALUE; #else float4 initial_ocol0 = real_ocol0; #endif float4 ocol0; float4 ocol1; int layer = 0; int3 tevcoord = int3(0, 0, 0); State s; s.TexColor = int4(0, 0, 0, 0); s.RawTexColor = int4(0, 0, 0, 0); s.AlphaBump = 0; s.Reg[0] = color[0]; s.Reg[1] = color[1]; s.Reg[2] = color[2]; s.Reg[3] = color[3]; // Main tev loop for(uint stage = 0u; stage <= num_stages; stage++) { StageState ss; ss.stage = stage; ss.cc = bpmem_combiners(stage).x; ss.ac = bpmem_combiners(stage).y; ss.order = bpmem_tevorder(stage>>1); if ((stage & 1u) == 1u) ss.order = ss.order >> 12; // This is the Meat of TEV { // Color Combiner uint color_a = bitfieldExtract(uint(ss.cc), 12, 4); uint color_b = bitfieldExtract(uint(ss.cc), 8, 4); uint color_c = bitfieldExtract(uint(ss.cc), 4, 4); uint color_d = bitfieldExtract(uint(ss.cc), 0, 4); uint color_bias = bitfieldExtract(uint(ss.cc), 16, 2); bool color_op = bool(bitfieldExtract(uint(ss.cc), 18, 1)); bool color_clamp = bool(bitfieldExtract(uint(ss.cc), 19, 1)); uint color_scale = bitfieldExtract(uint(ss.cc), 20, 2); uint color_dest = bitfieldExtract(uint(ss.cc), 22, 2); uint color_compare_op = color_scale << 1 | uint(color_op); int3 color_A = selectColorInput(s, ss, colors_0, colors_1, color_a) & int3(255, 255, 255); int3 color_B = selectColorInput(s, ss, colors_0, colors_1, color_b) & int3(255, 255, 255); int3 color_C = selectColorInput(s, ss, colors_0, colors_1, color_c) & int3(255, 255, 255); int3 color_D = selectColorInput(s, ss, colors_0, colors_1, color_d); // 10 bits + sign int3 color; if (color_bias != 3u) { // Normal mode color = tevLerp3(color_A, color_B, color_C, color_D, color_bias, color_op, color_scale); } else { // Compare mode // op 6 and 7 do a select per color channel if (color_compare_op == 6u) { // TevCompareMode::RGB8, TevComparison::GT color.r = (color_A.r > color_B.r) ? color_C.r : 0; color.g = (color_A.g > color_B.g) ? color_C.g : 0; color.b = (color_A.b > color_B.b) ? color_C.b : 0; } else if (color_compare_op == 7u) { // TevCompareMode::RGB8, TevComparison::EQ color.r = (color_A.r == color_B.r) ? color_C.r : 0; color.g = (color_A.g == color_B.g) ? color_C.g : 0; color.b = (color_A.b == color_B.b) ? color_C.b : 0; } else { // The remaining ops do one compare which selects all 3 channels color = tevCompare(color_compare_op, color_A, color_B) ? color_C : int3(0, 0, 0); } color = color_D + color; } // Clamp result if (color_clamp) color = clamp(color, 0, 255); else color = clamp(color, -1024, 1023); // Write result to the correct input register of the next stage if (color_dest < 2u) { if (color_dest < 1u) { s.Reg[0].rgb = color; // prev (0) } else { s.Reg[1].rgb = color; // c0 (1) } } else { if (color_dest < 3u) { s.Reg[2].rgb = color; // c1 (2) } else { s.Reg[3].rgb = color; // c2 (3) } } // Alpha Combiner uint alpha_a = bitfieldExtract(uint(ss.ac), 13, 3); uint alpha_b = bitfieldExtract(uint(ss.ac), 10, 3); uint alpha_c = bitfieldExtract(uint(ss.ac), 7, 3); uint alpha_d = bitfieldExtract(uint(ss.ac), 4, 3); uint alpha_bias = bitfieldExtract(uint(ss.ac), 16, 2); bool alpha_op = bool(bitfieldExtract(uint(ss.ac), 18, 1)); bool alpha_clamp = bool(bitfieldExtract(uint(ss.ac), 19, 1)); uint alpha_scale = bitfieldExtract(uint(ss.ac), 20, 2); uint alpha_dest = bitfieldExtract(uint(ss.ac), 22, 2); uint alpha_compare_op = alpha_scale << 1 | uint(alpha_op); int alpha_A = 0; int alpha_B = 0; if (alpha_bias != 3u || alpha_compare_op > 5u) { // Small optimisation here: alpha_A and alpha_B are unused by compare ops 0-5 alpha_A = selectAlphaInput(s, ss, colors_0, colors_1, alpha_a) & 255; alpha_B = selectAlphaInput(s, ss, colors_0, colors_1, alpha_b) & 255; }; int alpha_C = selectAlphaInput(s, ss, colors_0, colors_1, alpha_c) & 255; int alpha_D = selectAlphaInput(s, ss, colors_0, colors_1, alpha_d); // 10 bits + sign int alpha; if (alpha_bias != 3u) { // Normal mode alpha = tevLerp(alpha_A, alpha_B, alpha_C, alpha_D, alpha_bias, alpha_op, alpha_scale); } else { // Compare mode if (alpha_compare_op == 6u) { // TevCompareMode::A8, TevComparison::GT alpha = (alpha_A > alpha_B) ? alpha_C : 0; } else if (alpha_compare_op == 7u) { // TevCompareMode::A8, TevComparison::EQ alpha = (alpha_A == alpha_B) ? alpha_C : 0; } else { // All remaining alpha compare ops actually compare the color channels alpha = tevCompare(alpha_compare_op, color_A, color_B) ? alpha_C : 0; } alpha = alpha_D + alpha; } // Clamp result if (alpha_clamp) alpha = clamp(alpha, 0, 255); else alpha = clamp(alpha, -1024, 1023); // Write result to the correct input register of the next stage if (alpha_dest < 2u) { if (alpha_dest < 1u) { s.Reg[0].a = alpha; // prev (0) } else { s.Reg[1].a = alpha; // c0 (1) } } else { if (alpha_dest < 3u) { s.Reg[2].a = alpha; // c1 (2) } else { s.Reg[3].a = alpha; // c2 (3) } } } } // Main TEV loop int4 TevResult; TevResult.xyz = getTevReg(s, bitfieldExtract(uint(bpmem_combiners(num_stages).x), 22, 2)).xyz; TevResult.w = getTevReg(s, bitfieldExtract(uint(bpmem_combiners(num_stages).y), 22, 2)).w; TevResult &= 255; int zCoord = int(rawpos.z * 16777216.0); zCoord = clamp(zCoord, 0, 0xFFFFFF); // Depth Texture int early_zCoord = zCoord; if (bpmem_ztex_op != 0u) { int ztex = int(czbias[1].w); // fixed bias // Whatever texture was in our last stage, it's now our depth texture ztex += idot(s.RawTexColor.xyzw, czbias[0].xyzw); ztex += (bpmem_ztex_op == 1u) ? zCoord : 0; zCoord = ztex & 0xFFFFFF; } // Alpha Test #define discard_fragment discard if (bpmem_alphaTest != 0u) { bool comp0 = alphaCompare(TevResult.a, alphaRef.r, bitfieldExtract(uint(bpmem_alphaTest), 16, 3)); bool comp1 = alphaCompare(TevResult.a, alphaRef.g, bitfieldExtract(uint(bpmem_alphaTest), 19, 3)); // These if statements are written weirdly to work around intel and Qualcomm bugs with handling booleans. switch (bitfieldExtract(uint(bpmem_alphaTest), 22, 2)) { case 0u: // AND if (comp0 && comp1) break; else discard_fragment; break; case 1u: // OR if (comp0 || comp1) break; else discard_fragment; break; case 2u: // XOR if (comp0 != comp1) break; else discard_fragment; break; case 3u: // XNOR if (comp0 == comp1) break; else discard_fragment; break; } } // Hardware testing indicates that an alpha of 1 can pass an alpha test, // but doesn't do anything in blending if (TevResult.a == 1) TevResult.a = 0; if (bpmem_dither) { // Flipper uses a standard 2x2 Bayer Matrix for 6 bit dithering // Here the matrix is encoded into the two factor constants int2 dither = int2(rawpos.xy) & 1; TevResult.rgb = (TevResult.rgb - (TevResult.rgb >> 6)) + (dither.x ^ dither.y) * 2 + dither.y; } // Fog uint fog_function = bitfieldExtract(uint(bpmem_fogParam3), 21, 3); if (fog_function != 0x0u /* Off (no fog) */) { // TODO: This all needs to be converted from float to fixed point float ze; if (bitfieldExtract(uint(bpmem_fogParam3), 20, 1) == 0u) { // perspective // ze = A/(B - (Zs >> B_SHF) ze = (cfogf.x * 16777216.0) / float(cfogi.y - (zCoord >> cfogi.w)); } else { // orthographic // ze = a*Zs (here, no B_SHF) ze = cfogf.x * float(zCoord) / 16777216.0; } if (bool(bitfieldExtract(uint(bpmem_fogRangeBase), 10, 1))) { // x_adjust = sqrt((x-center)^2 + k^2)/k // ze *= x_adjust float offset = (2.0 * (rawpos.x / cfogf.w)) - 1.0 - cfogf.z; float floatindex = clamp(9.0 - abs(offset) * 9.0, 0.0, 9.0); uint indexlower = uint(floatindex); uint indexupper = indexlower + 1u; float klower = cfogrange[indexlower >> 2u][indexlower & 3u]; float kupper = cfogrange[indexupper >> 2u][indexupper & 3u]; float k = lerp(klower, kupper, frac(floatindex)); float x_adjust = sqrt(offset * offset + k * k) / k; ze *= x_adjust; } float fog = clamp(ze - cfogf.y, 0.0, 1.0); if (fog_function >= 0x4u /* Exponential fog */) { switch (fog_function) { case 0x4u /* Exponential fog */: fog = 1.0 - exp2(-8.0 * fog); break; case 0x5u /* Exponential-squared fog */: fog = 1.0 - exp2(-8.0 * fog * fog); break; case 0x6u /* Backwards exponential fog */: fog = exp2(-8.0 * (1.0 - fog)); break; case 0x7u /* Backwards exponenential-sequared fog */: fog = 1.0 - fog; fog = exp2(-8.0 * fog * fog); break; } } int ifog = iround(fog * 256.0); TevResult.rgb = (TevResult.rgb * (256 - ifog) + cfogcolor.rgb * ifog) >> 8; } // Logic Ops if (logic_op_enable) { int4 fb_value = iround(initial_ocol0 * 255.0); switch (logic_op_mode) { case 0u: TevResult = int4(0, 0, 0, 0); break; case 1u: TevResult = TevResult & fb_value; break; case 2u: TevResult = TevResult & ~fb_value; break; case 3u: TevResult = TevResult; break; case 4u: TevResult = ~TevResult & fb_value; break; case 5u: TevResult = fb_value; break; case 6u: TevResult = TevResult ^ fb_value; break; case 7u: TevResult = TevResult | fb_value; break; case 8u: TevResult = ~(TevResult | fb_value); break; case 9u: TevResult = ~(TevResult ^ fb_value); break; case 10u: TevResult = ~fb_value; break; case 11u: TevResult = TevResult | ~fb_value; break; case 12u: TevResult = ~TevResult; break; case 13u: TevResult = ~TevResult | fb_value; break; case 14u: TevResult = ~(TevResult & fb_value); break; case 15u: TevResult = int4(255, 255, 255, 255); break; } TevResult &= 0xff; } if (bpmem_rgba6_format) ocol0.rgb = float3(TevResult.rgb >> 2) / 63.0; else ocol0.rgb = float3(TevResult.rgb) / 255.0; if (bpmem_dstalpha != 0u) ocol0.a = float(bitfieldExtract(uint(bpmem_dstalpha), 0, 8) >> 2) / 63.0; else ocol0.a = float(TevResult.a >> 2) / 63.0; // Dest alpha override (dual source blending) // Colors will be blended against the alpha from ocol1 and // the alpha from ocol0 will be written to the framebuffer. ocol1 = float4(0.0, 0.0, 0.0, float(TevResult.a) / 255.0); if (blend_enable) { float4 src_color; if (bpmem_dstalpha != 0u) { src_color = ocol1; } else { src_color = ocol0; } float4 blend_src; if (blend_src_factor < 4u) { if (blend_src_factor < 2u) { if (blend_src_factor < 1u) { blend_src.rgb = float3(0,0,0); // 0 (0) } else { blend_src.rgb = float3(1,1,1); // 1 (1) } } else { if (blend_src_factor < 3u) { blend_src.rgb = initial_ocol0.rgb; // dst_color (2) } else { blend_src.rgb = float3(1,1,1) - initial_ocol0.rgb; // 1-dst_color (3) } } } else { if (blend_src_factor < 6u) { if (blend_src_factor < 5u) { blend_src.rgb = src_color.aaa; // src_alpha (4) } else { blend_src.rgb = float3(1,1,1) - src_color.aaa; // 1-src_alpha (5) } } else { if (blend_src_factor < 7u) { blend_src.rgb = initial_ocol0.aaa; // dst_alpha (6) } else { blend_src.rgb = float3(1,1,1) - initial_ocol0.aaa; // 1-dst_alpha (7) } } } if (blend_src_factor_alpha < 4u) { if (blend_src_factor_alpha < 2u) { if (blend_src_factor_alpha < 1u) { blend_src.a = 0.0; // 0 (0) } else { blend_src.a = 1.0; // 1 (1) } } else { if (blend_src_factor_alpha < 3u) { blend_src.a = initial_ocol0.a; // dst_color (2) } else { blend_src.a = 1.0 - initial_ocol0.a; // 1-dst_color (3) } } } else { if (blend_src_factor_alpha < 6u) { if (blend_src_factor_alpha < 5u) { blend_src.a = src_color.a; // src_alpha (4) } else { blend_src.a = 1.0 - src_color.a; // 1-src_alpha (5) } } else { if (blend_src_factor_alpha < 7u) { blend_src.a = initial_ocol0.a; // dst_alpha (6) } else { blend_src.a = 1.0 - initial_ocol0.a; // 1-dst_alpha (7) } } } float4 blend_dst; if (blend_dst_factor < 4u) { if (blend_dst_factor < 2u) { if (blend_dst_factor < 1u) { blend_dst.rgb = float3(0,0,0); // 0 (0) } else { blend_dst.rgb = float3(1,1,1); // 1 (1) } } else { if (blend_dst_factor < 3u) { blend_dst.rgb = ocol0.rgb; // src_color (2) } else { blend_dst.rgb = float3(1,1,1) - ocol0.rgb; // 1-src_color (3) } } } else { if (blend_dst_factor < 6u) { if (blend_dst_factor < 5u) { blend_dst.rgb = src_color.aaa; // src_alpha (4) } else { blend_dst.rgb = float3(1,1,1) - src_color.aaa; // 1-src_alpha (5) } } else { if (blend_dst_factor < 7u) { blend_dst.rgb = initial_ocol0.aaa; // dst_alpha (6) } else { blend_dst.rgb = float3(1,1,1) - initial_ocol0.aaa; // 1-dst_alpha (7) } } } if (blend_dst_factor_alpha < 4u) { if (blend_dst_factor_alpha < 2u) { if (blend_dst_factor_alpha < 1u) { blend_dst.a = 0.0; // 0 (0) } else { blend_dst.a = 1.0; // 1 (1) } } else { if (blend_dst_factor_alpha < 3u) { blend_dst.a = ocol0.a; // src_color (2) } else { blend_dst.a = 1.0 - ocol0.a; // 1-src_color (3) } } } else { if (blend_dst_factor_alpha < 6u) { if (blend_dst_factor_alpha < 5u) { blend_dst.a = src_color.a; // src_alpha (4) } else { blend_dst.a = 1.0 - src_color.a; // 1-src_alpha (5) } } else { if (blend_dst_factor_alpha < 7u) { blend_dst.a = initial_ocol0.a; // dst_alpha (6) } else { blend_dst.a = 1.0 - initial_ocol0.a; // 1-dst_alpha (7) } } } float4 blend_result; if (blend_subtract) blend_result.rgb = initial_ocol0.rgb * blend_dst.rgb - ocol0.rgb * blend_src.rgb; else blend_result.rgb = initial_ocol0.rgb * blend_dst.rgb + ocol0.rgb * blend_src.rgb; if (blend_subtract_alpha) blend_result.a = initial_ocol0.a * blend_dst.a - ocol0.a * blend_src.a; else blend_result.a = initial_ocol0.a * blend_dst.a + ocol0.a * blend_src.a; real_ocol0 = blend_result; } else { real_ocol0 = ocol0; } } int4 getRasColor(State s, StageState ss, float4 colors_0, float4 colors_1) { // Select Ras for stage uint ras = bitfieldExtract(uint(ss.order), 7, 3); if (ras < 2u) { // Lighting Channel 0 or 1 int4 color = iround(((ras == 0u) ? colors_0 : colors_1) * 255.0); uint swap = bitfieldExtract(uint(ss.ac), 0, 2); return Swizzle(swap, color); } else if (ras == 5u) { // Alpha Bump return int4(s.AlphaBump, s.AlphaBump, s.AlphaBump, s.AlphaBump); } else if (ras == 6u) { // Normalzied Alpha Bump int normalized = s.AlphaBump | s.AlphaBump >> 5; return int4(normalized, normalized, normalized, normalized); } else { return int4(0, 0, 0, 0); } } int4 getKonstColor(State s, StageState ss) { // Select Konst for stage // TODO: a switch case might be better here than an dynamically // indexed uniform lookup uint tevksel = bpmem_tevksel(ss.stage>>1); if ((ss.stage & 1u) == 0u) return int4(konstLookup[bitfieldExtract(uint(tevksel), 4, 5)].rgb, konstLookup[bitfieldExtract(uint(tevksel), 9, 5)].a); else return int4(konstLookup[bitfieldExtract(uint(tevksel), 14, 5)].rgb, konstLookup[bitfieldExtract(uint(tevksel), 19, 5)].a); } #version 320 es #define FORCE_EARLY_Z layout(early_fragment_tests) in #define ATTRIBUTE_LOCATION(x) #define FRAGMENT_OUTPUT_LOCATION(x) #define FRAGMENT_OUTPUT_LOCATION_INDEXED(x, y) #define UBO_BINDING(packing, x) layout(packing, binding = x) #define SAMPLER_BINDING(x) layout(binding = x) #define TEXEL_BUFFER_BINDING(x) layout(binding = x) #define SSBO_BINDING(x) layout(std430, binding = x) #define IMAGE_BINDING(format, x) layout(format, binding = x) #define VARYING_LOCATION(x) #extension GL_ANDROID_extension_pack_es31a : enable #extension GL_EXT_blend_func_extended : enable #extension GL_EXT_shader_framebuffer_fetch: enable #define FRAGMENT_INOUT inout precision highp float; precision highp int; precision highp sampler2DArray; precision highp usamplerBuffer; precision highp sampler2DMSArray; precision highp image2DArray; #define API_OPENGL 1 #define float2 vec2 #define float3 vec3 #define float4 vec4 #define uint2 uvec2 #define uint3 uvec3 #define uint4 uvec4 #define int2 ivec2 #define int3 ivec3 #define int4 ivec4 #define frac fract #define lerp mix layout(points) in; layout(triangle_strip, max_vertices = 4) out; struct Light { int4 color; float4 cosatt; float4 distatt; float4 pos; float4 dir; }; UBO_BINDING(std140, 4) uniform GSBlock { float4 cstereo; float4 clinept; int4 ctexoffset; uint vs_expand; }; struct VS_OUTPUT { float4 pos; float4 colors_0; float4 colors_1; float clipDist0; float clipDist1; }; VARYING_LOCATION(0) in VertexData { float4 pos; float4 colors_0; float4 colors_1; float clipDist0; float clipDist1; } vs[1]; VARYING_LOCATION(0) out VertexData { float4 pos; float4 colors_0; float4 colors_1; float clipDist0; float clipDist1; } ps; void main() { VS_OUTPUT center; center.pos = vs[0].pos; center.colors_0 = vs[0].colors_0; center.colors_1 = vs[0].colors_1; center.clipDist0 = vs[0].clipDist0; center.clipDist1 = vs[0].clipDist1; float2 offset = float2(clinept.w / clinept.x, -clinept.w / clinept.y) * center.pos.w; int i = 0; VS_OUTPUT f; f.pos = vs[i].pos; f.colors_0 = vs[i].colors_0; f.colors_1 = vs[i].colors_1; f.clipDist0 = vs[i].clipDist0; f.clipDist1 = vs[i].clipDist1; VS_OUTPUT ll = f; VS_OUTPUT lr = f; VS_OUTPUT ul = f; VS_OUTPUT ur = f; ll.pos.xy += float2(-1,-1) * offset; lr.pos.xy += float2(1,-1) * offset; ul.pos.xy += float2(-1,1) * offset; ur.pos.xy += offset; if (ctexoffset[3] != 0) { float2 texOffset = float2(1.0 / float(ctexoffset[3]), 1.0 / float(ctexoffset[3])); } gl_Position = ll.pos; ps.pos = ll.pos; ps.colors_0 = ll.colors_0; ps.colors_1 = ll.colors_1; ps.clipDist0 = ll.clipDist0; ps.clipDist1 = ll.clipDist1; EmitVertex(); gl_Position = lr.pos; ps.pos = lr.pos; ps.colors_0 = lr.colors_0; ps.colors_1 = lr.colors_1; ps.clipDist0 = lr.clipDist0; ps.clipDist1 = lr.clipDist1; EmitVertex(); gl_Position = ul.pos; ps.pos = ul.pos; ps.colors_0 = ul.colors_0; ps.colors_1 = ul.colors_1; ps.clipDist0 = ul.clipDist0; ps.clipDist1 = ul.clipDist1; EmitVertex(); gl_Position = ur.pos; ps.pos = ur.pos; ps.colors_0 = ur.colors_0; ps.colors_1 = ur.colors_1; ps.clipDist0 = ur.clipDist0; ps.clipDist1 = ur.clipDist1; EmitVertex(); EndPrimitive(); } Dolphin Version: Dolphin 2503a-580 Video Backend: OpenGL ES