20110401

NVIDIA FXAA II for Console

Been too busy to finish off FXAA II, so I'm releasing the source here for the FXAA II preset designed for the consoles, which is a great complement for the higher quality FXAA I designed for the PC.

NOTE: This is only the lowest quality FXAA preset designed for consoles, want something high quality, download FXAA I.


Performance

On Xbox 360 this is 1.3 ms/frame for 1280x720 and is texture bound.


Image Comparison

The awesome 2.2ms at 720p on Xbox360 DLAA presentation from GDC has been released with source and stuff. The NoAA and DLAA images below were blatantly ripped from the presentation.

FXAA II for Consoles (with #define FXAA_SUBPIX_SHIFT 0.25)


FXAA II for Consoles (with #define FXAA_SUBPIX_SHIFT 0.0)


DLAA


No AA


Source

This source is designed to work with the DX10/DX11/GL pixel position instead of the half offset DX9 position. The "FXAA_SUBPIX_SHIFT" define can be set to 0.0 to make the algorithm more symmetrical but then it looses ability to attempt to remove sub-pixel aliasing like single pixel features.

/*============================================================================

FXAA v2 CONSOLE by TIMOTHY LOTTES @ NVIDIA

============================================================================*/

/*============================================================================
API PORTING
============================================================================*/
#ifndef FXAA_GLSL_120
#define FXAA_GLSL_120 0
#endif
#ifndef FXAA_GLSL_130
#define FXAA_GLSL_130 0
#endif
#ifndef FXAA_HLSL_3
#define FXAA_HLSL_3 0
#endif
#ifndef FXAA_HLSL_4
#define FXAA_HLSL_4 0
#endif
/*--------------------------------------------------------------------------*/
#if FXAA_GLSL_120
// Requires,
// #version 120
// #extension GL_EXT_gpu_shader4 : enable
#define int2 ivec2
#define float2 vec2
#define float3 vec3
#define float4 vec4
#define FxaaInt2 ivec2
#define FxaaFloat2 vec2
#define FxaaSat(a) clamp((a), 0.0, 1.0)
#define FxaaTex sampler2D
#define FxaaTexLod0(t, p) texture2DLod(t, p, 0.0)
#define FxaaTexOff(t, p, o, r) texture2DLodOffset(t, p, 0.0, o)
#endif
/*--------------------------------------------------------------------------*/
#if FXAA_GLSL_130
// Requires "#version 130" or better
#define int2 ivec2
#define float2 vec2
#define float3 vec3
#define float4 vec4
#define FxaaInt2 ivec2
#define FxaaFloat2 vec2
#define FxaaSat(a) clamp((a), 0.0, 1.0)
#define FxaaTex sampler2D
#define FxaaTexLod0(t, p) textureLod(t, p, 0.0)
#define FxaaTexOff(t, p, o, r) textureLodOffset(t, p, 0.0, o)
#endif
/*--------------------------------------------------------------------------*/
#if FXAA_HLSL_3
#define int2 float2
#define FxaaInt2 float2
#define FxaaFloat2 float2
#define FxaaSat(a) saturate((a))
#define FxaaTex sampler2D
#define FxaaTexLod0(t, p) tex2Dlod(t, float4(p, 0.0, 0.0))
#define FxaaTexOff(t, p, o, r) tex2Dlod(t, float4(p + (o * r), 0, 0))
#endif
/*--------------------------------------------------------------------------*/
#if FXAA_HLSL_4
#define FxaaInt2 int2
#define FxaaFloat2 float2
#define FxaaSat(a) saturate((a))
struct FxaaTex { SamplerState smpl; Texture2D tex; };
#define FxaaTexLod0(t, p) t.tex.SampleLevel(t.smpl, p, 0.0)
#define FxaaTexOff(t, p, o, r) t.tex.SampleLevel(t.smpl, p, 0.0, o)
#endif


/*============================================================================

VERTEX SHADER

============================================================================*/
float4 FxaaVertexShader(
float2 pos, // Both x and y range {-1.0 to 1.0 across screen}.
float2 rcpFrame) { // {1.0/frameWidth, 1.0/frameHeight}
/*--------------------------------------------------------------------------*/
#define FXAA_SUBPIX_SHIFT (1.0/4.0)
/*--------------------------------------------------------------------------*/
float4 posPos;
posPos.xy = (pos.xy * 0.5) + 0.5;
posPos.zw = posPos.xy - (rcpFrame * (0.5 + FXAA_SUBPIX_SHIFT));
return posPos; }

/*============================================================================

PIXEL SHADER

============================================================================*/
float3 FxaaPixelShader(
float4 posPos, // Output of FxaaVertexShader interpolated across screen.
FxaaTex tex, // Input texture.
float2 rcpFrame) { // Constant {1.0/frameWidth, 1.0/frameHeight}.
/*--------------------------------------------------------------------------*/
#define FXAA_REDUCE_MIN (1.0/128.0)
#define FXAA_REDUCE_MUL (1.0/8.0)
#define FXAA_SPAN_MAX 8.0
/*--------------------------------------------------------------------------*/
float3 rgbNW = FxaaTexLod0(tex, posPos.zw).xyz;
float3 rgbNE = FxaaTexOff(tex, posPos.zw, FxaaInt2(1,0), rcpFrame.xy).xyz;
float3 rgbSW = FxaaTexOff(tex, posPos.zw, FxaaInt2(0,1), rcpFrame.xy).xyz;
float3 rgbSE = FxaaTexOff(tex, posPos.zw, FxaaInt2(1,1), rcpFrame.xy).xyz;
float3 rgbM = FxaaTexLod0(tex, posPos.xy).xyz;
/*--------------------------------------------------------------------------*/
float3 luma = float3(0.299, 0.587, 0.114);
float lumaNW = dot(rgbNW, luma);
float lumaNE = dot(rgbNE, luma);
float lumaSW = dot(rgbSW, luma);
float lumaSE = dot(rgbSE, luma);
float lumaM = dot(rgbM, luma);
/*--------------------------------------------------------------------------*/
float lumaMin = min(lumaM, min(min(lumaNW, lumaNE), min(lumaSW, lumaSE)));
float lumaMax = max(lumaM, max(max(lumaNW, lumaNE), max(lumaSW, lumaSE)));
/*--------------------------------------------------------------------------*/
float2 dir;
dir.x = -((lumaNW + lumaNE) - (lumaSW + lumaSE));
dir.y = ((lumaNW + lumaSW) - (lumaNE + lumaSE));
/*--------------------------------------------------------------------------*/
float dirReduce = max(
(lumaNW + lumaNE + lumaSW + lumaSE) * (0.25 * FXAA_REDUCE_MUL),
FXAA_REDUCE_MIN);
float rcpDirMin = 1.0/(min(abs(dir.x), abs(dir.y)) + dirReduce);
dir = min(FxaaFloat2( FXAA_SPAN_MAX, FXAA_SPAN_MAX),
max(FxaaFloat2(-FXAA_SPAN_MAX, -FXAA_SPAN_MAX),
dir * rcpDirMin)) * rcpFrame.xy;
/*--------------------------------------------------------------------------*/
float3 rgbA = (1.0/2.0) * (
FxaaTexLod0(tex, posPos.xy + dir * (1.0/3.0 - 0.5)).xyz +
FxaaTexLod0(tex, posPos.xy + dir * (2.0/3.0 - 0.5)).xyz);
float3 rgbB = rgbA * (1.0/2.0) + (1.0/4.0) * (
FxaaTexLod0(tex, posPos.xy + dir * (0.0/3.0 - 0.5)).xyz +
FxaaTexLod0(tex, posPos.xy + dir * (3.0/3.0 - 0.5)).xyz);
float lumaB = dot(rgbB, luma);
if((lumaB < lumaMin) || (lumaB > lumaMax)) return rgbA;
return rgbB; }

6 comments:

  1. Awesome!

    (here we dubbed "FXAA II for consoles" as "text mode antialiasing now supporting LPT, COM, xterm, and dumb terminals" :))

    ReplyDelete
  2. Just discovered your blog, great stuff! Keep it coming.

    As always with these attempts at removing aliasing artifacts as a post process it would be great to see a video comparison with MSAA and perhaps DLAA/MLAA. Screenies are nice and all but a slow pan across some problem edges at various orientations shows up behaviour hard to get from a screen shot.

    Perhaps the industry needs a SSAA (screen space AA) sequence of frames (with colour, depth + normal) from which a video can be constructed. Of course somebody will want objectID's sooner or later ;p

    ReplyDelete
  3. Works perfectly for me under GLSL_130&120! I really appreciate the fact that it looks very good even without depth/normal.

    I even did not notice any frame rate drop in our engine (consistent 60fps). I should go measure the timing though...

    Thanks for this very usable code release.

    ReplyDelete
  4. Many thanks for this ! Great results !

    ReplyDelete
  5. Thanks for the portable implementation! This is awesome.

    ReplyDelete
  6. Where can the FXAA II be found?
    I'd like to try the preset above this.

    ReplyDelete