forked from excess-demogroup/vlee
-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathfxaa.fx
737 lines (669 loc) · 29 KB
/
fxaa.fx
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
// Copyright (c) 2011 NVIDIA Corporation. All rights reserved.
//
// TO THE MAXIMUM EXTENT PERMITTED BY APPLICABLE LAW, THIS SOFTWARE IS PROVIDED
// *AS IS* AND NVIDIA AND ITS SUPPLIERS DISCLAIM ALL WARRANTIES, EITHER EXPRESS
// OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, NONINFRINGEMENT,IMPLIED WARRANTIES OF
// MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. IN NO EVENT SHALL NVIDIA
// OR ITS SUPPLIERS BE LIABLE FOR ANY DIRECT, SPECIAL, INCIDENTAL, INDIRECT, OR
// CONSEQUENTIAL DAMAGES WHATSOEVER (INCLUDING, WITHOUT LIMITATION, DAMAGES FOR LOSS
// OF BUSINESS PROFITS, BUSINESS INTERRUPTION, LOSS OF BUSINESS INFORMATION, OR ANY
// OTHER PECUNIARY LOSS) ARISING OUT OF THE USE OF OR INABILITY TO USE THIS SOFTWARE,
// EVEN IF NVIDIA HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
//
// Please direct any bugs or questions to SDKFeedback@nvidia.com
/*============================================================================
FXAA
============================================================================*/
/*============================================================================
API PORTING
============================================================================*/
#define int2 float2
#define FxaaInt2 float2
#define FxaaFloat2 float2
#define FxaaFloat3 float3
#define FxaaFloat4 float4
#define FxaaBool2Float(a) (a)
#define FxaaPow3(x, y) pow(x, y)
#define FxaaSel3(f, t, b) ((f)*(!b) + (t)*(b))
#define FxaaTex sampler2D
#define FxaaToFloat3(a) FxaaFloat3((a), (a), (a))
float4 FxaaTexLod0(FxaaTex tex, float2 pos)
{
return tex2Dlod(tex, float4(pos.xy, 0.0, 0.0));
}
float4 FxaaTexGrad(FxaaTex tex, float2 pos, float2 grad)
{
return tex2Dgrad(tex, pos.xy, grad, grad);
}
float4 FxaaTexOff(FxaaTex tex, float2 pos, int2 off, float2 rcpFrame)
{
return tex2Dlod(tex, float4(pos.xy + (off * rcpFrame), 0, 0));
}
/*============================================================================
SRGB KNOBS
------------------------------------------------------------------------------
FXAA_SRGB_ROP - Set to 1 when applying FXAA to an sRGB back buffer (DX10/11).
This will do the sRGB to linear transform,
as ROP will expect linear color from this shader,
and this shader works in non-linear color.
============================================================================*/
#define FXAA_SRGB_ROP 0
/*============================================================================
DEBUG KNOBS
------------------------------------------------------------------------------
All debug knobs draw FXAA-untouched pixels in FXAA computed luma (monochrome).
FXAA_DEBUG_PASSTHROUGH - Red for pixels which are filtered by FXAA with a
yellow tint on sub-pixel aliasing filtered by FXAA.
FXAA_DEBUG_HORZVERT - Blue for horizontal edges, gold for vertical edges.
FXAA_DEBUG_PAIR - Blue/green for the 2 pixel pair choice.
FXAA_DEBUG_NEGPOS - Red/blue for which side of center of span.
FXAA_DEBUG_OFFSET - Red/blue for -/+ x, gold/skyblue for -/+ y.
============================================================================*/
#ifndef FXAA_DEBUG_PASSTHROUGH
#define FXAA_DEBUG_PASSTHROUGH 0
#endif
#ifndef FXAA_DEBUG_HORZVERT
#define FXAA_DEBUG_HORZVERT 0
#endif
#ifndef FXAA_DEBUG_PAIR
#define FXAA_DEBUG_PAIR 0
#endif
#ifndef FXAA_DEBUG_NEGPOS
#define FXAA_DEBUG_NEGPOS 0
#endif
#ifndef FXAA_DEBUG_OFFSET
#define FXAA_DEBUG_OFFSET 0
#endif
/*--------------------------------------------------------------------------*/
#if FXAA_DEBUG_PASSTHROUGH || FXAA_DEBUG_HORZVERT || FXAA_DEBUG_PAIR
#define FXAA_DEBUG 1
#endif
#if FXAA_DEBUG_NEGPOS || FXAA_DEBUG_OFFSET
#define FXAA_DEBUG 1
#endif
#ifndef FXAA_DEBUG
#define FXAA_DEBUG 0
#endif
/*============================================================================
COMPILE-IN KNOBS
------------------------------------------------------------------------------
FXAA_PRESET - Choose compile-in knob preset 0-5.
------------------------------------------------------------------------------
FXAA_EDGE_THRESHOLD - The minimum amount of local contrast required
to apply algorithm.
1.0/3.0 - too little
1.0/4.0 - good start
1.0/8.0 - applies to more edges
1.0/16.0 - overkill
------------------------------------------------------------------------------
FXAA_EDGE_THRESHOLD_MIN - Trims the algorithm from processing darks.
Perf optimization.
1.0/32.0 - visible limit (smaller isn't visible)
1.0/16.0 - good compromise
1.0/12.0 - upper limit (seeing artifacts)
------------------------------------------------------------------------------
FXAA_SEARCH_STEPS - Maximum number of search steps for end of span.
------------------------------------------------------------------------------
FXAA_SEARCH_ACCELERATION - How much to accelerate search,
1 - no acceleration
2 - skip by 2 pixels
3 - skip by 3 pixels
4 - skip by 4 pixels
------------------------------------------------------------------------------
FXAA_SEARCH_THRESHOLD - Controls when to stop searching.
1.0/4.0 - seems to be the best quality wise
------------------------------------------------------------------------------
FXAA_SUBPIX_FASTER - Turn on lower quality but faster subpix path.
Not recomended, but used in preset 0.
------------------------------------------------------------------------------
FXAA_SUBPIX - Toggle subpix filtering.
0 - turn off
1 - turn on
2 - turn on full (ignores FXAA_SUBPIX_TRIM and CAP)
------------------------------------------------------------------------------
FXAA_SUBPIX_TRIM - Controls sub-pixel aliasing removal.
1.0/2.0 - low removal
1.0/3.0 - medium removal
1.0/4.0 - default removal
1.0/8.0 - high removal
0.0 - complete removal
------------------------------------------------------------------------------
FXAA_SUBPIX_CAP - Insures fine detail is not completely removed.
This is important for the transition of sub-pixel detail,
like fences and wires.
3.0/4.0 - default (medium amount of filtering)
7.0/8.0 - high amount of filtering
1.0 - no capping of sub-pixel aliasing removal
============================================================================*/
#ifndef FXAA_PRESET
#define FXAA_PRESET 4
#endif
/*--------------------------------------------------------------------------*/
#if (FXAA_PRESET == 0)
#define FXAA_EDGE_THRESHOLD (1.0/4.0)
#define FXAA_EDGE_THRESHOLD_MIN (1.0/12.0)
#define FXAA_SEARCH_STEPS 2
#define FXAA_SEARCH_ACCELERATION 4
#define FXAA_SEARCH_THRESHOLD (1.0/4.0)
#define FXAA_SUBPIX 1
#define FXAA_SUBPIX_FASTER 1
#define FXAA_SUBPIX_CAP (2.0/3.0)
#define FXAA_SUBPIX_TRIM (1.0/4.0)
#endif
/*--------------------------------------------------------------------------*/
#if (FXAA_PRESET == 1)
#define FXAA_EDGE_THRESHOLD (1.0/8.0)
#define FXAA_EDGE_THRESHOLD_MIN (1.0/16.0)
#define FXAA_SEARCH_STEPS 4
#define FXAA_SEARCH_ACCELERATION 3
#define FXAA_SEARCH_THRESHOLD (1.0/4.0)
#define FXAA_SUBPIX 1
#define FXAA_SUBPIX_FASTER 0
#define FXAA_SUBPIX_CAP (3.0/4.0)
#define FXAA_SUBPIX_TRIM (1.0/4.0)
#endif
/*--------------------------------------------------------------------------*/
#if (FXAA_PRESET == 2)
#define FXAA_EDGE_THRESHOLD (1.0/8.0)
#define FXAA_EDGE_THRESHOLD_MIN (1.0/24.0)
#define FXAA_SEARCH_STEPS 8
#define FXAA_SEARCH_ACCELERATION 2
#define FXAA_SEARCH_THRESHOLD (1.0/4.0)
#define FXAA_SUBPIX 1
#define FXAA_SUBPIX_FASTER 0
#define FXAA_SUBPIX_CAP (3.0/4.0)
#define FXAA_SUBPIX_TRIM (1.0/4.0)
#endif
/*--------------------------------------------------------------------------*/
#if (FXAA_PRESET == 3)
#define FXAA_EDGE_THRESHOLD (1.0/8.0)
#define FXAA_EDGE_THRESHOLD_MIN (1.0/24.0)
#define FXAA_SEARCH_STEPS 16
#define FXAA_SEARCH_ACCELERATION 1
#define FXAA_SEARCH_THRESHOLD (1.0/4.0)
#define FXAA_SUBPIX 1
#define FXAA_SUBPIX_FASTER 0
#define FXAA_SUBPIX_CAP (3.0/4.0)
#define FXAA_SUBPIX_TRIM (1.0/4.0)
#endif
/*--------------------------------------------------------------------------*/
#if (FXAA_PRESET == 4)
#define FXAA_EDGE_THRESHOLD (1.0/8.0)
#define FXAA_EDGE_THRESHOLD_MIN (1.0/24.0)
#define FXAA_SEARCH_STEPS 24
#define FXAA_SEARCH_ACCELERATION 1
#define FXAA_SEARCH_THRESHOLD (1.0/4.0)
#define FXAA_SUBPIX 1
#define FXAA_SUBPIX_FASTER 0
#define FXAA_SUBPIX_CAP (3.0/4.0)
#define FXAA_SUBPIX_TRIM (1.0/4.0)
#endif
/*--------------------------------------------------------------------------*/
#if (FXAA_PRESET == 5)
#define FXAA_EDGE_THRESHOLD (1.0/8.0)
#define FXAA_EDGE_THRESHOLD_MIN (1.0/24.0)
#define FXAA_SEARCH_STEPS 32
#define FXAA_SEARCH_ACCELERATION 1
#define FXAA_SEARCH_THRESHOLD (1.0/4.0)
#define FXAA_SUBPIX 1
#define FXAA_SUBPIX_FASTER 0
#define FXAA_SUBPIX_CAP (3.0/4.0)
#define FXAA_SUBPIX_TRIM (1.0/4.0)
#endif
/*--------------------------------------------------------------------------*/
#define FXAA_SUBPIX_TRIM_SCALE (1.0/(1.0 - FXAA_SUBPIX_TRIM))
/*============================================================================
HELPERS
============================================================================*/
// Return the luma, the estimation of luminance from rgb inputs.
// This approximates luma using one FMA instruction,
// skipping normalization and tossing out blue.
// FxaaLuma() will range 0.0 to 2.963210702.
float FxaaLuma(float3 rgb) {
return rgb.y * (0.587/0.299) + rgb.x; }
/*--------------------------------------------------------------------------*/
float3 FxaaLerp3(float3 a, float3 b, float amountOfA) {
return (FxaaToFloat3(-amountOfA) * b) +
((a * FxaaToFloat3(amountOfA)) + b); }
/*--------------------------------------------------------------------------*/
// Support any extra filtering before returning color.
float3 FxaaFilterReturn(float3 rgb) {
#if FXAA_SRGB_ROP
// Do sRGB encoded value to linear conversion.
return FxaaSel3(
rgb * FxaaToFloat3(1.0/12.92),
FxaaPow3(
rgb * FxaaToFloat3(1.0/1.055) + FxaaToFloat3(0.055/1.055),
FxaaToFloat3(2.4)),
rgb > FxaaToFloat3(0.04045));
#else
return rgb;
#endif
}
/*============================================================================
VERTEX SHADER
============================================================================*/
float2 FxaaVertexShader(
// Both x and y range {-1.0 to 1.0 across screen}.
float2 inPos) {
float2 pos;
pos.xy = (inPos.xy * FxaaFloat2(0.5, 0.5)) + FxaaFloat2(0.5, 0.5);
return pos; }
/*============================================================================
PIXEL SHADER
============================================================================*/
float3 FxaaPixelShader(
// Output of FxaaVertexShader interpolated across screen.
// xy -> actual texture position {0.0 to 1.0}
float2 pos,
// Input texture.
FxaaTex tex,
// RCPFRAME SHOULD PIXEL SHADER CONSTANTS!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
// {1.0/frameWidth, 1.0/frameHeight}
float2 rcpFrame) {
/*----------------------------------------------------------------------------
EARLY EXIT IF LOCAL CONTRAST BELOW EDGE DETECT LIMIT
------------------------------------------------------------------------------
Majority of pixels of a typical image do not require filtering,
often pixels are grouped into blocks which could benefit from early exit
right at the beginning of the algorithm.
Given the following neighborhood,
N
W M E
S
If the difference in local maximum and minimum luma (contrast "range")
is lower than a threshold proportional to the maximum local luma ("rangeMax"),
then the shader early exits (no visible aliasing).
This threshold is clamped at a minimum value ("FXAA_EDGE_THRESHOLD_MIN")
to avoid processing in really dark areas.
----------------------------------------------------------------------------*/
float3 rgbN = FxaaTexOff(tex, pos.xy, FxaaInt2( 0,-1), rcpFrame).xyz;
float3 rgbW = FxaaTexOff(tex, pos.xy, FxaaInt2(-1, 0), rcpFrame).xyz;
float3 rgbM = FxaaTexOff(tex, pos.xy, FxaaInt2( 0, 0), rcpFrame).xyz;
float3 rgbE = FxaaTexOff(tex, pos.xy, FxaaInt2( 1, 0), rcpFrame).xyz;
float3 rgbS = FxaaTexOff(tex, pos.xy, FxaaInt2( 0, 1), rcpFrame).xyz;
float lumaN = FxaaLuma(rgbN);
float lumaW = FxaaLuma(rgbW);
float lumaM = FxaaLuma(rgbM);
float lumaE = FxaaLuma(rgbE);
float lumaS = FxaaLuma(rgbS);
float rangeMin = min(lumaM, min(min(lumaN, lumaW), min(lumaS, lumaE)));
float rangeMax = max(lumaM, max(max(lumaN, lumaW), max(lumaS, lumaE)));
float range = rangeMax - rangeMin;
#if FXAA_DEBUG
float lumaO = lumaM / (1.0 + (0.587/0.299));
#endif
if(range < max(FXAA_EDGE_THRESHOLD_MIN, rangeMax * FXAA_EDGE_THRESHOLD)) {
#if FXAA_DEBUG
return FxaaFilterReturn(FxaaToFloat3(lumaO));
#endif
return FxaaFilterReturn(rgbM); }
#if FXAA_SUBPIX > 0
#if FXAA_SUBPIX_FASTER
float3 rgbL = (rgbN + rgbW + rgbE + rgbS + rgbM) *
FxaaToFloat3(1.0/5.0);
#else
float3 rgbL = rgbN + rgbW + rgbM + rgbE + rgbS;
#endif
#endif
/*----------------------------------------------------------------------------
COMPUTE LOWPASS
------------------------------------------------------------------------------
FXAA computes a local neighborhood lowpass value as follows,
(N + W + E + S)/4
Then uses the ratio of the contrast range of the lowpass
and the range found in the early exit check,
as a sub-pixel aliasing detection filter.
When FXAA detects sub-pixel aliasing (such as single pixel dots),
it later blends in "blendL" amount
of a lowpass value (computed in the next section) to the final result.
----------------------------------------------------------------------------*/
#if FXAA_SUBPIX != 0
float lumaL = (lumaN + lumaW + lumaE + lumaS) * 0.25;
float rangeL = abs(lumaL - lumaM);
#endif
#if FXAA_SUBPIX == 1
float blendL = max(0.0,
(rangeL / range) - FXAA_SUBPIX_TRIM) * FXAA_SUBPIX_TRIM_SCALE;
blendL = min(FXAA_SUBPIX_CAP, blendL);
#endif
#if FXAA_SUBPIX == 2
float blendL = rangeL / range;
#endif
#if FXAA_DEBUG_PASSTHROUGH
#if FXAA_SUBPIX == 0
float blendL = 0.0;
#endif
return FxaaFilterReturn(
FxaaFloat3(1.0, blendL/FXAA_SUBPIX_CAP, 0.0));
#endif
/*----------------------------------------------------------------------------
CHOOSE VERTICAL OR HORIZONTAL SEARCH
------------------------------------------------------------------------------
FXAA uses the following local neighborhood,
NW N NE
W M E
SW S SE
To compute an edge amount for both vertical and horizontal directions.
Note edge detect filters like Sobel fail on single pixel lines through M.
FXAA takes the weighted average magnitude of the high-pass values
for rows and columns as an indication of local edge amount.
A lowpass value for anti-sub-pixel-aliasing is computed as
(N+W+E+S+M+NW+NE+SW+SE)/9.
This full box pattern has higher quality than other options.
Note following this block, both vertical and horizontal cases
flow in parallel (reusing the horizontal variables).
----------------------------------------------------------------------------*/
float3 rgbNW = FxaaTexOff(tex, pos.xy, FxaaInt2(-1,-1), rcpFrame).xyz;
float3 rgbNE = FxaaTexOff(tex, pos.xy, FxaaInt2( 1,-1), rcpFrame).xyz;
float3 rgbSW = FxaaTexOff(tex, pos.xy, FxaaInt2(-1, 1), rcpFrame).xyz;
float3 rgbSE = FxaaTexOff(tex, pos.xy, FxaaInt2( 1, 1), rcpFrame).xyz;
#if (FXAA_SUBPIX_FASTER == 0) && (FXAA_SUBPIX > 0)
rgbL += (rgbNW + rgbNE + rgbSW + rgbSE);
rgbL *= FxaaToFloat3(1.0/9.0);
#endif
float lumaNW = FxaaLuma(rgbNW);
float lumaNE = FxaaLuma(rgbNE);
float lumaSW = FxaaLuma(rgbSW);
float lumaSE = FxaaLuma(rgbSE);
float edgeVert =
abs((0.25 * lumaNW) + (-0.5 * lumaN) + (0.25 * lumaNE)) +
abs((0.50 * lumaW ) + (-1.0 * lumaM) + (0.50 * lumaE )) +
abs((0.25 * lumaSW) + (-0.5 * lumaS) + (0.25 * lumaSE));
float edgeHorz =
abs((0.25 * lumaNW) + (-0.5 * lumaW) + (0.25 * lumaSW)) +
abs((0.50 * lumaN ) + (-1.0 * lumaM) + (0.50 * lumaS )) +
abs((0.25 * lumaNE) + (-0.5 * lumaE) + (0.25 * lumaSE));
bool horzSpan = edgeHorz >= edgeVert;
#if FXAA_DEBUG_HORZVERT
if(horzSpan) return FxaaFilterReturn(FxaaFloat3(1.0, 0.75, 0.0));
else return FxaaFilterReturn(FxaaFloat3(0.0, 0.50, 1.0));
#endif
float lengthSign = horzSpan ? -rcpFrame.y : -rcpFrame.x;
if(!horzSpan) lumaN = lumaW;
if(!horzSpan) lumaS = lumaE;
float gradientN = abs(lumaN - lumaM);
float gradientS = abs(lumaS - lumaM);
lumaN = (lumaN + lumaM) * 0.5;
lumaS = (lumaS + lumaM) * 0.5;
/*----------------------------------------------------------------------------
CHOOSE SIDE OF PIXEL WHERE GRADIENT IS HIGHEST
------------------------------------------------------------------------------
This chooses a pixel pair.
For "horzSpan == true" this will be a vertical pair,
[N] N
[M] or [M]
S [S]
Note following this block, both {N,M} and {S,M} cases
flow in parallel (reusing the {N,M} variables).
This pair of image rows or columns is searched below
in the positive and negative direction
until edge status changes
(or the maximum number of search steps is reached).
----------------------------------------------------------------------------*/
bool pairN = gradientN >= gradientS;
#if FXAA_DEBUG_PAIR
if(pairN) return FxaaFilterReturn(FxaaFloat3(0.0, 0.0, 1.0));
else return FxaaFilterReturn(FxaaFloat3(0.0, 1.0, 0.0));
#endif
if(!pairN) lumaN = lumaS;
if(!pairN) gradientN = gradientS;
if(!pairN) lengthSign *= -1.0;
float2 posN;
posN.x = pos.x + (horzSpan ? 0.0 : lengthSign * 0.5);
posN.y = pos.y + (horzSpan ? lengthSign * 0.5 : 0.0);
/*----------------------------------------------------------------------------
CHOOSE SEARCH LIMITING VALUES
------------------------------------------------------------------------------
Search limit (+/- gradientN) is a function of local gradient.
----------------------------------------------------------------------------*/
gradientN *= FXAA_SEARCH_THRESHOLD;
/*----------------------------------------------------------------------------
SEARCH IN BOTH DIRECTIONS UNTIL FIND LUMA PAIR AVERAGE IS OUT OF RANGE
------------------------------------------------------------------------------
This loop searches either in vertical or horizontal directions,
and in both the negative and positive direction in parallel.
This loop fusion is faster than searching separately.
The search is accelerated using FXAA_SEARCH_ACCELERATION length box filter
via anisotropic filtering with specified texture gradients.
----------------------------------------------------------------------------*/
float2 posP = posN;
float2 offNP = horzSpan ?
FxaaFloat2(rcpFrame.x, 0.0) :
FxaaFloat2(0.0f, rcpFrame.y);
float lumaEndN = lumaN;
float lumaEndP = lumaN;
bool doneN = false;
bool doneP = false;
#if FXAA_SEARCH_ACCELERATION == 1
posN += offNP * FxaaFloat2(-1.0, -1.0);
posP += offNP * FxaaFloat2( 1.0, 1.0);
#endif
#if FXAA_SEARCH_ACCELERATION == 2
posN += offNP * FxaaFloat2(-1.5, -1.5);
posP += offNP * FxaaFloat2( 1.5, 1.5);
offNP *= FxaaFloat2(2.0, 2.0);
#endif
#if FXAA_SEARCH_ACCELERATION == 3
posN += offNP * FxaaFloat2(-2.0, -2.0);
posP += offNP * FxaaFloat2( 2.0, 2.0);
offNP *= FxaaFloat2(3.0, 3.0);
#endif
#if FXAA_SEARCH_ACCELERATION == 4
posN += offNP * FxaaFloat2(-2.5, -2.5);
posP += offNP * FxaaFloat2( 2.5, 2.5);
offNP *= FxaaFloat2(4.0, 4.0);
#endif
for(int i = 0; i < FXAA_SEARCH_STEPS; i++) {
#if FXAA_SEARCH_ACCELERATION == 1
if(!doneN) lumaEndN =
FxaaLuma(FxaaTexLod0(tex, posN.xy).xyz);
if(!doneP) lumaEndP =
FxaaLuma(FxaaTexLod0(tex, posP.xy).xyz);
#else
if(!doneN) lumaEndN =
FxaaLuma(FxaaTexGrad(tex, posN.xy, offNP).xyz);
if(!doneP) lumaEndP =
FxaaLuma(FxaaTexGrad(tex, posP.xy, offNP).xyz);
#endif
doneN = doneN || (abs(lumaEndN - lumaN) >= gradientN);
doneP = doneP || (abs(lumaEndP - lumaN) >= gradientN);
if(doneN && doneP) break;
if(!doneN) posN -= offNP;
if(!doneP) posP += offNP; }
/*----------------------------------------------------------------------------
HANDLE IF CENTER IS ON POSITIVE OR NEGATIVE SIDE
------------------------------------------------------------------------------
FXAA uses the pixel's position in the span
in combination with the values (lumaEnd*) at the ends of the span,
to determine filtering.
This step computes which side of the span the pixel is on.
On negative side if dstN < dstP,
posN pos posP
|-----------|------|------------------|
| | | |
|<--dstN--->|<---------dstP---------->|
|
span center
----------------------------------------------------------------------------*/
float dstN = horzSpan ? pos.x - posN.x : pos.y - posN.y;
float dstP = horzSpan ? posP.x - pos.x : posP.y - pos.y;
bool directionN = dstN < dstP;
#if FXAA_DEBUG_NEGPOS
if(directionN) return FxaaFilterReturn(FxaaFloat3(1.0, 0.0, 0.0));
else return FxaaFilterReturn(FxaaFloat3(0.0, 0.0, 1.0));
#endif
lumaEndN = directionN ? lumaEndN : lumaEndP;
/*----------------------------------------------------------------------------
CHECK IF PIXEL IS IN SECTION OF SPAN WHICH GETS NO FILTERING
------------------------------------------------------------------------------
If both the pair luma at the end of the span (lumaEndN)
and middle pixel luma (lumaM)
are on the same side of the middle pair average luma (lumaN),
then don't filter.
Cases,
(1.) "L",
lumaM
|
V XXXXXXXX <- other line averaged
XXXXXXX[X]XXXXXXXXXXX <- source pixel line
| . |
--------------------------
[ ]xxxxxx[x]xx[X]XXXXXX <- pair average
--------------------------
^ ^ ^ ^
| | | |
. |<---->|<---------- no filter region
. | | |
. center | |
. | lumaEndN
. | .
. lumaN .
. .
|<--- span -->|
(2.) "^" and "-",
<- other line averaged
XXXXX[X]XXX <- source pixel line
| | |
--------------------------
[ ]xxxx[x]xx[ ] <- pair average
--------------------------
| | |
|<--->|<--->|<---------- filter both sides
(3.) "v" and inverse of "-",
XXXXXX XXXXXXXXX <- other line averaged
XXXXXXXXXXX[X]XXXXXXXXXXXX <- source pixel line
| | |
--------------------------
XXXX[X]xxxx[x]xx[X]XXXXXXX <- pair average
--------------------------
| | |
|<--->|<--->|<---------- don't filter both!
Note the "v" case for FXAA requires no filtering.
This is because the inverse of the "-" case is the "v".
Filtering "v" case turns open spans like this,
XXXXXXXXX
Into this (which is not desired),
x+. .+x
XXXXXXXXX
----------------------------------------------------------------------------*/
if(((lumaM - lumaN) < 0.0) == ((lumaEndN - lumaN) < 0.0))
lengthSign = 0.0;
/*----------------------------------------------------------------------------
COMPUTE SUB-PIXEL OFFSET AND FILTER SPAN
------------------------------------------------------------------------------
FXAA filters using a bilinear texture fetch offset
from the middle pixel M towards the center of the pair (NM below).
Maximum filtering will be half way between pair.
Reminder, at this point in the code,
the {N,M} pair is also reused for all cases: {S,M}, {W,M}, and {E,M}.
+-------+
| | 0.5 offset
| N | |
| | V
+-------+....---
| |
| M...|....---
| | ^
+-------+ |
. . 0.0 offset
. S .
. .
.........
Position on span is used to compute sub-pixel filter offset using simple ramp,
posN posP
|\ |<------- 0.5 pixel offset into pair pixel
| \ |
| \ |
---.......|...\..........|<------- 0.25 pixel offset into pair pixel
^ | ^\ |
| | | \ |
V | | \ |
---.......|===|==========|<------- 0.0 pixel offset (ie M pixel)
^ . | ^ .
| . pos | .
| . . | .
| . . center .
| . . .
| |<->|<---------.-------- dstN
| . . .
| . |<-------->|<------- dstP
| . .
| |<------------>|<------- spanLength
|
subPixelOffset
----------------------------------------------------------------------------*/
float spanLength = (dstP + dstN);
dstN = directionN ? dstN : dstP;
float subPixelOffset = (0.5 + (dstN * (-1.0/spanLength))) * lengthSign;
#if FXAA_DEBUG_OFFSET
float ox = horzSpan ? 0.0 : subPixelOffset*2.0/rcpFrame.x;
float oy = horzSpan ? subPixelOffset*2.0/rcpFrame.y : 0.0;
if(ox < 0.0) return FxaaFilterReturn(
FxaaLerp3(FxaaToFloat3(lumaO),
FxaaFloat3(1.0, 0.0, 0.0), -ox));
if(ox > 0.0) return FxaaFilterReturn(
FxaaLerp3(FxaaToFloat3(lumaO),
FxaaFloat3(0.0, 0.0, 1.0), ox));
if(oy < 0.0) return FxaaFilterReturn(
FxaaLerp3(FxaaToFloat3(lumaO),
FxaaFloat3(1.0, 0.6, 0.2), -oy));
if(oy > 0.0) return FxaaFilterReturn(
FxaaLerp3(FxaaToFloat3(lumaO),
FxaaFloat3(0.2, 0.6, 1.0), oy));
return FxaaFilterReturn(FxaaFloat3(lumaO, lumaO, lumaO));
#endif
float3 rgbF = FxaaTexLod0(tex, FxaaFloat2(
pos.x + (horzSpan ? 0.0 : subPixelOffset),
pos.y + (horzSpan ? subPixelOffset : 0.0))).xyz;
#if FXAA_SUBPIX == 0
return FxaaFilterReturn(rgbF);
#else
return FxaaFilterReturn(FxaaLerp3(rgbL, rgbF, blendL));
#endif
}
const float2 viewportInv;
const float bloom_cutoff;
texture color_tex;
sampler color_samp = sampler_state {
Texture = (color_tex);
MipFilter = NONE;
MinFilter = LINEAR;
MagFilter = LINEAR;
AddressU = CLAMP;
AddressV = CLAMP;
sRGBTexture = FALSE;
};
struct VS_OUTPUT {
float4 pos : POSITION;
float2 uv : TEXCOORD0;
};
VS_OUTPUT vertex(float4 ipos : POSITION, float2 uv : TEXCOORD0)
{
VS_OUTPUT Out;
Out.pos = ipos;
Out.uv = uv;
return Out;
}
struct PS_OUT {
float4 color : COLOR0;
float4 bright : COLOR1;
};
PS_OUT pixel(VS_OUTPUT In)
{
PS_OUT o;
o.color = float4(FxaaPixelShader(In.uv, color_samp, viewportInv), 1);
o.bright = o.color;
float lum = dot(o.bright.rgb, float3(0.299, 0.587, 0.114));
if (lum > 0)
o.bright.rgb *= max(0, lum - bloom_cutoff) / lum;
else
o.bright.rgb = 0;
return o;
}
technique postprocess {
pass P0 {
VertexShader = compile vs_3_0 vertex();
PixelShader = compile ps_3_0 pixel();
}
}