| | |
| | | // Put the following line to 0 or comment it to disable vignette weighting |
| | | #define USE_VIGNETTE_WEIGHTING 1 |
| | | |
| | | #include "Common.cginc" |
| | | #include "EyeAdaptation.cginc" |
| | | |
| | | RWStructuredBuffer<uint> _Histogram; |
| | | Texture2D<float4> _Source; |
| | | |
| | | CBUFFER_START(Params) |
| | | float4 _ScaleOffsetRes; // x: scale, y: offset, z: width, w: height |
| | | CBUFFER_END |
| | | |
| | | groupshared uint gs_histogram[HISTOGRAM_BINS]; |
| | | |
| | | #pragma kernel KEyeHistogram |
| | | [numthreads(HISTOGRAM_THREAD_X,HISTOGRAM_THREAD_Y,1)] |
| | | void KEyeHistogram(uint2 dispatchThreadId : SV_DispatchThreadID, uint2 groupThreadId : SV_GroupThreadID) |
| | | { |
| | | // Pretty straightforward implementation of histogram gathering using atomic ops. |
| | | // I tried a few methods (no atomic ops / heavy LDS leveraging) but this one turned out to be |
| | | // the fastest on desktop (Nvidia - Kepler/Maxwell) and PS4. Still need to try it on GCN/desktop |
| | | // but considering it runs very fast on PS4 we can expect it to run well (?). |
| | | |
| | | const uint localThreadId = groupThreadId.y * HISTOGRAM_THREAD_X + groupThreadId.x; |
| | | |
| | | // Clears the shared memory |
| | | if (localThreadId < HISTOGRAM_BINS) |
| | | gs_histogram[localThreadId] = 0u; |
| | | |
| | | GroupMemoryBarrierWithGroupSync(); |
| | | |
| | | // Gather local group histogram |
| | | if (dispatchThreadId.x < (uint)_ScaleOffsetRes.z && dispatchThreadId.y < (uint)_ScaleOffsetRes.w) |
| | | { |
| | | #if USE_VIGNETTE_WEIGHTING |
| | | // Vignette weighting to put more focus on what's in the center of the screen |
| | | float2 uv01 = float2(dispatchThreadId) / float2(_ScaleOffsetRes.z, _ScaleOffsetRes.w); |
| | | float2 d = abs(uv01 - (0.5).xx); |
| | | float vfactor = Pow2(saturate(1.0 - dot(d, d))); |
| | | uint weight = (uint)(64.0 * vfactor); |
| | | #else |
| | | uint weight = 1u; |
| | | #endif |
| | | |
| | | float3 color = _Source[dispatchThreadId].xyz; |
| | | float luminance = Max3(color); // Looks more natural than using a Rec.709 luminance for some reason |
| | | float logLuminance = GetHistogramBinFromLuminance(luminance, _ScaleOffsetRes.xy); |
| | | uint idx = (uint)(logLuminance * (HISTOGRAM_BINS - 1u)); |
| | | InterlockedAdd(gs_histogram[idx], weight); |
| | | } |
| | | |
| | | GroupMemoryBarrierWithGroupSync(); |
| | | |
| | | // Merge everything |
| | | if (localThreadId < HISTOGRAM_BINS) |
| | | InterlockedAdd(_Histogram[localThreadId], gs_histogram[localThreadId]); |
| | | } |
| | | // Put the following line to 0 or comment it to disable vignette weighting
|
| | | #define USE_VIGNETTE_WEIGHTING 1
|
| | |
|
| | | #include "Common.cginc"
|
| | | #include "EyeAdaptation.cginc"
|
| | |
|
| | | RWStructuredBuffer<uint> _Histogram;
|
| | | Texture2D<float4> _Source;
|
| | |
|
| | | CBUFFER_START(Params)
|
| | | float4 _ScaleOffsetRes; // x: scale, y: offset, z: width, w: height
|
| | | CBUFFER_END
|
| | |
|
| | | groupshared uint gs_histogram[HISTOGRAM_BINS];
|
| | |
|
| | | #pragma kernel KEyeHistogram
|
| | | [numthreads(HISTOGRAM_THREAD_X,HISTOGRAM_THREAD_Y,1)]
|
| | | void KEyeHistogram(uint2 dispatchThreadId : SV_DispatchThreadID, uint2 groupThreadId : SV_GroupThreadID)
|
| | | {
|
| | | // Pretty straightforward implementation of histogram gathering using atomic ops.
|
| | | // I tried a few methods (no atomic ops / heavy LDS leveraging) but this one turned out to be
|
| | | // the fastest on desktop (Nvidia - Kepler/Maxwell) and PS4. Still need to try it on GCN/desktop
|
| | | // but considering it runs very fast on PS4 we can expect it to run well (?).
|
| | |
|
| | | const uint localThreadId = groupThreadId.y * HISTOGRAM_THREAD_X + groupThreadId.x;
|
| | |
|
| | | // Clears the shared memory
|
| | | if (localThreadId < HISTOGRAM_BINS)
|
| | | gs_histogram[localThreadId] = 0u;
|
| | |
|
| | | GroupMemoryBarrierWithGroupSync();
|
| | |
|
| | | // Gather local group histogram
|
| | | if (dispatchThreadId.x < (uint)_ScaleOffsetRes.z && dispatchThreadId.y < (uint)_ScaleOffsetRes.w)
|
| | | {
|
| | | #if USE_VIGNETTE_WEIGHTING
|
| | | // Vignette weighting to put more focus on what's in the center of the screen
|
| | | float2 uv01 = float2(dispatchThreadId) / float2(_ScaleOffsetRes.z, _ScaleOffsetRes.w);
|
| | | float2 d = abs(uv01 - (0.5).xx);
|
| | | float vfactor = Pow2(saturate(1.0 - dot(d, d)));
|
| | | uint weight = (uint)(64.0 * vfactor);
|
| | | #else
|
| | | uint weight = 1u;
|
| | | #endif
|
| | |
|
| | | float3 color = _Source[dispatchThreadId].xyz;
|
| | | float luminance = Max3(color); // Looks more natural than using a Rec.709 luminance for some reason
|
| | | float logLuminance = GetHistogramBinFromLuminance(luminance, _ScaleOffsetRes.xy);
|
| | | uint idx = (uint)(logLuminance * (HISTOGRAM_BINS - 1u));
|
| | | InterlockedAdd(gs_histogram[idx], weight);
|
| | | }
|
| | |
|
| | | GroupMemoryBarrierWithGroupSync();
|
| | |
|
| | | // Merge everything
|
| | | if (localThreadId < HISTOGRAM_BINS)
|
| | | InterlockedAdd(_Histogram[localThreadId], gs_histogram[localThreadId]);
|
| | | }
|