| | |
| | | #include "UnityCG.cginc" |
| | | |
| | | RWStructuredBuffer<uint4> _Histogram; |
| | | Texture2D<float4> _Source; |
| | | |
| | | CBUFFER_START (Params) |
| | | uint _IsLinear; |
| | | float4 _Res; |
| | | uint4 _Channels; |
| | | CBUFFER_END |
| | | |
| | | groupshared uint4 gs_histogram[256]; |
| | | |
| | | #define GROUP_SIZE 16 |
| | | |
| | | #pragma kernel KHistogramGather |
| | | [numthreads(GROUP_SIZE, GROUP_SIZE,1)] |
| | | void KHistogramGather(uint2 dispatchThreadId : SV_DispatchThreadID, uint2 groupThreadId : SV_GroupThreadID) |
| | | { |
| | | const uint localThreadId = groupThreadId.y * GROUP_SIZE + groupThreadId.x; |
| | | |
| | | if (localThreadId < 256) |
| | | gs_histogram[localThreadId] = uint4(0, 0, 0, 0); |
| | | |
| | | GroupMemoryBarrierWithGroupSync(); |
| | | |
| | | if (dispatchThreadId.x < (uint)_Res.x && dispatchThreadId.y < (uint)_Res.y) |
| | | { |
| | | // We want a gamma histogram (like Photoshop & all) |
| | | float3 color = saturate(_Source[dispatchThreadId].xyz); |
| | | if (_IsLinear > 0) |
| | | color = LinearToGammaSpace(color); |
| | | |
| | | // Convert color & luminance to histogram bin |
| | | uint3 idx_c = (uint3)(round(color * 255.0)); |
| | | uint idx_l = (uint)(round(dot(color.rgb, float3(0.2125, 0.7154, 0.0721)) * 255.0)); |
| | | |
| | | // Fill the group shared histogram |
| | | if (_Channels.x > 0u) InterlockedAdd(gs_histogram[idx_c.x].x, 1); // Red |
| | | if (_Channels.y > 0u) InterlockedAdd(gs_histogram[idx_c.y].y, 1); // Green |
| | | if (_Channels.z > 0u) InterlockedAdd(gs_histogram[idx_c.z].z, 1); // Blue |
| | | if (_Channels.w > 0u) InterlockedAdd(gs_histogram[idx_l].w, 1); // Luminance |
| | | } |
| | | |
| | | GroupMemoryBarrierWithGroupSync(); |
| | | |
| | | // Merge |
| | | if (localThreadId < 256) |
| | | { |
| | | uint4 h = gs_histogram[localThreadId]; |
| | | if (_Channels.x > 0u && h.x > 0) InterlockedAdd(_Histogram[localThreadId].x, h.x); // Red |
| | | if (_Channels.y > 0u && h.y > 0) InterlockedAdd(_Histogram[localThreadId].y, h.y); // Green |
| | | if (_Channels.z > 0u && h.z > 0) InterlockedAdd(_Histogram[localThreadId].z, h.z); // Blue |
| | | if (_Channels.w > 0u && h.w > 0) InterlockedAdd(_Histogram[localThreadId].w, h.w); // Luminance |
| | | } |
| | | } |
| | | |
| | | // Scaling pass |
| | | groupshared uint4 gs_pyramid[256]; |
| | | |
| | | #pragma kernel KHistogramScale |
| | | [numthreads(16,16,1)] |
| | | void KHistogramScale(uint2 groupThreadId : SV_GroupThreadID) |
| | | { |
| | | const uint localThreadId = groupThreadId.y * 16 + groupThreadId.x; |
| | | gs_pyramid[localThreadId] = _Histogram[localThreadId]; |
| | | |
| | | GroupMemoryBarrierWithGroupSync(); |
| | | |
| | | // Parallel reduction to find the max value |
| | | UNITY_UNROLL |
| | | for(uint i = 256 >> 1; i > 0; i >>= 1) |
| | | { |
| | | if(localThreadId < i) |
| | | gs_pyramid[localThreadId] = max(gs_pyramid[localThreadId], gs_pyramid[localThreadId + i]); |
| | | |
| | | GroupMemoryBarrierWithGroupSync(); |
| | | } |
| | | |
| | | // Actual scaling |
| | | float4 factor = _Res.y / (float4)gs_pyramid[0]; |
| | | _Histogram[localThreadId] = (uint4)round(_Histogram[localThreadId] * factor); |
| | | } |
| | | |
| | | #pragma kernel KHistogramClear |
| | | [numthreads(GROUP_SIZE, GROUP_SIZE, 1)] |
| | | void KHistogramClear(uint2 dispatchThreadId : SV_DispatchThreadID) |
| | | { |
| | | if (dispatchThreadId.x < (uint)_Res.x && dispatchThreadId.y < (uint)_Res.y) |
| | | _Histogram[dispatchThreadId.y * _Res.x + dispatchThreadId.x] = uint4(0u, 0u, 0u, 0u); |
| | | } |
| | | #include "UnityCG.cginc"
|
| | |
|
| | | RWStructuredBuffer<uint4> _Histogram;
|
| | | Texture2D<float4> _Source;
|
| | |
|
| | | CBUFFER_START (Params)
|
| | | uint _IsLinear;
|
| | | float4 _Res;
|
| | | uint4 _Channels;
|
| | | CBUFFER_END
|
| | |
|
| | | groupshared uint4 gs_histogram[256];
|
| | |
|
| | | #define GROUP_SIZE 16
|
| | |
|
| | | #pragma kernel KHistogramGather
|
| | | [numthreads(GROUP_SIZE, GROUP_SIZE,1)]
|
| | | void KHistogramGather(uint2 dispatchThreadId : SV_DispatchThreadID, uint2 groupThreadId : SV_GroupThreadID)
|
| | | {
|
| | | const uint localThreadId = groupThreadId.y * GROUP_SIZE + groupThreadId.x;
|
| | | |
| | | if (localThreadId < 256)
|
| | | gs_histogram[localThreadId] = uint4(0, 0, 0, 0);
|
| | |
|
| | | GroupMemoryBarrierWithGroupSync();
|
| | |
|
| | | if (dispatchThreadId.x < (uint)_Res.x && dispatchThreadId.y < (uint)_Res.y)
|
| | | {
|
| | | // We want a gamma histogram (like Photoshop & all)
|
| | | float3 color = saturate(_Source[dispatchThreadId].xyz);
|
| | | if (_IsLinear > 0)
|
| | | color = LinearToGammaSpace(color);
|
| | | |
| | | // Convert color & luminance to histogram bin
|
| | | uint3 idx_c = (uint3)(round(color * 255.0));
|
| | | uint idx_l = (uint)(round(dot(color.rgb, float3(0.2125, 0.7154, 0.0721)) * 255.0));
|
| | | |
| | | // Fill the group shared histogram
|
| | | if (_Channels.x > 0u) InterlockedAdd(gs_histogram[idx_c.x].x, 1); // Red
|
| | | if (_Channels.y > 0u) InterlockedAdd(gs_histogram[idx_c.y].y, 1); // Green
|
| | | if (_Channels.z > 0u) InterlockedAdd(gs_histogram[idx_c.z].z, 1); // Blue
|
| | | if (_Channels.w > 0u) InterlockedAdd(gs_histogram[idx_l].w, 1); // Luminance
|
| | | }
|
| | |
|
| | | GroupMemoryBarrierWithGroupSync();
|
| | |
|
| | | // Merge
|
| | | if (localThreadId < 256)
|
| | | {
|
| | | uint4 h = gs_histogram[localThreadId];
|
| | | if (_Channels.x > 0u && h.x > 0) InterlockedAdd(_Histogram[localThreadId].x, h.x); // Red
|
| | | if (_Channels.y > 0u && h.y > 0) InterlockedAdd(_Histogram[localThreadId].y, h.y); // Green
|
| | | if (_Channels.z > 0u && h.z > 0) InterlockedAdd(_Histogram[localThreadId].z, h.z); // Blue
|
| | | if (_Channels.w > 0u && h.w > 0) InterlockedAdd(_Histogram[localThreadId].w, h.w); // Luminance
|
| | | }
|
| | | }
|
| | |
|
| | | // Scaling pass
|
| | | groupshared uint4 gs_pyramid[256];
|
| | |
|
| | | #pragma kernel KHistogramScale
|
| | | [numthreads(16,16,1)]
|
| | | void KHistogramScale(uint2 groupThreadId : SV_GroupThreadID)
|
| | | {
|
| | | const uint localThreadId = groupThreadId.y * 16 + groupThreadId.x;
|
| | | gs_pyramid[localThreadId] = _Histogram[localThreadId];
|
| | |
|
| | | GroupMemoryBarrierWithGroupSync();
|
| | |
|
| | | // Parallel reduction to find the max value
|
| | | UNITY_UNROLL
|
| | | for(uint i = 256 >> 1; i > 0; i >>= 1)
|
| | | {
|
| | | if(localThreadId < i)
|
| | | gs_pyramid[localThreadId] = max(gs_pyramid[localThreadId], gs_pyramid[localThreadId + i]);
|
| | |
|
| | | GroupMemoryBarrierWithGroupSync();
|
| | | }
|
| | |
|
| | | // Actual scaling
|
| | | float4 factor = _Res.y / (float4)gs_pyramid[0];
|
| | | _Histogram[localThreadId] = (uint4)round(_Histogram[localThreadId] * factor);
|
| | | }
|
| | |
|
| | | #pragma kernel KHistogramClear
|
| | | [numthreads(GROUP_SIZE, GROUP_SIZE, 1)]
|
| | | void KHistogramClear(uint2 dispatchThreadId : SV_DispatchThreadID)
|
| | | {
|
| | | if (dispatchThreadId.x < (uint)_Res.x && dispatchThreadId.y < (uint)_Res.y)
|
| | | _Histogram[dispatchThreadId.y * _Res.x + dispatchThreadId.x] = uint4(0u, 0u, 0u, 0u);
|
| | | }
|