当相机接近模型时,SSAO样本内核会导致性能下降?

SSAO sample kernels causes performance drop when camera is close to model?

本文关键字:性能 内核 样本 接近 相机 模型 SSAO      更新时间:2023-10-16

我有一个问题,当相机靠近模型时,性能会下降。

我发现这与 ssao 示例内核有关,但我似乎无法弄清楚为什么这些在靠近网格时会导致性能问题。

当我注释掉 ssao 渲染代码中的 for 循环示例时,性能会恢复到应有的状态,因此这个 for 循环显然是导致问题的原因。我最初认为这可能是着色器问题,但我也找不到任何问题。

有什么想法吗?这是您需要的所有代码...

SSAO 设置代码

// Create two frame buffers, one for ssao colour and another for ssao blur
_fbos.push_back(new Fbo(width, height, { new FboAttachment(width, height, GL_RED, GL_RGB, GL_FLOAT, GL_COLOR_ATTACHMENT0) }, false));
_fbos.push_back(new Fbo(width, height, { new FboAttachment(width, height, GL_RED, GL_RGB, GL_FLOAT, GL_COLOR_ATTACHMENT0) }, false));
//////////////////////////////////////////////////////////////////////////////////////////////////////////
std::uniform_real_distribution<GLfloat> rand_floats(0.0f, 1.0f);    // Generate random floats between 0.0 and 1.0
std::default_random_engine rand_generator;  // A generator for randomising floats
// Create temp iterator var
for (unsigned int i = 0; i < 64; ++i)   // Iterate through each sample...
{
glm::vec3 sample(rand_floats(rand_generator) * 2.0f - 1.0f, rand_floats(rand_generator) * 2.0f - 1.0f, rand_floats(rand_generator)); // the third parameter was wrong on this line
sample = glm::normalize(sample);    // Normalise the sample
sample *= rand_floats(rand_generator);  // Seed the randomisation
float scale = static_cast<float>(i) / 64.0f;    // Get pixel position in NDC about the resolution size
scale = Math::lerpf(0.1f, 1.0f, scale * scale);     // Interpolate the scale
sample *= scale;    // Scale the s and t values
_ssao_kernals.push_back(sample);    // Assign sample to the kernal array
_u_samples.push_back(glGetUniformLocation(shader_programs[0], ("samples[" + std::to_string(i) + "]").c_str()));     // Get each sample uniform location
}
for (unsigned int i = 0; i < 16; i++)       // For each sample / 4...
{
glm::vec3 noise(rand_floats(rand_generator) * 2.0f - 1.0f, rand_floats(rand_generator) * 2.0f - 1.0f, 0.0f);    // Randomly generate a noise pixel
_ssao_noise.push_back(noise);   // Assign noise pixel to noise array
}
/*
* Create a noise texture to remove any banding from the ssao
*/
glGenTextures(1, &_noise_texture); // generate the texture
glBindTexture(GL_TEXTURE_2D, _noise_texture); // bind data
glTexImage2D(GL_TEXTURE_2D, 0, GL_RGB32F, 4, 4, 0, GL_RGB, GL_FLOAT, &_ssao_noise[0]); // set texture data
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); // texture filtering
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); // texture filtering
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_REPEAT); // texture filtering 
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_REPEAT); // texture filtering

SSAO 渲染函数

_fbos[0]->Bind(); // bind ssao texture
glClear(GL_COLOR_BUFFER_BIT); // clear colour data on the screen
glUseProgram(_shader_programs[0]); // Use the first shader pass
for (unsigned int i = 0; i < SSAO_SAMPLE_RESOLUTION; ++i)   // For each ssao sample...
glUniform3fv(_u_samples[i], 1, glm::value_ptr(_ssao_kernals[i]));   // Assign kernal uniform data
glUniformMatrix4fv(_u_projection, 1, GL_FALSE, glm::value_ptr(Content::_map->GetCamera()->GetProjectionMatrix()));  // Assign camera projection uniform data
glActiveTexture(GL_TEXTURE0);   // Set active texture to index 0
glBindTexture(GL_TEXTURE_2D, _g_buffer_data->GetAttachments()[0]->_texture);    // Bind positions
glActiveTexture(GL_TEXTURE1);   // Set active texture to index 1
glBindTexture(GL_TEXTURE_2D, _g_buffer_data->GetAttachments()[1]->_texture);    // Bind normals
glActiveTexture(GL_TEXTURE2);   // Set active texture to index 2
glBindTexture(GL_TEXTURE_2D, _noise_texture);   // Bind the noise texture
_screen_rect->Render(1);        // Render to screen rectangle
// Blur ssao texture
_fbos[1]->Bind();
glClear(GL_COLOR_BUFFER_BIT);
glUseProgram(_shader_programs[1]);  // Use the second shader pass
glActiveTexture(GL_TEXTURE0);   // Bind active texture to index 0
glBindTexture(GL_TEXTURE_2D, _fbos[0]->GetAttachments()[0]->_texture);  // Bind the final colour
_screen_rect->Render(1);        // Render to screen rectangle

SSAO 片段着色器

#version 330 core
out float FragColor;
in vec2 _texcoord;
uniform sampler2D gPosition;
uniform sampler2D gNormal;
uniform sampler2D texNoise;
uniform vec3 samples[64];
int kernelSize = 64;
float radius = 0.3;
float bias = 0.025;
const vec2 noiseScale = vec2(1920.0 / 4.0, 1080.0 / 4.0); 
uniform mat4 proj;
void main()
{
vec3 fragPos = texture(gPosition, _texcoord).xyz;
vec3 normal = normalize(texture(gNormal, _texcoord).rgb);
vec3 randomVec = normalize(texture(texNoise, _texcoord * noiseScale).xyz);
vec3 tangent = normalize(randomVec - normal * dot(randomVec, normal));
vec3 bitangent = cross(normal, tangent);
mat3 TBN = mat3(tangent, bitangent, normal);
float occlusion = 0.0;
for(int i = 0; i < kernelSize; ++i)
{
// get sample position
vec3 sample = TBN * samples[i]; // from tangent to view-space
sample = fragPos + sample * radius; 
// project sample position (to sample texture) (to get position on screen/texture)
vec4 offset = vec4(sample, 1.0);
offset = proj * offset; // from view to clip-space
offset.xyz /= offset.w; // perspective divide
offset.xyz = offset.xyz * 0.5 + 0.5; // transform to range 0.0 - 1.0
// get sample depth
float sampleDepth = texture(gPosition, offset.xy).z; // get depth value of kernel sample
// range check & accumulate
float rangeCheck = smoothstep(0.0, 1.0, radius / abs(fragPos.z - sampleDepth));
occlusion += (sampleDepth >= sample.z + bias ? 1.0 : 0.0) * rangeCheck;           
}
occlusion = 1.0 - (occlusion / kernelSize);  
FragColor = pow(occlusion, 3.0);
}

这是 SSAO 的预期性能特征。

您计算AO的纹素离相机越近,它周围的采样点在屏幕空间中就越远,而这些相邻纹素在GPU的纹理缓存中的可能性就越小 - 这会导致巨大的性能影响。