为什么保守光栅化无法为某些三角形调用片段着色器?

Why does conservative rasterization fail to call the fragment shader for some triangles?

本文关键字:调用 三角形 片段 为什么      更新时间:2023-10-16

我在g3.4xlarge AWS EC2实例上使用保守的光栅化。以下代码应递增片段着色器中的原子计数器(在 RenderFunction(( 中读取(,并将片段坐标存储在着色器存储缓冲区对象中。以下三角形不会增加原子计数器。其他三角形确实会导致原子计数器递增。

NVCC --版本输出: Cuda 编译工具,版本 9.0,V9.0.176 信息: OpenGL 版本: 4.0.0 NVIDIA 384.111

以下代码修改自 https://github.com/daw42/glslcookbook

// g++ -std=gnu++0x  -g -I ./glslcookbook/ingredients/glad/include -I ./glslcookbook/ingredients  dbg1.cpp ./glslcookbook/ingredients/glad/src/glad.c ./glslcookbook/ingredients/glslprogram.cpp -ldl -lglut -lGLU
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include "glslprogram.h"
#include <GL/freeglut.h>
#include <iostream>
#include <glm/glm.hpp>
#include <glm/ext.hpp>
#define WINDOW_TITLE_PREFIX "Chapter 2"
using namespace glm;
int
CurrentWidth = 576,
CurrentHeight = 576,
WindowHandle = 0;
unsigned FrameCount = 0;
GLuint
VertexShaderId,
FragmentShaderId,
ProgramId,
VaoId,
VboId,
ColorBufferId;
void Initialize(int, char*[]);
void InitWindow(int, char*[]);
void ResizeFunction(int, int);
void RenderFunction(void);
void TimerFunction(int);
void IdleFunction(void);
void Cleanup(void);
void CreateVBO(void);
void DestroyVBO(void);
void CreateShaders(void);
void DestroyShaders(void);
enum BufferNames {
COUNTER_BUFFER = 0,
LINKED_LIST_BUFFER
};
int main(int argc, char* argv[])
{
Initialize(argc, argv);
glutMainLoop();
exit(EXIT_SUCCESS);
}
GLuint buffers[2];
int width=576;
int height= width;
int gLog2SL=20;
int maxV=603979776;
GLuint maxNodes=66000;
void Initialize(int argc, char* argv[])
{
if(gladLoadGL()) {
// you need an OpenGL context before loading glad
printf("I did load GL with no context!n");
exit(-1);
}
InitWindow(argc, argv);
if(!gladLoadGL()) {
printf("Something went wrong!n");
exit(-1);
}
fprintf(
stdout,
"INFO: OpenGL Version: %sn",
glGetString(GL_VERSION)
);
glEnable(GL_CONSERVATIVE_RASTERIZATION_NV);
GLenum ErrorCheckValue = glGetError();
if (ErrorCheckValue != GL_NO_ERROR)
{
fprintf(
stderr,
"ERROR: after calling glEnable(GL_CONSERVATIVE_RASTERIZATION_NV) : %s n",
gluErrorString(ErrorCheckValue)
);
exit(-1);
}
CreateShaders();
CreateVBO();
glClearColor(0.0f, 0.0f, 0.0f, 0.0f);
}
void InitWindow(int argc, char* argv[])
{
glutInit(&argc, argv);
glutInitContextVersion(4, 0);
glutInitContextFlags(GLUT_FORWARD_COMPATIBLE);
glutInitContextProfile(GLUT_CORE_PROFILE);
glutSetOption(
GLUT_ACTION_ON_WINDOW_CLOSE,
GLUT_ACTION_GLUTMAINLOOP_RETURNS
);

glutInitWindowSize(CurrentWidth, CurrentHeight);
glutInitDisplayMode(GLUT_DEPTH | GLUT_DOUBLE | GLUT_RGBA);
WindowHandle = glutCreateWindow(WINDOW_TITLE_PREFIX);
if(WindowHandle < 1) {
fprintf(
stderr,
"ERROR: Could not create a new rendering window.n"
);
exit(EXIT_FAILURE);
}
glutReshapeFunc(ResizeFunction);
glutDisplayFunc(RenderFunction);
glutIdleFunc(IdleFunction);
glutTimerFunc(0, TimerFunction, 0);
glutCloseFunc(Cleanup);
}
void ResizeFunction(int Width, int Height)
{
CurrentWidth = width;
CurrentHeight = height;
glViewport(0, 0, width, height);
}
void RenderFunction(void)
{
++FrameCount;
GLuint atomicVal=4352;
glBindBuffer(GL_ATOMIC_COUNTER_BUFFER, buffers[COUNTER_BUFFER]);
glGetBufferSubData(GL_ATOMIC_COUNTER_BUFFER, 0, sizeof(GLuint), &atomicVal);
GLuint zero = 0;
glBindBufferBase(GL_ATOMIC_COUNTER_BUFFER, 0, buffers[COUNTER_BUFFER] );
glBufferSubData(GL_ATOMIC_COUNTER_BUFFER, 0, sizeof(GLuint), &zero);
glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT);
glDrawArrays(GL_TRIANGLES, 0, 3);
glBindBuffer(GL_ATOMIC_COUNTER_BUFFER, buffers[COUNTER_BUFFER]);
glGetBufferSubData(GL_ATOMIC_COUNTER_BUFFER, 0, sizeof(GLuint), &atomicVal);
struct NodeType {
vec4 color;
};
//  NodeType nodeRA[maxNodes];
NodeType nodeRA[66000];
glBindBuffer(GL_SHADER_STORAGE_BUFFER, buffers[LINKED_LIST_BUFFER]);
//  glGetBufferSubData(GL_SHADER_STORAGE_BUFFER, 0, maxNodes * sizeof(NodeType), &nodeRA[0]);
glGetBufferSubData(GL_SHADER_STORAGE_BUFFER, 0, 66000 * sizeof(NodeType), &nodeRA[0]);

for (int i=0; i<atomicVal; i++) {
printf("index= %d, %f, %f, %f, %fn",i, nodeRA[i].color[0], nodeRA[i].color[1], nodeRA[i].color[2], nodeRA[i].color[3]);
}
glutSwapBuffers();
}
void IdleFunction(void)
{
glutPostRedisplay();
}
void TimerFunction(int Value)
{
if (0 != Value) {
char* TempString = (char*)
malloc(512 + strlen(WINDOW_TITLE_PREFIX));
sprintf(
TempString,
"%s: %d Frames Per Second @ %d x %d",
WINDOW_TITLE_PREFIX,
FrameCount * 4,
CurrentWidth,
CurrentHeight
);
glutSetWindowTitle(TempString);
free(TempString);
}
FrameCount = 0;
glutTimerFunc(250, TimerFunction, 1);
}
void Cleanup(void)
{
DestroyShaders();
DestroyVBO();
}
void CreateVBO(void)
{
glGenBuffers(2, buffers);
GLint nodeSize = 5 * sizeof(GLfloat) + sizeof(GLuint); // The size of a linked list node
// Our atomic counter
glBindBufferBase(GL_ATOMIC_COUNTER_BUFFER, 0, buffers[COUNTER_BUFFER]);
glBufferData(GL_ATOMIC_COUNTER_BUFFER, sizeof(GLuint), NULL, GL_DYNAMIC_DRAW);
// The buffer of linked lists
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, buffers[LINKED_LIST_BUFFER]);
glBufferData(GL_SHADER_STORAGE_BUFFER, maxNodes * nodeSize, NULL, GL_DYNAMIC_DRAW);
double pixSL= 1<<(gLog2SL);
int m=11, n=13, p=15;
GLfloat z= p*pixSL;
GLfloat vertX1= 435275968, vertY1= 328189312, vertX2= 435275712, vertY2= 328189312,vertX3= 435275712, vertY3= 328189056;
GLfloat Vertices[] = {
vertX1, vertY1, z, 1.0f,
vertX2, vertY2, z, 1.0f,
vertX3, vertY3, z, 1.0f
};
GLfloat Colors[] = {
1.0f, 0.0f, 0.0f, 1.0f,
0.0f, 1.0f, 0.0f, 1.0f,
0.0f, 0.0f, 1.0f, 1.0f
};
GLenum ErrorCheckValue = glGetError();
glGenVertexArrays(1, &VaoId);
glBindVertexArray(VaoId);
glGenBuffers(1, &VboId);
glBindBuffer(GL_ARRAY_BUFFER, VboId);
glBufferData(GL_ARRAY_BUFFER, sizeof(Vertices), Vertices, GL_STATIC_DRAW);
glVertexAttribPointer(0, 4, GL_FLOAT, GL_FALSE, 0, 0);
glEnableVertexAttribArray(0);
glGenBuffers(1, &ColorBufferId);
glBindBuffer(GL_ARRAY_BUFFER, ColorBufferId);
glBufferData(GL_ARRAY_BUFFER, sizeof(Colors), Colors, GL_STATIC_DRAW);
glVertexAttribPointer(1, 4, GL_FLOAT, GL_FALSE, 0, 0);
glEnableVertexAttribArray(1);
ErrorCheckValue = glGetError();
if (ErrorCheckValue != GL_NO_ERROR)
{
fprintf(
stderr,
"ERROR: Could not create a VBO: %s n",
gluErrorString(ErrorCheckValue)
);
exit(-1);
}
}
void DestroyVBO(void)
{
GLenum ErrorCheckValue = glGetError();
glDisableVertexAttribArray(1);
glDisableVertexAttribArray(0);
glBindBuffer(GL_ARRAY_BUFFER, 0);
glDeleteBuffers(1, &ColorBufferId);
glDeleteBuffers(1, &VboId);
glBindVertexArray(0);
glDeleteVertexArrays(1, &VaoId);
ErrorCheckValue = glGetError();
if (ErrorCheckValue != GL_NO_ERROR)
{
fprintf(
stderr,
"ERROR: Could not destroy the VBO: %s n",
gluErrorString(ErrorCheckValue)
);
exit(-1);
}
}
void CreateShaders(void)
{
GLenum ErrorCheckValue = glGetError();
GLSLProgram prog;
try {
prog.compileShader("oit.vs");
prog.compileShader("oit.fs");
prog.link();
prog.use();
} catch(GLSLProgramException &e ) {
std::cerr << e.what() << std::endl;
exit( EXIT_FAILURE );
}
prog.setUniform("MaxNodes", maxNodes);
// modelview matrix is a scaling by 1/max(x,y,z)
double scale= 1.0/maxV, maxV1= maxV;
const mat4 mv= glm::ortho(0.0, maxV1, 0.0, maxV1, 0.0, -maxV1);
prog.setUniform("ModelViewMatrix", mv);
glViewport(0.0,0.0,width, height);
}
void DestroyShaders(void)
{
GLenum ErrorCheckValue = glGetError();
glUseProgram(0);
}

这是 oit.fs

#version 430

layout (pixel_center_integer) in vec4 gl_FragCoord;
struct NodeType {
vec4 color;
//float depth;
//uint primID;
};
layout( binding = 0, offset = 0) uniform atomic_uint nextNodeCounter;
layout( binding = 0, std430 ) buffer linkedLists {
NodeType nodes[];
};
uniform uint MaxNodes;

in vec4 ex_Color;
out vec3 out_Color;
void main(void){
uint nodeIdx = atomicCounterIncrement(nextNodeCounter);
// Is our buffer full?  If so, we don't add the fragment
// to the list.
if( (nodeIdx < MaxNodes-1) ) {
float t2= float(nodeIdx);
vec4 t1= vec4(gl_FragCoord.x, gl_FragCoord.y, gl_FragCoord.z, 0.0);
nodes[nodeIdx].color= t1;
}
out_Color = vec3(1.0, 1.0,0.0);
}

这是 oit.vs

#version 430
layout (location = 0) in vec3 VertexPosition;
uniform mat4 ModelViewMatrix;
void main()
{
gl_Position = ModelViewMatrix * vec4(VertexPosition,1.0);
}

CreateShaders 中的注释"modelview 矩阵是按 1/max(x, y, z( 缩放",但这不是正交矩阵实际的作用。我在 Python 中快速检查了 vertX1 的数学,显示它远远超出了视口。如果所有三个顶点都被裁剪,则没有三角形,因此没有着色器输出。

建议仔细阅读正交矩阵的描述,或者改用尺度构造函数/函数。