1 // Copyright 2016 The SwiftShader Authors. All Rights Reserved.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14
15 #include "PixelProgram.hpp"
16
17 #include "Constants.hpp"
18 #include "SamplerCore.hpp"
19 #include "Device/Primitive.hpp"
20 #include "Device/Renderer.hpp"
21 #include "Vulkan/VkDevice.hpp"
22
23 namespace sw {
24
PixelProgram(const PixelProcessor::State & state,const vk::PipelineLayout * pipelineLayout,const SpirvShader * spirvShader,const vk::Attachments & attachments,const vk::DescriptorSet::Bindings & descriptorSets)25 PixelProgram::PixelProgram(
26 const PixelProcessor::State &state,
27 const vk::PipelineLayout *pipelineLayout,
28 const SpirvShader *spirvShader,
29 const vk::Attachments &attachments,
30 const vk::DescriptorSet::Bindings &descriptorSets)
31 : PixelRoutine(state, pipelineLayout, spirvShader, attachments, descriptorSets)
32 {
33 }
34
35 // Union all cMask and return it as Booleans
maskAny(Int cMask[4],const SampleSet & samples)36 SIMD::Int PixelProgram::maskAny(Int cMask[4], const SampleSet &samples)
37 {
38 // See if at least 1 sample is used
39 Int maskUnion = 0;
40 for(unsigned int q : samples)
41 {
42 maskUnion |= cMask[q];
43 }
44
45 // Convert to Booleans
46 SIMD::Int laneBits = SIMD::Int([](int i) { return 1 << i; }); // 1, 2, 4, 8, ...
47 SIMD::Int mask(maskUnion);
48 mask = CmpNEQ(mask & laneBits, 0);
49 return mask;
50 }
51
52 // Union all cMask/sMask/zMask and return it as Booleans
maskAny(Int cMask[4],Int sMask[4],Int zMask[4],const SampleSet & samples)53 SIMD::Int PixelProgram::maskAny(Int cMask[4], Int sMask[4], Int zMask[4], const SampleSet &samples)
54 {
55 // See if at least 1 sample is used
56 Int maskUnion = 0;
57 for(unsigned int q : samples)
58 {
59 maskUnion |= (cMask[q] & sMask[q] & zMask[q]);
60 }
61
62 // Convert to Booleans
63 SIMD::Int laneBits = SIMD::Int([](int i) { return 1 << i; }); // 1, 2, 4, 8, ...
64 SIMD::Int mask(maskUnion);
65 mask = CmpNEQ(mask & laneBits, 0);
66 return mask;
67 }
68
setBuiltins(Int & x,Int & y,SIMD::Float (& z)[4],SIMD::Float & w,Int cMask[4],const SampleSet & samples)69 void PixelProgram::setBuiltins(Int &x, Int &y, SIMD::Float (&z)[4], SIMD::Float &w, Int cMask[4], const SampleSet &samples)
70 {
71 routine.setImmutableInputBuiltins(spirvShader);
72
73 // TODO(b/146486064): Consider only assigning these to the SpirvRoutine iff
74 // they are ever going to be read.
75 float x0 = 0.5f;
76 float y0 = 0.5f;
77 float x1 = 1.5f;
78 float y1 = 1.5f;
79
80 // "When Sample Shading is enabled, the x and y components of FragCoord reflect the
81 // location of one of the samples corresponding to the shader invocation. Otherwise,
82 // the x and y components of FragCoord reflect the location of the center of the fragment."
83 if(state.sampleShadingEnabled && state.multiSampleCount > 1)
84 {
85 x0 = VkSampleLocations4[samples[0]][0];
86 y0 = VkSampleLocations4[samples[0]][1];
87 x1 = 1.0f + x0;
88 y1 = 1.0f + y0;
89 }
90
91 routine.fragCoord[0] = SIMD::Float(Float(x)) + SIMD::Float(x0, x1, x0, x1);
92 routine.fragCoord[1] = SIMD::Float(Float(y)) + SIMD::Float(y0, y0, y1, y1);
93 routine.fragCoord[2] = z[0]; // sample 0
94 routine.fragCoord[3] = w;
95
96 routine.invocationsPerSubgroup = SIMD::Width;
97 routine.helperInvocation = ~maskAny(cMask, samples);
98 routine.windowSpacePosition[0] = SIMD::Int(x) + SIMD::Int(0, 1, 0, 1);
99 routine.windowSpacePosition[1] = SIMD::Int(y) + SIMD::Int(0, 0, 1, 1);
100 routine.layer = *Pointer<Int>(data + OFFSET(DrawData, layer));
101
102 // PointCoord formula reference: https://www.khronos.org/registry/vulkan/specs/1.2/html/vkspec.html#primsrast-points-basic
103 // Note we don't add a 0.5 offset to x and y here (like for fragCoord) because pointCoordX/Y have 0.5 subtracted as part of the viewport transform.
104 SIMD::Float pointSizeInv = SIMD::Float(*Pointer<Float>(primitive + OFFSET(Primitive, pointSizeInv)));
105 routine.pointCoord[0] = SIMD::Float(0.5f) + pointSizeInv * (((SIMD::Float(Float(x)) + SIMD::Float(0.0f, 1.0f, 0.0f, 1.0f)) - SIMD::Float(*Pointer<Float>(primitive + OFFSET(Primitive, x0)))));
106 routine.pointCoord[1] = SIMD::Float(0.5f) + pointSizeInv * (((SIMD::Float(Float(y)) + SIMD::Float(0.0f, 0.0f, 1.0f, 1.0f)) - SIMD::Float(*Pointer<Float>(primitive + OFFSET(Primitive, y0)))));
107
108 routine.setInputBuiltin(spirvShader, spv::BuiltInViewIndex, [&](const Spirv::BuiltinMapping &builtin, Array<SIMD::Float> &value) {
109 assert(builtin.SizeInComponents == 1);
110 value[builtin.FirstComponent] = As<SIMD::Float>(SIMD::Int(routine.layer));
111 });
112
113 routine.setInputBuiltin(spirvShader, spv::BuiltInFragCoord, [&](const Spirv::BuiltinMapping &builtin, Array<SIMD::Float> &value) {
114 assert(builtin.SizeInComponents == 4);
115 value[builtin.FirstComponent + 0] = routine.fragCoord[0];
116 value[builtin.FirstComponent + 1] = routine.fragCoord[1];
117 value[builtin.FirstComponent + 2] = routine.fragCoord[2];
118 value[builtin.FirstComponent + 3] = routine.fragCoord[3];
119 });
120
121 routine.setInputBuiltin(spirvShader, spv::BuiltInPointCoord, [&](const Spirv::BuiltinMapping &builtin, Array<SIMD::Float> &value) {
122 assert(builtin.SizeInComponents == 2);
123 value[builtin.FirstComponent + 0] = routine.pointCoord[0];
124 value[builtin.FirstComponent + 1] = routine.pointCoord[1];
125 });
126
127 routine.setInputBuiltin(spirvShader, spv::BuiltInSubgroupSize, [&](const Spirv::BuiltinMapping &builtin, Array<SIMD::Float> &value) {
128 assert(builtin.SizeInComponents == 1);
129 value[builtin.FirstComponent] = As<SIMD::Float>(SIMD::Int(SIMD::Width));
130 });
131
132 routine.setInputBuiltin(spirvShader, spv::BuiltInHelperInvocation, [&](const Spirv::BuiltinMapping &builtin, Array<SIMD::Float> &value) {
133 assert(builtin.SizeInComponents == 1);
134 value[builtin.FirstComponent] = As<SIMD::Float>(routine.helperInvocation);
135 });
136 }
137
executeShader(Int cMask[4],Int sMask[4],Int zMask[4],const SampleSet & samples)138 void PixelProgram::executeShader(Int cMask[4], Int sMask[4], Int zMask[4], const SampleSet &samples)
139 {
140 routine.device = device;
141 routine.descriptorSets = data + OFFSET(DrawData, descriptorSets);
142 routine.descriptorDynamicOffsets = data + OFFSET(DrawData, descriptorDynamicOffsets);
143 routine.pushConstants = data + OFFSET(DrawData, pushConstants);
144 routine.constants = device + OFFSET(vk::Device, constants);
145
146 auto it = spirvShader->inputBuiltins.find(spv::BuiltInFrontFacing);
147 if(it != spirvShader->inputBuiltins.end())
148 {
149 ASSERT(it->second.SizeInComponents == 1);
150 auto frontFacing = SIMD::Int(*Pointer<Int>(primitive + OFFSET(Primitive, clockwiseMask)));
151 routine.getVariable(it->second.Id)[it->second.FirstComponent] = As<SIMD::Float>(frontFacing);
152 }
153
154 it = spirvShader->inputBuiltins.find(spv::BuiltInSampleMask);
155 if(it != spirvShader->inputBuiltins.end())
156 {
157 ASSERT(SIMD::Width == 4);
158 SIMD::Int laneBits = SIMD::Int(1, 2, 4, 8);
159
160 SIMD::Int inputSampleMask = 0;
161 for(unsigned int q : samples)
162 {
163 inputSampleMask |= SIMD::Int(1 << q) & CmpNEQ(SIMD::Int(cMask[q]) & laneBits, 0);
164 }
165
166 routine.getVariable(it->second.Id)[it->second.FirstComponent] = As<SIMD::Float>(inputSampleMask);
167 // Sample mask input is an array, as the spec contemplates MSAA levels higher than 32.
168 // Fill any non-zero indices with 0.
169 for(auto i = 1u; i < it->second.SizeInComponents; i++)
170 {
171 routine.getVariable(it->second.Id)[it->second.FirstComponent + i] = 0;
172 }
173 }
174
175 it = spirvShader->inputBuiltins.find(spv::BuiltInSampleId);
176 if(it != spirvShader->inputBuiltins.end())
177 {
178 ASSERT(samples.size() == 1);
179 int sampleId = samples[0];
180 routine.getVariable(it->second.Id)[it->second.FirstComponent] =
181 As<SIMD::Float>(SIMD::Int(sampleId));
182 }
183
184 it = spirvShader->inputBuiltins.find(spv::BuiltInSamplePosition);
185 if(it != spirvShader->inputBuiltins.end())
186 {
187 ASSERT(samples.size() == 1);
188 int sampleId = samples[0];
189 routine.getVariable(it->second.Id)[it->second.FirstComponent + 0] =
190 SIMD::Float((state.multiSampleCount > 1) ? VkSampleLocations4[sampleId][0] : 0.5f);
191 routine.getVariable(it->second.Id)[it->second.FirstComponent + 1] =
192 SIMD::Float((state.multiSampleCount > 1) ? VkSampleLocations4[sampleId][1] : 0.5f);
193 }
194
195 // Note: all lanes initially active to facilitate derivatives etc. Actual coverage is
196 // handled separately, through the cMask.
197 SIMD::Int activeLaneMask = 0xFFFFFFFF;
198 SIMD::Int storesAndAtomicsMask = maskAny(cMask, sMask, zMask, samples);
199 routine.discardMask = 0;
200
201 spirvShader->emit(&routine, activeLaneMask, storesAndAtomicsMask, descriptorSets, &attachments, state.multiSampleCount);
202 spirvShader->emitEpilog(&routine);
203
204 for(int i = 0; i < MAX_COLOR_BUFFERS; i++)
205 {
206 c[i].x = routine.outputs[i * 4 + 0];
207 c[i].y = routine.outputs[i * 4 + 1];
208 c[i].z = routine.outputs[i * 4 + 2];
209 c[i].w = routine.outputs[i * 4 + 3];
210 }
211
212 clampColor(c);
213
214 if(spirvShader->getAnalysis().ContainsDiscard)
215 {
216 for(unsigned int q : samples)
217 {
218 cMask[q] &= ~routine.discardMask;
219 }
220 }
221
222 it = spirvShader->outputBuiltins.find(spv::BuiltInSampleMask);
223 if(it != spirvShader->outputBuiltins.end())
224 {
225 auto outputSampleMask = As<SIMD::Int>(routine.getVariable(it->second.Id)[it->second.FirstComponent]);
226
227 for(unsigned int q : samples)
228 {
229 cMask[q] &= SignMask(CmpNEQ(outputSampleMask & SIMD::Int(1 << q), SIMD::Int(0)));
230 }
231 }
232
233 it = spirvShader->outputBuiltins.find(spv::BuiltInFragDepth);
234 if(it != spirvShader->outputBuiltins.end())
235 {
236 for(unsigned int q : samples)
237 {
238 z[q] = routine.getVariable(it->second.Id)[it->second.FirstComponent];
239 }
240 }
241 }
242
alphaTest(Int cMask[4],const SampleSet & samples)243 Bool PixelProgram::alphaTest(Int cMask[4], const SampleSet &samples)
244 {
245 if(!state.alphaToCoverage)
246 {
247 return true;
248 }
249
250 alphaToCoverage(cMask, c[0].w, samples);
251
252 Int pass = 0;
253 for(unsigned int q : samples)
254 {
255 pass = pass | cMask[q];
256 }
257
258 return pass != 0x0;
259 }
260
blendColor(Pointer<Byte> cBuffer[4],Int & x,Int sMask[4],Int zMask[4],Int cMask[4],const SampleSet & samples)261 void PixelProgram::blendColor(Pointer<Byte> cBuffer[4], Int &x, Int sMask[4], Int zMask[4], Int cMask[4], const SampleSet &samples)
262 {
263 for(int index = 0; index < MAX_COLOR_BUFFERS; index++)
264 {
265 if(!state.colorWriteActive(index))
266 {
267 continue;
268 }
269
270 for(unsigned int q : samples)
271 {
272 Pointer<Byte> buffer = cBuffer[index] + q * *Pointer<Int>(data + OFFSET(DrawData, colorSliceB[index]));
273
274 SIMD::Float4 C = alphaBlend(index, buffer, c[index], x);
275 ASSERT(SIMD::Width == 4);
276 Vector4f color;
277 color.x = Extract128(C.x, 0);
278 color.y = Extract128(C.y, 0);
279 color.z = Extract128(C.z, 0);
280 color.w = Extract128(C.w, 0);
281 writeColor(index, buffer, x, color, sMask[q], zMask[q], cMask[q]);
282 }
283 }
284 }
285
clampColor(SIMD::Float4 color[MAX_COLOR_BUFFERS])286 void PixelProgram::clampColor(SIMD::Float4 color[MAX_COLOR_BUFFERS])
287 {
288 // "If the color attachment is fixed-point, the components of the source and destination values and blend factors
289 // are each clamped to [0,1] or [-1,1] respectively for an unsigned normalized or signed normalized color attachment
290 // prior to evaluating the blend operations. If the color attachment is floating-point, no clamping occurs."
291
292 for(int index = 0; index < MAX_COLOR_BUFFERS; index++)
293 {
294 if(!state.colorWriteActive(index) && !(index == 0 && state.alphaToCoverage))
295 {
296 continue;
297 }
298
299 switch(state.colorFormat[index])
300 {
301 case VK_FORMAT_UNDEFINED:
302 break;
303 case VK_FORMAT_R4G4B4A4_UNORM_PACK16:
304 case VK_FORMAT_B4G4R4A4_UNORM_PACK16:
305 case VK_FORMAT_A4R4G4B4_UNORM_PACK16:
306 case VK_FORMAT_A4B4G4R4_UNORM_PACK16:
307 case VK_FORMAT_B5G6R5_UNORM_PACK16:
308 case VK_FORMAT_R5G5B5A1_UNORM_PACK16:
309 case VK_FORMAT_B5G5R5A1_UNORM_PACK16:
310 case VK_FORMAT_A1R5G5B5_UNORM_PACK16:
311 case VK_FORMAT_R5G6B5_UNORM_PACK16:
312 case VK_FORMAT_B8G8R8A8_UNORM:
313 case VK_FORMAT_B8G8R8A8_SRGB:
314 case VK_FORMAT_R8G8B8A8_UNORM:
315 case VK_FORMAT_R8G8B8A8_SRGB:
316 case VK_FORMAT_R8G8_UNORM:
317 case VK_FORMAT_R8_UNORM:
318 case VK_FORMAT_R16_UNORM:
319 case VK_FORMAT_R16G16_UNORM:
320 case VK_FORMAT_R16G16B16A16_UNORM:
321 case VK_FORMAT_A8B8G8R8_UNORM_PACK32:
322 case VK_FORMAT_A8B8G8R8_SRGB_PACK32:
323 case VK_FORMAT_A2B10G10R10_UNORM_PACK32:
324 case VK_FORMAT_A2R10G10B10_UNORM_PACK32:
325 color[index].x = Min(Max(color[index].x, 0.0f), 1.0f);
326 color[index].y = Min(Max(color[index].y, 0.0f), 1.0f);
327 color[index].z = Min(Max(color[index].z, 0.0f), 1.0f);
328 color[index].w = Min(Max(color[index].w, 0.0f), 1.0f);
329 break;
330 case VK_FORMAT_R32_SFLOAT:
331 case VK_FORMAT_R32G32_SFLOAT:
332 case VK_FORMAT_R32G32B32A32_SFLOAT:
333 case VK_FORMAT_R32_SINT:
334 case VK_FORMAT_R32G32_SINT:
335 case VK_FORMAT_R32G32B32A32_SINT:
336 case VK_FORMAT_R32_UINT:
337 case VK_FORMAT_R32G32_UINT:
338 case VK_FORMAT_R32G32B32A32_UINT:
339 case VK_FORMAT_R16_SFLOAT:
340 case VK_FORMAT_R16G16_SFLOAT:
341 case VK_FORMAT_R16G16B16A16_SFLOAT:
342 case VK_FORMAT_B10G11R11_UFLOAT_PACK32:
343 case VK_FORMAT_R16_SINT:
344 case VK_FORMAT_R16G16_SINT:
345 case VK_FORMAT_R16G16B16A16_SINT:
346 case VK_FORMAT_R16_UINT:
347 case VK_FORMAT_R16G16_UINT:
348 case VK_FORMAT_R16G16B16A16_UINT:
349 case VK_FORMAT_R8_SINT:
350 case VK_FORMAT_R8G8_SINT:
351 case VK_FORMAT_R8G8B8A8_SINT:
352 case VK_FORMAT_R8_UINT:
353 case VK_FORMAT_R8G8_UINT:
354 case VK_FORMAT_R8G8B8A8_UINT:
355 case VK_FORMAT_A8B8G8R8_UINT_PACK32:
356 case VK_FORMAT_A8B8G8R8_SINT_PACK32:
357 case VK_FORMAT_A2B10G10R10_UINT_PACK32:
358 case VK_FORMAT_A2R10G10B10_UINT_PACK32:
359 break;
360 default:
361 UNSUPPORTED("VkFormat: %d", int(state.colorFormat[index]));
362 }
363 }
364 }
365
366 } // namespace sw
367