Skip to content

Commit 758d35a

Browse files
authored
Merge pull request #74 from zao/feat/texture-fallback
feat: add BC7 CPU decode for legacy GPUs
2 parents c2504e0 + 3552b0d commit 758d35a

File tree

6 files changed

+123
-0
lines changed

6 files changed

+123
-0
lines changed

.gitmodules

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,3 +13,6 @@
1313
[submodule "libs/luautf8"]
1414
path = libs/luautf8
1515
url = https://github.com/starwing/luautf8.git
16+
[submodule "dep/compressonator"]
17+
path = dep/compressonator
18+
url = https://github.com/GPUOpen-Tools/compressonator

CMakeLists.txt

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -121,6 +121,21 @@ find_package(Threads REQUIRED)
121121
find_package(zstd REQUIRED)
122122
find_package(ZLIB REQUIRED)
123123

124+
add_library(cmp_core STATIC
125+
dep/compressonator/cmp_core/source/cmp_core.cpp
126+
dep/compressonator/cmp_core/source/cmp_core.h
127+
dep/compressonator/cmp_core/shaders/bc3_encode_kernel.cpp
128+
dep/compressonator/cmp_core/shaders/bc7_encode_kernel.cpp
129+
)
130+
131+
target_include_directories(cmp_core PRIVATE
132+
dep/compressonator/applications/_libs/cmp_math
133+
)
134+
135+
target_include_directories(cmp_core PUBLIC
136+
dep/compressonator/cmp_core/shaders
137+
dep/compressonator/cmp_core/source
138+
)
124139

125140
add_library(imgui STATIC
126141
dep/imgui/imconfig.h
@@ -203,6 +218,7 @@ target_link_libraries(SimpleGraphic
203218
PRIVATE
204219
unofficial::angle::libEGL
205220
unofficial::angle::libGLESv2
221+
cmp_core
206222
fmt::fmt
207223
glfw
208224
gli

dep/compressonator

Submodule compressonator added at f4b53d7

engine/render/r_main.cpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -920,6 +920,15 @@ void r_renderer_c::Init(r_featureFlag_e features)
920920
glCompressedTexImage2D = NULL;
921921
}
922922

923+
if (strstr(st_ext, "GL_EXT_texture_compression_bptc")) {
924+
sys->con->Printf("using GL_EXT_texture_compression_bptc\n");
925+
texBC7 = true;
926+
}
927+
else {
928+
sys->con->Printf("GL_EXT_texture_compression_bptc not supported\n");
929+
texBC7 = false;
930+
}
931+
923932
if (strstr(st_ext, "GL_EXT_debug_marker")) {
924933
sys->con->Printf("using GL_EXT_debug_marker\n");
925934
glInsertEventMarkerEXT = (PFNGLINSERTEVENTMARKEREXTPROC)openGL->GetProc("glInsertEventMarkerEXT");

engine/render/r_main.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -120,6 +120,7 @@ class r_renderer_c: public r_IRenderer, public conCmdHandler_c {
120120

121121
bool texNonPOT = false; // Non power-of-2 textures supported?
122122
dword texMaxDim = 0; // Maximum texture dimension
123+
bool texBC7 = true; // BC7 textures supported?
123124

124125
PFNGLCOMPRESSEDTEXIMAGE2DPROC glCompressedTexImage2D = nullptr;
125126
PFNGLINSERTEVENTMARKEREXTPROC glInsertEventMarkerEXT = nullptr;

engine/render/r_texture.cpp

Lines changed: 93 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
#include <atomic>
1010
#include "r_local.h"
1111

12+
#include "cmp_core.h"
1213
#include "stb_image_resize.h"
1314
#include <gli/gl.hpp>
1415
#include <gli/generate_mipmaps.hpp>
@@ -462,6 +463,93 @@ std::unique_ptr<image_c> r_tex_c::BuildMipSet(std::unique_ptr<image_c> img)
462463
return img;
463464
}
464465

466+
static gli::texture2d_array TranscodeTexture(gli::texture2d_array src, gli::format dstFormat, bool dropFinestMipIfPossible)
467+
{
468+
// Very limited format support, only really sufficient as a fallback when BC7 isn't available.
469+
470+
// Source formats: BC7
471+
const auto srcFormat = src.format();
472+
if (src.format() != gli::FORMAT_RGBA_BP_UNORM_BLOCK16)
473+
return src;
474+
475+
// Destination formats: BC3 or RGBA8
476+
if (dstFormat != gli::FORMAT_RGBA_DXT5_UNORM_BLOCK16 && dstFormat != gli::FORMAT_RGBA8_UNORM_PACK8)
477+
return src;
478+
479+
// To save VRAM and processing costs, there is the option to discard the finest mip level of the source if there's coarser levels available.
480+
// If so, the transcoding will generate destination levels 0..n-1 from levels 1..n of the source.
481+
size_t firstLevel = 0;
482+
if (dropFinestMipIfPossible && src.levels() > 1)
483+
firstLevel = 1;
484+
485+
const auto outExtent = src.extent(firstLevel);
486+
const auto outLayers = src.layers();
487+
const auto outLevels = src.levels() - firstLevel;
488+
489+
gli::texture2d_array dst(dstFormat, outExtent, outLayers, outLevels);
490+
491+
std::array<uint8_t, 64> rgba{};
492+
for (size_t layer = 0; layer < outLayers; ++layer) {
493+
for (size_t dstLevel = 0; dstLevel < outLevels; ++dstLevel) {
494+
auto* dstData = (uint8_t*)dst.data(layer, 0, dstLevel);
495+
const auto dstExtent = dst.extent(dstLevel);
496+
const auto dstRowStride = dstExtent.x * 4;
497+
498+
const size_t srcLevel = dstLevel + firstLevel;
499+
const auto* srcData = (const uint8_t*)src.data(layer, 0, srcLevel);
500+
501+
const auto srcBlockSize = gli::block_extent(srcFormat);
502+
const auto srcBlocksPerRow = (dstExtent.y + srcBlockSize.y - 1) / srcBlockSize.y; // round up partial blocks
503+
const auto srcBlocksPerColumn = (dstExtent.x + srcBlockSize.x - 1) / srcBlockSize.x; // -''-
504+
505+
for (size_t blockRow = 0; blockRow < srcBlocksPerRow; ++blockRow) {
506+
const size_t rowBase = blockRow * srcBlockSize.y;
507+
const size_t rowsLeft = (std::min)(4ull, dstExtent.y - rowBase);
508+
509+
for (size_t blockCol = 0; blockCol < srcBlocksPerColumn; ++blockCol) {
510+
// Read source 4x4 texel block, no branching needed.
511+
DecompressBlockBC7(srcData, rgba.data());
512+
513+
// Recompress or distribute the 4x4 RGBA block.
514+
if (dstFormat == gli::FORMAT_RGBA_DXT5_UNORM_BLOCK16) {
515+
// The block order in the level data for BC3 is the same as for BC7, so we can just append them as they appear.
516+
CompressBlockBC3(rgba.data(), 16, dstData + blockCol * gli::block_size(dstFormat));
517+
518+
// Advance the storage write pointer as we go.
519+
dstData += gli::block_size(dstFormat);
520+
}
521+
else if (dstFormat == gli::FORMAT_RGBA8_UNORM_PACK8) {
522+
// Compressed blocks unconditionally have 4x4 texels each, even if the source extent isn't evenly divisible into blocks with padding on the right and bottom of the block.
523+
// When copying these to RGBA storage which doesn't have this padding we need to ensure we don't go past the edges of the destination.
524+
525+
// Here we work off that dstData points at the top left pixel of the block row in the destination.
526+
const size_t colBase = blockCol * srcBlockSize.x;
527+
const size_t colsLeft = (std::min)(4ull, dstExtent.x - colBase);
528+
const size_t colBytesLeft = colsLeft * 4;
529+
for (size_t innerRow = 0; innerRow < rowsLeft; ++innerRow) {
530+
auto* dstPtr = dstData + dstRowStride * innerRow + colBase * 4;
531+
memcpy(dstPtr, rgba.data() + innerRow * 16, colBytesLeft);
532+
}
533+
// Note that dstData is advanced at the end of the source block row to make copy logic easier to follow.
534+
}
535+
srcData += gli::block_size(srcFormat);
536+
}
537+
538+
// Advance the destination buffer only at the end of an source block row if writing to RGBA output.
539+
if (!gli::is_compressed(dstFormat))
540+
dstData += dstRowStride * rowsLeft;
541+
}
542+
543+
const auto* srcEnd = srcData + src.size(srcLevel);
544+
const auto* dstEnd = dstData + dst.size(dstLevel);
545+
assert(srcData == srcEnd);
546+
assert(dstData == dstEnd);
547+
}
548+
}
549+
550+
return dst;
551+
}
552+
465553
void r_tex_c::LoadFile()
466554
{
467555
if (_stricmp(fileName.c_str(), "@white") == 0) {
@@ -492,6 +580,11 @@ void r_tex_c::LoadFile()
492580
};
493581
error = img->Load(path, sizeCallback);
494582
if ( !error ) {
583+
const bool useTextureFormatFallback = !renderer->texBC7;
584+
if (useTextureFormatFallback) {
585+
if (img->tex.format() == gli::FORMAT_RGBA_BP_UNORM_BLOCK16)
586+
img->tex = TranscodeTexture(img->tex, gli::FORMAT_RGBA8_UNORM_PACK8, true);
587+
}
495588
stackLayers = img->tex.layers();
496589
const bool is_async = !!(flags & TF_ASYNC);
497590
img = BuildMipSet(std::move(img));

0 commit comments

Comments
 (0)