|
9 | 9 | #include <atomic>
|
10 | 10 | #include "r_local.h"
|
11 | 11 |
|
| 12 | +#include "cmp_core.h" |
12 | 13 | #include "stb_image_resize.h"
|
13 | 14 | #include <gli/gl.hpp>
|
14 | 15 | #include <gli/generate_mipmaps.hpp>
|
@@ -462,6 +463,93 @@ std::unique_ptr<image_c> r_tex_c::BuildMipSet(std::unique_ptr<image_c> img)
|
462 | 463 | return img;
|
463 | 464 | }
|
464 | 465 |
|
| 466 | +static gli::texture2d_array TranscodeTexture(gli::texture2d_array src, gli::format dstFormat, bool dropFinestMipIfPossible) |
| 467 | +{ |
| 468 | + // Very limited format support, only really sufficient as a fallback when BC7 isn't available. |
| 469 | + |
| 470 | + // Source formats: BC7 |
| 471 | + const auto srcFormat = src.format(); |
| 472 | + if (src.format() != gli::FORMAT_RGBA_BP_UNORM_BLOCK16) |
| 473 | + return src; |
| 474 | + |
| 475 | + // Destination formats: BC3 or RGBA8 |
| 476 | + if (dstFormat != gli::FORMAT_RGBA_DXT5_UNORM_BLOCK16 && dstFormat != gli::FORMAT_RGBA8_UNORM_PACK8) |
| 477 | + return src; |
| 478 | + |
| 479 | + // To save VRAM and processing costs, there is the option to discard the finest mip level of the source if there's coarser levels available. |
| 480 | + // If so, the transcoding will generate destination levels 0..n-1 from levels 1..n of the source. |
| 481 | + size_t firstLevel = 0; |
| 482 | + if (dropFinestMipIfPossible && src.levels() > 1) |
| 483 | + firstLevel = 1; |
| 484 | + |
| 485 | + const auto outExtent = src.extent(firstLevel); |
| 486 | + const auto outLayers = src.layers(); |
| 487 | + const auto outLevels = src.levels() - firstLevel; |
| 488 | + |
| 489 | + gli::texture2d_array dst(dstFormat, outExtent, outLayers, outLevels); |
| 490 | + |
| 491 | + std::array<uint8_t, 64> rgba{}; |
| 492 | + for (size_t layer = 0; layer < outLayers; ++layer) { |
| 493 | + for (size_t dstLevel = 0; dstLevel < outLevels; ++dstLevel) { |
| 494 | + auto* dstData = (uint8_t*)dst.data(layer, 0, dstLevel); |
| 495 | + const auto dstExtent = dst.extent(dstLevel); |
| 496 | + const auto dstRowStride = dstExtent.x * 4; |
| 497 | + |
| 498 | + const size_t srcLevel = dstLevel + firstLevel; |
| 499 | + const auto* srcData = (const uint8_t*)src.data(layer, 0, srcLevel); |
| 500 | + |
| 501 | + const auto srcBlockSize = gli::block_extent(srcFormat); |
| 502 | + const auto srcBlocksPerRow = (dstExtent.y + srcBlockSize.y - 1) / srcBlockSize.y; // round up partial blocks |
| 503 | + const auto srcBlocksPerColumn = (dstExtent.x + srcBlockSize.x - 1) / srcBlockSize.x; // -''- |
| 504 | + |
| 505 | + for (size_t blockRow = 0; blockRow < srcBlocksPerRow; ++blockRow) { |
| 506 | + const size_t rowBase = blockRow * srcBlockSize.y; |
| 507 | + const size_t rowsLeft = (std::min)(4ull, dstExtent.y - rowBase); |
| 508 | + |
| 509 | + for (size_t blockCol = 0; blockCol < srcBlocksPerColumn; ++blockCol) { |
| 510 | + // Read source 4x4 texel block, no branching needed. |
| 511 | + DecompressBlockBC7(srcData, rgba.data()); |
| 512 | + |
| 513 | + // Recompress or distribute the 4x4 RGBA block. |
| 514 | + if (dstFormat == gli::FORMAT_RGBA_DXT5_UNORM_BLOCK16) { |
| 515 | + // The block order in the level data for BC3 is the same as for BC7, so we can just append them as they appear. |
| 516 | + CompressBlockBC3(rgba.data(), 16, dstData + blockCol * gli::block_size(dstFormat)); |
| 517 | + |
| 518 | + // Advance the storage write pointer as we go. |
| 519 | + dstData += gli::block_size(dstFormat); |
| 520 | + } |
| 521 | + else if (dstFormat == gli::FORMAT_RGBA8_UNORM_PACK8) { |
| 522 | + // Compressed blocks unconditionally have 4x4 texels each, even if the source extent isn't evenly divisible into blocks with padding on the right and bottom of the block. |
| 523 | + // When copying these to RGBA storage which doesn't have this padding we need to ensure we don't go past the edges of the destination. |
| 524 | + |
| 525 | + // Here we work off that dstData points at the top left pixel of the block row in the destination. |
| 526 | + const size_t colBase = blockCol * srcBlockSize.x; |
| 527 | + const size_t colsLeft = (std::min)(4ull, dstExtent.x - colBase); |
| 528 | + const size_t colBytesLeft = colsLeft * 4; |
| 529 | + for (size_t innerRow = 0; innerRow < rowsLeft; ++innerRow) { |
| 530 | + auto* dstPtr = dstData + dstRowStride * innerRow + colBase * 4; |
| 531 | + memcpy(dstPtr, rgba.data() + innerRow * 16, colBytesLeft); |
| 532 | + } |
| 533 | + // Note that dstData is advanced at the end of the source block row to make copy logic easier to follow. |
| 534 | + } |
| 535 | + srcData += gli::block_size(srcFormat); |
| 536 | + } |
| 537 | + |
| 538 | + // Advance the destination buffer only at the end of an source block row if writing to RGBA output. |
| 539 | + if (!gli::is_compressed(dstFormat)) |
| 540 | + dstData += dstRowStride * rowsLeft; |
| 541 | + } |
| 542 | + |
| 543 | + const auto* srcEnd = srcData + src.size(srcLevel); |
| 544 | + const auto* dstEnd = dstData + dst.size(dstLevel); |
| 545 | + assert(srcData == srcEnd); |
| 546 | + assert(dstData == dstEnd); |
| 547 | + } |
| 548 | + } |
| 549 | + |
| 550 | + return dst; |
| 551 | +} |
| 552 | + |
465 | 553 | void r_tex_c::LoadFile()
|
466 | 554 | {
|
467 | 555 | if (_stricmp(fileName.c_str(), "@white") == 0) {
|
@@ -492,6 +580,11 @@ void r_tex_c::LoadFile()
|
492 | 580 | };
|
493 | 581 | error = img->Load(path, sizeCallback);
|
494 | 582 | if ( !error ) {
|
| 583 | + const bool useTextureFormatFallback = !renderer->texBC7; |
| 584 | + if (useTextureFormatFallback) { |
| 585 | + if (img->tex.format() == gli::FORMAT_RGBA_BP_UNORM_BLOCK16) |
| 586 | + img->tex = TranscodeTexture(img->tex, gli::FORMAT_RGBA8_UNORM_PACK8, true); |
| 587 | + } |
495 | 588 | stackLayers = img->tex.layers();
|
496 | 589 | const bool is_async = !!(flags & TF_ASYNC);
|
497 | 590 | img = BuildMipSet(std::move(img));
|
|
0 commit comments