Skip to content

Commit 4e33434

Browse files
committed
Merge branch 'main-dev' of https://github.com/ashvardanian/StringZilla into main-dev
2 parents fb55d54 + 1891dbf commit 4e33434

File tree

3 files changed

+24
-4
lines changed

3 files changed

+24
-4
lines changed

CMakeLists.txt

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -327,6 +327,16 @@ if(${STRINGZILLA_BUILD_SHARED})
327327
"SZ_USE_ARM_NEON=1"
328328
"SZ_USE_ARM_SVE=1")
329329
endif()
330+
331+
if (MSVC)
332+
# Add dependencies for necessary runtime libraries in case of static linking
333+
# This ensures that basic runtime functions are available:
334+
# msvcrt.lib: Microsoft Visual C Runtime, required for basic C runtime functions on Windows.
335+
# vcruntime.lib: Microsoft Visual C++ Runtime library for basic runtime functions.
336+
# ucrt.lib: Universal C Runtime, necessary for linking basic C functions like I/O.
337+
target_link_libraries(${target} PRIVATE msvcrt.lib vcruntime.lib ucrt.lib)
338+
endif()
339+
330340
endfunction()
331341

332342
define_shared(stringzilla_shared)
@@ -344,4 +354,6 @@ if(${STRINGZILLA_BUILD_SHARED})
344354
"$<$<CXX_COMPILER_ID:MSVC>:/Oi-;/GS->")
345355
target_link_options(stringzillite PRIVATE "$<$<CXX_COMPILER_ID:GNU,Clang>:-nostdlib>")
346356
target_link_options(stringzillite PRIVATE "$<$<CXX_COMPILER_ID:MSVC>:/NODEFAULTLIB>")
357+
358+
347359
endif()

README.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -171,7 +171,7 @@ __Who is this for?__
171171
<span style="color:#ABABAB;">arm:</span> <b>9.4</b> MB/s
172172
</td>
173173
<td align="center">
174-
<code>uniform_int_distribution</code><br/>
174+
<code>std::uniform_int_distribution</code><br/>
175175
<span style="color:#ABABAB;">x86:</span> <b>47.2</b> &centerdot;
176176
<span style="color:#ABABAB;">arm:</span> <b>20.4</b> MB/s
177177
</td>
@@ -193,7 +193,7 @@ __Who is this for?__
193193
<tr>
194194
<td align="center">⚪</td>
195195
<td align="center">
196-
<code>transform</code><br/>
196+
<code>std::transform</code><br/>
197197
<span style="color:#ABABAB;">x86:</span> <b>3.81</b> &centerdot;
198198
<span style="color:#ABABAB;">arm:</span> <b>2.65</b> GB/s
199199
</td>

include/stringzilla/stringzilla.h

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5323,8 +5323,16 @@ SZ_PUBLIC void sz_look_up_transform_avx512(sz_cptr_t source, sz_size_t length, s
53235323
// operate on 4 registers, it might be cleaner to use 2x separate `_mm512_permutexvar_epi8` calls.
53245324
// Combining the results with 2x `_mm512_test_epi8_mask` and 3x blends afterwards.
53255325
//
5326-
// - `_mm512_mask_blend_epi8` - 1 cycle latency, and generally 2x can run in parallel.
5327-
// - `_mm512_test_epi8_mask` - 3 cycles latency, same as most comparison functions in AVX-512.
5326+
// - 4x `_mm512_permutexvar_epi8` maps to "VPERMB (ZMM, ZMM, ZMM)":
5327+
// - On Ice Lake: 3 cycles latency, ports: 1*p5
5328+
// - On Genoa: 6 cycles latency, ports: 1*FP12
5329+
// - 3x `_mm512_mask_blend_epi8` maps to "VPBLENDMB_Z (ZMM, K, ZMM, ZMM)":
5330+
// - On Ice Lake: 3 cycles latency, ports: 1*p05
5331+
// - On Genoa: 1 cycle latency, ports: 1*FP0123
5332+
// - 2x `_mm512_test_epi8_mask` maps to "VPTESTMB (K, ZMM, ZMM)":
5333+
// - On Ice Lake: 3 cycles latency, ports: 1*p5
5334+
// - On Genoa: 4 cycles latency, ports: 1*FP01
5335+
//
53285336
sz_u512_vec_t lut_0_to_63_vec, lut_64_to_127_vec, lut_128_to_191_vec, lut_192_to_255_vec;
53295337
lut_0_to_63_vec.zmm = _mm512_loadu_si512((lut));
53305338
lut_64_to_127_vec.zmm = _mm512_loadu_si512((lut + 64));

0 commit comments

Comments
 (0)