Skip to content

Commit 19e6998

Browse files
committed
Merge branch 'main-dev' of https://github.com/ashvardanian/StringZilla into main-dev
2 parents fb55d54 + 1891dbf commit 19e6998

File tree

3 files changed

+17
-4
lines changed

3 files changed

+17
-4
lines changed

CMakeLists.txt

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -338,6 +338,11 @@ if(${STRINGZILLA_BUILD_SHARED})
338338
target_compile_definitions(stringzillite PRIVATE "SZ_AVOID_LIBC=1")
339339
target_compile_definitions(stringzillite PRIVATE "SZ_OVERRIDE_LIBC=1")
340340

341+
if (MSVC)
342+
target_link_libraries(stringzilla_shared PRIVATE msvcrt.lib)
343+
target_link_libraries(stringzillite PRIVATE msvcrt.lib)
344+
endif()
345+
341346
# Avoid built-ins on MSVC and other compilers, as that will cause compileration errors
342347
target_compile_options(stringzillite PRIVATE
343348
"$<$<CXX_COMPILER_ID:GNU,Clang>:-fno-builtin;-nostdlib>"

README.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -171,7 +171,7 @@ __Who is this for?__
171171
<span style="color:#ABABAB;">arm:</span> <b>9.4</b> MB/s
172172
</td>
173173
<td align="center">
174-
<code>uniform_int_distribution</code><br/>
174+
<code>std::uniform_int_distribution</code><br/>
175175
<span style="color:#ABABAB;">x86:</span> <b>47.2</b> &centerdot;
176176
<span style="color:#ABABAB;">arm:</span> <b>20.4</b> MB/s
177177
</td>
@@ -193,7 +193,7 @@ __Who is this for?__
193193
<tr>
194194
<td align="center">⚪</td>
195195
<td align="center">
196-
<code>transform</code><br/>
196+
<code>std::transform</code><br/>
197197
<span style="color:#ABABAB;">x86:</span> <b>3.81</b> &centerdot;
198198
<span style="color:#ABABAB;">arm:</span> <b>2.65</b> GB/s
199199
</td>

include/stringzilla/stringzilla.h

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5323,8 +5323,16 @@ SZ_PUBLIC void sz_look_up_transform_avx512(sz_cptr_t source, sz_size_t length, s
53235323
// operate on 4 registers, it might be cleaner to use 2x separate `_mm512_permutexvar_epi8` calls.
53245324
// Combining the results with 2x `_mm512_test_epi8_mask` and 3x blends afterwards.
53255325
//
5326-
// - `_mm512_mask_blend_epi8` - 1 cycle latency, and generally 2x can run in parallel.
5327-
// - `_mm512_test_epi8_mask` - 3 cycles latency, same as most comparison functions in AVX-512.
5326+
// - 4x `_mm512_permutexvar_epi8` maps to "VPERMB (ZMM, ZMM, ZMM)":
5327+
// - On Ice Lake: 3 cycles latency, ports: 1*p5
5328+
// - On Genoa: 6 cycles latency, ports: 1*FP12
5329+
// - 3x `_mm512_mask_blend_epi8` maps to "VPBLENDMB_Z (ZMM, K, ZMM, ZMM)":
5330+
// - On Ice Lake: 3 cycles latency, ports: 1*p05
5331+
// - On Genoa: 1 cycle latency, ports: 1*FP0123
5332+
// - 2x `_mm512_test_epi8_mask` maps to "VPTESTMB (K, ZMM, ZMM)":
5333+
// - On Ice Lake: 3 cycles latency, ports: 1*p5
5334+
// - On Genoa: 4 cycles latency, ports: 1*FP01
5335+
//
53285336
sz_u512_vec_t lut_0_to_63_vec, lut_64_to_127_vec, lut_128_to_191_vec, lut_192_to_255_vec;
53295337
lut_0_to_63_vec.zmm = _mm512_loadu_si512((lut));
53305338
lut_64_to_127_vec.zmm = _mm512_loadu_si512((lut + 64));

0 commit comments

Comments
 (0)