Skip to content

Commit 9897f8a

Browse files
authored
FAST_IO_HAS_BUILTIN, fix win9x mutex, fix use sse2 ins in sse (#1149)
* FAST_IO_HAS_BUILTIN, fix win9x mutex, fix use sse2 ins in sse * rm * fix
1 parent b9c6f26 commit 9897f8a

File tree

51 files changed

+257
-467
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

51 files changed

+257
-467
lines changed

include/fast_io_core_impl/allocation/c_malloc.h

Lines changed: 8 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -39,12 +39,8 @@ class c_malloc_allocator
3939
n = 1;
4040
}
4141
void *p =
42-
#if defined(__has_builtin)
43-
#if __has_builtin(__builtin_malloc)
42+
#if FAST_IO_HAS_BUILTIN(__builtin_malloc)
4443
__builtin_malloc(n)
45-
#else
46-
::std::malloc(n)
47-
#endif
4844
#else
4945
::std::malloc(n)
5046
#endif
@@ -66,12 +62,8 @@ class c_malloc_allocator
6662
}
6763
::std::size_t const to_allocate{n};
6864
p =
69-
#if defined(__has_builtin)
70-
#if __has_builtin(__builtin_realloc)
65+
#if FAST_IO_HAS_BUILTIN(__builtin_realloc)
7166
__builtin_realloc
72-
#else
73-
::std::realloc
74-
#endif
7567
#else
7668
::std::realloc
7769
#endif
@@ -93,12 +85,8 @@ class c_malloc_allocator
9385
n = 1;
9486
}
9587
void *p =
96-
#if defined(__has_builtin)
97-
#if __has_builtin(__builtin_calloc)
88+
#if FAST_IO_HAS_BUILTIN(__builtin_calloc)
9889
__builtin_calloc
99-
#else
100-
::std::calloc
101-
#endif
10290
#else
10391
::std::calloc
10492
#endif
@@ -142,12 +130,8 @@ class c_malloc_allocator
142130
if (alignment <= __STDCPP_DEFAULT_NEW_ALIGNMENT__)
143131
{
144132
p =
145-
#if defined(__has_builtin)
146-
#if __has_builtin(__builtin_malloc)
133+
#if FAST_IO_HAS_BUILTIN(__builtin_malloc)
147134
__builtin_malloc
148-
#else
149-
::std::malloc
150-
#endif
151135
#else
152136
::std::malloc
153137
#endif
@@ -175,12 +159,8 @@ class c_malloc_allocator
175159
if (alignment <= __STDCPP_DEFAULT_NEW_ALIGNMENT__)
176160
{
177161
p =
178-
#if defined(__has_builtin)
179-
#if __has_builtin(__builtin_realloc)
162+
#if FAST_IO_HAS_BUILTIN(__builtin_realloc)
180163
__builtin_realloc
181-
#else
182-
::std::realloc
183-
#endif
184164
#else
185165
::std::realloc
186166
#endif
@@ -205,12 +185,8 @@ class c_malloc_allocator
205185
}
206186
if (alignment <= __STDCPP_DEFAULT_NEW_ALIGNMENT__)
207187
{
208-
#if defined(__has_builtin)
209-
#if __has_builtin(__builtin_free)
188+
#if FAST_IO_HAS_BUILTIN(__builtin_free)
210189
__builtin_free
211-
#else
212-
::std::free
213-
#endif
214190
#else
215191
::std::free
216192
#endif
@@ -228,12 +204,8 @@ class c_malloc_allocator
228204
{
229205
return;
230206
}
231-
#if defined(__has_builtin)
232-
#if __has_builtin(__builtin_free)
233-
__builtin_free
234-
#else
235-
::std::free
236-
#endif
207+
#if FAST_IO_HAS_BUILTIN(__builtin_free)
208+
__builtin_free
237209
#else
238210
::std::free
239211
#endif

include/fast_io_core_impl/allocation/nt_preliminary_definition.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,7 @@ struct gdi_teb_batch
9696
::std::uint_least32_t Buffer[310];
9797
};
9898

99+
// NOLINTBEGIN(*-optin.performance.Padding)
99100
struct teb
100101
{
101102
nt_tib NtTib;
@@ -265,6 +266,7 @@ struct teb
265266
::std::uint_least32_t SpinCallCount;
266267
::std::uint_least64_t ExtendedFeatureDisableMask;
267268
};
269+
// NOLINTEND(*-optin.performance.Padding)
268270

269271
FAST_IO_DLLIMPORT FAST_IO_GNU_MALLOC void *FAST_IO_WINSTDCALL RtlAllocateHeap(void *, ::std::uint_least32_t, ::std::size_t) noexcept FAST_IO_WINSTDCALL_RENAME(RtlAllocateHeap, 12);
270272
FAST_IO_DLLIMPORT char unsigned FAST_IO_WINSTDCALL RtlFreeHeap(void *, ::std::uint_least32_t, void *) noexcept FAST_IO_WINSTDCALL_RENAME(RtlFreeHeap, 12);

include/fast_io_core_impl/codecvt/general.h

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -193,7 +193,7 @@ general_code_cvt(src_char_type const *src_first, src_char_type const *src_last,
193193
else
194194
{
195195
#if (defined(_MSC_VER) && defined(_M_AMD64) && !defined(__clang__)) || \
196-
(defined(__SSE__) && defined(__x86_64__) && __cpp_lib_is_constant_evaluated >= 201811L)
196+
(defined(__SSE__) && defined(__SSE2__) && defined(__x86_64__) && __cpp_lib_is_constant_evaluated >= 201811L)
197197
if constexpr (src_encoding != encoding_scheme::utf_ebcdic && encoding != encoding_scheme::utf_ebcdic &&
198198
1 == sizeof(src_char_type) && (1 == sizeof(dest_char_type) || encoding_is_utf(encoding)))
199199
{
@@ -221,7 +221,7 @@ general_code_cvt(src_char_type const *src_first, src_char_type const *src_last,
221221
}
222222
else
223223
{
224-
dst += get_utf_code_units<encoding>(code, dst);
224+
dst += get_utf_code_units<encoding>(static_cast<char32_t>(code), dst);
225225
}
226226
}
227227
else
@@ -235,7 +235,7 @@ general_code_cvt(src_char_type const *src_first, src_char_type const *src_last,
235235
}
236236
else
237237
{
238-
dst += get_utf_code_units<encoding>(code, dst);
238+
dst += get_utf_code_units<encoding>(static_cast<char32_t>(code), dst);
239239
}
240240
}
241241
}
@@ -265,7 +265,7 @@ general_code_cvt(src_char_type const *src_first, src_char_type const *src_last,
265265
}
266266
else
267267
{
268-
dst += get_utf_code_units<encoding>(code, dst);
268+
dst += get_utf_code_units<encoding>(static_cast<char32_t>(code), dst);
269269
}
270270
}
271271
else
@@ -309,7 +309,7 @@ general_code_cvt(src_char_type const *src_first, src_char_type const *src_last,
309309
}
310310
else
311311
{
312-
dst += get_utf_code_units<encoding>(code, dst);
312+
dst += get_utf_code_units<encoding>(static_cast<char32_t>(code), dst);
313313
}
314314
}
315315
else
@@ -331,7 +331,7 @@ general_code_cvt(src_char_type const *src_first, src_char_type const *src_last,
331331
}
332332
else
333333
{
334-
dst += get_utf_code_units<encoding>(code, dst);
334+
dst += get_utf_code_units<encoding>(static_cast<char32_t>(code), dst);
335335
}
336336
}
337337
}
@@ -468,7 +468,7 @@ inline constexpr dest_char_type *general_code_cvt(state_type &__restrict state,
468468
}
469469
else
470470
{
471-
dst += get_utf_code_units<encoding>(code, dst);
471+
dst += get_utf_code_units<encoding>(static_cast<char32_t>(code), dst);
472472
}
473473
src_first += static_cast<::std::size_t>(bytes_src - bytes - state_size);
474474
}
@@ -492,7 +492,7 @@ inline constexpr dest_char_type *general_code_cvt(state_type &__restrict state,
492492
}
493493
else
494494
{
495-
dst += get_utf_code_units<encoding>(code, dst);
495+
dst += get_utf_code_units<encoding>(static_cast<char32_t>(code), dst);
496496
}
497497
src_first += static_cast<::std::size_t>(static_cast<::std::size_t>(adv) - state_size);
498498
}

include/fast_io_core_impl/codecvt/utf.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -498,7 +498,7 @@ inline constexpr char32_t utf16_surrogate_to_utf32(char16_t high, char16_t low)
498498
return static_cast<char32_t>((static_cast<::std::uint_least32_t>(high) << 10u) + low - 0x35fdc00u);
499499
}
500500

501-
#if (defined(_MSC_VER) && defined(_M_AMD64) && !defined(__clang__)) || (defined(__SSE__) && defined(__x86_64__))
501+
#if (defined(_MSC_VER) && defined(_M_AMD64) && !defined(__clang__)) || (defined(__SSE__) && defined(__SSE2__) && defined(__x86_64__))
502502
template <::std::integral T, ::std::integral U>
503503
requires((sizeof(T) == 1) && (sizeof(U) == 1 || sizeof(U) == 2 || sizeof(U) == 4))
504504
inline code_cvt_result<T, U> convert_ascii_with_sse(T const *__restrict pSrc, U *__restrict pDst) noexcept
@@ -520,7 +520,7 @@ inline code_cvt_result<T, U> convert_ascii_with_sse(T const *__restrict pSrc, U
520520
x86_64_v16qi chunk;
521521
__builtin_memcpy(__builtin_addressof(chunk), pSrc, m128i_size);
522522
mask = static_cast<::std::uint_least32_t>(__builtin_ia32_pmovmskb128(chunk));
523-
#if __has_builtin(__builtin_shufflevector)
523+
#if FAST_IO_HAS_BUILTIN(__builtin_shufflevector)
524524
x86_64_v16qi half{__builtin_shufflevector(chunk, zero, 0, 16 + 0, 1, 16 + 1, 2, 16 + 2, 3, 16 + 3, 4, 16 + 4, 5,
525525
16 + 5, 6, 16 + 6, 7, 16 + 7)};
526526
__builtin_memcpy(pDst, __builtin_addressof(half), m128i_size);
@@ -539,7 +539,7 @@ inline code_cvt_result<T, U> convert_ascii_with_sse(T const *__restrict pSrc, U
539539
x86_64_v16qi chunk;
540540
__builtin_memcpy(__builtin_addressof(chunk), pSrc, m128i_size);
541541
mask = static_cast<::std::uint_least32_t>(__builtin_ia32_pmovmskb128(chunk));
542-
#if __has_builtin(__builtin_shufflevector)
542+
#if FAST_IO_HAS_BUILTIN(__builtin_shufflevector)
543543
x86_64_v16qi half_result{__builtin_shufflevector(chunk, zero, 0, 16 + 0, 1, 16 + 1, 2, 16 + 2, 3, 16 + 3, 4,
544544
16 + 4, 5, 16 + 5, 6, 16 + 6, 7, 16 + 7)};
545545
x86_64_v8hi half;

include/fast_io_core_impl/freestanding/algorithm.h

Lines changed: 7 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -243,43 +243,31 @@ namespace fast_io::freestanding
243243
{
244244

245245
inline
246-
#if defined(__has_builtin)
247-
#if __has_builtin(__builtin_memcpy)
246+
#if FAST_IO_HAS_BUILTIN(__builtin_memcpy)
248247
constexpr
249-
#endif
250248
#endif
251249
void *
252250
my_memcpy(void *dest, void const *src, ::std::size_t count) noexcept
253251
{
254252
return
255-
#if defined(__has_builtin)
256-
#if __has_builtin(__builtin_memcpy)
253+
#if FAST_IO_HAS_BUILTIN(__builtin_memcpy)
257254
__builtin_memcpy
258-
#else
259-
::std::memcpy
260-
#endif
261255
#else
262256
::std::memcpy
263257
#endif
264258
(dest, src, count);
265259
}
266260

267261
inline
268-
#if defined(__has_builtin)
269-
#if __has_builtin(__builtin_memmove)
262+
#if FAST_IO_HAS_BUILTIN(__builtin_memmove)
270263
constexpr
271-
#endif
272264
#endif
273265
void *
274266
my_memmove(void *dest, void const *src, ::std::size_t count) noexcept
275267
{
276268
return
277-
#if defined(__has_builtin)
278-
#if __has_builtin(__builtin_memmove)
269+
#if FAST_IO_HAS_BUILTIN(__builtin_memmove)
279270
__builtin_memmove
280-
#else
281-
::std::memmove
282-
#endif
283271
#else
284272
::std::memmove
285273
#endif
@@ -289,34 +277,24 @@ inline
289277
inline void *my_memset(void *dest, int ch, ::std::size_t count) noexcept
290278
{
291279
return
292-
#if defined(__has_builtin)
293-
#if __has_builtin(__builtin_memset)
280+
#if FAST_IO_HAS_BUILTIN(__builtin_memset)
294281
__builtin_memset
295-
#else
296-
::std::memset
297-
#endif
298282
#else
299283
::std::memset
300284
#endif
301285
(dest, ch, count);
302286
}
303287

304288
inline
305-
#if defined(__has_builtin)
306-
#if __has_builtin(__builtin_memcmp)
289+
#if FAST_IO_HAS_BUILTIN(__builtin_memcmp)
307290
constexpr
308-
#endif
309291
#endif
310292
int
311293
my_memcmp(void const *dest, void const *src, ::std::size_t count) noexcept
312294
{
313295
return
314-
#if defined(__has_builtin)
315-
#if __has_builtin(__builtin_memcmp)
296+
#if FAST_IO_HAS_BUILTIN(__builtin_memcmp)
316297
__builtin_memcmp
317-
#else
318-
::std::memcmp
319-
#endif
320298
#else
321299
::std::memcmp
322300
#endif

include/fast_io_core_impl/freestanding/allocator.h

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -13,17 +13,17 @@ struct allocator
1313
{
1414
__builtin_trap();
1515
}
16-
#if __has_builtin(__builtin_operator_new)
17-
return static_cast<T *>(__builtin_operator_new(n * sizeof(T)));
16+
#if FAST_IO_HAS_BUILTIN(__builtin_operator_new)
17+
return static_cast<T *>(__builtin_operator_new(n * sizeof(T)));
1818
#else
1919
__builtin_trap();
2020
return nullptr;
2121
#endif
2222
}
2323
inline constexpr void deallocate([[maybe_unused]] T *ptr, [[maybe_unused]] ::std::size_t n) noexcept
2424
{
25-
#if __has_builtin(__builtin_operator_delete)
26-
__builtin_operator_delete(ptr, sizeof(T) * n);
25+
#if FAST_IO_HAS_BUILTIN(__builtin_operator_delete)
26+
__builtin_operator_delete(ptr, sizeof(T) * n);
2727
#endif
2828
}
2929
};

0 commit comments

Comments
 (0)