@@ -815,7 +815,7 @@ template <> class transcoder<char32_t, char8> {
815
815
// / \param dest An output iterator to which the output sequence is written.
816
816
// / \returns Iterator one past the last element assigned.
817
817
template <ICUBABY_CONCEPT_OUTPUT_ITERATOR (output_type) OutputIterator>
818
- static constexpr OutputIterator write_continuation (unsigned const number, input_type const code_unit,
818
+ static constexpr OutputIterator write_continuation (std:: uint_least8_t const number, input_type const code_unit,
819
819
OutputIterator dest) {
820
820
if (number == 0U ) {
821
821
return dest;
@@ -827,6 +827,8 @@ template <> class transcoder<char32_t, char8> {
827
827
828
828
// / Writes a two CU value to the output.
829
829
// /
830
+ // / Code points in the range [U+80, U+800) are represented as two UTF-8 code units.
831
+ // /
830
832
// / \tparam OutputIterator An output iterator type to which values of type output_type can be written.
831
833
// / \param code_unit The code unit to be written.
832
834
// / \param dest An output iterator to which the output sequence is written.
@@ -835,10 +837,12 @@ template <> class transcoder<char32_t, char8> {
835
837
static OutputIterator write2 (input_type code_unit, OutputIterator dest) {
836
838
assert (code_unit >= 0x80U && code_unit <= 0x7FFU && " Code point is out-of-range for 2 byte UTF-8" );
837
839
*(dest++) = static_cast <output_type> ((code_unit >> details::utf8_shift) | byte_1_of_2);
838
- return transcoder::write_continuation (1U , code_unit, dest);
840
+ return transcoder::write_continuation (std:: uint_least8_t { 1 } , code_unit, dest);
839
841
}
840
842
// / Writes a three CU value to the output.
841
843
// /
844
+ // / Code points in the range [U+800, U+10000) are represented as three UTF-8 code units.
845
+ // /
842
846
// / \tparam OutputIterator An output iterator type to which values of type output_type can be written.
843
847
// / \param code_unit The code unit to be written.
844
848
// / \param dest An output iterator to which the output sequence is written.
@@ -847,10 +851,12 @@ template <> class transcoder<char32_t, char8> {
847
851
static OutputIterator write3 (input_type code_unit, OutputIterator dest) {
848
852
assert (code_unit >= 0x800U && code_unit <= 0xFFFFU && " Code point is out-of-range for 3 byte UTF-8" );
849
853
*(dest++) = static_cast <output_type> ((code_unit >> (details::utf8_shift * 2U )) | byte_1_of_3);
850
- return transcoder::write_continuation (2U , code_unit, dest);
854
+ return transcoder::write_continuation (std:: uint_least8_t { 2 } , code_unit, dest);
851
855
}
852
856
// / Writes a four CU value to the output.
853
857
// /
858
+ // / Code points in the range [U+10000, U+10FFFF] are represented as four UTF-8 code units.
859
+ // /
854
860
// / \tparam OutputIterator An output iterator type to which values of type output_type can be written.
855
861
// / \param code_unit The code unit to be written.
856
862
// / \param dest An output iterator to which the output sequence is written.
@@ -859,7 +865,7 @@ template <> class transcoder<char32_t, char8> {
859
865
static OutputIterator write4 (input_type code_unit, OutputIterator dest) {
860
866
assert (code_unit >= 0x10000U && code_unit <= 0x10FFFFU && " Code point is out-of-range for 4 byte UTF-8" );
861
867
*(dest++) = static_cast <output_type> ((code_unit >> (details::utf8_shift * 3U )) | byte_1_of_4);
862
- return transcoder::write_continuation (3U , code_unit, dest);
868
+ return transcoder::write_continuation (std:: uint_least8_t { 3 } , code_unit, dest);
863
869
}
864
870
// / Writes U+FFFD REPLACEMENT CHAR to the output and records the input as not well formed.
865
871
// /
@@ -1212,7 +1218,7 @@ template <> class transcoder<char16_t, char32_t> {
1212
1218
};
1213
1219
1214
1220
// / \brief An enumeration representing the encoding detected by transcoder<std::byte, X>.
1215
- enum class encoding {
1221
+ enum class encoding : std:: uint_least8_t {
1216
1222
unknown, // /< No encoding has yet been determined.
1217
1223
utf8, // /< The detected encoding is UTF-8.
1218
1224
utf16be, // /< The detected encoding is big-endian UTF-16.
@@ -1616,9 +1622,9 @@ template <ICUBABY_CONCEPT_UNICODE_CHAR_TYPE ToEncoding> class transcoder<std::by
1616
1622
}
1617
1623
1618
1624
// / \brief Returns a byte from the byte order marker table which corresponds to a specific state as denoted by
1619
- // / \p state_byte and byte count \p byte_number.the
1625
+ // / \p state_byte and byte count \p byte_number.
1620
1626
// /
1621
- // / \param state A valid state machine state.
1627
+ // / \param state_byte A valid state machine state.
1622
1628
// / \param byte_number The index of the byte within the byte order marker.
1623
1629
// / \returns A byte from the byte order marker table.
1624
1630
[[nodiscard]] static constexpr std::byte bom_value (std::byte const state_byte,
@@ -1637,9 +1643,8 @@ template <ICUBABY_CONCEPT_UNICODE_CHAR_TYPE ToEncoding> class transcoder<std::by
1637
1643
return enc[byte_number];
1638
1644
}
1639
1645
// / \brief Returns a byte from the byte order marker table which corresponds to a specific state as denoted by
1640
- // / the current state and byte count \p byte_number.the
1646
+ // / the current state and byte count
1641
1647
// /
1642
- // / \param byte_number The index of the byte within the byte order marker.
1643
1648
// / \returns A byte from the byte order marker table.
1644
1649
[[nodiscard]] constexpr std::byte bom_value () const noexcept {
1645
1650
return transcoder::bom_value (static_cast <std::byte> (state_), transcoder::get_byte_no (state_));
@@ -1793,16 +1798,17 @@ template <ICUBABY_CONCEPT_UNICODE_CHAR_TYPE ToEncoding> class transcoder<std::by
1793
1798
// / \param value An input byte
1794
1799
// / \returns A native-endian 16 bit value.
1795
1800
[[nodiscard]] constexpr char16_t char16_from_big_endian_buffer (input_type const value) const noexcept {
1796
- return static_cast <char16_t > ((static_cast <std::uint_least16_t > (buffer_[0 ]) << 8U ) |
1797
- static_cast <std::uint_least16_t > (value));
1801
+ return static_cast <char16_t > (
1802
+ static_cast <std::uint_least16_t > (static_cast <std::uint_least16_t > (buffer_[0 ]) << 8U ) |
1803
+ static_cast <std::uint_least16_t > (value));
1798
1804
}
1799
1805
// / \brief Produces a native-endian 16-bit value from little endian encoded input by combining the first entry in the
1800
1806
// / buffer_ array with \p value.
1801
1807
// /
1802
1808
// / \param value An input byte
1803
1809
// / \returns A native-endian 16 bit value.
1804
1810
[[nodiscard]] constexpr char16_t char16_from_little_endian_buffer (input_type const value) const noexcept {
1805
- return static_cast <char16_t > ((static_cast <std::uint_least16_t > (value) << 8U ) |
1811
+ return static_cast <char16_t > (static_cast <std:: uint_least16_t > (static_cast <std::uint_least16_t > (value) << 8U ) |
1806
1812
static_cast <std::uint_least16_t > (buffer_[0 ]));
1807
1813
}
1808
1814
// / \brief Produces a native-endian 32-bit value from big endian encoded input by combining the entries in the
@@ -1811,10 +1817,11 @@ template <ICUBABY_CONCEPT_UNICODE_CHAR_TYPE ToEncoding> class transcoder<std::by
1811
1817
// / \param value An input byte
1812
1818
// / \returns A native-endian 32 bit value.
1813
1819
[[nodiscard]] constexpr char32_t char32_from_big_endian_buffer (input_type const value) const noexcept {
1814
- return static_cast <char32_t > ((static_cast <std::uint_least32_t > (buffer_[0 ]) << 24U ) |
1815
- (static_cast <std::uint_least32_t > (buffer_[1 ]) << 16U ) |
1816
- (static_cast <std::uint_least32_t > (buffer_[2 ]) << 8U ) |
1817
- static_cast <std::uint_least32_t > (value));
1820
+ return static_cast <char32_t > (
1821
+ static_cast <std::uint_least32_t > (static_cast <std::uint_least32_t > (buffer_[0 ]) << 24U ) |
1822
+ static_cast <std::uint_least32_t > (static_cast <std::uint_least32_t > (buffer_[1 ]) << 16U ) |
1823
+ static_cast <std::uint_least32_t > (static_cast <std::uint_least32_t > (buffer_[2 ]) << 8U ) |
1824
+ static_cast <std::uint_least32_t > (value));
1818
1825
}
1819
1826
// / \brief Produces a native-endian 32-bit value from little endian encoded input by combining the entries in the
1820
1827
// / buffer_ array with \p value.
0 commit comments