diff options
Added static assert of std::string::value_type size
* CodepointIterator only supports UTF-8 encoded single-byte input strings
** this should prevent CodepointIterator from compiling on systems with larger char sizes while providing a helpful error message
* improved const-correctness by marking currByte (iterator dereferencing cache) and helper method arguments as const
-rw-r--r-- | src/codepoint_iterator.cc | 10 | ||||
-rw-r--r-- | src/codepoint_iterator.h | 5 | ||||
-rw-r--r-- | src/utility.h | 8 |
3 files changed, 16 insertions, 7 deletions
diff --git a/src/codepoint_iterator.cc b/src/codepoint_iterator.cc index 59cb23b..b96423a 100644 --- a/src/codepoint_iterator.cc +++ b/src/codepoint_iterator.cc @@ -40,8 +40,10 @@ std::ptrdiff_t CodepointIterator::operator-( } char32_t CodepointIterator::operator*() { - std::uint8_t currByte = *(this->iterator_); - char32_t codePoint = 0; + const std::uint8_t currByte( + static_cast<std::uint8_t>(*(this->iterator_)) + ); + char32_t codePoint{}; if ( match(currByte, dtl::CodeUnitType::CONTINUATION) ) { if ( match(currByte, dtl::CodeUnitType::THREE) ) { @@ -94,7 +96,9 @@ char32_t CodepointIterator::operator*() { } CodepointIterator& CodepointIterator::operator++() { - std::uint8_t currByte(*(this->iterator_)); + const std::uint8_t currByte( + static_cast<std::uint8_t>(*(this->iterator_)) + ); std::string::difference_type offset(1); if ( match(currByte, dtl::CodeUnitType::CONTINUATION) ) { diff --git a/src/codepoint_iterator.h b/src/codepoint_iterator.h index d1806e5..77600b7 100644 --- a/src/codepoint_iterator.h +++ b/src/codepoint_iterator.h @@ -10,6 +10,11 @@ namespace UTF8 { class CodepointIterator : public std::iterator<std::bidirectional_iterator_tag, char32_t, std::string::difference_type> { + static_assert( + sizeof(std::string::value_type) == 1, + "CodepointIterator only supports single-byte UTF-8 encoded input" + ); + public: CodepointIterator(std::string::const_iterator); CodepointIterator(const CodepointIterator&); diff --git a/src/utility.h b/src/utility.h index 8aa46a0..75811f5 100644 --- a/src/utility.h +++ b/src/utility.h @@ -20,14 +20,14 @@ enum class CodePoint : std::uint8_t { FOUR = (UINT8_MAX >> 5), // 00000111 }; -inline bool match(std::uint8_t unit, CodeUnitType type) { +inline bool match(const std::uint8_t unit, const CodeUnitType type) { return unit & static_cast<std::uint8_t>(type); } inline void write(char32_t& point, - std::uint8_t unit, - CodePoint mask, - std::uint8_t offset) { + const std::uint8_t unit, + const CodePoint mask, + const std::uint8_t offset) { point += (unit & static_cast<std::uint8_t>(mask)) << offset; } |