From c87d8ec91f0e191dba6b744788167bb8d41d339e Mon Sep 17 00:00:00 2001 From: Adrian Kummerländer Date: Wed, 16 Apr 2014 19:53:00 +0200 Subject: Added static assert of std::string::value_type size * CodepointIterator only supports UTF-8 encoded single-byte input strings ** this should prevent CodepointIterator from compiling on systems with larger char sizes while providing a helpful error message * improved const-correctness by marking currByte (iterator dereferencing cache) and helper method arguments as const --- src/codepoint_iterator.cc | 10 +++++++--- src/codepoint_iterator.h | 5 +++++ src/utility.h | 8 ++++---- 3 files changed, 16 insertions(+), 7 deletions(-) diff --git a/src/codepoint_iterator.cc b/src/codepoint_iterator.cc index 59cb23b..b96423a 100644 --- a/src/codepoint_iterator.cc +++ b/src/codepoint_iterator.cc @@ -40,8 +40,10 @@ std::ptrdiff_t CodepointIterator::operator-( } char32_t CodepointIterator::operator*() { - std::uint8_t currByte = *(this->iterator_); - char32_t codePoint = 0; + const std::uint8_t currByte( + static_cast(*(this->iterator_)) + ); + char32_t codePoint{}; if ( match(currByte, dtl::CodeUnitType::CONTINUATION) ) { if ( match(currByte, dtl::CodeUnitType::THREE) ) { @@ -94,7 +96,9 @@ char32_t CodepointIterator::operator*() { } CodepointIterator& CodepointIterator::operator++() { - std::uint8_t currByte(*(this->iterator_)); + const std::uint8_t currByte( + static_cast(*(this->iterator_)) + ); std::string::difference_type offset(1); if ( match(currByte, dtl::CodeUnitType::CONTINUATION) ) { diff --git a/src/codepoint_iterator.h b/src/codepoint_iterator.h index d1806e5..77600b7 100644 --- a/src/codepoint_iterator.h +++ b/src/codepoint_iterator.h @@ -10,6 +10,11 @@ namespace UTF8 { class CodepointIterator : public std::iterator { + static_assert( + sizeof(std::string::value_type) == 1, + "CodepointIterator only supports single-byte UTF-8 encoded input" + ); + public: CodepointIterator(std::string::const_iterator); CodepointIterator(const CodepointIterator&); diff --git a/src/utility.h b/src/utility.h index 8aa46a0..75811f5 100644 --- a/src/utility.h +++ b/src/utility.h @@ -20,14 +20,14 @@ enum class CodePoint : std::uint8_t { FOUR = (UINT8_MAX >> 5), // 00000111 }; -inline bool match(std::uint8_t unit, CodeUnitType type) { +inline bool match(const std::uint8_t unit, const CodeUnitType type) { return unit & static_cast(type); } inline void write(char32_t& point, - std::uint8_t unit, - CodePoint mask, - std::uint8_t offset) { + const std::uint8_t unit, + const CodePoint mask, + const std::uint8_t offset) { point += (unit & static_cast(mask)) << offset; } -- cgit v1.2.3