aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAdrian Kummerländer2014-04-16 19:53:00 +0200
committerAdrian Kummerländer2014-04-16 19:53:00 +0200
commitc87d8ec91f0e191dba6b744788167bb8d41d339e (patch)
tree5428a69adf1e035e0fe89a164445e0bc1c8e663e
parent097c27f4106c0f5cae0fe284e1d322ed728b6d96 (diff)
downloadCodepointIterator-c87d8ec91f0e191dba6b744788167bb8d41d339e.tar
CodepointIterator-c87d8ec91f0e191dba6b744788167bb8d41d339e.tar.gz
CodepointIterator-c87d8ec91f0e191dba6b744788167bb8d41d339e.tar.bz2
CodepointIterator-c87d8ec91f0e191dba6b744788167bb8d41d339e.tar.lz
CodepointIterator-c87d8ec91f0e191dba6b744788167bb8d41d339e.tar.xz
CodepointIterator-c87d8ec91f0e191dba6b744788167bb8d41d339e.tar.zst
CodepointIterator-c87d8ec91f0e191dba6b744788167bb8d41d339e.zip
Added static assert of std::string::value_type size
* CodepointIterator only supports UTF-8 encoded single-byte input strings ** this should prevent CodepointIterator from compiling on systems with larger char sizes while providing a helpful error message * improved const-correctness by marking currByte (iterator dereferencing cache) and helper method arguments as const
-rw-r--r--src/codepoint_iterator.cc10
-rw-r--r--src/codepoint_iterator.h5
-rw-r--r--src/utility.h8
3 files changed, 16 insertions, 7 deletions
diff --git a/src/codepoint_iterator.cc b/src/codepoint_iterator.cc
index 59cb23b..b96423a 100644
--- a/src/codepoint_iterator.cc
+++ b/src/codepoint_iterator.cc
@@ -40,8 +40,10 @@ std::ptrdiff_t CodepointIterator::operator-(
}
char32_t CodepointIterator::operator*() {
- std::uint8_t currByte = *(this->iterator_);
- char32_t codePoint = 0;
+ const std::uint8_t currByte(
+ static_cast<std::uint8_t>(*(this->iterator_))
+ );
+ char32_t codePoint{};
if ( match(currByte, dtl::CodeUnitType::CONTINUATION) ) {
if ( match(currByte, dtl::CodeUnitType::THREE) ) {
@@ -94,7 +96,9 @@ char32_t CodepointIterator::operator*() {
}
CodepointIterator& CodepointIterator::operator++() {
- std::uint8_t currByte(*(this->iterator_));
+ const std::uint8_t currByte(
+ static_cast<std::uint8_t>(*(this->iterator_))
+ );
std::string::difference_type offset(1);
if ( match(currByte, dtl::CodeUnitType::CONTINUATION) ) {
diff --git a/src/codepoint_iterator.h b/src/codepoint_iterator.h
index d1806e5..77600b7 100644
--- a/src/codepoint_iterator.h
+++ b/src/codepoint_iterator.h
@@ -10,6 +10,11 @@ namespace UTF8 {
class CodepointIterator : public std::iterator<std::bidirectional_iterator_tag,
char32_t,
std::string::difference_type> {
+ static_assert(
+ sizeof(std::string::value_type) == 1,
+ "CodepointIterator only supports single-byte UTF-8 encoded input"
+ );
+
public:
CodepointIterator(std::string::const_iterator);
CodepointIterator(const CodepointIterator&);
diff --git a/src/utility.h b/src/utility.h
index 8aa46a0..75811f5 100644
--- a/src/utility.h
+++ b/src/utility.h
@@ -20,14 +20,14 @@ enum class CodePoint : std::uint8_t {
FOUR = (UINT8_MAX >> 5), // 00000111
};
-inline bool match(std::uint8_t unit, CodeUnitType type) {
+inline bool match(const std::uint8_t unit, const CodeUnitType type) {
return unit & static_cast<std::uint8_t>(type);
}
inline void write(char32_t& point,
- std::uint8_t unit,
- CodePoint mask,
- std::uint8_t offset) {
+ const std::uint8_t unit,
+ const CodePoint mask,
+ const std::uint8_t offset) {
point += (unit & static_cast<std::uint8_t>(mask)) << offset;
}