aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/codepoint_iterator.cc10
-rw-r--r--src/codepoint_iterator.h5
-rw-r--r--src/utility.h8
3 files changed, 16 insertions, 7 deletions
diff --git a/src/codepoint_iterator.cc b/src/codepoint_iterator.cc
index 59cb23b..b96423a 100644
--- a/src/codepoint_iterator.cc
+++ b/src/codepoint_iterator.cc
@@ -40,8 +40,10 @@ std::ptrdiff_t CodepointIterator::operator-(
}
char32_t CodepointIterator::operator*() {
- std::uint8_t currByte = *(this->iterator_);
- char32_t codePoint = 0;
+ const std::uint8_t currByte(
+ static_cast<std::uint8_t>(*(this->iterator_))
+ );
+ char32_t codePoint{};
if ( match(currByte, dtl::CodeUnitType::CONTINUATION) ) {
if ( match(currByte, dtl::CodeUnitType::THREE) ) {
@@ -94,7 +96,9 @@ char32_t CodepointIterator::operator*() {
}
CodepointIterator& CodepointIterator::operator++() {
- std::uint8_t currByte(*(this->iterator_));
+ const std::uint8_t currByte(
+ static_cast<std::uint8_t>(*(this->iterator_))
+ );
std::string::difference_type offset(1);
if ( match(currByte, dtl::CodeUnitType::CONTINUATION) ) {
diff --git a/src/codepoint_iterator.h b/src/codepoint_iterator.h
index d1806e5..77600b7 100644
--- a/src/codepoint_iterator.h
+++ b/src/codepoint_iterator.h
@@ -10,6 +10,11 @@ namespace UTF8 {
class CodepointIterator : public std::iterator<std::bidirectional_iterator_tag,
char32_t,
std::string::difference_type> {
+ static_assert(
+ sizeof(std::string::value_type) == 1,
+ "CodepointIterator only supports single-byte UTF-8 encoded input"
+ );
+
public:
CodepointIterator(std::string::const_iterator);
CodepointIterator(const CodepointIterator&);
diff --git a/src/utility.h b/src/utility.h
index 8aa46a0..75811f5 100644
--- a/src/utility.h
+++ b/src/utility.h
@@ -20,14 +20,14 @@ enum class CodePoint : std::uint8_t {
FOUR = (UINT8_MAX >> 5), // 00000111
};
-inline bool match(std::uint8_t unit, CodeUnitType type) {
+inline bool match(const std::uint8_t unit, const CodeUnitType type) {
return unit & static_cast<std::uint8_t>(type);
}
inline void write(char32_t& point,
- std::uint8_t unit,
- CodePoint mask,
- std::uint8_t offset) {
+ const std::uint8_t unit,
+ const CodePoint mask,
+ const std::uint8_t offset) {
point += (unit & static_cast<std::uint8_t>(mask)) << offset;
}