aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAdrian Kummerländer2014-02-15 12:57:50 +0100
committerAdrian Kummerländer2014-02-15 12:57:50 +0100
commitd9dc09933635a62e188600c419fe42bcd03a3820 (patch)
treeb656f09231a403b68e95f76341eeabec1672b441
parent79a65ce58ad8f3b2b1c9eeaba4b0b4710dc09e2c (diff)
downloadCodepointIterator-d9dc09933635a62e188600c419fe42bcd03a3820.tar
CodepointIterator-d9dc09933635a62e188600c419fe42bcd03a3820.tar.gz
CodepointIterator-d9dc09933635a62e188600c419fe42bcd03a3820.tar.bz2
CodepointIterator-d9dc09933635a62e188600c419fe42bcd03a3820.tar.lz
CodepointIterator-d9dc09933635a62e188600c419fe42bcd03a3820.tar.xz
CodepointIterator-d9dc09933635a62e188600c419fe42bcd03a3820.tar.zst
CodepointIterator-d9dc09933635a62e188600c419fe42bcd03a3820.zip
Removed internal codepoint caching
* it is not the responsibility of a codepoint iterator to cache the resolved codepoint for reuse ** if this is required by the user of this class can iterate it better in the context it is required ** e.g. implement a "CachedIterator" template
-rw-r--r--src/codepoint_iterator.cc101
-rw-r--r--src/codepoint_iterator.h3
2 files changed, 46 insertions, 58 deletions
diff --git a/src/codepoint_iterator.cc b/src/codepoint_iterator.cc
index fb638d1..99ea866 100644
--- a/src/codepoint_iterator.cc
+++ b/src/codepoint_iterator.cc
@@ -5,20 +5,14 @@
namespace UTF8 {
CodepointIterator::CodepointIterator(std::string::const_iterator iter):
- iterator_(iter),
- dereferenced_(false),
- codepoint_(0) { }
+ iterator_(iter) { }
CodepointIterator::CodepointIterator(const CodepointIterator& src):
- iterator_(src.iterator_),
- dereferenced_(src.dereferenced_),
- codepoint_(src.codepoint_) { }
+ iterator_(src.iterator_) { }
CodepointIterator& CodepointIterator::operator=(const CodepointIterator& src) {
- this->iterator_ = src.iterator_;
- this->dereferenced_ = src.dereferenced_;
- this->codepoint_ = src.codepoint_;
-
+ this->iterator_ = src.iterator_;
+
return *this;
}
@@ -41,64 +35,60 @@ bool CodepointIterator::operator!=(
}
char32_t CodepointIterator::operator*() {
- if ( !this->dereferenced_ ) {
- uint8_t currByte = *(this->iterator_);
- this->dereferenced_ = true;
- this->codepoint_ = 0;
-
- if ( match(currByte, dtl::CodeUnitType::CONTINUATION) ) {
- if ( match(currByte, dtl::CodeUnitType::THREE) ) {
- if ( match(currByte, dtl::CodeUnitType::FOUR) ) {
- dtl::write(this->codepoint_,
- currByte,
- dtl::CodePoint::FOUR,
- 18);
- dtl::write(this->codepoint_,
- *(this->iterator_ + 1),
- dtl::CodePoint::CONTINUATION,
- 12);
- dtl::write(this->codepoint_,
- *(this->iterator_ + 2),
- dtl::CodePoint::CONTINUATION,
- 6);
- dtl::write(this->codepoint_,
- *(this->iterator_ + 3),
- dtl::CodePoint::CONTINUATION,
- 0);
- } else {
- dtl::write(this->codepoint_,
- currByte,
- dtl::CodePoint::THREE,
- 12);
- dtl::write(this->codepoint_,
- *(this->iterator_ + 1),
- dtl::CodePoint::CONTINUATION,
- 6);
- dtl::write(this->codepoint_,
- *(this->iterator_ + 2),
- dtl::CodePoint::CONTINUATION,
- 0);
- }
- } else {
- dtl::write(this->codepoint_,
+ uint8_t currByte = *(this->iterator_);
+ char32_t codePoint = 0;
+
+ if ( match(currByte, dtl::CodeUnitType::CONTINUATION) ) {
+ if ( match(currByte, dtl::CodeUnitType::THREE) ) {
+ if ( match(currByte, dtl::CodeUnitType::FOUR) ) {
+ dtl::write(codePoint,
currByte,
- dtl::CodePoint::TWO,
+ dtl::CodePoint::FOUR,
+ 18);
+ dtl::write(codePoint,
+ *(this->iterator_ + 1),
+ dtl::CodePoint::CONTINUATION,
+ 12);
+ dtl::write(codePoint,
+ *(this->iterator_ + 2),
+ dtl::CodePoint::CONTINUATION,
6);
- dtl::write(this->codepoint_,
+ dtl::write(codePoint,
+ *(this->iterator_ + 3),
+ dtl::CodePoint::CONTINUATION,
+ 0);
+ } else {
+ dtl::write(codePoint,
+ currByte,
+ dtl::CodePoint::THREE,
+ 12);
+ dtl::write(codePoint,
*(this->iterator_ + 1),
dtl::CodePoint::CONTINUATION,
+ 6);
+ dtl::write(codePoint,
+ *(this->iterator_ + 2),
+ dtl::CodePoint::CONTINUATION,
0);
}
} else {
- this->codepoint_ = currByte;
+ dtl::write(codePoint,
+ currByte,
+ dtl::CodePoint::TWO,
+ 6);
+ dtl::write(codePoint,
+ *(this->iterator_ + 1),
+ dtl::CodePoint::CONTINUATION,
+ 0);
}
+ } else {
+ codePoint = currByte;
}
- return this->codepoint_;
+ return codePoint;
}
CodepointIterator& CodepointIterator::operator++() {
- this->dereferenced_ = false;
uint8_t currByte = *(this->iterator_);
std::string::difference_type offset = 1;
@@ -120,7 +110,6 @@ CodepointIterator& CodepointIterator::operator++() {
}
CodepointIterator& CodepointIterator::operator--() {
- this->dereferenced_ = false;
this->iterator_.operator--();
if ( match(*(this->iterator_), dtl::CodeUnitType::CONTINUATION) ) {
diff --git a/src/codepoint_iterator.h b/src/codepoint_iterator.h
index 938f53d..6c8c43d 100644
--- a/src/codepoint_iterator.h
+++ b/src/codepoint_iterator.h
@@ -34,8 +34,7 @@ class CodepointIterator : public std::iterator<std::bidirectional_iterator_tag,
private:
std::string::const_iterator iterator_;
- bool dereferenced_;
- char32_t codepoint_;
+
};
class codepoint_invalid: public std::exception {