From 4d1b9a918f8a189ba1e9887c6a9da04e7392db90 Mon Sep 17 00:00:00 2001 From: Adrian Kummerländer Date: Sat, 5 Oct 2013 12:41:07 +0200 Subject: Initial commit * CodepointIterator is a simple C++ iterator class which iterates through unicode codepoints in a UTF8-encoded string * It is derived from std::iterator and implements the std::bidirectional_iterator_tag * Dereferencing an instance of the class provides the codepoint as char32_t * Tests require Google Test and use UTF8-samples from http://www.columbia.edu/~fdc/utf8/ --- src/codepoint_iterator.h | 49 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 49 insertions(+) create mode 100644 src/codepoint_iterator.h (limited to 'src/codepoint_iterator.h') diff --git a/src/codepoint_iterator.h b/src/codepoint_iterator.h new file mode 100644 index 0000000..938f53d --- /dev/null +++ b/src/codepoint_iterator.h @@ -0,0 +1,49 @@ +#ifndef CODEPOINT_ITERATOR_H_ +#define CODEPOINT_ITERATOR_H_ + +#include +#include +#include + +namespace UTF8 { + +class CodepointIterator : public std::iterator { + public: + CodepointIterator(std::string::const_iterator); + CodepointIterator(const CodepointIterator&); + + CodepointIterator& operator=(const CodepointIterator&); + + bool operator==(const CodepointIterator&) const; + bool operator==(const std::string::const_iterator&) const; + + bool operator!=(const CodepointIterator&) const; + bool operator!=(const std::string::const_iterator&) const; + + char32_t operator*(); + + CodepointIterator& operator++(); + CodepointIterator& operator--(); + + CodepointIterator operator++(int); + CodepointIterator operator--(int); + + private: + std::string::const_iterator iterator_; + bool dereferenced_; + char32_t codepoint_; +}; + +class codepoint_invalid: public std::exception { + virtual const char* what() const throw() { + return "codepoint_invalid"; + } +}; + +} + +#endif // CODEPOINT_ITERATOR_H_ -- cgit v1.2.3