From 79a65ce58ad8f3b2b1c9eeaba4b0b4710dc09e2c Mon Sep 17 00:00:00 2001 From: Adrian Kummerländer Date: Sat, 15 Feb 2014 12:48:35 +0100 Subject: Extracted helper functions and bitmasks into separate compilation unit * utility.h and utility.cc now contain the UTF8-codepoint and unit bitmasks and read / write functions * Modified users of these functions and unions accordingly * Added the new compilation unit to the Makefile * Changed bitmask specification from plain integer literals to shift expressions for better readability --- src/utility.h | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) create mode 100644 src/utility.h (limited to 'src/utility.h') diff --git a/src/utility.h b/src/utility.h new file mode 100644 index 0000000..dcdcf75 --- /dev/null +++ b/src/utility.h @@ -0,0 +1,29 @@ +#ifndef CODEPOINT_ITERATOR_UTILITY_H_ +#define CODEPOINT_ITERATOR_UTILITY_H_ + +#include + +namespace UTF8 { +namespace dtl { + +enum class CodeUnitType : uint8_t { + CONTINUATION = (128 >> 0), // 10000000 + LEADING = (128 >> 1), // 01000000 + THREE = (128 >> 2), // 00100000 + FOUR = (128 >> 3), // 00010000 +}; + +enum class CodePoint : uint8_t { + CONTINUATION = (UINT8_MAX >> 2), // 00111111 + TWO = (UINT8_MAX >> 3), // 00011111 + THREE = (UINT8_MAX >> 4), // 00001111 + FOUR = (UINT8_MAX >> 5), // 00000111 +}; + +bool match(const uint8_t&, CodeUnitType&&); +void write(char32_t&, const uint8_t&, CodePoint&&, const uint8_t&); + +} +} + +#endif // CODEPOINT_ITERATOR_UTILITY_H_ -- cgit v1.2.3