diff options
author | Adrian Kummerlaender | 2017-10-07 15:45:37 +0200 |
---|---|---|
committer | Adrian Kummerlaender | 2017-10-07 15:45:37 +0200 |
commit | 51a422f161e7e7cc5b1093a12e004165754ec1b9 (patch) | |
tree | 5c23a69b197e614424aba3e14f59be87de6a4fac /src | |
parent | 574e79243648debf57a0d92653bd0df83398e772 (diff) | |
download | DictzipQuery-51a422f161e7e7cc5b1093a12e004165754ec1b9.tar DictzipQuery-51a422f161e7e7cc5b1093a12e004165754ec1b9.tar.gz DictzipQuery-51a422f161e7e7cc5b1093a12e004165754ec1b9.tar.bz2 DictzipQuery-51a422f161e7e7cc5b1093a12e004165754ec1b9.tar.lz DictzipQuery-51a422f161e7e7cc5b1093a12e004165754ec1b9.tar.xz DictzipQuery-51a422f161e7e7cc5b1093a12e004165754ec1b9.tar.zst DictzipQuery-51a422f161e7e7cc5b1093a12e004165754ec1b9.zip |
Introduce IndexFile class
Diffstat (limited to 'src')
-rw-r--r-- | src/index.cc | 50 | ||||
-rw-r--r-- | src/index.h | 29 | ||||
-rw-r--r-- | src/util/query.cc | 1 |
3 files changed, 80 insertions, 0 deletions
diff --git a/src/index.cc b/src/index.cc new file mode 100644 index 0000000..c8dc6de --- /dev/null +++ b/src/index.cc @@ -0,0 +1,50 @@ +#include "index.h" + +#include "util/query.h" +#include "util/base64.h" + +#include <algorithm> + + +namespace dictzip { + +IndexFile::Entry parse_from_line(const std::string& line) { + const std::size_t start = line.find_first_of('\t'); + const std::size_t end = line.find_last_of('\t'); + + return IndexFile::Entry( + line.substr(0, start), + base64_decode(line.substr(start + 1, end - (start + 1))), + base64_decode(line.substr(end + 1))); +} + +IndexFile::Entry::Entry(const std::string& line): + IndexFile::Entry{parse_from_line(line)} { } + +IndexFile::Entry::Entry( + const std::string& word, std::size_t offset, std::size_t length): + word(word), + offset(offset), + length(length) { } + +IndexFile::IndexFile(const std::string& path): + path_(path) { } + +std::vector<IndexFile::Entry> IndexFile::get(const std::string& word) { + const std::vector<std::string> lines = get_lines_starting_with(this->path_, word); + + std::vector<Entry> entries; + entries.reserve(lines.size()); + + std::for_each( + lines.begin(), + lines.end(), + [&entries](const std::string& line) { + entries.emplace_back(line); + } + ); + + return entries; +} + +} diff --git a/src/index.h b/src/index.h new file mode 100644 index 0000000..50acb2e --- /dev/null +++ b/src/index.h @@ -0,0 +1,29 @@ +#pragma once + +#include <string> +#include <vector> +#include <cstdint> + +namespace dictzip { + +class IndexFile { +public: + struct Entry { + Entry(const std::string& line); + Entry(const std::string& word, std::size_t offset, std::size_t length); + + const std::string word; + const std::size_t offset; + const std::size_t length; + }; + + IndexFile(const std::string& path); + + std::vector<Entry> get(const std::string& word); + +private: + const std::string path_; + +}; + +} diff --git a/src/util/query.cc b/src/util/query.cc index f210613..f71b257 100644 --- a/src/util/query.cc +++ b/src/util/query.cc @@ -15,6 +15,7 @@ std::vector<std::string> get_lines_starting_with( return std::vector<std::string>{}; } + posix_fadvise(fileno(file), 0, 0, 1); // FDADVICE_SEQUENTIAL std::vector<std::string> result; |