aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorAdrian Kummerlaender2017-10-07 15:45:37 +0200
committerAdrian Kummerlaender2017-10-07 15:45:37 +0200
commit51a422f161e7e7cc5b1093a12e004165754ec1b9 (patch)
tree5c23a69b197e614424aba3e14f59be87de6a4fac /src
parent574e79243648debf57a0d92653bd0df83398e772 (diff)
downloadDictzipQuery-51a422f161e7e7cc5b1093a12e004165754ec1b9.tar
DictzipQuery-51a422f161e7e7cc5b1093a12e004165754ec1b9.tar.gz
DictzipQuery-51a422f161e7e7cc5b1093a12e004165754ec1b9.tar.bz2
DictzipQuery-51a422f161e7e7cc5b1093a12e004165754ec1b9.tar.lz
DictzipQuery-51a422f161e7e7cc5b1093a12e004165754ec1b9.tar.xz
DictzipQuery-51a422f161e7e7cc5b1093a12e004165754ec1b9.tar.zst
DictzipQuery-51a422f161e7e7cc5b1093a12e004165754ec1b9.zip
Introduce IndexFile class
Diffstat (limited to 'src')
-rw-r--r--src/index.cc50
-rw-r--r--src/index.h29
-rw-r--r--src/util/query.cc1
3 files changed, 80 insertions, 0 deletions
diff --git a/src/index.cc b/src/index.cc
new file mode 100644
index 0000000..c8dc6de
--- /dev/null
+++ b/src/index.cc
@@ -0,0 +1,50 @@
+#include "index.h"
+
+#include "util/query.h"
+#include "util/base64.h"
+
+#include <algorithm>
+
+
+namespace dictzip {
+
+IndexFile::Entry parse_from_line(const std::string& line) {
+ const std::size_t start = line.find_first_of('\t');
+ const std::size_t end = line.find_last_of('\t');
+
+ return IndexFile::Entry(
+ line.substr(0, start),
+ base64_decode(line.substr(start + 1, end - (start + 1))),
+ base64_decode(line.substr(end + 1)));
+}
+
+IndexFile::Entry::Entry(const std::string& line):
+ IndexFile::Entry{parse_from_line(line)} { }
+
+IndexFile::Entry::Entry(
+ const std::string& word, std::size_t offset, std::size_t length):
+ word(word),
+ offset(offset),
+ length(length) { }
+
+IndexFile::IndexFile(const std::string& path):
+ path_(path) { }
+
+std::vector<IndexFile::Entry> IndexFile::get(const std::string& word) {
+ const std::vector<std::string> lines = get_lines_starting_with(this->path_, word);
+
+ std::vector<Entry> entries;
+ entries.reserve(lines.size());
+
+ std::for_each(
+ lines.begin(),
+ lines.end(),
+ [&entries](const std::string& line) {
+ entries.emplace_back(line);
+ }
+ );
+
+ return entries;
+}
+
+}
diff --git a/src/index.h b/src/index.h
new file mode 100644
index 0000000..50acb2e
--- /dev/null
+++ b/src/index.h
@@ -0,0 +1,29 @@
+#pragma once
+
+#include <string>
+#include <vector>
+#include <cstdint>
+
+namespace dictzip {
+
+class IndexFile {
+public:
+ struct Entry {
+ Entry(const std::string& line);
+ Entry(const std::string& word, std::size_t offset, std::size_t length);
+
+ const std::string word;
+ const std::size_t offset;
+ const std::size_t length;
+ };
+
+ IndexFile(const std::string& path);
+
+ std::vector<Entry> get(const std::string& word);
+
+private:
+ const std::string path_;
+
+};
+
+}
diff --git a/src/util/query.cc b/src/util/query.cc
index f210613..f71b257 100644
--- a/src/util/query.cc
+++ b/src/util/query.cc
@@ -15,6 +15,7 @@ std::vector<std::string> get_lines_starting_with(
return std::vector<std::string>{};
}
+
posix_fadvise(fileno(file), 0, 0, 1); // FDADVICE_SEQUENTIAL
std::vector<std::string> result;