aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAdrian Kummerlaender2017-10-07 15:45:37 +0200
committerAdrian Kummerlaender2017-10-07 15:45:37 +0200
commit51a422f161e7e7cc5b1093a12e004165754ec1b9 (patch)
tree5c23a69b197e614424aba3e14f59be87de6a4fac
parent574e79243648debf57a0d92653bd0df83398e772 (diff)
downloadDictzipQuery-51a422f161e7e7cc5b1093a12e004165754ec1b9.tar
DictzipQuery-51a422f161e7e7cc5b1093a12e004165754ec1b9.tar.gz
DictzipQuery-51a422f161e7e7cc5b1093a12e004165754ec1b9.tar.bz2
DictzipQuery-51a422f161e7e7cc5b1093a12e004165754ec1b9.tar.lz
DictzipQuery-51a422f161e7e7cc5b1093a12e004165754ec1b9.tar.xz
DictzipQuery-51a422f161e7e7cc5b1093a12e004165754ec1b9.tar.zst
DictzipQuery-51a422f161e7e7cc5b1093a12e004165754ec1b9.zip
Introduce IndexFile class
-rw-r--r--CMakeLists.txt1
-rw-r--r--example.cc17
-rw-r--r--src/index.cc50
-rw-r--r--src/index.h29
-rw-r--r--src/util/query.cc1
5 files changed, 88 insertions, 10 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 45d4b67..6c96f1d 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -13,6 +13,7 @@ include_directories(
add_library(
DictzipQuery
SHARED
+ src/index.cc
src/util/base64.cc
src/util/query.cc
src/istream/stream.cc
diff --git a/example.cc b/example.cc
index e42a7fb..3727434 100644
--- a/example.cc
+++ b/example.cc
@@ -1,6 +1,5 @@
+#include "index.h"
#include "istream/stream.h"
-#include "util/base64.h"
-#include "util/query.h"
#include <string>
#include <iostream>
@@ -12,7 +11,7 @@ std::string get(const std::string& path, std::size_t offset, std::size_t length)
result.resize(length);
stream.seekg(offset);
- stream.read(result.data(), length);
+ stream.read(&result[0], length);
return result;
}
@@ -21,14 +20,12 @@ int main(int argc, char** argv) {
if ( argc != 2 ) {
std::cerr << "Empty query." << std::endl;
} else {
- // Get index entries of requested word definitions
- for ( auto& line : dictzip::get_lines_starting_with("gcide.index", argv[1]) ) {
- // Decode location in compressed archive
- const std::size_t offset = dictzip::base64_decode(dictzip::get_encoded_offset(line));
- const std::size_t length = dictzip::base64_decode(dictzip::get_encoded_length(line));
+ dictzip::IndexFile index("gcide.index");
- // Print the GCIDE definition of _Accession_
- std::cout << get("gcide.dict.dz", offset, length) << std::endl;
+ // Get index entries of requested word definitions
+ for ( auto& entry : index.get(argv[1]) ) {
+ // Print the GCIDE definition
+ std::cout << get("gcide.dict.dz", entry.offset, entry.length) << std::endl;
}
}
}
diff --git a/src/index.cc b/src/index.cc
new file mode 100644
index 0000000..c8dc6de
--- /dev/null
+++ b/src/index.cc
@@ -0,0 +1,50 @@
+#include "index.h"
+
+#include "util/query.h"
+#include "util/base64.h"
+
+#include <algorithm>
+
+
+namespace dictzip {
+
+IndexFile::Entry parse_from_line(const std::string& line) {
+ const std::size_t start = line.find_first_of('\t');
+ const std::size_t end = line.find_last_of('\t');
+
+ return IndexFile::Entry(
+ line.substr(0, start),
+ base64_decode(line.substr(start + 1, end - (start + 1))),
+ base64_decode(line.substr(end + 1)));
+}
+
+IndexFile::Entry::Entry(const std::string& line):
+ IndexFile::Entry{parse_from_line(line)} { }
+
+IndexFile::Entry::Entry(
+ const std::string& word, std::size_t offset, std::size_t length):
+ word(word),
+ offset(offset),
+ length(length) { }
+
+IndexFile::IndexFile(const std::string& path):
+ path_(path) { }
+
+std::vector<IndexFile::Entry> IndexFile::get(const std::string& word) {
+ const std::vector<std::string> lines = get_lines_starting_with(this->path_, word);
+
+ std::vector<Entry> entries;
+ entries.reserve(lines.size());
+
+ std::for_each(
+ lines.begin(),
+ lines.end(),
+ [&entries](const std::string& line) {
+ entries.emplace_back(line);
+ }
+ );
+
+ return entries;
+}
+
+}
diff --git a/src/index.h b/src/index.h
new file mode 100644
index 0000000..50acb2e
--- /dev/null
+++ b/src/index.h
@@ -0,0 +1,29 @@
+#pragma once
+
+#include <string>
+#include <vector>
+#include <cstdint>
+
+namespace dictzip {
+
+class IndexFile {
+public:
+ struct Entry {
+ Entry(const std::string& line);
+ Entry(const std::string& word, std::size_t offset, std::size_t length);
+
+ const std::string word;
+ const std::size_t offset;
+ const std::size_t length;
+ };
+
+ IndexFile(const std::string& path);
+
+ std::vector<Entry> get(const std::string& word);
+
+private:
+ const std::string path_;
+
+};
+
+}
diff --git a/src/util/query.cc b/src/util/query.cc
index f210613..f71b257 100644
--- a/src/util/query.cc
+++ b/src/util/query.cc
@@ -15,6 +15,7 @@ std::vector<std::string> get_lines_starting_with(
return std::vector<std::string>{};
}
+
posix_fadvise(fileno(file), 0, 0, 1); // FDADVICE_SEQUENTIAL
std::vector<std::string> result;