diff options
-rw-r--r-- | CMakeLists.txt | 1 | ||||
-rw-r--r-- | example.cc | 17 | ||||
-rw-r--r-- | src/index.cc | 50 | ||||
-rw-r--r-- | src/index.h | 29 | ||||
-rw-r--r-- | src/util/query.cc | 1 |
5 files changed, 88 insertions, 10 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt index 45d4b67..6c96f1d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -13,6 +13,7 @@ include_directories( add_library( DictzipQuery SHARED + src/index.cc src/util/base64.cc src/util/query.cc src/istream/stream.cc @@ -1,6 +1,5 @@ +#include "index.h" #include "istream/stream.h" -#include "util/base64.h" -#include "util/query.h" #include <string> #include <iostream> @@ -12,7 +11,7 @@ std::string get(const std::string& path, std::size_t offset, std::size_t length) result.resize(length); stream.seekg(offset); - stream.read(result.data(), length); + stream.read(&result[0], length); return result; } @@ -21,14 +20,12 @@ int main(int argc, char** argv) { if ( argc != 2 ) { std::cerr << "Empty query." << std::endl; } else { - // Get index entries of requested word definitions - for ( auto& line : dictzip::get_lines_starting_with("gcide.index", argv[1]) ) { - // Decode location in compressed archive - const std::size_t offset = dictzip::base64_decode(dictzip::get_encoded_offset(line)); - const std::size_t length = dictzip::base64_decode(dictzip::get_encoded_length(line)); + dictzip::IndexFile index("gcide.index"); - // Print the GCIDE definition of _Accession_ - std::cout << get("gcide.dict.dz", offset, length) << std::endl; + // Get index entries of requested word definitions + for ( auto& entry : index.get(argv[1]) ) { + // Print the GCIDE definition + std::cout << get("gcide.dict.dz", entry.offset, entry.length) << std::endl; } } } diff --git a/src/index.cc b/src/index.cc new file mode 100644 index 0000000..c8dc6de --- /dev/null +++ b/src/index.cc @@ -0,0 +1,50 @@ +#include "index.h" + +#include "util/query.h" +#include "util/base64.h" + +#include <algorithm> + + +namespace dictzip { + +IndexFile::Entry parse_from_line(const std::string& line) { + const std::size_t start = line.find_first_of('\t'); + const std::size_t end = line.find_last_of('\t'); + + return IndexFile::Entry( + line.substr(0, start), + base64_decode(line.substr(start + 1, end - (start + 1))), + base64_decode(line.substr(end + 1))); +} + +IndexFile::Entry::Entry(const std::string& line): + IndexFile::Entry{parse_from_line(line)} { } + +IndexFile::Entry::Entry( + const std::string& word, std::size_t offset, std::size_t length): + word(word), + offset(offset), + length(length) { } + +IndexFile::IndexFile(const std::string& path): + path_(path) { } + +std::vector<IndexFile::Entry> IndexFile::get(const std::string& word) { + const std::vector<std::string> lines = get_lines_starting_with(this->path_, word); + + std::vector<Entry> entries; + entries.reserve(lines.size()); + + std::for_each( + lines.begin(), + lines.end(), + [&entries](const std::string& line) { + entries.emplace_back(line); + } + ); + + return entries; +} + +} diff --git a/src/index.h b/src/index.h new file mode 100644 index 0000000..50acb2e --- /dev/null +++ b/src/index.h @@ -0,0 +1,29 @@ +#pragma once + +#include <string> +#include <vector> +#include <cstdint> + +namespace dictzip { + +class IndexFile { +public: + struct Entry { + Entry(const std::string& line); + Entry(const std::string& word, std::size_t offset, std::size_t length); + + const std::string word; + const std::size_t offset; + const std::size_t length; + }; + + IndexFile(const std::string& path); + + std::vector<Entry> get(const std::string& word); + +private: + const std::string path_; + +}; + +} diff --git a/src/util/query.cc b/src/util/query.cc index f210613..f71b257 100644 --- a/src/util/query.cc +++ b/src/util/query.cc @@ -15,6 +15,7 @@ std::vector<std::string> get_lines_starting_with( return std::vector<std::string>{}; } + posix_fadvise(fileno(file), 0, 0, 1); // FDADVICE_SEQUENTIAL std::vector<std::string> result; |