From 51a422f161e7e7cc5b1093a12e004165754ec1b9 Mon Sep 17 00:00:00 2001 From: Adrian Kummerlaender Date: Sat, 7 Oct 2017 15:45:37 +0200 Subject: Introduce IndexFile class --- CMakeLists.txt | 1 + example.cc | 17 +++++++---------- src/index.cc | 50 ++++++++++++++++++++++++++++++++++++++++++++++++++ src/index.h | 29 +++++++++++++++++++++++++++++ src/util/query.cc | 1 + 5 files changed, 88 insertions(+), 10 deletions(-) create mode 100644 src/index.cc create mode 100644 src/index.h diff --git a/CMakeLists.txt b/CMakeLists.txt index 45d4b67..6c96f1d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -13,6 +13,7 @@ include_directories( add_library( DictzipQuery SHARED + src/index.cc src/util/base64.cc src/util/query.cc src/istream/stream.cc diff --git a/example.cc b/example.cc index e42a7fb..3727434 100644 --- a/example.cc +++ b/example.cc @@ -1,6 +1,5 @@ +#include "index.h" #include "istream/stream.h" -#include "util/base64.h" -#include "util/query.h" #include #include @@ -12,7 +11,7 @@ std::string get(const std::string& path, std::size_t offset, std::size_t length) result.resize(length); stream.seekg(offset); - stream.read(result.data(), length); + stream.read(&result[0], length); return result; } @@ -21,14 +20,12 @@ int main(int argc, char** argv) { if ( argc != 2 ) { std::cerr << "Empty query." << std::endl; } else { - // Get index entries of requested word definitions - for ( auto& line : dictzip::get_lines_starting_with("gcide.index", argv[1]) ) { - // Decode location in compressed archive - const std::size_t offset = dictzip::base64_decode(dictzip::get_encoded_offset(line)); - const std::size_t length = dictzip::base64_decode(dictzip::get_encoded_length(line)); + dictzip::IndexFile index("gcide.index"); - // Print the GCIDE definition of _Accession_ - std::cout << get("gcide.dict.dz", offset, length) << std::endl; + // Get index entries of requested word definitions + for ( auto& entry : index.get(argv[1]) ) { + // Print the GCIDE definition + std::cout << get("gcide.dict.dz", entry.offset, entry.length) << std::endl; } } } diff --git a/src/index.cc b/src/index.cc new file mode 100644 index 0000000..c8dc6de --- /dev/null +++ b/src/index.cc @@ -0,0 +1,50 @@ +#include "index.h" + +#include "util/query.h" +#include "util/base64.h" + +#include + + +namespace dictzip { + +IndexFile::Entry parse_from_line(const std::string& line) { + const std::size_t start = line.find_first_of('\t'); + const std::size_t end = line.find_last_of('\t'); + + return IndexFile::Entry( + line.substr(0, start), + base64_decode(line.substr(start + 1, end - (start + 1))), + base64_decode(line.substr(end + 1))); +} + +IndexFile::Entry::Entry(const std::string& line): + IndexFile::Entry{parse_from_line(line)} { } + +IndexFile::Entry::Entry( + const std::string& word, std::size_t offset, std::size_t length): + word(word), + offset(offset), + length(length) { } + +IndexFile::IndexFile(const std::string& path): + path_(path) { } + +std::vector IndexFile::get(const std::string& word) { + const std::vector lines = get_lines_starting_with(this->path_, word); + + std::vector entries; + entries.reserve(lines.size()); + + std::for_each( + lines.begin(), + lines.end(), + [&entries](const std::string& line) { + entries.emplace_back(line); + } + ); + + return entries; +} + +} diff --git a/src/index.h b/src/index.h new file mode 100644 index 0000000..50acb2e --- /dev/null +++ b/src/index.h @@ -0,0 +1,29 @@ +#pragma once + +#include +#include +#include + +namespace dictzip { + +class IndexFile { +public: + struct Entry { + Entry(const std::string& line); + Entry(const std::string& word, std::size_t offset, std::size_t length); + + const std::string word; + const std::size_t offset; + const std::size_t length; + }; + + IndexFile(const std::string& path); + + std::vector get(const std::string& word); + +private: + const std::string path_; + +}; + +} diff --git a/src/util/query.cc b/src/util/query.cc index f210613..f71b257 100644 --- a/src/util/query.cc +++ b/src/util/query.cc @@ -15,6 +15,7 @@ std::vector get_lines_starting_with( return std::vector{}; } + posix_fadvise(fileno(file), 0, 0, 1); // FDADVICE_SEQUENTIAL std::vector result; -- cgit v1.2.3