diff options
-rw-r--r-- | example.cc | 18 | ||||
-rw-r--r-- | src/util/query.cc | 45 | ||||
-rw-r--r-- | src/util/query.h | 4 |
3 files changed, 36 insertions, 31 deletions
@@ -21,14 +21,14 @@ int main(int argc, char** argv) { if ( argc != 2 ) { std::cerr << "Empty query." << std::endl; } else { - // Get index entry of requested word definition - const std::string line = dictzip::get_line_starting_with("gcide.index", argv[1]); - - // Decode location in compressed archive - const std::size_t offset = dictzip::base64_decode(dictzip::get_encoded_offset(line)); - const std::size_t length = dictzip::base64_decode(dictzip::get_encoded_length(line)); - - // Print the GCIDE definition of _Accession_ - std::cout << get("gcide.dict.dz", offset, length) << std::endl; + // Get index entries of requested word definitions + for ( auto& line : dictzip::get_lines_starting_with("gcide.index", argv[1]) ) { + // Decode location in compressed archive + const std::size_t offset = dictzip::base64_decode(dictzip::get_encoded_offset(line)); + const std::size_t length = dictzip::base64_decode(dictzip::get_encoded_length(line)); + + // Print the GCIDE definition of _Accession_ + std::cout << get("gcide.dict.dz", offset, length) << std::endl; + } } } diff --git a/src/util/query.cc b/src/util/query.cc index a6fdcd3..f210613 100644 --- a/src/util/query.cc +++ b/src/util/query.cc @@ -5,40 +5,43 @@ #include <cstdio> #include <cstring> -#include <stdexcept> - namespace dictzip { -std::string get_line_starting_with( +std::vector<std::string> get_lines_starting_with( const std::string& path, const std::string& substring) { - static const auto BUFFER_SIZE = 16*1024; - FILE* file = std::fopen(path.c_str(), "r"); + if ( !file ) { + return std::vector<std::string>{}; + } + posix_fadvise(fileno(file), 0, 0, 1); // FDADVICE_SEQUENTIAL - char buffer[BUFFER_SIZE + 1]; + std::vector<std::string> result; + char buffer[16*1024 + 1]; char* start_of_match = nullptr; - while( std::size_t bytes_read = std::fread(buffer, sizeof(char), BUFFER_SIZE, file) ) { - if ( bytes_read ) { - for ( char* p = buffer; - (p = static_cast<char*>(std::memchr(p, '\n', (buffer + bytes_read) - p))); - ++p ) { - if ( start_of_match == nullptr ) { - if ( std::strncmp(substring.c_str(), p+1, substring.size()) == 0 ) { - start_of_match = p+1; - } - } else { - return std::string(start_of_match, p-start_of_match); - } + while ( std::size_t n = std::fread(buffer, + sizeof(char), + sizeof(buffer) - 1, + file) ) { + for ( char* p = buffer; + (p = static_cast<char*>(std::memchr(p, '\n', (buffer + n) - p))); + ++p ) { + if ( start_of_match != nullptr ) { + result.emplace_back(start_of_match, p - start_of_match); + start_of_match = nullptr; + } + + if ( std::strncmp(substring.c_str(), p+1, substring.size()) == 0 ) { + start_of_match = p+1; } - } else { - break; } } - throw std::runtime_error("No match found"); + std::fclose(file); + + return result; } std::string get_encoded_offset(const std::string& line) { diff --git a/src/util/query.h b/src/util/query.h index 3a25206..1c7df36 100644 --- a/src/util/query.h +++ b/src/util/query.h @@ -1,10 +1,12 @@ #pragma once #include <string> +#include <vector> namespace dictzip { -std::string get_line_starting_with(const std::string& path, const std::string& substring); +std::vector<std::string> get_lines_starting_with( + const std::string& path, const std::string& substring); std::string get_encoded_offset(const std::string& line); std::string get_encoded_length(const std::string& line); |