diff options
author | Adrian Kummerlaender | 2017-10-06 16:46:31 +0200 |
---|---|---|
committer | Adrian Kummerlaender | 2017-10-06 16:47:32 +0200 |
commit | b93f4fea59dfab2c8cb06d00b32a567838982bdf (patch) | |
tree | 268e2d9f057acc72a6af42194497059bd0088b6f /src | |
parent | d951a22faf35aabe526de2588006c9381904b137 (diff) | |
download | DictzipQuery-b93f4fea59dfab2c8cb06d00b32a567838982bdf.tar DictzipQuery-b93f4fea59dfab2c8cb06d00b32a567838982bdf.tar.gz DictzipQuery-b93f4fea59dfab2c8cb06d00b32a567838982bdf.tar.bz2 DictzipQuery-b93f4fea59dfab2c8cb06d00b32a567838982bdf.tar.lz DictzipQuery-b93f4fea59dfab2c8cb06d00b32a567838982bdf.tar.xz DictzipQuery-b93f4fea59dfab2c8cb06d00b32a567838982bdf.tar.zst DictzipQuery-b93f4fea59dfab2c8cb06d00b32a567838982bdf.zip |
Implement primitive dictzip index queries
Diffstat (limited to 'src')
-rw-r--r-- | src/istream/buffer.cc | 2 | ||||
-rw-r--r-- | src/util/query.cc | 57 | ||||
-rw-r--r-- | src/util/query.h | 12 |
3 files changed, 70 insertions, 1 deletions
diff --git a/src/istream/buffer.cc b/src/istream/buffer.cc index 26697f4..2a195e0 100644 --- a/src/istream/buffer.cc +++ b/src/istream/buffer.cc @@ -20,7 +20,7 @@ void IstreamBuf::readChunk(long n) { std::fseek(dictzip_file_, data_offset_ + chunkN.offset, SEEK_SET); - if ( fread(zBuf, 1, chunkN.size, dictzip_file_) != chunkN.size ) { + if ( std::fread(zBuf, 1, chunkN.size, dictzip_file_) != chunkN.size ) { throw std::runtime_error("Could not read dictzip chunk."); } diff --git a/src/util/query.cc b/src/util/query.cc new file mode 100644 index 0000000..a6fdcd3 --- /dev/null +++ b/src/util/query.cc @@ -0,0 +1,57 @@ +#include "query.h" + +#include <fcntl.h> + +#include <cstdio> +#include <cstring> + +#include <stdexcept> + +namespace dictzip { + +std::string get_line_starting_with( + const std::string& path, const std::string& substring) { + static const auto BUFFER_SIZE = 16*1024; + + FILE* file = std::fopen(path.c_str(), "r"); + + posix_fadvise(fileno(file), 0, 0, 1); // FDADVICE_SEQUENTIAL + + char buffer[BUFFER_SIZE + 1]; + char* start_of_match = nullptr; + + while( std::size_t bytes_read = std::fread(buffer, sizeof(char), BUFFER_SIZE, file) ) { + if ( bytes_read ) { + for ( char* p = buffer; + (p = static_cast<char*>(std::memchr(p, '\n', (buffer + bytes_read) - p))); + ++p ) { + if ( start_of_match == nullptr ) { + if ( std::strncmp(substring.c_str(), p+1, substring.size()) == 0 ) { + start_of_match = p+1; + } + } else { + return std::string(start_of_match, p-start_of_match); + } + } + } else { + break; + } + } + + throw std::runtime_error("No match found"); +} + +std::string get_encoded_offset(const std::string& line) { + const std::size_t start = line.find_first_of('\t'); + const std::size_t end = line.find_last_of('\t'); + + return line.substr(start + 1, end - (start + 1)); +} + +std::string get_encoded_length(const std::string& line) { + const std::size_t start = line.find_last_of('\t'); + + return line.substr(start + 1); +} + +} diff --git a/src/util/query.h b/src/util/query.h new file mode 100644 index 0000000..3a25206 --- /dev/null +++ b/src/util/query.h @@ -0,0 +1,12 @@ +#pragma once + +#include <string> + +namespace dictzip { + +std::string get_line_starting_with(const std::string& path, const std::string& substring); + +std::string get_encoded_offset(const std::string& line); +std::string get_encoded_length(const std::string& line); + +} |