#include "query.h" #include #include #include namespace dictzip { std::vector get_lines_starting_with( const std::string& path, const std::string& substring) { FILE* file = std::fopen(path.c_str(), "r"); if ( !file ) { return std::vector{}; } posix_fadvise(fileno(file), 0, 0, 1); // FDADVICE_SEQUENTIAL std::vector result; char buffer[16*1024 + 1]; char* start_of_match = nullptr; std::string overlap; while ( std::size_t n = std::fread(buffer, sizeof(char), sizeof(buffer) - 1, file) ) { for ( char* p = buffer; (p = static_cast(std::memchr(p, '\n', (buffer + n) - p))); ++p ) { if ( start_of_match != nullptr ) { if ( overlap.empty() ) { result.emplace_back(start_of_match, p - start_of_match); start_of_match = nullptr; } else { result.emplace_back(overlap.append(buffer, p - buffer)); start_of_match = nullptr; overlap.clear(); } } if ( std::strncmp(substring.c_str(), p+1, substring.size()) == 0 ) { start_of_match = p+1; } } if ( start_of_match != nullptr ) { overlap = std::string(start_of_match, (buffer + n) - start_of_match); } } std::fclose(file); return result; } std::string get_encoded_offset(const std::string& line) { const std::size_t start = line.find_first_of('\t'); const std::size_t end = line.find_last_of('\t'); return line.substr(start + 1, end - (start + 1)); } std::string get_encoded_length(const std::string& line) { const std::size_t start = line.find_last_of('\t'); return line.substr(start + 1); } }