diff options
Diffstat (limited to 'src')
| -rw-r--r-- | src/util/query.cc | 45 | ||||
| -rw-r--r-- | src/util/query.h | 4 | 
2 files changed, 27 insertions, 22 deletions
diff --git a/src/util/query.cc b/src/util/query.cc index a6fdcd3..f210613 100644 --- a/src/util/query.cc +++ b/src/util/query.cc @@ -5,40 +5,43 @@  #include <cstdio>  #include <cstring> -#include <stdexcept> -  namespace dictzip { -std::string get_line_starting_with( +std::vector<std::string> get_lines_starting_with(  	const std::string& path, const std::string& substring) { -	static const auto BUFFER_SIZE = 16*1024; -  	FILE* file = std::fopen(path.c_str(), "r"); +	if ( !file ) { +		return std::vector<std::string>{}; +	} +  	posix_fadvise(fileno(file), 0, 0, 1);  // FDADVICE_SEQUENTIAL -	char buffer[BUFFER_SIZE + 1]; +	std::vector<std::string> result; +	char buffer[16*1024 + 1];  	char* start_of_match = nullptr; -	while( std::size_t bytes_read = std::fread(buffer, sizeof(char), BUFFER_SIZE, file) ) { -		if ( bytes_read ) { -			for ( char* p = buffer; -			      (p = static_cast<char*>(std::memchr(p, '\n', (buffer + bytes_read) - p))); -			      ++p ) { -				if ( start_of_match == nullptr ) { -					if ( std::strncmp(substring.c_str(), p+1, substring.size()) == 0 ) { -						start_of_match = p+1; -					} -				} else { -					return std::string(start_of_match, p-start_of_match); -				} +	while ( std::size_t n = std::fread(buffer, +	                                   sizeof(char), +	                                   sizeof(buffer) - 1, +	                                   file) ) { +		for ( char* p = buffer; +			  (p = static_cast<char*>(std::memchr(p, '\n', (buffer + n) - p))); +			  ++p ) { +			if ( start_of_match != nullptr ) { +				result.emplace_back(start_of_match, p - start_of_match); +				start_of_match = nullptr; +			} + +			if ( std::strncmp(substring.c_str(), p+1, substring.size()) == 0 ) { +				start_of_match = p+1;  			} -		} else { -			break;  		}  	} -	throw std::runtime_error("No match found"); +	std::fclose(file); + +	return result;  }  std::string get_encoded_offset(const std::string& line) { diff --git a/src/util/query.h b/src/util/query.h index 3a25206..1c7df36 100644 --- a/src/util/query.h +++ b/src/util/query.h @@ -1,10 +1,12 @@  #pragma once  #include <string> +#include <vector>  namespace dictzip { -std::string get_line_starting_with(const std::string& path, const std::string& substring); +std::vector<std::string> get_lines_starting_with( +	const std::string& path, const std::string& substring);  std::string get_encoded_offset(const std::string& line);  std::string get_encoded_length(const std::string& line);  | 
