diff --git a/src/http_request.cpp b/src/http_request.cpp index be532637..6d4b0658 100644 --- a/src/http_request.cpp +++ b/src/http_request.cpp @@ -39,7 +39,7 @@ struct arguments_accumulator { }; void http_request::set_method(const std::string& method) { - this->method = string_utilities::to_upper_copy(method); + this->method = method; } #ifdef HAVE_DAUTH diff --git a/src/http_utils.cpp b/src/http_utils.cpp index 138c44ef..43b522bb 100644 --- a/src/http_utils.cpp +++ b/src/http_utils.cpp @@ -218,19 +218,15 @@ std::vector http_utils::tokenize_url(const std::string& str, const } std::string http_utils::standardize_url(const std::string& url) { - std::string n_url = url; + if (url.empty()) return url; - std::string::iterator new_end = std::unique(n_url.begin(), n_url.end(), [](char a, char b) { return (a == b) && (a == '/'); }); - n_url.erase(new_end, n_url.end()); + std::string result = url; - std::string::size_type n_url_length = n_url.length(); + auto new_end = std::unique(result.begin(), result.end(), [](char a, char b) { return (a == b) && (a == '/'); }); + result.erase(new_end, result.end()); - std::string result; - - if (n_url_length > 1 && n_url[n_url_length - 1] == '/') { - result = n_url.substr(0, n_url_length - 1); - } else { - result = n_url; + if (result.length() > 1 && result.back() == '/') { + result.pop_back(); } return result; @@ -302,13 +298,19 @@ uint16_t get_port(const struct sockaddr* sa) { } } +static inline int hex_digit_value(char c) { + if (c >= '0' && c <= '9') return c - '0'; + if (c >= 'a' && c <= 'f') return c - 'a' + 10; + if (c >= 'A' && c <= 'F') return c - 'A' + 10; + return -1; +} + size_t http_unescape(std::string* val) { if (val->empty()) return 0; unsigned int rpos = 0; unsigned int wpos = 0; - unsigned int num; unsigned int size = val->size(); while (rpos < size && (*val)[rpos] != '\0') { @@ -319,11 +321,15 @@ size_t http_unescape(std::string* val) { rpos++; break; case '%': - if (size > rpos + 2 && ((1 == sscanf(val->substr(rpos + 1, 2).c_str(), "%2x", &num)) || (1 == sscanf(val->substr(rpos + 1, 2).c_str(), "%2X", &num)))) { - (*val)[wpos] = (unsigned char) num; - wpos++; - rpos += 3; - break; + if (size > rpos + 2) { + int hi = hex_digit_value((*val)[rpos + 1]); + int lo = hex_digit_value((*val)[rpos + 2]); + if (hi >= 0 && lo >= 0) { + (*val)[wpos] = static_cast((hi << 4) | lo); + wpos++; + rpos += 3; + break; + } } // intentional fall through! default: diff --git a/src/httpserver/details/modded_request.hpp b/src/httpserver/details/modded_request.hpp index 0ab79ada..49aae1d3 100644 --- a/src/httpserver/details/modded_request.hpp +++ b/src/httpserver/details/modded_request.hpp @@ -37,8 +37,8 @@ namespace details { struct modded_request { struct MHD_PostProcessor *pp = nullptr; - std::unique_ptr complete_uri; - std::unique_ptr standardized_url; + std::string complete_uri; + std::string standardized_url; webserver* ws = nullptr; std::shared_ptr (httpserver::http_resource::*callback)(const httpserver::http_request&); diff --git a/src/httpserver/http_request.hpp b/src/httpserver/http_request.hpp index 4c1c3323..2ff2136c 100644 --- a/src/httpserver/http_request.hpp +++ b/src/httpserver/http_request.hpp @@ -93,7 +93,8 @@ class http_request { * @return a vector of strings containing all pieces **/ const std::vector get_path_pieces() const { - return http::http_utils::tokenize_url(path); + ensure_path_pieces_cached(); + return cache->path_pieces; } /** @@ -102,9 +103,9 @@ class http_request { * @return the selected piece in form of string **/ const std::string get_path_piece(int index) const { - std::vector post_path = get_path_pieces(); - if (static_cast(post_path.size()) > index) { - return post_path[index]; + ensure_path_pieces_cached(); + if (static_cast(cache->path_pieces.size()) > index) { + return cache->path_pieces[index]; } return EMPTY; } @@ -426,7 +427,11 @@ class http_request { std::string_view get_connection_value(std::string_view key, enum MHD_ValueKind kind) const; const http::header_view_map get_headerlike_values(enum MHD_ValueKind kind) const; - // Cache certain data items on demand so we can consistently return views + // http_request objects are owned by a single connection and are not + // shared across threads. Lazy caching (path_pieces, args, etc.) is + // safe without synchronization under this invariant. + + // Cache certain data items on demand so we can consistently return views // over the data. Some things we transform before returning to the user for // simplicity (e.g. query_str, requestor), others out of necessity (arg unescaping). // Others (username, password, digested_user) MHD returns as char* that we need @@ -440,10 +445,19 @@ class http_request { std::string digested_user; #endif // HAVE_DAUTH std::map, http::arg_comparator> unescaped_args; + std::vector path_pieces; bool args_populated = false; + bool path_pieces_cached = false; }; std::unique_ptr cache = std::make_unique(); + void ensure_path_pieces_cached() const { + if (!cache->path_pieces_cached) { + cache->path_pieces = http::http_utils::tokenize_url(path); + cache->path_pieces_cached = true; + } + } + // Populate the data cache unescaped_args void populate_args() const; diff --git a/src/httpserver/webserver.hpp b/src/httpserver/webserver.hpp index e4d5e313..e7dbba7b 100644 --- a/src/httpserver/webserver.hpp +++ b/src/httpserver/webserver.hpp @@ -40,11 +40,14 @@ #include #endif +#include #include #include +#include #include #include #include +#include #include #ifdef HAVE_GNUTLS @@ -188,6 +191,16 @@ class webserver { std::shared_mutex registered_resources_mutex; std::map registered_resources; std::map registered_resources_str; + std::map registered_resources_regex; + + struct route_cache_entry { + details::http_endpoint matched_endpoint; + http_resource* resource; + }; + static constexpr size_t ROUTE_CACHE_MAX_SIZE = 256; + std::mutex route_cache_mutex; + std::list> route_cache_list; + std::unordered_map>::iterator> route_cache_map; std::shared_mutex bans_mutex; std::set bans; @@ -226,6 +239,8 @@ class webserver { MHD_Result complete_request(MHD_Connection* connection, struct details::modded_request* mr, const char* version, const char* method); + void invalidate_route_cache(); + #ifdef HAVE_GNUTLS // MHD_PskServerCredentialsCallback signature static int psk_cred_handler_func(void* cls, diff --git a/src/string_utilities.cpp b/src/string_utilities.cpp index 7170ccf6..697fbf08 100644 --- a/src/string_utilities.cpp +++ b/src/string_utilities.cpp @@ -22,8 +22,8 @@ #include #include -#include #include +#include #include namespace httpserver { @@ -45,13 +45,29 @@ const std::string to_lower_copy(const std::string& str) { const std::vector string_split(const std::string& s, char sep, bool collapse) { std::vector result; + if (s.empty()) return result; - std::istringstream buf(s); - for (std::string token; getline(buf, token, sep); ) { - if ((collapse && token != "") || !collapse) { - result.push_back(token); + std::string::size_type start = 0; + std::string::size_type end; + + while ((end = s.find(sep, start)) != std::string::npos) { + std::string token = s.substr(start, end - start); + if (!collapse || !token.empty()) { + result.push_back(std::move(token)); } + start = end + 1; } + + // Handle the last token (after the final separator) + // Only add if there's content or if not collapsing + // Note: match istringstream behavior which does not emit trailing empty token + if (start < s.size()) { + std::string token = s.substr(start); + if (!collapse || !token.empty()) { + result.push_back(std::move(token)); + } + } + return result; } diff --git a/src/webserver.cpp b/src/webserver.cpp index 547eda60..7bde4fab 100644 --- a/src/webserver.cpp +++ b/src/webserver.cpp @@ -213,11 +213,20 @@ bool webserver::register_resource(const std::string& resource, http_resource* hr std::unique_lock registered_resources_lock(registered_resources_mutex); pair::iterator, bool> result = registered_resources.insert(map::value_type(idx, hrm)); - if (!family && result.second && idx.get_url_pars().empty()) { - registered_resources_str.insert(pair(idx.get_url_complete(), result.first->second)); + if (result.second) { + bool is_exact = !family && idx.get_url_pars().empty(); + if (is_exact) { + registered_resources_str.insert(pair(idx.get_url_complete(), result.first->second)); + } + if (idx.is_regex_compiled()) { + registered_resources_regex.insert(map::value_type(idx, hrm)); + } + registered_resources_lock.unlock(); + invalidate_route_cache(); + return true; } - return result.second; + return false; } bool webserver::start(bool blocking) { @@ -386,13 +395,30 @@ bool webserver::stop() { return true; } +void webserver::invalidate_route_cache() { + std::lock_guard lock(route_cache_mutex); + route_cache_list.clear(); + route_cache_map.clear(); +} + void webserver::unregister_resource(const string& resource) { // family does not matter - it just checks the url_normalized anyhow details::http_endpoint he(resource, false, true, regex_checking); std::unique_lock registered_resources_lock(registered_resources_mutex); + + // Invalidate cache while holding registered_resources_mutex to prevent + // any thread from retrieving dangling resource pointers from the cache + // after we erase from the resource maps. + { + std::lock_guard cache_lock(route_cache_mutex); + route_cache_list.clear(); + route_cache_map.clear(); + } + registered_resources.erase(he); registered_resources.erase(he.get_url_complete()); registered_resources_str.erase(he.get_url_complete()); + registered_resources_regex.erase(he); } void webserver::ban_ip(const string& ip) { @@ -509,7 +535,7 @@ void* uri_log(void* cls, const char* uri, struct MHD_Connection *con) { std::ignore = con; auto mr = std::make_unique(); - mr->complete_uri = std::make_unique(uri); + mr->complete_uri = uri; return reinterpret_cast(mr.release()); } @@ -736,41 +762,80 @@ MHD_Result webserver::finalize_answer(MHD_Connection* connection, struct details { std::shared_lock registered_resources_lock(registered_resources_mutex); if (!single_resource) { - const char* st_url = mr->standardized_url->c_str(); + const char* st_url = mr->standardized_url.c_str(); fe = registered_resources_str.find(st_url); if (fe == registered_resources_str.end()) { if (regex_checking) { - map::iterator found_endpoint; - details::http_endpoint endpoint(st_url, false, false, false); - map::iterator it; - - size_t len = 0; - size_t tot_len = 0; - for (it = registered_resources.begin(); it != registered_resources.end(); ++it) { - size_t endpoint_pieces_len = (*it).first.get_url_pieces().size(); - size_t endpoint_tot_len = (*it).first.get_url_complete().size(); - if (!found || endpoint_pieces_len > len || (endpoint_pieces_len == len && endpoint_tot_len > tot_len)) { - if ((*it).first.match(endpoint)) { - found = true; - len = endpoint_pieces_len; - tot_len = endpoint_tot_len; - found_endpoint = it; - } + // Data needed for parameter extraction after match. + // On cache hit, we copy these while holding the cache lock + // to avoid use-after-free if another thread invalidates cache. + vector matched_url_pars; + vector matched_chunks; + + // Check the LRU route cache first + { + std::lock_guard cache_lock(route_cache_mutex); + auto cache_it = route_cache_map.find(mr->standardized_url); + if (cache_it != route_cache_map.end()) { + // Cache hit — move to front of LRU list + route_cache_list.splice(route_cache_list.begin(), route_cache_list, cache_it->second); + const route_cache_entry& cached = cache_it->second->second; + matched_url_pars = cached.matched_endpoint.get_url_pars(); + matched_chunks = cached.matched_endpoint.get_chunk_positions(); + hrm = cached.resource; + found = true; } } - if (found) { - vector url_pars = found_endpoint->first.get_url_pars(); + if (!found) { + // Cache miss — perform regex scan + map::iterator found_endpoint; + + size_t len = 0; + size_t tot_len = 0; + for (auto it = registered_resources_regex.begin(); it != registered_resources_regex.end(); ++it) { + size_t endpoint_pieces_len = it->first.get_url_pieces().size(); + size_t endpoint_tot_len = it->first.get_url_complete().size(); + if (!found || endpoint_pieces_len > len || (endpoint_pieces_len == len && endpoint_tot_len > tot_len)) { + if (it->first.match(endpoint)) { + found = true; + len = endpoint_pieces_len; + tot_len = endpoint_tot_len; + found_endpoint = it; + } + } + } - vector url_pieces = endpoint.get_url_pieces(); - vector chunks = found_endpoint->first.get_chunk_positions(); - for (unsigned int i = 0; i < url_pars.size(); i++) { - mr->dhr->set_arg(url_pars[i], url_pieces[chunks[i]]); + if (found) { + // Safe to reference: registered_resources_mutex (shared) is still held + matched_url_pars = found_endpoint->first.get_url_pars(); + matched_chunks = found_endpoint->first.get_chunk_positions(); + hrm = found_endpoint->second; + + // Store in LRU cache + { + std::lock_guard cache_lock(route_cache_mutex); + route_cache_list.emplace_front(mr->standardized_url, route_cache_entry{found_endpoint->first, hrm}); + route_cache_map[mr->standardized_url] = route_cache_list.begin(); + + if (route_cache_map.size() > ROUTE_CACHE_MAX_SIZE) { + route_cache_map.erase(route_cache_list.back().first); + route_cache_list.pop_back(); + } + } } + } - hrm = found_endpoint->second; + // Extract URL parameters from matched endpoint + if (found) { + const auto& url_pieces = endpoint.get_url_pieces(); + for (unsigned int i = 0; i < matched_url_pars.size(); i++) { + if (matched_chunks[i] >= 0 && static_cast(matched_chunks[i]) < url_pieces.size()) { + mr->dhr->set_arg(matched_url_pars[i], url_pieces[matched_chunks[i]]); + } + } } } } else { @@ -857,7 +922,7 @@ MHD_Result webserver::finalize_answer(MHD_Connection* connection, struct details MHD_Result webserver::complete_request(MHD_Connection* connection, struct details::modded_request* mr, const char* version, const char* method) { mr->ws = this; - mr->dhr->set_path(mr->standardized_url->c_str()); + mr->dhr->set_path(mr->standardized_url.c_str()); mr->dhr->set_method(method); mr->dhr->set_version(version); @@ -882,33 +947,34 @@ MHD_Result webserver::answer_to_connection(void* cls, MHD_Connection* connection std::string t_url = url; base_unescaper(&t_url, static_cast(cls)->unescaper); - mr->standardized_url = std::make_unique(http_utils::standardize_url(t_url)); + mr->standardized_url = http_utils::standardize_url(t_url); mr->has_body = false; - access_log(static_cast(cls), *(mr->complete_uri) + " METHOD: " + method); + access_log(static_cast(cls), mr->complete_uri + " METHOD: " + method); - if (0 == strcasecmp(method, http_utils::http_method_get)) { + // Case-sensitive per RFC 7230 §3.1.1: HTTP method is case-sensitive. + if (0 == strcmp(method, http_utils::http_method_get)) { mr->callback = &http_resource::render_GET; } else if (0 == strcmp(method, http_utils::http_method_post)) { mr->callback = &http_resource::render_POST; mr->has_body = true; - } else if (0 == strcasecmp(method, http_utils::http_method_put)) { + } else if (0 == strcmp(method, http_utils::http_method_put)) { mr->callback = &http_resource::render_PUT; mr->has_body = true; - } else if (0 == strcasecmp(method, http_utils::http_method_delete)) { + } else if (0 == strcmp(method, http_utils::http_method_delete)) { mr->callback = &http_resource::render_DELETE; mr->has_body = true; - } else if (0 == strcasecmp(method, http_utils::http_method_patch)) { + } else if (0 == strcmp(method, http_utils::http_method_patch)) { mr->callback = &http_resource::render_PATCH; mr->has_body = true; - } else if (0 == strcasecmp(method, http_utils::http_method_head)) { + } else if (0 == strcmp(method, http_utils::http_method_head)) { mr->callback = &http_resource::render_HEAD; - } else if (0 ==strcasecmp(method, http_utils::http_method_connect)) { + } else if (0 == strcmp(method, http_utils::http_method_connect)) { mr->callback = &http_resource::render_CONNECT; - } else if (0 == strcasecmp(method, http_utils::http_method_trace)) { + } else if (0 == strcmp(method, http_utils::http_method_trace)) { mr->callback = &http_resource::render_TRACE; - } else if (0 ==strcasecmp(method, http_utils::http_method_options)) { + } else if (0 == strcmp(method, http_utils::http_method_options)) { mr->callback = &http_resource::render_OPTIONS; }