Skip to content

Commit 17bfe16

Browse files
committed
Merge TASK-027 majors+minors (37/59 resolved, 22 deferred)
2 parents 47e926d + 3801286 commit 17bfe16

13 files changed

Lines changed: 319 additions & 138 deletions

specs/unworked_review_issues/2026-05-10_230348_task-027.md

Lines changed: 95 additions & 36 deletions
Large diffs are not rendered by default.

src/detail/webserver_dispatch.cpp

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -98,7 +98,7 @@ void webserver_impl::invalidate_route_cache() {
9898
// standardized_url only; v2's is keyed on (method, path). A
9999
// registration may invalidate both, so clear both atomically.
100100
{
101-
std::lock_guard<std::mutex> lock(route_cache_mutex);
101+
std::lock_guard<std::mutex> lock(route_cache_mutex_);
102102
route_cache_list.clear();
103103
route_cache_map.clear();
104104
}
@@ -202,11 +202,14 @@ webserver_impl::lookup_v2(http_method method, const std::string& path) {
202202
}
203203
} // table_lock released.
204204

205-
// Step 3: install into cache (cache mutex only).
205+
// Step 3: install into cache (cache mutex only). Move result.entry and
206+
// result.captured_params into the cache_value to avoid a second copy
207+
// of the shared_ptr ref-count bump and captured vector on the miss path.
208+
// result is not used after this point.
206209
if (result.found) {
207210
cache_value v;
208-
v.entry = result.entry;
209-
v.captured_params = result.captured_params;
211+
v.entry = std::move(result.entry);
212+
v.captured_params = std::move(result.captured_params);
210213
route_cache_v2.insert(key, std::move(v));
211214
}
212215

@@ -219,7 +222,7 @@ namespace detail {
219222

220223
std::optional<webserver_impl::regex_route_lookup>
221224
webserver_impl::lookup_route_cache(const std::string& key) {
222-
std::lock_guard<std::mutex> cache_lock(route_cache_mutex);
225+
std::lock_guard<std::mutex> cache_lock(route_cache_mutex_);
223226
auto cache_it = route_cache_map.find(key);
224227
if (cache_it == route_cache_map.end()) {
225228
return std::nullopt;
@@ -265,7 +268,7 @@ webserver_impl::scan_regex_routes(const detail::http_endpoint& target) {
265268
void webserver_impl::store_route_cache(const std::string& key,
266269
const detail::http_endpoint& matched,
267270
std::shared_ptr<http_resource> hrm) {
268-
std::lock_guard<std::mutex> cache_lock(route_cache_mutex);
271+
std::lock_guard<std::mutex> cache_lock(route_cache_mutex_);
269272
route_cache_list.emplace_front(key, route_cache_entry{matched, std::move(hrm)});
270273
route_cache_map[key] = route_cache_list.begin();
271274
if (route_cache_map.size() > ROUTE_CACHE_MAX_SIZE) {

src/detail/webserver_register.cpp

Lines changed: 28 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -204,28 +204,24 @@ void webserver::register_resource(const std::string& resource,
204204
// 1. Validates the handler, the method set (non-empty, no count_
205205
void webserver::unregister_impl_(const string& resource, bool family) {
206206
detail::http_endpoint he(resource, family, true, regex_checking);
207-
std::unique_lock registered_resources_lock(impl_->registered_resources_mutex);
208207

209208
{
210-
std::lock_guard<std::mutex> cache_lock(impl_->route_cache_mutex);
211-
impl_->route_cache_list.clear();
212-
impl_->route_cache_map.clear();
213-
}
214-
215-
impl_->registered_resources.erase(he);
216-
impl_->registered_resources_regex.erase(he);
217-
// The string-keyed fast-path map only ever holds exact (non-family)
218-
// entries (see register_impl_). A prefix unregister has nothing to
219-
// do here.
220-
if (!family) {
221-
impl_->registered_resources_str.erase(he.get_url_complete());
209+
std::unique_lock registered_resources_lock(impl_->registered_resources_mutex);
210+
impl_->registered_resources.erase(he);
211+
impl_->registered_resources_regex.erase(he);
212+
// The string-keyed fast-path map only ever holds exact (non-family)
213+
// entries (see register_impl_). A prefix unregister has nothing to
214+
// do here.
215+
if (!family) {
216+
impl_->registered_resources_str.erase(he.get_url_complete());
217+
}
222218
}
223219

224220
// TASK-027: mirror the erasure into the v2 3-tier table. Lock order:
225-
// we already hold registered_resources_lock (v1 table); take
226-
// route_table_mutex_ next, then route_cache_mutex_ via the
227-
// route_cache::clear() helper. The discipline (table BEFORE cache)
228-
// is consistent with register_impl_ and the documented invariant.
221+
// registered_resources_mutex released above; take route_table_mutex_
222+
// next, then invalidate_route_cache() takes route_cache_mutex_.
223+
// This matches the pattern used in register_impl_ / on_methods_:
224+
// table lock released before cache is cleared.
229225
{
230226
std::unique_lock table_lock(impl_->route_table_mutex_);
231227
const std::string& key = he.get_url_complete();
@@ -245,7 +241,7 @@ void webserver::unregister_impl_(const string& resource, bool family) {
245241
impl_->regex_routes_.end());
246242
}
247243
}
248-
impl_->route_cache_v2.clear();
244+
impl_->invalidate_route_cache();
249245
}
250246

251247
void webserver::unregister_path(const string& path) {
@@ -261,24 +257,19 @@ void webserver::unregister_resource(const string& resource) {
261257
detail::http_endpoint he_exact(resource, /*family=*/false, true, regex_checking);
262258
detail::http_endpoint he_prefix(resource, /*family=*/true, true, regex_checking);
263259

264-
// Hold a single write-lock across both erasures so no request thread can
265-
// observe a partially-unregistered state (CWE-367 TOCTOU fix: the exact
266-
// entry and the prefix entry are removed atomically).
267-
std::unique_lock registered_resources_lock(impl_->registered_resources_mutex);
268-
269260
{
270-
std::lock_guard<std::mutex> cache_lock(impl_->route_cache_mutex);
271-
impl_->route_cache_list.clear();
272-
impl_->route_cache_map.clear();
261+
// Hold a single write-lock across both v1 erasures so no request thread
262+
// can observe a partially-unregistered state (CWE-367 TOCTOU fix: the
263+
// exact entry and the prefix entry are removed atomically).
264+
std::unique_lock registered_resources_lock(impl_->registered_resources_mutex);
265+
impl_->registered_resources.erase(he_exact);
266+
impl_->registered_resources.erase(he_prefix);
267+
impl_->registered_resources_regex.erase(he_exact);
268+
impl_->registered_resources_regex.erase(he_prefix);
269+
// The string-keyed fast-path map only holds exact (non-family) entries.
270+
impl_->registered_resources_str.erase(he_exact.get_url_complete());
273271
}
274272

275-
impl_->registered_resources.erase(he_exact);
276-
impl_->registered_resources.erase(he_prefix);
277-
impl_->registered_resources_regex.erase(he_exact);
278-
impl_->registered_resources_regex.erase(he_prefix);
279-
// The string-keyed fast-path map only holds exact (non-family) entries.
280-
impl_->registered_resources_str.erase(he_exact.get_url_complete());
281-
282273
// TASK-027: mirror into the v2 3-tier table. Erase under both
283274
// classifications so a prior register_path AND register_prefix on
284275
// the same path are both cleared atomically.
@@ -297,7 +288,10 @@ void webserver::unregister_resource(const string& resource) {
297288
}),
298289
impl_->regex_routes_.end());
299290
}
300-
impl_->route_cache_v2.clear();
291+
// Delegate cache clearing to invalidate_route_cache() matching the
292+
// pattern used by register_impl_ and on_methods_ (table lock released
293+
// before cache is cleared).
294+
impl_->invalidate_route_cache();
301295
}
302296

303297
// TASK-029: The v2.0 public IP-control API is the pair block_ip / unblock_ip.

src/detail/webserver_request.cpp

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,8 @@ using httpserver::http::ip_representation;
9393
using httpserver::http::base_unescaper;
9494

9595

96+
namespace detail {
97+
9698
namespace {
9799

98100
// Apply one path segment to the running stack: "" / "." are skipped,
@@ -108,9 +110,13 @@ void apply_normalized_segment(std::vector<std::string>& segments,
108110
segments.push_back(seg);
109111
}
110112

111-
} // namespace
112-
113-
static std::string normalize_path(const std::string& path) {
113+
// NOTE: the caller (should_skip_auth) must receive an already-unescaped
114+
// path (i.e., no %XX sequences remain). MHD's unescaper_func runs before
115+
// should_skip_auth, so this invariant is satisfied on the dispatch path.
116+
// Double slashes (//) and trailing slashes are collapsed automatically:
117+
// apply_normalized_segment skips empty segment strings, so consecutive
118+
// '/' separators produce zero segments (same as a single '/').
119+
std::string normalize_path(const std::string& path) {
114120
std::vector<std::string> segments;
115121
std::string::size_type start = 0;
116122
if (!path.empty() && path[0] == '/') start = 1;
@@ -128,7 +134,7 @@ static std::string normalize_path(const std::string& path) {
128134
return normalized;
129135
}
130136

131-
namespace detail {
137+
} // namespace
132138

133139
bool webserver_impl::should_skip_auth(const std::string& path) const {
134140
std::string normalized = normalize_path(path);

src/detail/webserver_routes.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -209,8 +209,9 @@ void webserver_impl::insert_fresh_v2_entry(const detail::http_endpoint& idx,
209209
auto tier = classify_route_tier(idx);
210210
switch (tier.kind) {
211211
case route_tier_kind::radix:
212-
// Unreachable: callers route url_pars-non-empty through
213-
// upsert_v2_param_route, never here.
212+
// Unreachable: upsert_v2_table_entry routes url_pars-non-empty paths
213+
// through upsert_v2_param_route before calling insert_fresh_v2_entry.
214+
__builtin_unreachable();
214215
break;
215216
case route_tier_kind::exact: {
216217
detail::route_entry entry;

src/httpserver/detail/radix_tree.hpp

Lines changed: 14 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -20,12 +20,11 @@
2020

2121
// TASK-027: segment-trie for the parameterized + prefix route tier.
2222
//
23-
// The architecture spec (§4.7) commits only to the OUTER shape (three-tier
24-
// + cache); the radix-tree implementation choice is intentionally left
25-
// open. A segment trie is sufficient for the 9-method / N-segment
26-
// registration shape libhttpserver supports and avoids dragging in a
27-
// vendored library (which would conflict with the project's tightly
28-
// curated source tree and LGPL-2.1 distribution).
23+
// A segment trie avoids dragging in a vendored library (which would
24+
// conflict with the project's tightly curated source tree and
25+
// LGPL-2.1 distribution). The architecture spec (§4.7) commits only
26+
// to the outer three-tier + cache shape; the implementation choice
27+
// is intentionally left open.
2928
//
3029
// Internal header — only reachable when compiling libhttpserver.
3130
#if !defined(HTTPSERVER_COMPILATION)
@@ -49,28 +48,19 @@
4948
namespace httpserver {
5049
namespace detail {
5150

52-
// radix_match: result type of radix_tree<T>::find. `entry` is a non-owning
53-
// pointer into the tree; valid until the next mutation. `captures` lists
54-
// (parameter-name, captured-value) pairs in the order the wildcards
55-
// appear along the matched path. `is_prefix_match` is true iff the match
56-
// came from a `is_prefix=true` registration that did not consume every
57-
// remaining request segment.
51+
// radix_match: result type of radix_tree<T>::find.
52+
// `entry` is a non-owning pointer into the tree; valid until the next mutation.
5853
template <typename T>
5954
struct radix_match {
6055
const T* entry = nullptr;
6156
std::vector<std::pair<std::string, std::string>> captures;
6257
bool is_prefix_match = false;
6358
};
6459

65-
// Single trie node. Children are split into:
66-
// - `children_`: keyed by the literal segment string (exact match).
67-
// - `wildcard_child_`: optional single child consuming any one segment.
68-
//
69-
// Each node may carry an `exact_terminus_` (registration with is_prefix=false
70-
// that ends here) and/or a `prefix_terminus_` (is_prefix=true). The two
71-
// are kept separately because a prefix and an exact registration may both
72-
// terminate at the same node (e.g. /static prefix + /static exact would be
73-
// a user error caught at registration time, but the storage allows it).
60+
// Single trie node. `wildcard_child_` is a single optional pointer (not
61+
// another map entry) because there can be at most one unnamed wildcard per
62+
// path level by design — two {name} siblings at the same depth are
63+
// ambiguous and rejected at registration time.
7464
template <typename T>
7565
struct radix_node {
7666
std::unordered_map<std::string, std::unique_ptr<radix_node>> children_;
@@ -221,6 +211,9 @@ class radix_tree {
221211

222212
// Remove the entry at `path`. is_prefix selects which terminus to
223213
// clear. Returns true iff a terminus was actually cleared.
214+
// NOTE: unlike find(), where descent uses the concrete request-path
215+
// segment value (e.g. "42"), remove() receives the registered pattern
216+
// (e.g. "{id}") and matches wildcard nodes by the placeholder shape.
224217
bool remove(std::string_view path, bool is_prefix) {
225218
radix_node<T>* node = root_.get();
226219
const auto segments = tokenize(path);

src/httpserver/detail/route_cache.hpp

Lines changed: 16 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@
4545
#include <functional>
4646
#include <list>
4747
#include <mutex>
48+
#include <stdexcept>
4849
#include <string>
4950
#include <string_view>
5051
#include <unordered_map>
@@ -69,11 +70,13 @@ struct cache_key {
6970
};
7071

7172
struct cache_key_hash {
73+
// Golden-ratio mix constant: reduces hash clustering vs. plain XOR.
74+
static constexpr std::size_t kHashMix = 0x9e3779b97f4a7c15ULL;
75+
7276
std::size_t operator()(const cache_key& k) const noexcept {
7377
std::size_t h1 = std::hash<std::string>{}(k.path);
7478
std::size_t h2 = static_cast<std::size_t>(k.method);
75-
// boost::hash_combine constant.
76-
return h1 ^ (h2 + 0x9e3779b97f4a7c15ULL + (h1 << 6) + (h1 >> 2));
79+
return h1 ^ (h2 + kHashMix + (h1 << 6) + (h1 >> 2));
7780
}
7881
};
7982

@@ -97,7 +100,12 @@ struct cache_value {
97100
class route_cache {
98101
public:
99102
explicit route_cache(std::size_t max_entries = 256)
100-
: max_entries_(max_entries) {}
103+
: max_entries_(max_entries) {
104+
if (max_entries == 0) {
105+
throw std::invalid_argument(
106+
"route_cache max_entries must be > 0");
107+
}
108+
}
101109

102110
// Find by key; returns true on hit and copies the value into `out`.
103111
// Promotes the hit to the front of the LRU list as a side effect.
@@ -124,7 +132,7 @@ class route_cache {
124132
// Compute the same hash as cache_key_hash without owning `path`.
125133
std::size_t h1 = std::hash<std::string_view>{}(path);
126134
std::size_t h2 = static_cast<std::size_t>(method);
127-
std::size_t bucket_hash = h1 ^ (h2 + 0x9e3779b97f4a7c15ULL
135+
std::size_t bucket_hash = h1 ^ (h2 + cache_key_hash::kHashMix
128136
+ (h1 << 6) + (h1 >> 2));
129137
std::lock_guard<std::mutex> lock(mutex_);
130138
// Walk the bucket manually via equal_range on the raw bucket index.
@@ -163,13 +171,15 @@ class route_cache {
163171
}
164172
}
165173

166-
void clear() noexcept {
174+
// Note: not noexcept — std::lock_guard can throw std::system_error
175+
// (though in practice it does not on POSIX under normal conditions).
176+
void clear() {
167177
std::lock_guard<std::mutex> lock(mutex_);
168178
map_.clear();
169179
list_.clear();
170180
}
171181

172-
std::size_t size() const noexcept {
182+
std::size_t size() const {
173183
std::lock_guard<std::mutex> lock(mutex_);
174184
return map_.size();
175185
}

src/httpserver/detail/route_tier.hpp

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030
#include <optional>
3131
// Disabling lint error on regex (the only reason it errors is because the Chromium team prefers google/re2)
3232
#include <regex> // NOLINT [build/c++11]
33+
#include <stdexcept>
3334

3435
#include "httpserver/detail/http_endpoint.hpp"
3536

@@ -78,8 +79,18 @@ inline route_tier_result classify_route_tier(const detail::http_endpoint& idx) {
7879
// pattern is trivially ^/literal$ and the exact hash tier is
7980
// faster and correct. Otherwise the path has meaningful regex
8081
// metacharacters and belongs in the regex tier.
81-
std::regex re(idx.get_url_normalized(),
82-
std::regex::extended | std::regex::icase);
82+
// Guard std::regex construction (CWE-248): a malformed pattern
83+
// throws std::regex_error. Convert to std::invalid_argument so
84+
// callers get a catchable typed exception at registration time.
85+
std::regex re;
86+
try {
87+
re = std::regex(idx.get_url_normalized(),
88+
std::regex::extended | std::regex::icase);
89+
} catch (const std::regex_error& e) {
90+
throw std::invalid_argument(
91+
std::string("invalid regex route pattern '")
92+
+ idx.get_url_normalized() + "': " + e.what());
93+
}
8394
if (std::regex_match(idx.get_url_complete(), re)) {
8495
res.kind = route_tier_kind::exact;
8596
} else {

0 commit comments

Comments
 (0)