Skip to content

Commit b49e041

Browse files
potetoBoshen
andauthored
[rust-compiler] Carry uninspected AST subtrees as raw JSON text (react#36730)
Stacked on react#36729 (upstream rejects cross-fork base branches, so this targets main as a draft; the first commit belongs to the parent PR. Review the last three commits. Will rebase and mark ready when react#36729 lands.) Unmodeled AST subtrees (type annotations, class bodies, unknown statements) were stored as `serde_json::Value` trees: every node allocated through a `Map<String, Value>`, and pass-through subtrees were repeatedly traversed by code that never looks inside them. They are now `RawNode`, a newtype over `Box<RawValue>` holding the original JSON text verbatim. Design notes, since two obvious alternatives fail: - Bare `RawValue` fields break under `#[serde(tag = "type")]` enums: internally-tagged deserialization buffers content into serde's private `Content` tree, which `RawValue` cannot read from. `RawNode::deserialize` instead streams whatever deserializer it is handed through `serde_transcode` into a fresh JSON string, which works behind tagged enums, `flatten`, and `from_value` alike. - Default-limit reparses break deep ASTs: internal `RawNode` reparse sites use `from_json_str_unbounded` (disables serde_json's 128-level recursion limit, matching how the top-level parse is configured); regression-tested with a 400-deep statement chain. `parse_value` fails loudly on malformed text rather than masking corruption with `Value::Null`; RawNode holds valid JSON by construction. Size-neutral in the shipped binary; the win is structural (no speculative `Value` trees on the hot path, pass-through subtrees stay untouched text). Verified on this exact tree: cargo workspace tests, both snap channels 1804/1804. --------- Co-authored-by: Boshen <1430279+Boshen@users.noreply.github.com>
1 parent ae6fe8a commit b49e041

17 files changed

Lines changed: 437 additions & 278 deletions

File tree

compiler/Cargo.lock

Lines changed: 10 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

compiler/crates/react_compiler/src/entrypoint/program.rs

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -790,7 +790,7 @@ fn calls_hooks_or_creates_jsx_in_class_body(
790790
) -> bool {
791791
body.body
792792
.iter()
793-
.any(|member| calls_hooks_or_creates_jsx_in_json(member))
793+
.any(|member| calls_hooks_or_creates_jsx_in_json(&member.parse_value()))
794794
}
795795

796796
fn calls_hooks_or_creates_jsx_in_json(value: &serde_json::Value) -> bool {
@@ -930,11 +930,11 @@ fn calls_hooks_or_creates_jsx_in_pattern(pattern: &PatternLike) -> bool {
930930
/// Returns false for primitive type annotations that indicate this is NOT a component.
931931
fn is_valid_props_annotation(param: &PatternLike) -> bool {
932932
let type_annotation = match param {
933-
PatternLike::Identifier(id) => id.type_annotation.as_deref(),
934-
PatternLike::ObjectPattern(op) => op.type_annotation.as_deref(),
935-
PatternLike::ArrayPattern(ap) => ap.type_annotation.as_deref(),
936-
PatternLike::AssignmentPattern(ap) => ap.type_annotation.as_deref(),
937-
PatternLike::RestElement(re) => re.type_annotation.as_deref(),
933+
PatternLike::Identifier(id) => id.type_annotation.as_ref(),
934+
PatternLike::ObjectPattern(op) => op.type_annotation.as_ref(),
935+
PatternLike::ArrayPattern(ap) => ap.type_annotation.as_ref(),
936+
PatternLike::AssignmentPattern(ap) => ap.type_annotation.as_ref(),
937+
PatternLike::RestElement(re) => re.type_annotation.as_ref(),
938938
PatternLike::MemberExpression(_)
939939
| PatternLike::TSAsExpression(_)
940940
| PatternLike::TSSatisfiesExpression(_)
@@ -943,7 +943,7 @@ fn is_valid_props_annotation(param: &PatternLike) -> bool {
943943
| PatternLike::TypeCastExpression(_) => None,
944944
};
945945
let annot = match type_annotation {
946-
Some(val) => val,
946+
Some(raw) => raw.parse_value(),
947947
None => return true, // No annotation = valid
948948
};
949949
let annot_type = match annot.get("type").and_then(|v| v.as_str()) {
@@ -2181,7 +2181,9 @@ fn stmt_references_identifier_at_top_level(stmt: &Statement, name: &str) -> bool
21812181
// Unmodeled statements (e.g. `export = X`) can reference top-level
21822182
// bindings; scan the raw node for a matching Identifier so the
21832183
// gating reference-before-declaration analysis does not miss them.
2184-
Statement::Unknown(unknown) => raw_node_references_identifier(unknown.raw(), name),
2184+
Statement::Unknown(unknown) => {
2185+
raw_node_references_identifier(&unknown.raw().parse_value(), name)
2186+
}
21852187
_ => false,
21862188
}
21872189
}

compiler/crates/react_compiler_ast/Cargo.toml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,8 @@ edition = "2024"
55

66
[dependencies]
77
serde = { version = "1", features = ["derive"] }
8-
serde_json = "1"
8+
serde_json = { version = "1", features = ["raw_value", "unbounded_depth"] }
9+
serde-transcode = "1"
910
indexmap = { version = "2", features = ["serde"] }
1011

1112
[dev-dependencies]

compiler/crates/react_compiler_ast/src/common.rs

Lines changed: 89 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,101 @@
11
use serde::Deserialize;
22
use serde::Serialize;
33

4+
/// An AST subtree the compiler does not model with typed nodes (type
5+
/// annotations, class bodies, parser extras). Wraps JSON text: serialization
6+
/// is verbatim pass-through and deserialization streams the subtree into text
7+
/// without retaining a `serde_json::Value` tree. Consumers that inspect these
8+
/// subtrees parse on demand via [`RawNode::parse_value`]; paths that do so
9+
/// repeatedly per traversal pay a parse each time, so cache the parsed Value
10+
/// at the call site if it shows up in profiles.
11+
///
12+
/// Deserialize is hand-implemented with a transcode rather than capturing a
13+
/// `RawValue` directly: most nodes sit under `#[serde(tag = "type")]` enums,
14+
/// whose content buffering breaks `RawValue`'s text-borrowing capture.
15+
#[derive(Debug, Clone, Serialize)]
16+
#[serde(transparent)]
17+
pub struct RawNode(pub Box<serde_json::value::RawValue>);
18+
19+
impl<'de> serde::Deserialize<'de> for RawNode {
20+
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
21+
where
22+
D: serde::Deserializer<'de>,
23+
{
24+
let mut buf = Vec::new();
25+
let mut ser = serde_json::Serializer::new(&mut buf);
26+
serde_transcode::transcode(deserializer, &mut ser).map_err(serde::de::Error::custom)?;
27+
let text = String::from_utf8(buf).map_err(serde::de::Error::custom)?;
28+
serde_json::value::RawValue::from_string(text)
29+
.map(RawNode)
30+
.map_err(serde::de::Error::custom)
31+
}
32+
}
33+
34+
impl RawNode {
35+
pub fn from_value(value: &serde_json::Value) -> Self {
36+
RawNode(
37+
serde_json::value::RawValue::from_string(value.to_string())
38+
.expect("serde_json::Value always serializes to valid JSON"),
39+
)
40+
}
41+
42+
pub fn null() -> Self {
43+
RawNode(
44+
serde_json::value::RawValue::from_string("null".to_string())
45+
.expect("null is valid JSON"),
46+
)
47+
}
48+
49+
/// The raw JSON text of this subtree.
50+
pub fn get(&self) -> &str {
51+
self.0.get()
52+
}
53+
54+
/// Parse the subtree into a `serde_json::Value` for structural inspection.
55+
/// RawNode text is valid JSON by construction, so failure here means a
56+
/// broken invariant, not bad input; fail loudly rather than degrade.
57+
pub fn parse_value(&self) -> serde_json::Value {
58+
from_json_str_unbounded(self.0.get())
59+
.expect("RawNode holds valid JSON by construction")
60+
}
61+
62+
/// The node's `"type"` field, without parsing the whole subtree into a Value.
63+
pub fn type_name(&self) -> Option<String> {
64+
#[derive(Deserialize)]
65+
struct TypeProbe {
66+
#[serde(rename = "type")]
67+
type_name: Option<String>,
68+
}
69+
from_json_str_unbounded::<TypeProbe>(self.0.get())
70+
.ok()
71+
.and_then(|p| p.type_name)
72+
}
73+
}
74+
75+
/// Parse JSON text with serde_json's recursion limit disabled. Every internal
76+
/// reparse of [`RawNode`] text must go through this: the napi entrypoint
77+
/// deserializes arbitrarily deep ASTs with the limit disabled (on a 64MB
78+
/// stack), and the tolerant statement path's reparses must not quietly
79+
/// reintroduce the default limit.
80+
pub fn from_json_str_unbounded<'de, T: serde::Deserialize<'de>>(
81+
s: &'de str,
82+
) -> serde_json::Result<T> {
83+
let mut deserializer = serde_json::Deserializer::from_str(s);
84+
deserializer.disable_recursion_limit();
85+
T::deserialize(&mut deserializer)
86+
}
87+
488
/// Custom deserializer that distinguishes "field absent" from "field: null".
589
/// - JSON field absent → `None` (via `#[serde(default)]`)
6-
/// - JSON field `null` → `Some(Value::Null)`
7-
/// - JSON field with value → `Some(value)`
90+
/// - JSON field `null` → `Some(RawNode("null"))`
91+
/// - JSON field with value → `Some(raw value)`
892
///
993
/// Use with `#[serde(default, skip_serializing_if = "Option::is_none", deserialize_with = "nullable_value")]`
10-
pub fn nullable_value<'de, D>(deserializer: D) -> Result<Option<Box<serde_json::Value>>, D::Error>
94+
pub fn nullable_value<'de, D>(deserializer: D) -> Result<Option<RawNode>, D::Error>
1195
where
1296
D: serde::Deserializer<'de>,
1397
{
14-
let value = serde_json::Value::deserialize(deserializer)?;
15-
Ok(Some(Box::new(value)))
98+
RawNode::deserialize(deserializer).map(Some)
1699
}
17100

18101
#[derive(Debug, Clone, Serialize, Deserialize)]
@@ -75,7 +158,7 @@ pub struct BaseNode {
75158
#[serde(default, skip_serializing_if = "Option::is_none")]
76159
pub range: Option<(u32, u32)>,
77160
#[serde(default, skip_serializing_if = "Option::is_none")]
78-
pub extra: Option<serde_json::Value>,
161+
pub extra: Option<RawNode>,
79162
#[serde(
80163
default,
81164
skip_serializing_if = "Option::is_none",

0 commit comments

Comments
 (0)