diff --git a/Cargo.lock b/Cargo.lock index 36bc1182..8bc98f2d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -886,7 +886,7 @@ dependencies = [ "quote", "regex", "rustc-hash 1.1.0", - "shlex", + "shlex 1.3.0", "syn 2.0.111", "which 4.4.2", ] @@ -1057,7 +1057,7 @@ dependencies = [ "serde_json", "serde_yaml", "serial_test", - "shlex", + "shlex 1.3.0", "tar", "temp-env", "tempfile", @@ -1090,7 +1090,6 @@ dependencies = [ "colored", "csv", "dashmap", - "devgen-tree-sitter-swift", "docx-rs", "etcetera 0.11.0", "flate2", @@ -1122,6 +1121,7 @@ dependencies = [ "serial_test", "sha2 0.10.9", "shellexpand", + "streaming-iterator", "sysinfo 0.32.1", "tar", "temp-env", @@ -1134,13 +1134,19 @@ dependencies = [ "tracing-appender", "tracing-subscriber", "tree-sitter", + "tree-sitter-c", + "tree-sitter-cpp", "tree-sitter-go", "tree-sitter-java", "tree-sitter-javascript", - "tree-sitter-kotlin", + "tree-sitter-julia", + "tree-sitter-kotlin-ng", + "tree-sitter-matlab", "tree-sitter-python", + "tree-sitter-r", "tree-sitter-ruby", "tree-sitter-rust", + "tree-sitter-swift", "umya-spreadsheet", "url", "utoipa", @@ -1694,13 +1700,14 @@ dependencies = [ [[package]] name = "cc" -version = "1.0.106" +version = "1.2.64" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "066fce287b1d4eafef758e89e09d724a24808a9196fe9756b8ca90e86d0719a2" +checksum = "dad887fd958be91b5098c0248def011f4523ab786cd411be668777e55063501f" dependencies = [ + "find-msvc-tools", "jobserver", "libc", - "once_cell", + "shlex 2.0.1", ] [[package]] @@ -2669,16 +2676,6 @@ version = "1.6.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "abd57806937c9cc163efc8ea3910e00a62e2aeb0b8119f1793a978088f8f6b04" -[[package]] -name = "devgen-tree-sitter-swift" -version = "0.21.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "55c23625b6874f93a85934eb8fe4804a87d0a7d38ff1b74fc3d7ab4a06bd92ae" -dependencies = [ - "cc", - "tree-sitter", -] - [[package]] name = "diatomic-waker" version = "0.2.3" @@ -3127,6 +3124,12 @@ dependencies = [ "windows-sys 0.60.2", ] +[[package]] +name = "find-msvc-tools" +version = "0.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5baebc0774151f905a1a2cc41989300b1e6fbb29aff0ceffa1064fdd3088d582" + [[package]] name = "fixedbitset" version = "0.5.7" @@ -7487,6 +7490,12 @@ version = "1.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" +[[package]] +name = "shlex" +version = "2.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f8fadd59c855ef2080decdef8ff161eb6661b86933c9d82e5ba29dc602a55aba" + [[package]] name = "signal-hook" version = "0.3.18" @@ -7884,6 +7893,12 @@ dependencies = [ "serde_json", ] +[[package]] +name = "streaming-iterator" +version = "0.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b2231b7c3057d5e4ad0156fb3dc807d900806020c5ffa3ee6ff2c8c76fb8520" + [[package]] name = "string_cache" version = "0.9.0" @@ -8784,82 +8799,152 @@ dependencies = [ [[package]] name = "tree-sitter" -version = "0.21.0" +version = "0.26.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "705bf7c0958d0171dd7d3a6542f2f4f21d87ed5f1dc8db52919d3a6bed9a359a" +checksum = "4dab76d0b724ba557954125188cf0633a1ca43199ced82d95c7b9c32cc3de1f3" dependencies = [ "cc", "regex", + "regex-syntax", + "serde_json", + "streaming-iterator", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-c" +version = "0.24.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a9b2eb57a55fed6b00812912e730b7a275cf4fe98bfd6a5d76263d4438371728" +dependencies = [ + "cc", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-cpp" +version = "0.23.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df2196ea9d47b4ab4a31b9297eaa5a5d19a0b121dceb9f118f6790ad0ab94743" +dependencies = [ + "cc", + "tree-sitter-language", ] [[package]] name = "tree-sitter-go" -version = "0.21.2" +version = "0.25.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b8d702a98d3c7e70e466456e58ff2b1ac550bf1e29b97e5770676d2fdabec00d" +checksum = "c8560a4d2f835cc0d4d2c2e03cbd0dde2f6114b43bc491164238d333e28b16ea" dependencies = [ "cc", - "tree-sitter", + "tree-sitter-language", ] [[package]] name = "tree-sitter-java" -version = "0.21.0" +version = "0.23.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "33bc21adf831a773c075d9d00107ab43965e6a6ea7607b47fd9ec6f3db4b481b" +checksum = "0aa6cbcdc8c679b214e616fd3300da67da0e492e066df01bcf5a5921a71e90d6" dependencies = [ "cc", - "tree-sitter", + "tree-sitter-language", ] [[package]] name = "tree-sitter-javascript" -version = "0.21.4" +version = "0.25.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8710a71bc6779e33811a8067bdda3ed08bed1733296ff915e44faf60f8c533d7" +checksum = "68204f2abc0627a90bdf06e605f5c470aa26fdcb2081ea553a04bdad756693f5" dependencies = [ "cc", - "tree-sitter", + "tree-sitter-language", ] [[package]] -name = "tree-sitter-kotlin" -version = "0.3.8" +name = "tree-sitter-julia" +version = "0.23.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "54ff60aeb036f5762515ceb31404512ea4f9599764bcd3857074bb82867bdd34" +checksum = "4144731a178812ee867619b1e98b3b91e54c1652304b26e5ebe3175b701de323" dependencies = [ "cc", - "tree-sitter", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-kotlin-ng" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e800ebbda938acfbf224f4d2c34947a31994b1295ee6e819b65226c7b51b4450" +dependencies = [ + "cc", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-language" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "009994f150cc0cd50ff54917d5bc8bffe8cad10ca10d81c34da2ec421ae61782" + +[[package]] +name = "tree-sitter-matlab" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e8d8831a78547f54860dd8467166b1ab0c466d03d43c4bb447af55c024281f7" +dependencies = [ + "cc", + "tree-sitter-language", ] [[package]] name = "tree-sitter-python" -version = "0.21.0" +version = "0.25.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b4066c6cf678f962f8c2c4561f205945c84834cce73d981e71392624fdc390a9" +checksum = "6bf85fd39652e740bf60f46f4cda9492c3a9ad75880575bf14960f775cb74a1c" dependencies = [ "cc", - "tree-sitter", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-r" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "429133cbda9f8a46e03ef3aae6abb6c3d22875f8585cad472138101bfd517255" +dependencies = [ + "cc", + "tree-sitter-language", ] [[package]] name = "tree-sitter-ruby" -version = "0.21.0" +version = "0.23.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c0031f687c0772f2dad7b77104c43428611099a1804c81244ada21560f41f0b1" +checksum = "be0484ea4ef6bb9c575b4fdabde7e31340a8d2dbc7d52b321ac83da703249f95" dependencies = [ "cc", - "tree-sitter", + "tree-sitter-language", ] [[package]] name = "tree-sitter-rust" -version = "0.21.2" +version = "0.24.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "277690f420bf90741dea984f3da038ace46c4fe6047cba57a66822226cde1c93" +checksum = "439e577dbe07423ec2582ac62c7531120dbfccfa6e5f92406f93dd271a120e45" dependencies = [ "cc", - "tree-sitter", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-swift" +version = "0.7.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fe36052155b9dd69ca82b3b8f1b4ccfb2d867125ac1a4db1dd7331829242668c" +dependencies = [ + "cc", + "tree-sitter-language", ] [[package]] diff --git a/crates/biorouter-cli/src/scenario_tests/provider_configs.rs b/crates/biorouter-cli/src/scenario_tests/provider_configs.rs index 21388c19..ca3d704c 100644 --- a/crates/biorouter-cli/src/scenario_tests/provider_configs.rs +++ b/crates/biorouter-cli/src/scenario_tests/provider_configs.rs @@ -97,6 +97,8 @@ static PROVIDER_CONFIGS: LazyLock> = LazyLock::new(|| { ProviderConfig::simple_skip("snowflake", "claude-3-7-sonnet", Some("No keys available")), ProviderConfig::simple_skip("venice", "llama-3.3-70b", Some("No keys available")), ProviderConfig::simple_skip("xai", "grok-3", Some("No keys available")), + ProviderConfig::simple_skip("zai", "glm-4.6", Some("No keys available")), + ProviderConfig::simple_skip("xiaomi_mimo", "mimo-v2.5", Some("No keys available")), ] }); diff --git a/crates/biorouter-mcp/Cargo.toml b/crates/biorouter-mcp/Cargo.toml index 5c2b05d7..d3a7794d 100644 --- a/crates/biorouter-mcp/Cargo.toml +++ b/crates/biorouter-mcp/Cargo.toml @@ -47,15 +47,21 @@ umya-spreadsheet = "2.2.3" utoipa = { version = "4.1", features = ["chrono"], optional = true } which = {workspace = true} lru = "0.16" -tree-sitter = "0.21" -tree-sitter-python = "0.21" -tree-sitter-rust = "0.21" -tree-sitter-javascript = "0.21" -tree-sitter-go = "0.21" -tree-sitter-java = "0.21" -tree-sitter-kotlin = "0.3.8" -devgen-tree-sitter-swift = "0.21.0" -tree-sitter-ruby = "0.21.0" +tree-sitter = "0.26" +streaming-iterator = "0.1" +tree-sitter-python = "0.25" +tree-sitter-rust = "0.24" +tree-sitter-javascript = "0.25" +tree-sitter-go = "0.25" +tree-sitter-java = "0.23" +tree-sitter-kotlin-ng = "1.1" +tree-sitter-swift = "0.7" +tree-sitter-ruby = "0.23" +tree-sitter-cpp = "0.23" +tree-sitter-c = "0.24" +tree-sitter-r = "1.2" +tree-sitter-julia = "0.23" +tree-sitter-matlab = "1.3" rayon = "1.10" libc = "0.2" # TODO: Fork mpatch or replace with a custom implementation using `similar` crate diff --git a/crates/biorouter-mcp/src/developer/analyze/languages/c.rs b/crates/biorouter-mcp/src/developer/analyze/languages/c.rs new file mode 100644 index 00000000..102a20fa --- /dev/null +++ b/crates/biorouter-mcp/src/developer/analyze/languages/c.rs @@ -0,0 +1,59 @@ +/// Tree-sitter query for extracting C code elements +/// +/// Targets the `tree-sitter-c` grammar. Like C++, the function name is nested +/// inside a `function_declarator`, so caller attribution uses +/// [`extract_function_name_for_kind`]. +pub const ELEMENT_QUERY: &str = r#" + ; Function definitions: int add(int a, int b) {} + (function_definition + declarator: (function_declarator + declarator: (identifier) @func)) + + ; Structs / unions / enums + (struct_specifier name: (type_identifier) @struct) + (union_specifier name: (type_identifier) @struct) + (enum_specifier name: (type_identifier) @struct) + + ; Includes + (preproc_include) @import +"#; + +/// Tree-sitter query for extracting C function calls +pub const CALL_QUERY: &str = r#" + ; Function calls: add(1, 2) + (call_expression + function: (identifier) @function.call) + + ; Member calls via function pointer fields: obj.fn() + (call_expression + function: (field_expression + field: (field_identifier) @method.call)) +"#; + +/// Recursively descend through declarator wrappers (pointer/function) to find +/// the function name identifier. +fn declarator_name(node: &tree_sitter::Node, source: &str) -> Option { + match node.kind() { + "identifier" | "field_identifier" | "type_identifier" => { + source.get(node.byte_range()).map(|s| s.to_string()) + } + _ => { + if let Some(inner) = node.child_by_field_name("declarator") { + return declarator_name(&inner, source); + } + (0..node.child_count()) + .filter_map(|i| node.child(i as u32)) + .find_map(|c| declarator_name(&c, source)) + } + } +} + +/// Extract the function name from a `function_definition` node. +pub fn extract_function_name_for_kind( + node: &tree_sitter::Node, + source: &str, + _kind: &str, +) -> Option { + let declarator = node.child_by_field_name("declarator")?; + declarator_name(&declarator, source) +} diff --git a/crates/biorouter-mcp/src/developer/analyze/languages/cpp.rs b/crates/biorouter-mcp/src/developer/analyze/languages/cpp.rs new file mode 100644 index 00000000..5ea249d3 --- /dev/null +++ b/crates/biorouter-mcp/src/developer/analyze/languages/cpp.rs @@ -0,0 +1,77 @@ +/// Tree-sitter query for extracting C++ code elements +/// +/// Targets the `tree-sitter-cpp` grammar. Function names live nested inside a +/// `function_declarator` rather than as a direct child of `function_definition`, +/// so caller attribution uses [`extract_function_name_for_kind`]. +pub const ELEMENT_QUERY: &str = r#" + ; Free functions and out-of-line method definitions: void Foo::m() {} + (function_definition + declarator: (function_declarator + declarator: (identifier) @func)) + (function_definition + declarator: (function_declarator + declarator: (field_identifier) @func)) + (function_definition + declarator: (function_declarator + declarator: (qualified_identifier + name: (identifier) @func))) + + ; Method declarations inside a class/struct body + (field_declaration + declarator: (function_declarator + declarator: (field_identifier) @func)) + + ; Classes and structs + (class_specifier name: (type_identifier) @class) + (struct_specifier name: (type_identifier) @struct) + + ; Includes + (preproc_include) @import +"#; + +/// Tree-sitter query for extracting C++ function calls +pub const CALL_QUERY: &str = r#" + ; Free function calls: g() + (call_expression + function: (identifier) @function.call) + + ; Method calls: obj.h() + (call_expression + function: (field_expression + field: (field_identifier) @method.call)) + + ; Scoped calls: ns::k() + (call_expression + function: (qualified_identifier + name: (identifier) @scoped.call)) +"#; + +/// Recursively descend through declarator wrappers (pointer/reference/function) +/// to find the actual function name identifier. +fn declarator_name(node: &tree_sitter::Node, source: &str) -> Option { + match node.kind() { + "identifier" | "field_identifier" | "type_identifier" | "destructor_name" + | "operator_name" => source.get(node.byte_range()).map(|s| s.to_string()), + "qualified_identifier" => node + .child_by_field_name("name") + .and_then(|n| declarator_name(&n, source)), + _ => { + if let Some(inner) = node.child_by_field_name("declarator") { + return declarator_name(&inner, source); + } + (0..node.child_count()) + .filter_map(|i| node.child(i as u32)) + .find_map(|c| declarator_name(&c, source)) + } + } +} + +/// Extract the function name from a `function_definition` node. +pub fn extract_function_name_for_kind( + node: &tree_sitter::Node, + source: &str, + _kind: &str, +) -> Option { + let declarator = node.child_by_field_name("declarator")?; + declarator_name(&declarator, source) +} diff --git a/crates/biorouter-mcp/src/developer/analyze/languages/go.rs b/crates/biorouter-mcp/src/developer/analyze/languages/go.rs index 9c1ec283..336c9ef7 100644 --- a/crates/biorouter-mcp/src/developer/analyze/languages/go.rs +++ b/crates/biorouter-mcp/src/developer/analyze/languages/go.rs @@ -85,7 +85,7 @@ pub fn find_method_for_receiver( while let Some(parent) = current.parent() { if parent.kind() == "method_declaration" { for i in 0..parent.child_count() { - if let Some(child) = parent.child(i) { + if let Some(child) = parent.child(i as u32) { if child.kind() == "field_identifier" { return source.get(child.byte_range()).map(|s| s.to_string()); } diff --git a/crates/biorouter-mcp/src/developer/analyze/languages/julia.rs b/crates/biorouter-mcp/src/developer/analyze/languages/julia.rs new file mode 100644 index 00000000..60aa9728 --- /dev/null +++ b/crates/biorouter-mcp/src/developer/analyze/languages/julia.rs @@ -0,0 +1,59 @@ +/// Tree-sitter query for extracting Julia code elements +/// +/// Targets the `tree-sitter-julia` grammar. Long-form function names are nested +/// in `function_definition -> signature -> call_expression -> identifier`; +/// short-form definitions (`h(x) = ...`) are assignments whose first child is a +/// call. Caller attribution uses [`extract_function_name_for_kind`]. +pub const ELEMENT_QUERY: &str = r#" + ; Long-form: function f(x) ... end + (function_definition + (signature + (call_expression + (identifier) @func))) + + ; Short-form: h(x) = x + 1 (call must be the FIRST child = the lhs) + (assignment + . + (call_expression + (identifier) @func)) + + ; Macros: macro m(x) ... end + (macro_definition + (signature + (call_expression + (identifier) @func))) + + ; Types + (struct_definition (type_head (identifier) @struct)) + (abstract_definition (type_head (identifier) @struct)) + (module_definition name: (identifier) @class) + + ; Imports + (using_statement (identifier) @import) + (import_statement (identifier) @import) +"#; + +/// Tree-sitter query for extracting Julia function calls +pub const CALL_QUERY: &str = r#" + (call_expression + (identifier) @function.call) +"#; + +/// Find the first descendant-or-self child of the given kind. +fn child_of_kind<'a>(node: &tree_sitter::Node<'a>, kind: &str) -> Option> { + (0..node.child_count()) + .filter_map(|i| node.child(i as u32)) + .find(|c| c.kind() == kind) +} + +/// Extract the function name from a Julia `function_definition` node. +pub fn extract_function_name_for_kind( + node: &tree_sitter::Node, + source: &str, + _kind: &str, +) -> Option { + let signature = child_of_kind(node, "signature")?; + let call = child_of_kind(&signature, "call_expression")?; + let ident = child_of_kind(&call, "identifier")?; + source.get(ident.byte_range()).map(|s| s.to_string()) +} diff --git a/crates/biorouter-mcp/src/developer/analyze/languages/kotlin.rs b/crates/biorouter-mcp/src/developer/analyze/languages/kotlin.rs index 5182fe89..4db1c396 100644 --- a/crates/biorouter-mcp/src/developer/analyze/languages/kotlin.rs +++ b/crates/biorouter-mcp/src/developer/analyze/languages/kotlin.rs @@ -1,27 +1,31 @@ /// Tree-sitter query for extracting Kotlin code elements +/// +/// Targets the `tree-sitter-kotlin-ng` grammar, whose node kinds differ from the +/// older `tree-sitter-kotlin`: identifiers are `identifier` (not `simple_identifier` +/// / `type_identifier`) and imports are `import` (not `import_header`). pub const ELEMENT_QUERY: &str = r#" ; Functions - (function_declaration (simple_identifier) @func) + (function_declaration name: (identifier) @func) - ; Classes - (class_declaration (type_identifier) @class) + ; Classes / interfaces + (class_declaration name: (identifier) @class) ; Objects (singleton classes) - (object_declaration (type_identifier) @class) + (object_declaration name: (identifier) @class) ; Imports - (import_header) @import + (import) @import "#; /// Tree-sitter query for extracting Kotlin function calls pub const CALL_QUERY: &str = r#" - ; Simple function calls + ; Simple function calls: g() (call_expression - (simple_identifier) @function.call) + (identifier) @function.call) - ; Method calls with navigation (obj.method()) + ; Method calls with navigation: obj.method() (call_expression (navigation_expression - (navigation_suffix - (simple_identifier) @method.call))) + (identifier) + (identifier) @method.call)) "#; diff --git a/crates/biorouter-mcp/src/developer/analyze/languages/matlab.rs b/crates/biorouter-mcp/src/developer/analyze/languages/matlab.rs new file mode 100644 index 00000000..eabdaa2c --- /dev/null +++ b/crates/biorouter-mcp/src/developer/analyze/languages/matlab.rs @@ -0,0 +1,21 @@ +/// Tree-sitter query for extracting MATLAB code elements +/// +/// Targets the `tree-sitter-matlab` grammar. The function name is a direct +/// `name:` child of `function_definition`, so the default direct-child name +/// extraction works and no custom handler is required. +pub const ELEMENT_QUERY: &str = r#" + ; Functions: function y = f(x) ... end + (function_definition name: (identifier) @func) + + ; Classes: classdef Cls ... end + (class_definition name: (identifier) @class) +"#; + +/// Tree-sitter query for extracting MATLAB function calls +/// +/// Note: MATLAB syntax does not distinguish a function call from array +/// indexing, so `a(i)` is also reported as a call by the grammar. +pub const CALL_QUERY: &str = r#" + (function_call + name: (identifier) @function.call) +"#; diff --git a/crates/biorouter-mcp/src/developer/analyze/languages/mod.rs b/crates/biorouter-mcp/src/developer/analyze/languages/mod.rs index c9bae7bc..c1321627 100644 --- a/crates/biorouter-mcp/src/developer/analyze/languages/mod.rs +++ b/crates/biorouter-mcp/src/developer/analyze/languages/mod.rs @@ -25,11 +25,16 @@ //! - Method receiver lookup: Implement `find_method_for_receiver()` to associate //! methods with their containing types (see Go and Ruby) +pub mod c; +pub mod cpp; pub mod go; pub mod java; pub mod javascript; +pub mod julia; pub mod kotlin; +pub mod matlab; pub mod python; +pub mod r; pub mod ruby; pub mod rust; pub mod swift; @@ -132,7 +137,7 @@ pub fn get_language_info(language: &str) -> Option { element_query: kotlin::ELEMENT_QUERY, call_query: kotlin::CALL_QUERY, reference_query: "", - function_node_kinds: &["function_declaration", "class_body"], + function_node_kinds: &["function_declaration"], function_name_kinds: &["identifier", "field_identifier", "property_identifier"], extract_function_name_handler: None, find_method_for_receiver_handler: None, @@ -163,6 +168,56 @@ pub fn get_language_info(language: &str) -> Option { find_method_for_receiver_handler: Some(ruby::find_method_for_receiver), find_receiver_type_handler: None, }), + "cpp" => Some(LanguageInfo { + element_query: cpp::ELEMENT_QUERY, + call_query: cpp::CALL_QUERY, + reference_query: "", + function_node_kinds: &["function_definition"], + function_name_kinds: &["identifier", "field_identifier"], + extract_function_name_handler: Some(cpp::extract_function_name_for_kind), + find_method_for_receiver_handler: None, + find_receiver_type_handler: None, + }), + "c" => Some(LanguageInfo { + element_query: c::ELEMENT_QUERY, + call_query: c::CALL_QUERY, + reference_query: "", + function_node_kinds: &["function_definition"], + function_name_kinds: &["identifier", "field_identifier"], + extract_function_name_handler: Some(c::extract_function_name_for_kind), + find_method_for_receiver_handler: None, + find_receiver_type_handler: None, + }), + "r" => Some(LanguageInfo { + element_query: r::ELEMENT_QUERY, + call_query: r::CALL_QUERY, + reference_query: "", + function_node_kinds: &["function_definition"], + function_name_kinds: &["identifier"], + extract_function_name_handler: Some(r::extract_function_name_for_kind), + find_method_for_receiver_handler: None, + find_receiver_type_handler: None, + }), + "julia" => Some(LanguageInfo { + element_query: julia::ELEMENT_QUERY, + call_query: julia::CALL_QUERY, + reference_query: "", + function_node_kinds: &["function_definition"], + function_name_kinds: &["identifier"], + extract_function_name_handler: Some(julia::extract_function_name_for_kind), + find_method_for_receiver_handler: None, + find_receiver_type_handler: None, + }), + "matlab" => Some(LanguageInfo { + element_query: matlab::ELEMENT_QUERY, + call_query: matlab::CALL_QUERY, + reference_query: "", + function_node_kinds: &["function_definition"], + function_name_kinds: &["identifier"], + extract_function_name_handler: None, + find_method_for_receiver_handler: None, + find_receiver_type_handler: None, + }), _ => None, } } diff --git a/crates/biorouter-mcp/src/developer/analyze/languages/r.rs b/crates/biorouter-mcp/src/developer/analyze/languages/r.rs new file mode 100644 index 00000000..868946ab --- /dev/null +++ b/crates/biorouter-mcp/src/developer/analyze/languages/r.rs @@ -0,0 +1,43 @@ +/// Tree-sitter query for extracting R code elements +/// +/// Targets the `tree-sitter-r` grammar. R functions are anonymous +/// `function_definition` values bound by assignment, so the name is the left +/// side of the enclosing `binary_operator` (`f <- function(...)` or +/// `g = function(...)`). Caller attribution recovers it via +/// [`extract_function_name_for_kind`]. +pub const ELEMENT_QUERY: &str = r#" + ; Named function definitions: f <- function(...) / g = function(...) + (binary_operator + lhs: (identifier) @func + rhs: (function_definition)) +"#; + +/// Tree-sitter query for extracting R function calls +pub const CALL_QUERY: &str = r#" + ; Plain calls: g(x), library(dplyr) + (call + function: (identifier) @function.call) + + ; Member calls via $ : obj$method() + (call + function: (extract_operator + rhs: (identifier) @method.call)) +"#; + +/// Extract the function name from an R `function_definition` node by reading the +/// identifier on the left of the enclosing assignment. +pub fn extract_function_name_for_kind( + node: &tree_sitter::Node, + source: &str, + _kind: &str, +) -> Option { + let parent = node.parent()?; + if parent.kind() == "binary_operator" { + if let Some(lhs) = parent.child_by_field_name("lhs") { + if lhs.kind() == "identifier" { + return source.get(lhs.byte_range()).map(|s| s.to_string()); + } + } + } + None +} diff --git a/crates/biorouter-mcp/src/developer/analyze/languages/ruby.rs b/crates/biorouter-mcp/src/developer/analyze/languages/ruby.rs index df9f7496..fb519863 100644 --- a/crates/biorouter-mcp/src/developer/analyze/languages/ruby.rs +++ b/crates/biorouter-mcp/src/developer/analyze/languages/ruby.rs @@ -114,7 +114,7 @@ fn find_first_method_in_class( max_depth: usize, ) -> Option { for i in 0..class_node.child_count() { - if let Some(child) = class_node.child(i) { + if let Some(child) = class_node.child(i as u32) { if child.kind() == "body_statement" { return find_method_in_body_with_depth(&child, source, 0, max_depth); } @@ -135,10 +135,10 @@ fn find_method_in_body_with_depth( } for i in 0..node.child_count() { - if let Some(child) = node.child(i) { + if let Some(child) = node.child(i as u32) { if child.kind() == "method" { for j in 0..child.child_count() { - if let Some(name_node) = child.child(j) { + if let Some(name_node) = child.child(j as u32) { if name_node.kind() == "identifier" { return source.get(name_node.byte_range()).map(|s| s.to_string()); } diff --git a/crates/biorouter-mcp/src/developer/analyze/languages/rust.rs b/crates/biorouter-mcp/src/developer/analyze/languages/rust.rs index bf9a6f98..c21bd6c6 100644 --- a/crates/biorouter-mcp/src/developer/analyze/languages/rust.rs +++ b/crates/biorouter-mcp/src/developer/analyze/languages/rust.rs @@ -81,7 +81,7 @@ pub fn extract_function_name_for_kind( if kind == "impl_item" { // For impl blocks, find the type being implemented for i in 0..node.child_count() { - if let Some(child) = node.child(i) { + if let Some(child) = node.child(i as u32) { if child.kind() == "type_identifier" { return source .get(child.byte_range()) @@ -109,7 +109,7 @@ pub fn find_method_for_receiver( if parent.kind() == "function_item" { // Found the function, get its name for i in 0..parent.child_count() { - if let Some(child) = parent.child(i) { + if let Some(child) = parent.child(i as u32) { if child.kind() == "identifier" { return source.get(child.byte_range()).map(|s| s.to_string()); } @@ -133,7 +133,7 @@ pub fn find_receiver_type(node: &tree_sitter::Node, source: &str) -> Option tree_sitter_python::language(), - "rust" => tree_sitter_rust::language(), - "javascript" | "typescript" => tree_sitter_javascript::language(), - "go" => tree_sitter_go::language(), - "java" => tree_sitter_java::language(), - "kotlin" => tree_sitter_kotlin::language(), - "swift" => devgen_tree_sitter_swift::language(), - "ruby" => tree_sitter_ruby::language(), + "python" => tree_sitter_python::LANGUAGE.into(), + "rust" => tree_sitter_rust::LANGUAGE.into(), + "javascript" | "typescript" => tree_sitter_javascript::LANGUAGE.into(), + "go" => tree_sitter_go::LANGUAGE.into(), + "java" => tree_sitter_java::LANGUAGE.into(), + "kotlin" => tree_sitter_kotlin_ng::LANGUAGE.into(), + "swift" => tree_sitter_swift::LANGUAGE.into(), + "ruby" => tree_sitter_ruby::LANGUAGE.into(), + "cpp" => tree_sitter_cpp::LANGUAGE.into(), + "c" => tree_sitter_c::LANGUAGE.into(), + "r" => tree_sitter_r::LANGUAGE.into(), + "julia" => tree_sitter_julia::LANGUAGE.into(), + "matlab" => tree_sitter_matlab::LANGUAGE.into(), _ => { tracing::warn!("Unsupported language: {}", language); return Err(ErrorData::new( @@ -94,7 +99,7 @@ impl ElementExtractor { kinds: &[&str], ) -> Option> { (0..node.child_count()) - .filter_map(|i| node.child(i)) + .filter_map(|i| node.child(i as u32)) .find(|child| kinds.contains(&child.kind())) } @@ -196,6 +201,7 @@ impl ElementExtractor { source: &str, query_str: &str, ) -> Result { + use streaming_iterator::StreamingIterator; use tree_sitter::{Query, QueryCursor}; let mut functions = Vec::new(); @@ -214,7 +220,7 @@ impl ElementExtractor { let mut cursor = QueryCursor::new(); let mut matches = cursor.matches(&query, tree.root_node(), source.as_bytes()); - for match_ in matches.by_ref() { + while let Some(match_) = matches.next() { for capture in match_.captures { let node = capture.node; let Some(text) = source.get(node.byte_range()) else { @@ -264,6 +270,7 @@ impl ElementExtractor { language: &str, ) -> Result, ErrorData> { use crate::developer::analyze::languages; + use streaming_iterator::StreamingIterator; use tree_sitter::{Query, QueryCursor}; let mut calls = Vec::new(); @@ -287,7 +294,7 @@ impl ElementExtractor { let mut cursor = QueryCursor::new(); let mut matches = cursor.matches(&query, tree.root_node(), source.as_bytes()); - for match_ in matches.by_ref() { + while let Some(match_) = matches.next() { for capture in match_.captures { let node = capture.node; let Some(text) = source.get(node.byte_range()) else { @@ -343,6 +350,7 @@ impl ElementExtractor { ast_recursion_limit: Option, ) -> Result, ErrorData> { use crate::developer::analyze::languages; + use streaming_iterator::StreamingIterator; use tree_sitter::{Query, QueryCursor}; let mut references = Vec::new(); @@ -366,7 +374,7 @@ impl ElementExtractor { let mut cursor = QueryCursor::new(); let mut matches = cursor.matches(&query, tree.root_node(), source.as_bytes()); - for match_ in matches.by_ref() { + while let Some(match_) = matches.next() { for capture in match_.captures { let node = capture.node; let Some(text) = source.get(node.byte_range()) else { diff --git a/crates/biorouter-mcp/src/developer/analyze/tests/c_test.rs b/crates/biorouter-mcp/src/developer/analyze/tests/c_test.rs new file mode 100644 index 00000000..51183991 --- /dev/null +++ b/crates/biorouter-mcp/src/developer/analyze/tests/c_test.rs @@ -0,0 +1,63 @@ +use crate::developer::analyze::graph::CallGraph; +use crate::developer::analyze::parser::{ElementExtractor, ParserManager}; +use crate::developer::analyze::types::AnalysisResult; +use std::collections::HashSet; +use std::path::PathBuf; + +fn parse_and_extract(code: &str) -> AnalysisResult { + let manager = ParserManager::new(); + let tree = manager.parse(code, "c").unwrap(); + ElementExtractor::extract_with_depth(&tree, code, "c", "semantic", None).unwrap() +} + +const SAMPLE: &str = r#" +#include + +struct Point { int x; int y; }; + +static int add(int a, int b) { + return a + b; +} + +int main(void) { + int s = add(1, 2); + printf("%d", s); + return 0; +} +"#; + +#[test] +fn test_c_functions_structs_imports() { + let result = parse_and_extract(SAMPLE); + + let funcs: HashSet<_> = result.functions.iter().map(|f| f.name.as_str()).collect(); + assert!(funcs.contains("add"), "expected add, got {funcs:?}"); + assert!(funcs.contains("main"), "expected main, got {funcs:?}"); + + let structs: HashSet<_> = result.classes.iter().map(|c| c.name.as_str()).collect(); + assert!(structs.contains("Point"), "expected Point, got {structs:?}"); + + assert!(result.imports.iter().any(|i| i.contains("stdio"))); +} + +#[test] +fn test_c_calls_and_graph() { + let result = parse_and_extract(SAMPLE); + let callees: HashSet<_> = result + .calls + .iter() + .map(|c| c.callee_name.as_str()) + .collect(); + assert!( + callees.contains("add"), + "expected add call, got {callees:?}" + ); + assert!( + callees.contains("printf"), + "expected printf call, got {callees:?}" + ); + + let graph = CallGraph::build_from_results(&[(PathBuf::from("t.c"), result)]); + let incoming = graph.find_incoming_chains("add", 2); + assert!(!incoming.is_empty(), "expected main to call add"); +} diff --git a/crates/biorouter-mcp/src/developer/analyze/tests/cpp_test.rs b/crates/biorouter-mcp/src/developer/analyze/tests/cpp_test.rs new file mode 100644 index 00000000..df7ebb2c --- /dev/null +++ b/crates/biorouter-mcp/src/developer/analyze/tests/cpp_test.rs @@ -0,0 +1,105 @@ +use crate::developer::analyze::graph::CallGraph; +use crate::developer::analyze::parser::{ElementExtractor, ParserManager}; +use crate::developer::analyze::types::AnalysisResult; +use std::collections::HashSet; +use std::path::PathBuf; + +fn parse_and_extract(code: &str) -> AnalysisResult { + let manager = ParserManager::new(); + let tree = manager.parse(code, "cpp").unwrap(); + ElementExtractor::extract_with_depth(&tree, code, "cpp", "semantic", None).unwrap() +} + +fn build_graph(code: &str) -> CallGraph { + let manager = ParserManager::new(); + let tree = manager.parse(code, "cpp").unwrap(); + let result = + ElementExtractor::extract_with_depth(&tree, code, "cpp", "semantic", None).unwrap(); + CallGraph::build_from_results(&[(PathBuf::from("test.cpp"), result)]) +} + +const SAMPLE: &str = r#" +#include + +namespace ns { +class Widget { +public: + void start(); + void stop(); +}; +} + +void ns::Widget::start() { + helper(); + obj.run(); +} + +int main() { + ns::Widget w; + w.start(); + return 0; +} +"#; + +#[test] +fn test_cpp_functions_and_classes() { + let result = parse_and_extract(SAMPLE); + + let funcs: HashSet<_> = result.functions.iter().map(|f| f.name.as_str()).collect(); + assert!(funcs.contains("start"), "expected start, got {funcs:?}"); + assert!(funcs.contains("stop"), "expected stop, got {funcs:?}"); + assert!(funcs.contains("main"), "expected main, got {funcs:?}"); + + let classes: HashSet<_> = result.classes.iter().map(|c| c.name.as_str()).collect(); + assert!( + classes.contains("Widget"), + "expected Widget, got {classes:?}" + ); + + assert!(result.imports.iter().any(|i| i.contains("vector"))); + assert_eq!(result.main_line.is_some(), true); +} + +#[test] +fn test_cpp_calls() { + let result = parse_and_extract(SAMPLE); + let callees: HashSet<_> = result + .calls + .iter() + .map(|c| c.callee_name.as_str()) + .collect(); + assert!( + callees.contains("helper"), + "expected helper call, got {callees:?}" + ); + assert!( + callees.contains("run"), + "expected run method call, got {callees:?}" + ); + assert!( + callees.contains("start"), + "expected start call, got {callees:?}" + ); +} + +#[test] +fn test_cpp_call_graph() { + let graph = build_graph(SAMPLE); + // main() -> w.start() + let incoming = graph.find_incoming_chains("start", 2); + assert!(!incoming.is_empty(), "expected callers of start"); + // ns::Widget::start() -> helper(), obj.run() + let outgoing = graph.find_outgoing_chains("start", 2); + assert!(!outgoing.is_empty(), "expected callees of start"); +} + +#[test] +fn test_cpp_structure_mode_counts_without_bodies() { + let manager = ParserManager::new(); + let tree = manager.parse(SAMPLE, "cpp").unwrap(); + let result = + ElementExtractor::extract_with_depth(&tree, SAMPLE, "cpp", "structure", None).unwrap(); + // structure mode clears element details + assert!(result.functions.is_empty()); + assert!(result.classes.is_empty()); +} diff --git a/crates/biorouter-mcp/src/developer/analyze/tests/julia_test.rs b/crates/biorouter-mcp/src/developer/analyze/tests/julia_test.rs new file mode 100644 index 00000000..8127e44c --- /dev/null +++ b/crates/biorouter-mcp/src/developer/analyze/tests/julia_test.rs @@ -0,0 +1,76 @@ +use crate::developer::analyze::graph::CallGraph; +use crate::developer::analyze::parser::{ElementExtractor, ParserManager}; +use crate::developer::analyze::types::AnalysisResult; +use std::collections::HashSet; +use std::path::PathBuf; + +fn parse_and_extract(code: &str) -> AnalysisResult { + let manager = ParserManager::new(); + let tree = manager.parse(code, "julia").unwrap(); + ElementExtractor::extract_with_depth(&tree, code, "julia", "semantic", None).unwrap() +} + +const SAMPLE: &str = r#" +using LinearAlgebra + +function process(data) + clean(data) + transform(data) +end + +clean(x) = identity(x) + +struct Config + size::Int +end + +process(input) +"#; + +#[test] +fn test_julia_long_and_short_function_forms() { + let result = parse_and_extract(SAMPLE); + let funcs: HashSet<_> = result.functions.iter().map(|f| f.name.as_str()).collect(); + assert!(funcs.contains("process"), "expected process, got {funcs:?}"); + assert!( + funcs.contains("clean"), + "expected short-form clean, got {funcs:?}" + ); +} + +#[test] +fn test_julia_structs_and_imports() { + let result = parse_and_extract(SAMPLE); + let classes: HashSet<_> = result.classes.iter().map(|c| c.name.as_str()).collect(); + assert!( + classes.contains("Config"), + "expected Config, got {classes:?}" + ); + assert!( + result.imports.iter().any(|i| i.contains("LinearAlgebra")), + "expected using import, got {:?}", + result.imports + ); +} + +#[test] +fn test_julia_calls_and_graph() { + let result = parse_and_extract(SAMPLE); + let callees: HashSet<_> = result + .calls + .iter() + .map(|c| c.callee_name.as_str()) + .collect(); + assert!( + callees.contains("clean"), + "expected clean call, got {callees:?}" + ); + assert!( + callees.contains("transform"), + "expected transform call, got {callees:?}" + ); + + let graph = CallGraph::build_from_results(&[(PathBuf::from("t.jl"), result)]); + let incoming = graph.find_incoming_chains("clean", 2); + assert!(!incoming.is_empty(), "expected process to call clean"); +} diff --git a/crates/biorouter-mcp/src/developer/analyze/tests/kotlin_test.rs b/crates/biorouter-mcp/src/developer/analyze/tests/kotlin_test.rs new file mode 100644 index 00000000..4afa4e31 --- /dev/null +++ b/crates/biorouter-mcp/src/developer/analyze/tests/kotlin_test.rs @@ -0,0 +1,71 @@ +// Verifies the analyzer still works after swapping tree-sitter-kotlin -> +// tree-sitter-kotlin-ng, whose node kinds differ (identifier vs simple_identifier, +// import vs import_header). +use crate::developer::analyze::graph::CallGraph; +use crate::developer::analyze::parser::{ElementExtractor, ParserManager}; +use crate::developer::analyze::types::AnalysisResult; +use std::collections::HashSet; +use std::path::PathBuf; + +fn parse_and_extract(code: &str) -> AnalysisResult { + let manager = ParserManager::new(); + let tree = manager.parse(code, "kotlin").unwrap(); + ElementExtractor::extract_with_depth(&tree, code, "kotlin", "semantic", None).unwrap() +} + +const SAMPLE: &str = r#" +import kotlin.math.sqrt + +class Calculator { + fun compute(x: Int): Int { + return helper(x) + } + fun helper(y: Int): Int { + return y * 2 + } +} + +object Singleton { + fun run() {} +} + +fun main() { + val c = Calculator() + c.compute(5) +} +"#; + +#[test] +fn test_kotlin_functions_classes_objects() { + let result = parse_and_extract(SAMPLE); + let funcs: HashSet<_> = result.functions.iter().map(|f| f.name.as_str()).collect(); + assert!(funcs.contains("compute"), "got {funcs:?}"); + assert!(funcs.contains("helper"), "got {funcs:?}"); + assert!(funcs.contains("run"), "got {funcs:?}"); + assert!(funcs.contains("main"), "got {funcs:?}"); + + let classes: HashSet<_> = result.classes.iter().map(|c| c.name.as_str()).collect(); + assert!(classes.contains("Calculator"), "got {classes:?}"); + assert!(classes.contains("Singleton"), "got {classes:?}"); + + assert!(!result.imports.is_empty(), "expected import captured"); +} + +#[test] +fn test_kotlin_calls_and_graph() { + let result = parse_and_extract(SAMPLE); + let callees: HashSet<_> = result + .calls + .iter() + .map(|c| c.callee_name.as_str()) + .collect(); + assert!(callees.contains("helper"), "got {callees:?}"); + assert!( + callees.contains("compute"), + "expected navigation call, got {callees:?}" + ); + + let graph = CallGraph::build_from_results(&[(PathBuf::from("t.kt"), result)]); + let incoming = graph.find_incoming_chains("helper", 2); + assert!(!incoming.is_empty(), "expected compute to call helper"); +} diff --git a/crates/biorouter-mcp/src/developer/analyze/tests/language_support_test.rs b/crates/biorouter-mcp/src/developer/analyze/tests/language_support_test.rs new file mode 100644 index 00000000..fd955d8f --- /dev/null +++ b/crates/biorouter-mcp/src/developer/analyze/tests/language_support_test.rs @@ -0,0 +1,77 @@ +// Cross-cutting guarantees for the multi-language analyzer after the +// tree-sitter 0.26 bump: every supported language must (a) be registered with a +// non-empty element query, (b) construct a real tree-sitter parser, and (c) be +// reachable from a file extension. +use crate::developer::analyze::languages::get_language_info; +use crate::developer::analyze::parser::ParserManager; +use crate::developer::lang::get_language_identifier; +use std::path::Path; + +const ALL_LANGUAGES: &[&str] = &[ + "python", + "rust", + "javascript", + "typescript", + "go", + "java", + "kotlin", + "swift", + "ruby", + "cpp", + "c", + "r", + "julia", + "matlab", +]; + +#[test] +fn test_every_language_has_queries_and_a_working_parser() { + let manager = ParserManager::new(); + for lang in ALL_LANGUAGES { + let info = get_language_info(lang) + .unwrap_or_else(|| panic!("no LanguageInfo registered for {lang}")); + assert!( + !info.element_query.is_empty(), + "{lang} has an empty element query" + ); + assert!( + !info.call_query.is_empty(), + "{lang} has an empty call query" + ); + // Parser construction must succeed (validates the grammar links + ABI). + manager + .get_or_create_parser(lang) + .unwrap_or_else(|e| panic!("failed to build parser for {lang}: {e:?}")); + } +} + +#[test] +fn test_new_file_extensions_map_to_languages() { + let cases = [ + ("foo.cpp", "cpp"), + ("foo.cc", "cpp"), + ("foo.hpp", "cpp"), + ("foo.hh", "cpp"), + ("foo.c", "c"), + ("foo.r", "r"), + ("foo.R", "r"), + ("foo.jl", "julia"), + ("foo.m", "matlab"), + ("foo.kt", "kotlin"), + ("foo.swift", "swift"), + ]; + for (file, expected) in cases { + assert_eq!( + get_language_identifier(Path::new(file)), + expected, + "extension mapping wrong for {file}" + ); + } +} + +#[test] +fn test_unsupported_language_is_rejected() { + let manager = ParserManager::new(); + assert!(manager.get_or_create_parser("cobol").is_err()); + assert!(get_language_info("cobol").is_none()); +} diff --git a/crates/biorouter-mcp/src/developer/analyze/tests/matlab_test.rs b/crates/biorouter-mcp/src/developer/analyze/tests/matlab_test.rs new file mode 100644 index 00000000..0e6cbdf6 --- /dev/null +++ b/crates/biorouter-mcp/src/developer/analyze/tests/matlab_test.rs @@ -0,0 +1,77 @@ +use crate::developer::analyze::graph::CallGraph; +use crate::developer::analyze::parser::{ElementExtractor, ParserManager}; +use crate::developer::analyze::types::AnalysisResult; +use std::collections::HashSet; +use std::path::PathBuf; + +fn parse_and_extract(code: &str) -> AnalysisResult { + let manager = ParserManager::new(); + let tree = manager.parse(code, "matlab").unwrap(); + ElementExtractor::extract_with_depth(&tree, code, "matlab", "semantic", None).unwrap() +} + +const SAMPLE: &str = r#" +function result = process(data) + result = clean(data); +end + +function out = clean(x) + out = x * 2; +end + +process(input); +"#; + +const CLASSDEF: &str = r#" +classdef Calculator + properties + value + end + methods + function obj = compute(self) + obj = 1; + end + end +end +"#; + +#[test] +fn test_matlab_functions() { + let result = parse_and_extract(SAMPLE); + let funcs: HashSet<_> = result.functions.iter().map(|f| f.name.as_str()).collect(); + assert!(funcs.contains("process"), "expected process, got {funcs:?}"); + assert!(funcs.contains("clean"), "expected clean, got {funcs:?}"); +} + +#[test] +fn test_matlab_calls_and_graph() { + let result = parse_and_extract(SAMPLE); + let callees: HashSet<_> = result + .calls + .iter() + .map(|c| c.callee_name.as_str()) + .collect(); + assert!( + callees.contains("clean"), + "expected clean call, got {callees:?}" + ); + + let graph = CallGraph::build_from_results(&[(PathBuf::from("t.m"), result)]); + let incoming = graph.find_incoming_chains("clean", 2); + assert!(!incoming.is_empty(), "expected process to call clean"); +} + +#[test] +fn test_matlab_classdef() { + let result = parse_and_extract(CLASSDEF); + let classes: HashSet<_> = result.classes.iter().map(|c| c.name.as_str()).collect(); + assert!( + classes.contains("Calculator"), + "expected Calculator class, got {classes:?}" + ); + let funcs: HashSet<_> = result.functions.iter().map(|f| f.name.as_str()).collect(); + assert!( + funcs.contains("compute"), + "expected method compute, got {funcs:?}" + ); +} diff --git a/crates/biorouter-mcp/src/developer/analyze/tests/mod.rs b/crates/biorouter-mcp/src/developer/analyze/tests/mod.rs index 6da0e66d..d0776da9 100644 --- a/crates/biorouter-mcp/src/developer/analyze/tests/mod.rs +++ b/crates/biorouter-mcp/src/developer/analyze/tests/mod.rs @@ -1,13 +1,21 @@ // Test modules for the analyze tool +pub mod c_test; pub mod cache_tests; +pub mod cpp_test; pub mod fixtures; pub mod formatter_tests; pub mod go_test; pub mod graph_tests; pub mod integration_tests; +pub mod julia_test; +pub mod kotlin_test; +pub mod language_support_test; pub mod large_output_tests; +pub mod matlab_test; pub mod parser_tests; +pub mod r_test; pub mod ruby_test; pub mod rust_test; +pub mod swift_test; pub mod traversal_tests; diff --git a/crates/biorouter-mcp/src/developer/analyze/tests/r_test.rs b/crates/biorouter-mcp/src/developer/analyze/tests/r_test.rs new file mode 100644 index 00000000..50a64c0e --- /dev/null +++ b/crates/biorouter-mcp/src/developer/analyze/tests/r_test.rs @@ -0,0 +1,66 @@ +use crate::developer::analyze::graph::CallGraph; +use crate::developer::analyze::parser::{ElementExtractor, ParserManager}; +use crate::developer::analyze::types::AnalysisResult; +use std::collections::HashSet; +use std::path::PathBuf; + +fn parse_and_extract(code: &str) -> AnalysisResult { + let manager = ParserManager::new(); + let tree = manager.parse(code, "r").unwrap(); + ElementExtractor::extract_with_depth(&tree, code, "r", "semantic", None).unwrap() +} + +const SAMPLE: &str = r#" +library(dplyr) + +process <- function(data) { + clean(data) + data$summarize() +} + +clean = function(x) { + x +} + +process(df) +"#; + +#[test] +fn test_r_function_definitions_both_assignment_styles() { + let result = parse_and_extract(SAMPLE); + let funcs: HashSet<_> = result.functions.iter().map(|f| f.name.as_str()).collect(); + // `<-` and `=` assignment forms both recognized + assert!(funcs.contains("process"), "expected process, got {funcs:?}"); + assert!(funcs.contains("clean"), "expected clean, got {funcs:?}"); +} + +#[test] +fn test_r_calls_including_dollar_member() { + let result = parse_and_extract(SAMPLE); + let callees: HashSet<_> = result + .calls + .iter() + .map(|c| c.callee_name.as_str()) + .collect(); + assert!( + callees.contains("clean"), + "expected clean call, got {callees:?}" + ); + assert!( + callees.contains("library"), + "expected library call, got {callees:?}" + ); + assert!( + callees.contains("summarize"), + "expected $ member call, got {callees:?}" + ); +} + +#[test] +fn test_r_call_graph_attribution() { + let result = parse_and_extract(SAMPLE); + let graph = CallGraph::build_from_results(&[(PathBuf::from("t.R"), result)]); + // process() -> clean() + let incoming = graph.find_incoming_chains("clean", 2); + assert!(!incoming.is_empty(), "expected process to call clean"); +} diff --git a/crates/biorouter-mcp/src/developer/analyze/tests/swift_test.rs b/crates/biorouter-mcp/src/developer/analyze/tests/swift_test.rs new file mode 100644 index 00000000..69a71565 --- /dev/null +++ b/crates/biorouter-mcp/src/developer/analyze/tests/swift_test.rs @@ -0,0 +1,67 @@ +// Verifies the analyzer still works after swapping devgen-tree-sitter-swift -> +// tree-sitter-swift (alex-pinkus grammar) under the tree-sitter 0.26 bump. +use crate::developer::analyze::graph::CallGraph; +use crate::developer::analyze::parser::{ElementExtractor, ParserManager}; +use crate::developer::analyze::types::AnalysisResult; +use std::collections::HashSet; +use std::path::PathBuf; + +fn parse_and_extract(code: &str) -> AnalysisResult { + let manager = ParserManager::new(); + let tree = manager.parse(code, "swift").unwrap(); + ElementExtractor::extract_with_depth(&tree, code, "swift", "semantic", None).unwrap() +} + +const SAMPLE: &str = r#" +import Foundation + +class Service { + func start() { + helper() + client.fetch() + } + func helper() {} +} + +protocol Runnable {} + +func main() { + let s = Service() + s.start() +} +"#; + +#[test] +fn test_swift_functions_and_classes() { + let result = parse_and_extract(SAMPLE); + let funcs: HashSet<_> = result.functions.iter().map(|f| f.name.as_str()).collect(); + assert!(funcs.contains("start"), "got {funcs:?}"); + assert!(funcs.contains("helper"), "got {funcs:?}"); + assert!(funcs.contains("main"), "got {funcs:?}"); + + let classes: HashSet<_> = result.classes.iter().map(|c| c.name.as_str()).collect(); + assert!(classes.contains("Service"), "got {classes:?}"); + assert!( + classes.contains("Runnable"), + "expected protocol, got {classes:?}" + ); +} + +#[test] +fn test_swift_calls_and_graph() { + let result = parse_and_extract(SAMPLE); + let callees: HashSet<_> = result + .calls + .iter() + .map(|c| c.callee_name.as_str()) + .collect(); + assert!(callees.contains("helper"), "got {callees:?}"); + assert!( + callees.contains("fetch"), + "expected method call, got {callees:?}" + ); + + let graph = CallGraph::build_from_results(&[(PathBuf::from("t.swift"), result)]); + let incoming = graph.find_incoming_chains("start", 2); + assert!(!incoming.is_empty(), "expected main to call start"); +} diff --git a/crates/biorouter-mcp/src/developer/lang.rs b/crates/biorouter-mcp/src/developer/lang.rs index 590f065d..6a19b0ca 100644 --- a/crates/biorouter-mcp/src/developer/lang.rs +++ b/crates/biorouter-mcp/src/developer/lang.rs @@ -24,13 +24,14 @@ pub fn get_language_identifier(path: &Path) -> &'static str { Some("java") => "java", Some("cpp") | Some("cc") | Some("cxx") => "cpp", Some("c") => "c", - Some("h") | Some("hpp") => "cpp", + Some("h") | Some("hpp") | Some("hh") | Some("hxx") => "cpp", Some("rb") => "ruby", Some("php") => "php", Some("swift") => "swift", Some("kt") | Some("kts") => "kotlin", Some("scala") => "scala", - Some("r") => "r", + Some("r") | Some("R") => "r", + Some("jl") => "julia", Some("m") => "matlab", Some("pl") => "perl", Some("dockerfile") => "dockerfile", diff --git a/crates/biorouter/src/model.rs b/crates/biorouter/src/model.rs index aa2b187e..cc11366d 100644 --- a/crates/biorouter/src/model.rs +++ b/crates/biorouter/src/model.rs @@ -119,12 +119,21 @@ static MODEL_SPECIFIC_LIMITS: Lazy> = Lazy::new(|| { ("grok-code-fast-1", 256_000), ("grok-4", 1_000_000), ("grok", 131_072), + // zai (Zhipu GLM) — GLM-4.6/4.5 are 128k–200k; default to 128k + ("glm-4.7", 200_000), + ("glm-4.6", 200_000), + ("glm-5", 200_000), + ("glm", 131_072), // deepseek — V4 family is 1M ("deepseek-v4", 1_000_000), // moonshot — k2.5/k2.6 are 256k, original k2 is 128k ("kimi-k2.5", 262_144), ("kimi-k2.6", 262_144), ("kimi-k2", 131_072), + // xiaomi mimo — MiMo v2.5 family advertises ~1M; MiMo v2 family ~256k + ("mimo-v2.5", 1_000_000), // covers mimo-v2.5 and mimo-v2.5-pro + ("mimo-v2", 262_144), // covers mimo-v2-pro and mimo-v2-omni + ("mimo", 131_072), // any other mimo variant ] }); diff --git a/crates/biorouter/src/providers/auto_detect.rs b/crates/biorouter/src/providers/auto_detect.rs index 0513fd92..3b83ee34 100644 --- a/crates/biorouter/src/providers/auto_detect.rs +++ b/crates/biorouter/src/providers/auto_detect.rs @@ -8,6 +8,8 @@ pub async fn detect_provider_from_api_key(api_key: &str) -> Option<(String, Vec< ("google", "GOOGLE_API_KEY"), ("groq", "GROQ_API_KEY"), ("xai", "XAI_API_KEY"), + ("zai", "ZAI_API_KEY"), + ("xiaomi_mimo", "XIAOMI_MIMO_API_KEY"), // Ollama and OpenRouter don't validate keys, so they would match any input ]; diff --git a/crates/biorouter/src/providers/factory.rs b/crates/biorouter/src/providers/factory.rs index c31fc318..fdd96bb3 100644 --- a/crates/biorouter/src/providers/factory.rs +++ b/crates/biorouter/src/providers/factory.rs @@ -23,6 +23,8 @@ use super::{ versa_azure::VersaAzureProvider, versa_bedrock::VersaBedrockProvider, xai::XaiProvider, + xiaomi_mimo::XiaomiMimoProvider, + zai::ZaiProvider, }; use crate::model::ModelConfig; use crate::providers::base::ProviderType; @@ -79,6 +81,11 @@ async fn init_registry() -> RwLock { registry.register::(|m| Box::pin(TetrateProvider::from_env(m)), true); registry.register::(|m| Box::pin(VeniceProvider::from_env(m)), false); registry.register::(|m| Box::pin(XaiProvider::from_env(m)), false); + registry.register::( + |m| Box::pin(XiaomiMimoProvider::from_env(m)), + false, + ); + registry.register::(|m| Box::pin(ZaiProvider::from_env(m)), false); }); if let Err(e) = load_custom_providers_into_registry(&mut registry) { tracing::warn!("Failed to load custom providers: {}", e); @@ -315,6 +322,8 @@ mod tests { ("groq", "GROQ_API_KEY"), ("mistral", "MISTRAL_API_KEY"), ("custom_deepseek", "DEEPSEEK_API_KEY"), + ("xiaomi_mimo", "XIAOMI_MIMO_API_KEY"), + ("zai", "ZAI_API_KEY"), ]; for (name, expected_key) in cases { if let Some((meta, _)) = providers_list.iter().find(|(m, _)| m.name == name) { diff --git a/crates/biorouter/src/providers/mod.rs b/crates/biorouter/src/providers/mod.rs index 69ee1125..b36ded32 100644 --- a/crates/biorouter/src/providers/mod.rs +++ b/crates/biorouter/src/providers/mod.rs @@ -37,6 +37,8 @@ pub mod venice; pub mod versa_azure; pub mod versa_bedrock; pub mod xai; +pub mod xiaomi_mimo; +pub mod zai; pub use factory::{ create, create_with_default_model, create_with_named_model, providers, refresh_custom_providers, diff --git a/crates/biorouter/src/providers/openai.rs b/crates/biorouter/src/providers/openai.rs index a030f668..542c2cc1 100644 --- a/crates/biorouter/src/providers/openai.rs +++ b/crates/biorouter/src/providers/openai.rs @@ -20,6 +20,7 @@ use async_trait::async_trait; use futures::{StreamExt, TryStreamExt}; use reqwest::StatusCode; use serde_json::Value; +use std::borrow::Cow; use std::collections::HashMap; use std::io; use tokio::pin; @@ -66,6 +67,30 @@ pub const OPEN_AI_KNOWN_MODELS: &[(&str, usize)] = &[ pub const OPEN_AI_DOC_URL: &str = "https://platform.openai.com/docs/models"; +/// Built-in model-id aliases for OpenAI-compatible hosts that are retiring a +/// model name, so a user's saved config keeps working after the vendor removes +/// the old id. Keyed by the API host; returns `old id -> live id`. +/// +/// DeepSeek retires `deepseek-chat` / `deepseek-reasoner` on 2026-07-24 (both +/// have been aliases of V4-Flash since the V4 launch). Rewriting them on the +/// wire makes the transition seamless for anyone still selecting the old ids — +/// including custom providers pointed at a `deepseek.com` host. Mapping both to +/// `deepseek-v4-flash` (not `-pro`) is faithful: Flash has thinking enabled by +/// default, so `deepseek-reasoner` behaviour is preserved with no cost jump. +fn builtin_model_aliases(host: &str) -> Option> { + let host = host.trim().to_ascii_lowercase(); + if host == "deepseek.com" || host == "api.deepseek.com" || host.ends_with(".deepseek.com") { + return Some(HashMap::from([ + ("deepseek-chat".to_string(), "deepseek-v4-flash".to_string()), + ( + "deepseek-reasoner".to_string(), + "deepseek-v4-flash".to_string(), + ), + ])); + } + None +} + #[derive(Debug, serde::Serialize)] pub struct OpenAiProvider { #[serde(skip)] @@ -77,6 +102,9 @@ pub struct OpenAiProvider { custom_headers: Option>, supports_streaming: bool, name: String, + /// `old model id -> live model id` rewrites applied just before a request is + /// sent, so retired upstream ids keep working. See [`builtin_model_aliases`]. + model_aliases: Option>, } impl OpenAiProvider { @@ -131,6 +159,7 @@ impl OpenAiProvider { custom_headers, supports_streaming: true, name: Self::metadata().name, + model_aliases: None, }) } @@ -145,6 +174,7 @@ impl OpenAiProvider { custom_headers: None, supports_streaming: true, name: Self::metadata().name, + model_aliases: None, } } @@ -160,6 +190,8 @@ impl OpenAiProvider { let url = url::Url::parse(&config.base_url) .map_err(|e| anyhow::anyhow!("Invalid base URL '{}': {}", config.base_url, e))?; + let model_aliases = builtin_model_aliases(url.host_str().unwrap_or("")); + let host = if let Some(port) = url.port() { format!( "{}://{}:{}", @@ -202,9 +234,32 @@ impl OpenAiProvider { custom_headers: config.headers, supports_streaming: config.supports_streaming.unwrap_or(true), name: config.name.clone(), + model_aliases, }) } + /// Rewrite a retired model id to its live replacement just before sending a + /// request. Returns the input untouched when no alias applies, so the common + /// path allocates nothing. + fn resolve_model<'a>(&self, model_config: &'a ModelConfig) -> Cow<'a, ModelConfig> { + if let Some(target) = self + .model_aliases + .as_ref() + .and_then(|aliases| aliases.get(&model_config.model_name)) + .filter(|target| *target != &model_config.model_name) + { + tracing::debug!( + from = %model_config.model_name, + to = %target, + "remapping retired model id to its live replacement" + ); + let mut remapped = model_config.clone(); + remapped.model_name = target.clone(); + return Cow::Owned(remapped); + } + Cow::Borrowed(model_config) + } + fn uses_responses_api(model_name: &str) -> bool { model_name.starts_with("gpt-5-codex") || model_name.starts_with("gpt-5.1-codex") @@ -304,6 +359,8 @@ impl Provider for OpenAiProvider { messages: &[Message], tools: &[Tool], ) -> Result<(Message, ProviderUsage), ProviderError> { + let resolved = self.resolve_model(model_config); + let model_config = resolved.as_ref(); if Self::uses_responses_api(&model_config.model_name) { let payload = create_responses_request(model_config, system, messages, tools)?; let mut log = RequestLog::start(&self.model, &payload)?; @@ -411,11 +468,13 @@ impl Provider for OpenAiProvider { messages: &[Message], tools: &[Tool], ) -> Result { - if Self::uses_responses_api(&self.model.model_name) { - let mut payload = create_responses_request(&self.model, system, messages, tools)?; + let resolved = self.resolve_model(&self.model); + let model = resolved.as_ref(); + if Self::uses_responses_api(&model.model_name) { + let mut payload = create_responses_request(model, system, messages, tools)?; payload["stream"] = serde_json::Value::Bool(true); - let mut log = RequestLog::start(&self.model, &payload)?; + let mut log = RequestLog::start(model, &payload)?; let response = self .with_retry(|| async { @@ -446,15 +505,9 @@ impl Provider for OpenAiProvider { } })) } else { - let payload = create_request( - &self.model, - system, - messages, - tools, - &ImageFormat::OpenAi, - true, - )?; - let mut log = RequestLog::start(&self.model, &payload)?; + let payload = + create_request(model, system, messages, tools, &ImageFormat::OpenAi, true)?; + let mut log = RequestLog::start(model, &payload)?; let response = self .with_retry(|| async { @@ -485,6 +538,88 @@ fn parse_custom_headers(s: String) -> HashMap { .collect() } +#[cfg(test)] +mod alias_tests { + use super::*; + use crate::providers::api_client::{ApiClient, AuthMethod}; + + fn model(name: &str) -> ModelConfig { + ModelConfig::new(name).unwrap() + } + + fn provider_for_host(host: &str) -> OpenAiProvider { + let api_client = ApiClient::new( + host.to_string(), + AuthMethod::BearerToken("test".to_string()), + ) + .unwrap(); + let mut p = OpenAiProvider::new(api_client, model("deepseek-chat")); + p.model_aliases = builtin_model_aliases( + url::Url::parse(host) + .ok() + .and_then(|u| u.host_str().map(str::to_string)) + .unwrap_or_default() + .as_str(), + ); + p + } + + #[test] + fn deepseek_host_aliases_retired_ids() { + let aliases = builtin_model_aliases("api.deepseek.com").expect("deepseek host has aliases"); + assert_eq!( + aliases.get("deepseek-chat").map(String::as_str), + Some("deepseek-v4-flash") + ); + assert_eq!( + aliases.get("deepseek-reasoner").map(String::as_str), + Some("deepseek-v4-flash") + ); + } + + #[test] + fn deepseek_host_matching_is_case_insensitive_and_covers_subdomains() { + assert!(builtin_model_aliases("API.DeepSeek.com").is_some()); + assert!(builtin_model_aliases("eu.deepseek.com").is_some()); + assert!(builtin_model_aliases("deepseek.com").is_some()); + } + + #[test] + fn non_deepseek_hosts_have_no_aliases() { + assert!(builtin_model_aliases("api.openai.com").is_none()); + assert!(builtin_model_aliases("api.deepseek.com.evil.example").is_none()); + assert!(builtin_model_aliases("").is_none()); + } + + #[test] + fn resolve_model_rewrites_retired_id_only() { + let p = provider_for_host("https://api.deepseek.com"); + + let chat = model("deepseek-chat"); + assert_eq!(p.resolve_model(&chat).model_name, "deepseek-v4-flash"); + + let reasoner = model("deepseek-reasoner"); + assert_eq!(p.resolve_model(&reasoner).model_name, "deepseek-v4-flash"); + + // A live id is passed through untouched (no allocation/rewrite). + let v4 = model("deepseek-v4-pro"); + assert_eq!(p.resolve_model(&v4).model_name, "deepseek-v4-pro"); + } + + #[test] + fn resolve_model_is_noop_without_aliases() { + let api_client = ApiClient::new( + "https://api.openai.com".to_string(), + AuthMethod::BearerToken("test".to_string()), + ) + .unwrap(); + let p = OpenAiProvider::new(api_client, model("deepseek-chat")); + // No alias table → the (now-retired) id is left as-is. + let chat = model("deepseek-chat"); + assert_eq!(p.resolve_model(&chat).model_name, "deepseek-chat"); + } +} + #[async_trait] impl EmbeddingCapable for OpenAiProvider { async fn create_embeddings(&self, texts: Vec) -> Result>> { diff --git a/crates/biorouter/src/providers/xiaomi_mimo.rs b/crates/biorouter/src/providers/xiaomi_mimo.rs new file mode 100644 index 00000000..914fe834 --- /dev/null +++ b/crates/biorouter/src/providers/xiaomi_mimo.rs @@ -0,0 +1,203 @@ +use super::api_client::{ApiClient, AuthMethod}; +use super::errors::ProviderError; +use super::retry::ProviderRetry; +use super::utils::{ + get_model, handle_response_openai_compat, handle_status_openai_compat, stream_openai_compat, + RequestLog, +}; +use crate::conversation::message::Message; +use crate::model::ModelConfig; +use crate::providers::base::{ + ConfigKey, MessageStream, Provider, ProviderMetadata, ProviderUsage, Usage, +}; +use crate::providers::formats::openai::{create_request, get_usage, response_to_message}; +use anyhow::Result; +use async_trait::async_trait; +use rmcp::model::Tool; +use serde_json::Value; + +// Xiaomi's MiMo model family, served through an OpenAI-compatible API. +// +// Endpoint and auth were verified live (June 2026) against a real MiMo key: +// the default below is the Singapore "Token Plan" host, which answered +// `GET /v1/models` and `POST /v1/chat/completions` (Bearer auth, OpenAI wire +// format) with HTTP 200 and a real `mimo-v2.5` completion. MiMo serves keys +// per region/plan, so operators on a different tier set `XIAOMI_MIMO_HOST`: +// - Pay-as-you-go (`sk-` keys): https://api.xiaomimimo.com/v1 +// - Token Plan (`tp-` keys): https://token-plan-{cn,sgp,ams}.xiaomimimo.com/v1 +// The wire format (OpenAI-compatible chat/completions + Bearer auth) follows +// the same shape every other OpenAI-compatible provider here uses. +pub const XIAOMI_MIMO_API_HOST: &str = "https://token-plan-sgp.xiaomimimo.com/v1"; +pub const XIAOMI_MIMO_DEFAULT_MODEL: &str = "mimo-v2.5"; +pub const XIAOMI_MIMO_KNOWN_MODELS: &[&str] = &[ + // MiMo v2.5 family (~1M context) + "mimo-v2.5", + "mimo-v2.5-pro", + // MiMo v2 family (~256k context) + "mimo-v2-pro", + "mimo-v2-omni", +]; + +pub const XIAOMI_MIMO_DOC_URL: &str = "https://github.com/XiaomiMiMo/MiMo"; + +#[derive(serde::Serialize)] +pub struct XiaomiMimoProvider { + #[serde(skip)] + api_client: ApiClient, + model: ModelConfig, + supports_streaming: bool, + #[serde(skip)] + name: String, +} + +impl XiaomiMimoProvider { + pub async fn from_env(model: ModelConfig) -> Result { + let config = crate::config::Config::global(); + let api_key: String = config.get_secret("XIAOMI_MIMO_API_KEY")?; + let host: String = config + .get_param("XIAOMI_MIMO_HOST") + .unwrap_or_else(|_| XIAOMI_MIMO_API_HOST.to_string()); + + let auth = AuthMethod::BearerToken(api_key); + let api_client = ApiClient::new(host, auth)?; + + Ok(Self { + api_client, + model, + supports_streaming: true, + name: Self::metadata().name, + }) + } + + async fn post(&self, payload: Value) -> Result { + let response = self + .api_client + .response_post("chat/completions", &payload) + .await?; + + handle_response_openai_compat(response).await + } +} + +#[async_trait] +impl Provider for XiaomiMimoProvider { + fn metadata() -> ProviderMetadata { + ProviderMetadata::new( + "xiaomi_mimo", + "Xiaomi MiMo", + "Xiaomi MiMo models via an OpenAI-compatible API. Set XIAOMI_MIMO_HOST to your MiMo endpoint.", + XIAOMI_MIMO_DEFAULT_MODEL, + XIAOMI_MIMO_KNOWN_MODELS.to_vec(), + XIAOMI_MIMO_DOC_URL, + vec![ + ConfigKey::new("XIAOMI_MIMO_API_KEY", true, true, None), + ConfigKey::new("XIAOMI_MIMO_HOST", false, false, Some(XIAOMI_MIMO_API_HOST)), + ], + ) + } + + fn get_name(&self) -> &str { + &self.name + } + + fn get_model_config(&self) -> ModelConfig { + self.model.clone() + } + + #[tracing::instrument( + skip(self, model_config, system, messages, tools), + fields(model_config, input, output, input_tokens, output_tokens, total_tokens) + )] + async fn complete_with_model( + &self, + model_config: &ModelConfig, + system: &str, + messages: &[Message], + tools: &[Tool], + ) -> Result<(Message, ProviderUsage), ProviderError> { + let payload = create_request( + model_config, + system, + messages, + tools, + &super::utils::ImageFormat::OpenAi, + false, + )?; + + let mut log = RequestLog::start(&self.model, &payload)?; + let response = self.with_retry(|| self.post(payload.clone())).await?; + + let message = response_to_message(&response)?; + let usage = response.get("usage").map(get_usage).unwrap_or_else(|| { + tracing::debug!("Failed to get usage data"); + Usage::default() + }); + let response_model = get_model(&response); + log.write(&response, Some(&usage))?; + Ok((message, ProviderUsage::new(response_model, usage))) + } + + fn supports_streaming(&self) -> bool { + self.supports_streaming + } + + async fn stream( + &self, + system: &str, + messages: &[Message], + tools: &[Tool], + ) -> Result { + let payload = create_request( + &self.model, + system, + messages, + tools, + &super::utils::ImageFormat::OpenAi, + true, + )?; + let mut log = RequestLog::start(&self.model, &payload)?; + + let response = self + .with_retry(|| async { + let resp = self + .api_client + .response_post("chat/completions", &payload) + .await?; + handle_status_openai_compat(resp).await + }) + .await + .inspect_err(|e| { + let _ = log.error(e); + })?; + + stream_openai_compat(response, log) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_metadata_structure() { + let metadata = XiaomiMimoProvider::metadata(); + + assert_eq!(metadata.name, "xiaomi_mimo"); + assert_eq!(metadata.default_model, "mimo-v2.5"); + assert!(metadata.known_models.iter().any(|m| m.name == "mimo-v2.5")); + assert!(!metadata.known_models.is_empty()); + + assert_eq!(metadata.config_keys.len(), 2); + assert_eq!(metadata.config_keys[0].name, "XIAOMI_MIMO_API_KEY"); + assert_eq!(metadata.config_keys[1].name, "XIAOMI_MIMO_HOST"); + } + + #[tokio::test] + async fn test_registered_in_factory() { + let all = crate::providers::providers().await; + assert!( + all.iter().any(|(m, _)| m.name == "xiaomi_mimo"), + "xiaomi_mimo provider must be registered in the factory registry" + ); + } +} diff --git a/crates/biorouter/src/providers/zai.rs b/crates/biorouter/src/providers/zai.rs new file mode 100644 index 00000000..2b23f193 --- /dev/null +++ b/crates/biorouter/src/providers/zai.rs @@ -0,0 +1,205 @@ +use super::api_client::{ApiClient, AuthMethod}; +use super::errors::ProviderError; +use super::retry::ProviderRetry; +use super::utils::{ + get_model, handle_response_openai_compat, handle_status_openai_compat, stream_openai_compat, + RequestLog, +}; +use crate::conversation::message::Message; +use crate::model::ModelConfig; +use crate::providers::base::{ + ConfigKey, MessageStream, Provider, ProviderMetadata, ProviderUsage, Usage, +}; +use crate::providers::formats::openai::{create_request, get_usage, response_to_message}; +use anyhow::Result; +use async_trait::async_trait; +use rmcp::model::Tool; +use serde_json::Value; + +// z.ai is the international platform of Zhipu AI; it serves the GLM family of +// models through an OpenAI-compatible API (and a separate Anthropic-compatible +// surface used by Claude Code — not used here). Verified live against +// docs.z.ai (June 2026): base URL `/api/paas/v4`, Bearer-token auth. +pub const ZAI_API_HOST: &str = "https://api.z.ai/api/paas/v4"; +pub const ZAI_DEFAULT_MODEL: &str = "glm-4.6"; +pub const ZAI_KNOWN_MODELS: &[&str] = &[ + // GLM-4 family + "glm-4.7", + "glm-4.6", + "glm-4.5", + "glm-4.5-air", + // GLM-5 family + "glm-5.2", + "glm-5.1", + "glm-5", + "glm-5-turbo", +]; + +pub const ZAI_DOC_URL: &str = "https://docs.z.ai/guides/overview/pricing"; + +#[derive(serde::Serialize)] +pub struct ZaiProvider { + #[serde(skip)] + api_client: ApiClient, + model: ModelConfig, + supports_streaming: bool, + #[serde(skip)] + name: String, +} + +impl ZaiProvider { + pub async fn from_env(model: ModelConfig) -> Result { + let config = crate::config::Config::global(); + let api_key: String = config.get_secret("ZAI_API_KEY")?; + let host: String = config + .get_param("ZAI_HOST") + .unwrap_or_else(|_| ZAI_API_HOST.to_string()); + + let auth = AuthMethod::BearerToken(api_key); + let api_client = ApiClient::new(host, auth)?; + + Ok(Self { + api_client, + model, + supports_streaming: true, + name: Self::metadata().name, + }) + } + + async fn post(&self, payload: Value) -> Result { + let response = self + .api_client + .response_post("chat/completions", &payload) + .await?; + + handle_response_openai_compat(response).await + } +} + +#[async_trait] +impl Provider for ZaiProvider { + fn metadata() -> ProviderMetadata { + ProviderMetadata::new( + "zai", + "z.ai", + "GLM models from z.ai (Zhipu AI), including the GLM-4 and GLM-5 families via an OpenAI-compatible API", + ZAI_DEFAULT_MODEL, + ZAI_KNOWN_MODELS.to_vec(), + ZAI_DOC_URL, + vec![ + ConfigKey::new("ZAI_API_KEY", true, true, None), + ConfigKey::new("ZAI_HOST", false, false, Some(ZAI_API_HOST)), + ], + ) + } + + fn get_name(&self) -> &str { + &self.name + } + + fn get_model_config(&self) -> ModelConfig { + self.model.clone() + } + + #[tracing::instrument( + skip(self, model_config, system, messages, tools), + fields(model_config, input, output, input_tokens, output_tokens, total_tokens) + )] + async fn complete_with_model( + &self, + model_config: &ModelConfig, + system: &str, + messages: &[Message], + tools: &[Tool], + ) -> Result<(Message, ProviderUsage), ProviderError> { + let payload = create_request( + model_config, + system, + messages, + tools, + &super::utils::ImageFormat::OpenAi, + false, + )?; + + let mut log = RequestLog::start(&self.model, &payload)?; + let response = self.with_retry(|| self.post(payload.clone())).await?; + + let message = response_to_message(&response)?; + let usage = response.get("usage").map(get_usage).unwrap_or_else(|| { + tracing::debug!("Failed to get usage data"); + Usage::default() + }); + let response_model = get_model(&response); + log.write(&response, Some(&usage))?; + Ok((message, ProviderUsage::new(response_model, usage))) + } + + fn supports_streaming(&self) -> bool { + self.supports_streaming + } + + async fn stream( + &self, + system: &str, + messages: &[Message], + tools: &[Tool], + ) -> Result { + let payload = create_request( + &self.model, + system, + messages, + tools, + &super::utils::ImageFormat::OpenAi, + true, + )?; + let mut log = RequestLog::start(&self.model, &payload)?; + + let response = self + .with_retry(|| async { + let resp = self + .api_client + .response_post("chat/completions", &payload) + .await?; + handle_status_openai_compat(resp).await + }) + .await + .inspect_err(|e| { + let _ = log.error(e); + })?; + + stream_openai_compat(response, log) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_metadata_structure() { + let metadata = ZaiProvider::metadata(); + + assert_eq!(metadata.name, "zai"); + assert_eq!(metadata.default_model, "glm-4.6"); + assert!(metadata.known_models.iter().any(|m| m.name == "glm-4.6")); + assert!(!metadata.known_models.is_empty()); + + assert_eq!(metadata.config_keys.len(), 2); + assert_eq!(metadata.config_keys[0].name, "ZAI_API_KEY"); + assert_eq!(metadata.config_keys[1].name, "ZAI_HOST"); + // Host default points at the OpenAI-compatible base URL. + assert_eq!( + metadata.config_keys[1].default, + Some(ZAI_API_HOST.to_string()) + ); + } + + #[tokio::test] + async fn test_registered_in_factory() { + let all = crate::providers::providers().await; + assert!( + all.iter().any(|(m, _)| m.name == "zai"), + "zai provider must be registered in the factory registry" + ); + } +} diff --git a/crates/biorouter/tests/providers.rs b/crates/biorouter/tests/providers.rs index aa0f02e9..bfe8fd9a 100644 --- a/crates/biorouter/tests/providers.rs +++ b/crates/biorouter/tests/providers.rs @@ -14,6 +14,8 @@ use biorouter::providers::openai::OPEN_AI_DEFAULT_MODEL; use biorouter::providers::sagemaker_tgi::SAGEMAKER_TGI_DEFAULT_MODEL; use biorouter::providers::snowflake::SNOWFLAKE_DEFAULT_MODEL; use biorouter::providers::xai::XAI_DEFAULT_MODEL; +use biorouter::providers::xiaomi_mimo::XIAOMI_MIMO_DEFAULT_MODEL; +use biorouter::providers::zai::ZAI_DEFAULT_MODEL; use dotenvy::dotenv; use rmcp::model::{AnnotateAble, Content, RawImageContent}; use rmcp::model::{CallToolRequestParams, Tool}; @@ -235,7 +237,10 @@ impl ProviderTester { dbg!(&result); println!("==================="); - if self.name.to_lowercase() == "ollama" { + // Ollama and Xiaomi MiMo silently truncate oversized input to their + // context window (MiMo caps at its ~1M window and returns Ok) rather + // than returning a context-length error. + if matches!(self.name.to_lowercase().as_str(), "ollama" | "xiaomi_mimo") { assert!( result.is_ok(), "Expected to succeed because of default truncation" @@ -582,6 +587,22 @@ async fn test_xai_provider() -> Result<()> { test_provider("Xai", XAI_DEFAULT_MODEL, &["XAI_API_KEY"], None).await } +#[tokio::test] +async fn test_zai_provider() -> Result<()> { + test_provider("Zai", ZAI_DEFAULT_MODEL, &["ZAI_API_KEY"], None).await +} + +#[tokio::test] +async fn test_xiaomi_mimo_provider() -> Result<()> { + test_provider( + "xiaomi_mimo", + XIAOMI_MIMO_DEFAULT_MODEL, + &["XIAOMI_MIMO_API_KEY"], + None, + ) + .await +} + #[ctor::dtor] fn print_test_report() { TEST_REPORT.print_summary(); diff --git a/docs/xiaomi-mimo-integration-checklist.md b/docs/xiaomi-mimo-integration-checklist.md new file mode 100644 index 00000000..cd07156c --- /dev/null +++ b/docs/xiaomi-mimo-integration-checklist.md @@ -0,0 +1,74 @@ +# Xiaomi MiMo Provider — Integration & Verification Checklist + +Xiaomi's **MiMo** LLM family is integrated as a first-class, OpenAI-compatible +provider (`xiaomi_mimo`). This checklist enumerates every surface a user can +select a provider/model and how to verify MiMo appears and works. + +## How it's wired + +- **Native provider module:** `crates/biorouter/src/providers/xiaomi_mimo.rs` + (`XiaomiMimoProvider`), registered in `crates/biorouter/src/providers/factory.rs`. + Provider id `xiaomi_mimo`, display name **Xiaomi MiMo**, default model + `mimo-v2.5`. +- **Auth / endpoint:** Bearer `XIAOMI_MIMO_API_KEY`. Default host is the + live-verified Singapore Token-Plan endpoint + `https://token-plan-sgp.xiaomimimo.com/v1`; override with `XIAOMI_MIMO_HOST` + for another region/tier: + - Pay-as-you-go (`sk-` keys): `https://api.xiaomimimo.com/v1` + - Token Plan (`tp-` keys): `https://token-plan-{cn,sgp,ams}.xiaomimimo.com/v1` +- **Models:** `mimo-v2.5`, `mimo-v2.5-pro` (~1M ctx), `mimo-v2-pro`, + `mimo-v2-omni` (~256k ctx). Context limits also registered in + `crates/biorouter/src/model.rs` (`MODEL_SPECIFIC_LIMITS`). +- Because every selection surface is **registry-driven**, the provider appears + automatically once registered + configured. Only display polish + (ordering/labels/onboarding text) needed explicit wiring. + +## Surfaces — appears in every place a model can be chosen + +- [ ] **Provider config dashboard (Settings → Providers)** — appears under + *Commercial Models*. Backend-driven via `GET /config/providers`; ordering set + in `ui/desktop/src/components/settings/providers/providerOrdering.ts` + (`xiaomi_mimo`). +- [ ] **Provider configuration modal** — `XIAOMI_MIMO_API_KEY` (secret) + + optional `XIAOMI_MIMO_HOST` fields render from backend `config_keys`; labels + in `ui/desktop/src/utils/configUtils.ts`. +- [ ] **Onboarding** — listed under "Auto-detect from API key" and "View all + commercial providers"; auto-detect wired in + `crates/biorouter/src/providers/auto_detect.rs` and text in + `ui/desktop/src/components/onboarding/CommercialSetupCard.tsx`. +- [ ] **Main model selector** (bottom menu / SwitchModelModal) — once + configured, `mimo-*` models appear in the picker (backend-driven). +- [ ] **Leader/Worker mode** (`LeadWorkerSettings.tsx`) — MiMo models selectable + for both lead and worker (backend-driven). +- [ ] **Knowledge base ingestion/digestion** (`IngestModelPicker.tsx`) — MiMo + models selectable for ingest (backend-driven). +- [ ] **CLI** (`biorouter configure`) — appears in the provider list under + Commercial; usable via `biorouter run --provider xiaomi_mimo --model mimo-v2.5`. +- [ ] **TUI** — stores the selected provider/model string; no separate list. +- [ ] **Daemon/server** — `GET /config/providers`, + `GET /config/providers/xiaomi_mimo/models`, and `/config/detect-provider` + all surface it (registry-driven; no allowlist). + +## Functional verification + +- [ ] **Live endpoint reachable** — `POST {host}/v1/chat/completions` with the + key returns HTTP 200 + a `mimo-v2.5` completion. *(Verified: returned + `BIOROUTER-OK`, `reasoning_tokens: 0` with thinking disabled.)* +- [ ] **Registered in factory** — `cargo test -p biorouter --lib providers::xiaomi_mimo` + (`test_registered_in_factory`, `test_metadata_structure`). +- [ ] **Config keys correct** — `cargo test -p biorouter --lib test_openai_compatible_providers_config_keys` + (first key `XIAOMI_MIMO_API_KEY`, required + secret). *(Verified passing.)* +- [ ] **Live completion through the provider stack** — + `XIAOMI_MIMO_API_KEY= cargo test -p biorouter --test providers test_xiaomi_mimo_provider -- --nocapture` + (exercises factory → `XiaomiMimoProvider::from_env` → live HTTP). +- [ ] **Context window** — `mimo-v2.5` reports ~1M tokens for token accounting + (`MODEL_SPECIFIC_LIMITS`). + +## Notes / gotchas + +- MiMo enables **thinking** by default (adds reasoning tokens/latency). The + OpenAI surface disables it via `chat_template_kwargs.enable_thinking=false`. +- A `tp-` key is bound to a single region — set `XIAOMI_MIMO_HOST` to the + matching regional endpoint if not on Singapore. +- Token accounting: the API reports cached prompt tokens separately; counts are + not directly comparable across regions/tiers. diff --git a/docs/zai-integration-checklist.md b/docs/zai-integration-checklist.md new file mode 100644 index 00000000..6e812ea7 --- /dev/null +++ b/docs/zai-integration-checklist.md @@ -0,0 +1,74 @@ +# z.ai (GLM) Provider — Integration & Verification Checklist + +z.ai (the international platform of **Zhipu AI**) is integrated as a +first-class, OpenAI-compatible provider (`zai`) serving the **GLM** model +family. This checklist enumerates every surface a user can select a +provider/model and how to verify z.ai appears and works. (Its sibling, +`xiaomi-mimo-integration-checklist.md`, covers the MiMo provider, which is +wired identically.) + +## How it's wired + +- **Native provider module:** `crates/biorouter/src/providers/zai.rs` + (`ZaiProvider`), registered in `crates/biorouter/src/providers/factory.rs`. + Provider id `zai`, display name **z.ai**, default model `glm-4.6`. +- **Auth / endpoint:** Bearer `ZAI_API_KEY`. Default host is the + OpenAI-compatible base `https://api.z.ai/api/paas/v4`; override with + `ZAI_HOST`. (z.ai also exposes an Anthropic-compatible surface at + `https://api.z.ai/api/anthropic` used by Claude Code — not used here; we + integrate the OpenAI surface, matching the other ~16 OpenAI-compatible + providers.) +- **Models:** `glm-4.7`, `glm-4.6`, `glm-4.5`, `glm-4.5-air`, `glm-5.2`, + `glm-5.1`, `glm-5`, `glm-5-turbo`. Context limits registered in + `crates/biorouter/src/model.rs` (`MODEL_SPECIFIC_LIMITS`, `glm-*` patterns). +- Because every selection surface is **registry-driven**, the provider appears + automatically once registered + configured. Only display polish + (ordering/labels/onboarding text) needed explicit wiring. + +## Surfaces — appears in every place a model can be chosen + +- [ ] **Provider config dashboard (Settings → Providers)** — appears under + *Commercial Models*. Backend-driven via `GET /config/providers`; ordering set + in `ui/desktop/src/components/settings/providers/providerOrdering.ts` (`zai`). +- [ ] **Provider configuration modal** — `ZAI_API_KEY` (secret) + optional + `ZAI_HOST` fields render from backend `config_keys`. +- [ ] **Onboarding** — listed under "Auto-detect from API key"; auto-detect + wired in `crates/biorouter/src/providers/auto_detect.rs` and text in + `ui/desktop/src/components/onboarding/CommercialSetupCard.tsx`. +- [ ] **Main model selector** (bottom menu / SwitchModelModal) — once + configured, `glm-*` models appear in the picker (backend-driven). +- [ ] **Leader/Worker mode** (`LeadWorkerSettings.tsx`) — GLM models selectable + for both lead and worker (backend-driven). +- [ ] **Knowledge base ingestion/digestion** (`IngestModelPicker.tsx`) — GLM + models selectable for ingest (backend-driven). +- [ ] **CLI** (`biorouter configure`) — appears in the provider list under + Commercial (`configure_provider_dialog()` reads the registry); usable via + `biorouter run --provider zai --model glm-4.6`. +- [ ] **TUI** — stores the selected provider/model string; no separate list. +- [ ] **Daemon/server** — `GET /config/providers`, + `GET /config/providers/zai/models`, and `/config/detect-provider` all surface + it (registry-driven; no allowlist). + +## Functional verification + +- [ ] **Endpoint / auth reachable** — `GET {host}/models` with the key returns + HTTP 200. *(Verified live: key authenticates; chat returns HTTP 429 code 1113 + "insufficient balance" — auth OK, account just needs credit. A bad key + returns 401, confirming the 429 is a billing, not auth, state.)* +- [ ] **Registered in factory** — `cargo test -p biorouter --lib providers::zai` + (`test_registered_in_factory`, `test_metadata_structure`). +- [ ] **Config keys correct** — `cargo test -p biorouter --lib test_openai_compatible_providers_config_keys` + (first key `ZAI_API_KEY`, required + secret). +- [ ] **Live completion through the provider stack** (needs a funded key) — + `ZAI_API_KEY= cargo test -p biorouter --test providers test_zai_provider -- --nocapture` + (exercises factory → `ZaiProvider::from_env` → live HTTP). +- [ ] **Context window** — `glm-4.6`/`glm-4.7` report ~200k tokens for token + accounting (`MODEL_SPECIFIC_LIMITS`). + +## Notes / gotchas + +- z.ai keys have the form `.` — pass the whole string (the dot is + part of the key) as `ZAI_API_KEY`. +- The default endpoint is the OpenAI-compatible `/api/paas/v4` base. Don't point + `ZAI_HOST` at the Anthropic surface (`/api/anthropic`) — the wire format + differs and this provider speaks OpenAI chat/completions. diff --git a/ui/desktop/src/components/onboarding/CommercialSetupCard.tsx b/ui/desktop/src/components/onboarding/CommercialSetupCard.tsx index 05493f22..f9f58a39 100644 --- a/ui/desktop/src/components/onboarding/CommercialSetupCard.tsx +++ b/ui/desktop/src/components/onboarding/CommercialSetupCard.tsx @@ -62,8 +62,8 @@ export default function CommercialSetupCard({

Auto-detect from API key

- Paste a key from OpenAI, Anthropic, Google, Groq, or xAI — we'll detect the provider for - you. + Paste a key from OpenAI, Anthropic, Google, Groq, xAI, z.ai, or Xiaomi MiMo — we'll detect + the provider for you.

@@ -119,7 +119,7 @@ export default function CommercialSetupCard({
    -
  • · Supported providers: OpenAI, Anthropic, Google, Groq, xAI
  • +
  • · Supported providers: OpenAI, Anthropic, Google, Groq, xAI, z.ai, Xiaomi MiMo
  • · Verify the key is active and has sufficient credits
  • · For local models, use the Local card above
diff --git a/ui/desktop/src/components/settings/providers/providerOrdering.ts b/ui/desktop/src/components/settings/providers/providerOrdering.ts index 024d4027..c847c9fe 100644 --- a/ui/desktop/src/components/settings/providers/providerOrdering.ts +++ b/ui/desktop/src/components/settings/providers/providerOrdering.ts @@ -14,6 +14,8 @@ const PRIORITY_ORDER: Record = { anthropic: 2, openai: 3, google: 4, + zai: 5, + xiaomi_mimo: 6, }; export type ProviderGroupKey = 'institutional' | 'local' | 'commercial'; diff --git a/ui/desktop/src/utils/configUtils.ts b/ui/desktop/src/utils/configUtils.ts index 0d84c859..bded6cdc 100644 --- a/ui/desktop/src/utils/configUtils.ts +++ b/ui/desktop/src/utils/configUtils.ts @@ -30,6 +30,14 @@ export const configLabels: Record = { // groq GROQ_API_KEY: 'Groq API Key', + // xiaomi mimo + XIAOMI_MIMO_API_KEY: 'Xiaomi MiMo API Key', + XIAOMI_MIMO_HOST: 'Xiaomi MiMo Host (region endpoint)', + + // zai (Zhipu GLM) + ZAI_API_KEY: 'z.ai API Key', + ZAI_HOST: 'z.ai Host', + // openrouter OPENROUTER_API_KEY: 'OpenRouter API Key', @@ -73,6 +81,8 @@ export const providerPrefixes: Record = { anthropic: ['ANTHROPIC_'], google: ['GOOGLE_'], groq: ['GROQ_'], + xiaomi_mimo: ['XIAOMI_MIMO_'], + zai: ['ZAI_'], databricks: ['DATABRICKS_'], openrouter: ['OPENROUTER_'], ollama: ['OLLAMA_'],