diff --git a/.github/workflows/editor-support.yml b/.github/workflows/editor-support.yml new file mode 100644 index 00000000..9c5cb7c7 --- /dev/null +++ b/.github/workflows/editor-support.yml @@ -0,0 +1,46 @@ +name: Editor Support + +on: + pull_request: + paths: + - ".github/workflows/editor-support.yml" + - "makefile" + - "src/**" + - "test-suite/lsp-semantic-tokens.bpp" + - "test-suite/lsp-fixtures/**" + - "zed/**" + push: + branches: ["main"] + paths: + - ".github/workflows/editor-support.yml" + - "makefile" + - "src/**" + - "test-suite/lsp-semantic-tokens.bpp" + - "test-suite/lsp-fixtures/**" + - "zed/**" + workflow_dispatch: + +jobs: + semantic-tokens: + runs-on: ubuntu-latest + container: + image: debian:stable + options: --user root + steps: + - name: Install dependencies + run: | + apt-get update + apt-get install -y build-essential flex bison git jq libutfcpp-dev nlohmann-json3-dev + git config --global --add safe.directory "$GITHUB_WORKSPACE" + - uses: actions/checkout@v4 + - name: Test language server + run: make test-lsp + + zed-extension: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - name: Install WebAssembly target + run: rustup target add wasm32-wasip1 + - name: Test Zed extension + run: make test-zed diff --git a/.gitignore b/.gitignore index 6d3e757f..cc834387 100644 --- a/.gitignore +++ b/.gitignore @@ -11,3 +11,5 @@ debian/*.7 vscode/node_modules vscode/*.vsix vscode/dist +zed/target +zed/extension.wasm diff --git a/README.md b/README.md index 27fa8ce5..ff672608 100644 --- a/README.md +++ b/README.md @@ -70,6 +70,9 @@ Language server-specific prerequisites: - `nlohmann-json3-dev` Optional: + - `jq` for running the language server integration tests + - `cargo` and the `wasm32-wasip1` Rust target for validating the Zed extension + - the `wasm32-wasip2` Rust target for installing the extension in current Zed releases - `pandoc` and `perl` for building the documentation - `debhelper` for building the Debian package and keeping version numbers up-to-date via `dpkg-parsechangelog` @@ -86,6 +89,8 @@ $ sudo apt install build-essential flex bison libutfcpp-dev pandoc perl debhelpe $ make # Build the Bash++ compiler and language server, bin/bpp and bin/bpp-lsp $ make manpages # Build the manpages, which can then be found under debian/ $ make test # Run the test suite to verify the compiler works correctly +$ make test-lsp # Run the language server integration tests +$ make test-zed # Validate the Zed extension ``` ## Using the compiler @@ -120,3 +125,10 @@ $ shellwatch compiled-program.sh ## VSCode extension The [Bash++ extension for Visual Studio Code](https://marketplace.visualstudio.com/items?itemName=rail5.bashpp) provides IDE language support for Bash++, including syntax highlighting and (optionally) language server features such as code completion, go-to definition, etc. + +## Zed extension + +The development extension under [`zed/`](zed/) recognizes `.bpp` files, reuses +Zed's built-in Bash grammar for baseline highlighting, and starts an installed +`bpp-lsp` from `PATH`. Enable Zed's `combined` semantic-token mode to layer +Bash++ identifier highlighting from the compiler AST over the Bash syntax. diff --git a/makefile b/makefile index 41b8c495..737e2a92 100644 --- a/makefile +++ b/makefile @@ -14,6 +14,9 @@ include mk/docs.mk test: bin/bpp -Istdlib/ test-suite/run.bpp +test-lsp: bin/bpp bin/bpp-lsp + bin/bpp -Istdlib/ test-suite/lsp-semantic-tokens.bpp bin/bpp-lsp + vscode: @cd vscode && $(MAKE) --no-print-directory @@ -21,9 +24,18 @@ clean-vscode: @cd vscode && $(MAKE) --no-print-directory clean @echo "Cleaned up VSCode extension files." -clean: clean-flexbison clean-lsp clean-meta clean-objects clean-bin clean-std clean-manpages clean-technical-docs clean-vscode +zed: + cargo build --manifest-path zed/Cargo.toml --target wasm32-wasip1 + +test-zed: + cargo check --manifest-path zed/Cargo.toml --target wasm32-wasip1 + +clean-zed: + rm -rf zed/target zed/extension.wasm + +clean: clean-flexbison clean-lsp clean-meta clean-objects clean-bin clean-std clean-manpages clean-technical-docs clean-vscode clean-zed -.PHONY: all test vscode clean-vscode +.PHONY: all test test-lsp vscode clean-vscode zed test-zed clean-zed ifeq ($(filter clean%,$(MAKECMDGOALS)),) -include $(shell find bin -name '*.d' 2>/dev/null) diff --git a/src/AST/BashppParser.cpp b/src/AST/BashppParser.cpp index 78bc651b..e97318a9 100644 --- a/src/AST/BashppParser.cpp +++ b/src/AST/BashppParser.cpp @@ -18,8 +18,10 @@ extern void yyset_in(FILE* in_str, yyscan_t scanner); extern void initLexer(yyscan_t yyscanner); extern void destroyLexer(yyscan_t yyscanner); +extern std::vector get_lexer_tokens(yyscan_t yyscanner); extern bool set_display_lexer_output(bool enable, yyscan_t yyscanner); +extern void set_collect_lexer_tokens(bool enable, yyscan_t yyscanner); extern void set_utf16_mode(bool enable, yyscan_t yyscanner); #include @@ -78,6 +80,7 @@ void AST::BashppParser::_initialize_lexer() { initLexer(lexer); set_utf16_mode(utf16_mode, lexer); set_display_lexer_output(display_lexer_output, lexer); + set_collect_lexer_tokens(collect_lexer_tokens, lexer); } void AST::BashppParser::_destroy_lexer() { @@ -97,6 +100,7 @@ void AST::BashppParser::_parse() { errors, lexer); parser.parse(); // Returns an int, not needed by us + lexer_tokens = ::get_lexer_tokens(lexer); } catch (...) { _destroy_lexer(); throw; @@ -113,6 +117,10 @@ void AST::BashppParser::setDisplayLexerOutput(bool enabled) { display_lexer_output = enabled; } +void AST::BashppParser::setCollectLexerTokens(bool enabled) { + collect_lexer_tokens = enabled; +} + void AST::BashppParser::setInputFromFilePath(const std::string& file_path) { input_type = InputType::FILEPATH; input_source = file_path; @@ -144,3 +152,7 @@ std::shared_ptr AST::BashppParser::program() { const std::vector& AST::BashppParser::get_errors() const { return errors; } + +const std::vector& AST::BashppParser::get_lexer_tokens() const { + return lexer_tokens; +} diff --git a/src/AST/BashppParser.h b/src/AST/BashppParser.h index 4bc88711..0a7951e4 100644 --- a/src/AST/BashppParser.h +++ b/src/AST/BashppParser.h @@ -11,6 +11,7 @@ #include #include #include +#include #include #include @@ -32,8 +33,10 @@ class BashppParser { bool utf16_mode = false; // Whether to use UTF-16 mode for character counting bool display_lexer_output = false; + bool collect_lexer_tokens = false; std::vector errors; + std::vector lexer_tokens; std::string input_file_path = ""; std::vector include_chain; @@ -56,6 +59,7 @@ class BashppParser { public: void setUTF16Mode(bool enabled); void setDisplayLexerOutput(bool enabled); + void setCollectLexerTokens(bool enabled); void setInputFromFilePath(const std::string& file_path); void setInputFromFilePtr(FILE* file_ptr, const std::string& file_path); @@ -66,6 +70,7 @@ class BashppParser { std::shared_ptr program(); const std::vector& get_errors() const; + const std::vector& get_lexer_tokens() const; }; } // namespace AST diff --git a/src/AST/LexerToken.h b/src/AST/LexerToken.h new file mode 100644 index 00000000..fee28eeb --- /dev/null +++ b/src/AST/LexerToken.h @@ -0,0 +1,24 @@ +/* + * Copyright (C) 2026 Andrew S. Rightenburg + * Bash++: Bash with classes + * SPDX-License-Identifier: GPL-3.0-or-later + */ + +#pragma once + +#include + +#include + +namespace AST { + +/** + * @brief A lexer symbol and its exact source range for editor features. + */ +struct LexerToken { + std::string kind; + std::string text; + ParserLocation location; +}; + +} // namespace AST diff --git a/src/flexbison/lexer.l b/src/flexbison/lexer.l index 7915f329..8678089c 100644 --- a/src/flexbison/lexer.l +++ b/src/flexbison/lexer.l @@ -7,11 +7,13 @@ * SPDX-License-Identifier: GPL-3.0-or-later */ #include "parser.tab.hpp" // Bison header for token types +#include #include // For AST::Token #include #include #include #include +#include /** GNU Bison Docs, 10.1.7.2: Complete Symbols @@ -103,7 +105,9 @@ struct LexerState { struct LexerExtra { LexerState lexerState; ModeStack modeStack; + std::vector lexerTokens; bool display_lexer_output = false; + bool collect_lexer_tokens = false; }; extern LexerExtra* yyget_extra(yyscan_t yyscanner); @@ -137,6 +141,10 @@ void set_display_lexer_output(bool enable, yyscan_t yyscanner) { get_lexer_extra(yyscanner)->display_lexer_output = enable; } +void set_collect_lexer_tokens(bool enable, yyscan_t yyscanner) { + get_lexer_extra(yyscanner)->collect_lexer_tokens = enable; +} + size_t utf8_char_count(const std::string& s) { size_t count = 0; for (unsigned char c : s) { @@ -291,6 +299,47 @@ void set_received_local_keyword(bool received, yyscan_t yyscanner) { */ yy::parser::symbol_type maybe_get_lvalue_token(yy::parser::symbol_type token, yyscan_t yyscanner); +void recordLexerToken( + const yy::parser::symbol_type& token, + const std::string& text, + yyscan_t yyscanner +) { + auto* extra = get_lexer_extra(yyscanner); + if (!extra->collect_lexer_tokens) return; + + ParserLocation location = token.location; + std::string recorded_text = text; + if (token.kind() == yy::parser::symbol_kind::S_SINGLEQUOTED_STRING) { + // The lexer combines several matches while assembling this token, so the + // semantic value retains a more accurate range than the final match. + const auto& value = token.value.as>(); + location.begin.line = value.getLine(); + location.begin.column = value.getCharPositionInLine(); + recorded_text = value.getValue(); + } + + extra->lexerTokens.push_back(AST::LexerToken{ + .kind = std::string(yy::parser::symbol_name(token.kind())), + .text = std::move(recorded_text), + .location = location + }); +} + +void recordComment( + const std::string& text, + const ParserLocation& location, + yyscan_t yyscanner +) { + auto* extra = get_lexer_extra(yyscanner); + if (!extra->collect_lexer_tokens) return; + + extra->lexerTokens.push_back(AST::LexerToken{ + .kind = "COMMENT", + .text = text, + .location = location + }); +} + void updateLexerState(yyscan_t yyscanner) { thisLexerState.expecting_assignment_operator = false; thisLexerState.parsed_assignment_operator = false; @@ -448,10 +497,14 @@ ANGLEBRACKET_INCLUDE_PATH <([^>])+> * return yy::parser::make_AT(); */ #define emit(tokenType, ...) \ + do { \ updateLexerState(yyscanner); \ - return maybe_get_lvalue_token(yy::parser::make_##tokenType( \ + auto token = maybe_get_lvalue_token(yy::parser::make_##tokenType( \ __VA_ARGS__ __VA_OPT__(,) current_match_location(yyscanner) \ - ), yyscanner); + ), yyscanner); \ + recordLexerToken(token, std::string(yytext, yyleng), yyscanner); \ + return token; \ + } while (false) /** * Helper to get the text of the current token as a std::string. @@ -465,7 +518,7 @@ ANGLEBRACKET_INCLUDE_PATH <([^>])+> { [ \t\n]+ { /* Ignore whitespace and newlines after a DELIM */ } - [\#][^\n]* { /* Ignore comments */ } + [\#][^\n]* { recordComment(std::string(yytext, yyleng), current_match_location(yyscanner), yyscanner); } . { // Any other character means we're done skipping thisModeStack.pop(); // Exit SKIP_AFTER_DELIM_MODE @@ -989,7 +1042,7 @@ function/[ \t]+[a-zA-Z_][a-zA-Z_0-9]* { {INTEGER} { emit(INTEGER, tokenText); } -[ \t]*[\#][^\n]* { /* Ignore comments */ } +[ \t]*[\#][^\n]* { recordComment(std::string(yytext, yyleng), current_match_location(yyscanner), yyscanner); } { "then"/[ \t\n] { @@ -1560,6 +1613,11 @@ extern void initLexer(yyscan_t yyscanner) { thisModeStack.bind(yyscanner); thisModeStack.push(SKIP_AFTER_DELIM_MODE); // Initial mode thisLexerState.reset(); + get_lexer_extra(yyscanner)->lexerTokens.clear(); +} + +extern std::vector get_lexer_tokens(yyscan_t yyscanner) { + return get_lexer_extra(yyscanner)->lexerTokens; } extern void destroyLexer(yyscan_t yyscanner) { diff --git a/src/lsp/BashppServer.h b/src/lsp/BashppServer.h index 8c697d65..cecacebe 100644 --- a/src/lsp/BashppServer.h +++ b/src/lsp/BashppServer.h @@ -80,6 +80,7 @@ class BashppServer { GenericResponseMessage handleRename(const GenericRequestMessage& request); GenericResponseMessage handleReferences(const GenericRequestMessage& request); GenericResponseMessage handleCompletion(const GenericRequestMessage& request); + GenericResponseMessage handleSemanticTokens(const GenericRequestMessage& request); CompletionList handleATCompletion(const CompletionParams& params); CompletionList handleDOTCompletion(const CompletionParams& params); @@ -286,7 +287,7 @@ class BashppServer { * @brief Maps request types to the functions that handle them. * */ - static constexpr std::array request_handlers = {{ + static constexpr std::array request_handlers = {{ {"initialize", &BashppServer::handleInitialize}, {"textDocument/definition", &BashppServer::handleDefinition}, {"textDocument/completion", &BashppServer::handleCompletion}, @@ -294,6 +295,7 @@ class BashppServer { {"textDocument/documentSymbol", &BashppServer::handleDocumentSymbol}, {"textDocument/rename", &BashppServer::handleRename}, {"textDocument/references", &BashppServer::handleReferences}, + {"textDocument/semanticTokens/full", &BashppServer::handleSemanticTokens}, {"shutdown", &BashppServer::shutdown} }}; diff --git a/src/lsp/ProgramPool.cpp b/src/lsp/ProgramPool.cpp index be6fa6e9..f059e9a4 100644 --- a/src/lsp/ProgramPool.cpp +++ b/src/lsp/ProgramPool.cpp @@ -79,6 +79,13 @@ std::string ProgramPool::get_file_contents(const std::string& file_path) { return contents; } +std::vector ProgramPool::get_lexer_tokens(const std::string& file_path) { + std::lock_guard lock(pool_mutex); + auto tokens = lexer_tokens.find(file_path); + if (tokens == lexer_tokens.end()) return {}; + return tokens->second; +} + void ProgramPool::_remove_oldest_program() { _remove_program(0); } @@ -153,6 +160,7 @@ std::shared_ptr ProgramPool::_parse_program(const std::string& AST::BashppParser parser; parser.setUTF16Mode(utf16_mode); + parser.setCollectLexerTokens(true); if (unsaved_changes.contains(file_path)) { parser.setInputFromStringContents(unsaved_changes[file_path]); @@ -161,6 +169,7 @@ std::shared_ptr ProgramPool::_parse_program(const std::string& } auto program = parser.program(); + lexer_tokens[file_path] = parser.get_lexer_tokens(); listener.set_parser_errors(parser.get_errors()); if (program == nullptr) { program = std::make_shared(); // Parsing failed @@ -382,6 +391,7 @@ void ProgramPool::clean() { programs.clear(); program_indices.clear(); open_files.clear(); + lexer_tokens.clear(); } update_snapshot(); // Update the snapshot after cleaning } diff --git a/src/lsp/ProgramPool.h b/src/lsp/ProgramPool.h index d44be64f..592ab543 100644 --- a/src/lsp/ProgramPool.h +++ b/src/lsp/ProgramPool.h @@ -12,6 +12,7 @@ #include #include +#include #include #include @@ -40,6 +41,7 @@ class ProgramPool { std::unordered_map> program_indices; // Maps file paths to program indices in the pool std::unordered_map open_files; // Maps file paths to whether they are currently open std::unordered_map unsaved_changes; // Maps file paths to their unsaved contents + std::unordered_map> lexer_tokens; BashVersion target_bash_version = {5, 2}; std::recursive_mutex pool_mutex; // Mutex to protect access to the pool @@ -102,6 +104,14 @@ class ProgramPool { * @return std::string The contents of the file. */ std::string get_file_contents(const std::string& file_path); + + /** + * @brief Get compiler lexer tokens for a parsed file. + * + * @param file_path The path of the parsed file. + * @return std::vector A copy of the file's lexer tokens. + */ + std::vector get_lexer_tokens(const std::string& file_path); /** * @brief Get or create a program for the given file path diff --git a/src/lsp/README.md b/src/lsp/README.md index 29fcd05c..68bf3192 100644 --- a/src/lsp/README.md +++ b/src/lsp/README.md @@ -15,9 +15,14 @@ It is designed to provide language server protocol (LSP) features such as code c - [✓] Workspace renaming - [✓] Find references - [✓] Document symbols +- [✓] Semantic tokens - [   ] Workspace symbols - [   ] Code formatting -- [   ] Semantic tokens + +Semantic tokens are produced from the same Flex/Bison AST used by the compiler. +The server reports Bash++ classes, methods, data members, objects, pointers, +parameters, and references. Editors can combine these tokens with their normal +Bash syntax highlighting instead of relying on a second Bash++ parser. ## Copyright and License diff --git a/src/lsp/handlers/handleInitialize.cpp b/src/lsp/handlers/handleInitialize.cpp index 4e0e65f8..ccc7b9cf 100644 --- a/src/lsp/handlers/handleInitialize.cpp +++ b/src/lsp/handlers/handleInitialize.cpp @@ -8,6 +8,7 @@ #include #include +#include GenericResponseMessage bpp::BashppServer::handleInitialize(const GenericRequestMessage& request) { InitializeRequest initialize_request = request.toSpecific(); @@ -38,6 +39,24 @@ GenericResponseMessage bpp::BashppServer::handleInitialize(const GenericRequestM // Advertise that we support DocumentSymbol requests result.capabilities.documentSymbolProvider = true; + SemanticTokensOptions semantic_tokens_options; + semantic_tokens_options.legend.tokenTypes = { + "class", + "method", + "property", + "variable", + "parameter", + "keyword", + "comment", + "string", + "operator", + "number", + "function" + }; + semantic_tokens_options.legend.tokenModifiers = {"declaration"}; + semantic_tokens_options.full = true; + result.capabilities.semanticTokensProvider = semantic_tokens_options; + // Planned but not yet implemented: //result.capabilities.workspaceSymbolProvider = true; diff --git a/src/lsp/handlers/handleSemanticTokens.cpp b/src/lsp/handlers/handleSemanticTokens.cpp new file mode 100644 index 00000000..dd50b79d --- /dev/null +++ b/src/lsp/handlers/handleSemanticTokens.cpp @@ -0,0 +1,55 @@ +/* + * Copyright (C) 2026 Andrew S. Rightenburg + * Bash++: Bash with classes + * SPDX-License-Identifier: GPL-3.0-or-later + */ + +#include +#include +#include +#include + +GenericResponseMessage bpp::BashppServer::handleSemanticTokens( + const GenericRequestMessage& request +) { + SemanticTokensRequestResponse response; + response.id = request.id; + SemanticTokensRequest semantic_tokens_request = + request.toSpecific(); + + std::string uri; + try { + uri = validateUri(semantic_tokens_request.params.textDocument.uri); + } catch (const std::exception& exception) { + log("Invalid URI in semantic tokens request: ", exception.what()); + response.result = nullptr; + return response; + } + + auto program = program_pool.get_program(uri); + if (program == nullptr) { + log("Program not found for URI: ", uri); + response.result = nullptr; + return response; + } + + auto ast = program->get_source_file_ast(uri); + if (ast == nullptr) { + log("AST not found for URI: ", uri); + response.result = nullptr; + return response; + } + + SemanticTokenCollector collector( + uri, + program, + program_pool.get_lexer_tokens(uri), + program_pool.get_utf16_mode() + ); + collector.walk(ast); + + SemanticTokens result; + result.data = collector.encode(); + response.result = result; + return response; +} diff --git a/src/lsp/include/SemanticTokenCollector.cpp b/src/lsp/include/SemanticTokenCollector.cpp new file mode 100644 index 00000000..feb97950 --- /dev/null +++ b/src/lsp/include/SemanticTokenCollector.cpp @@ -0,0 +1,513 @@ +/* + * Copyright (C) 2026 Andrew S. Rightenburg + * Bash++: Bash with classes + * SPDX-License-Identifier: GPL-3.0-or-later + */ + +#include "SemanticTokenCollector.h" + +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +namespace bpp { + +namespace { + +std::optional classify_lexer_token(std::string_view kind) { + if (kind == "COMMENT") return SemanticTokenType::Comment; + if (kind == "INTEGER") return SemanticTokenType::Number; + if (kind == "BASH_VAR") return SemanticTokenType::Variable; + if (kind == "IDENTIFIER_LVALUE" || kind == "BASH_FUNCTION_LABEL") { + return SemanticTokenType::Function; + } + if (kind.starts_with("KEYWORD_") || kind.starts_with("BASH_KEYWORD_")) { + return SemanticTokenType::Keyword; + } + + static constexpr std::array string_tokens{ + std::string_view("SINGLEQUOTED_STRING"), + std::string_view("QUOTE_BEGIN"), + std::string_view("QUOTE_END"), + std::string_view("STRING_CONTENT"), + std::string_view("INCLUDE_PATH") + }; + if (std::ranges::contains(string_tokens, kind)) { + return SemanticTokenType::String; + } + + static constexpr std::array operator_tokens{ + std::string_view("DOUBLEAMPERSAND"), + std::string_view("DOUBLEPIPE"), + std::string_view("PIPE"), + std::string_view("DELIM"), + std::string_view("AT"), + std::string_view("AT_LVALUE"), + std::string_view("LBRACE"), + std::string_view("RBRACE"), + std::string_view("LANGLE"), + std::string_view("RANGLE"), + std::string_view("LANGLE_AMPERSAND"), + std::string_view("RANGLE_AMPERSAND"), + std::string_view("AMPERSAND_RANGLE"), + std::string_view("COLON"), + std::string_view("PLUS_EQUALS"), + std::string_view("EQUALS"), + std::string_view("ASTERISK"), + std::string_view("DEREFERENCE_OPERATOR"), + std::string_view("AMPERSAND"), + std::string_view("DOT"), + std::string_view("SUPERSHELL_START"), + std::string_view("SUPERSHELL_END"), + std::string_view("SUBSHELL_START"), + std::string_view("SUBSHELL_END"), + std::string_view("SUBSHELL_SUBSTITUTION_START"), + std::string_view("SUBSHELL_SUBSTITUTION_END"), + std::string_view("ARRAY_ASSIGNMENT_START"), + std::string_view("ARRAY_ASSIGNMENT_END"), + std::string_view("DEPRECATED_SUBSHELL_START"), + std::string_view("DEPRECATED_SUBSHELL_END"), + std::string_view("LPAREN"), + std::string_view("RPAREN"), + std::string_view("ARRAY_INDEX_START"), + std::string_view("ARRAY_INDEX_END"), + std::string_view("LBRACKET"), + std::string_view("RBRACKET"), + std::string_view("REF_START"), + std::string_view("REF_START_LVALUE"), + std::string_view("REF_END"), + std::string_view("BASH_VAR_START"), + std::string_view("BASH_VAR_END"), + std::string_view("HASH"), + std::string_view("HEREDOC_CONTENT_START"), + std::string_view("HERESTRING_START"), + std::string_view("HEREDOC_START"), + std::string_view("BASH_CASE_PATTERN_DELIM"), + std::string_view("BASH_CASE_PATTERN_TERMINATOR"), + std::string_view("ARITH_FOR_CONDITION_START"), + std::string_view("ARITH_FOR_CONDITION_END"), + std::string_view("INCREMENT_OPERATOR"), + std::string_view("DECREMENT_OPERATOR"), + std::string_view("COMPARISON_OPERATOR"), + std::string_view("BASH_FUNCTION_OPEN"), + std::string_view("BASH_TEST_CONDITION_START"), + std::string_view("BASH_TEST_CONDITION_END"), + std::string_view("EXCLAM"), + std::string_view("EXPANSION_BEGIN"), + std::string_view("PROCESS_SUBSTITUTION_START"), + std::string_view("PROCESS_SUBSTITUTION_END"), + std::string_view("BASH_ARITHMETIC_START"), + std::string_view("BASH_ARITHMETIC_END"), + std::string_view("BASH_53_NATIVE_SUPERSHELL_START"), + std::string_view("BASH_53_NATIVE_SUPERSHELL_END") + }; + if (std::ranges::contains(operator_tokens, kind)) { + return SemanticTokenType::Operator; + } + + return std::nullopt; +} + +} // namespace + +SemanticTokenCollector::SemanticTokenCollector( + std::string source_file, + std::shared_ptr program, + const std::vector& lexer_tokens, + bool utf16_mode +) : + source_file(std::move(source_file)), + program(std::move(program)), + utf16_mode(utf16_mode) { + for (const auto& token : lexer_tokens) { + add_lexer_token(token); + } +} + +void SemanticTokenCollector::add_token(BashppSemanticToken semantic_token) { + if (semantic_token.length == 0) return; + + auto existing = std::find_if(tokens.begin(), tokens.end(), + [&semantic_token](const BashppSemanticToken& candidate) { + return candidate.line == semantic_token.line + && candidate.start_character == semantic_token.start_character + && candidate.length == semantic_token.length; + } + ); + + if (existing == tokens.end()) { + tokens.push_back(std::move(semantic_token)); + return; + } + + if (existing->syntax && !semantic_token.syntax) { + *existing = std::move(semantic_token); + return; + } + + const bool existing_is_declaration = (existing->modifiers & declaration_modifier) != 0; + const bool new_is_declaration = + (semantic_token.modifiers & declaration_modifier) != 0; + if (new_is_declaration && !existing_is_declaration) { + *existing = std::move(semantic_token); + } +} + +void SemanticTokenCollector::add_token( + const AST::Token& token, + SemanticTokenType type, + uint32_t modifiers +) { + const std::string& value = token.getValue(); + if (value.empty()) return; + + add_token(BashppSemanticToken{ + .line = token.getLine(), + .start_character = token.getCharPositionInLine(), + .length = encoded_character_count(value), + .type = type, + .modifiers = modifiers, + .syntax = false + }); +} + +uint32_t SemanticTokenCollector::encoded_character_count(const std::string& text) const { + uint32_t count = 0; + for (size_t index = 0; index < text.size();) { + const unsigned char first = static_cast(text[index]); + if (first == '\r') { + index++; + continue; + } + + size_t byte_count = 1; + uint32_t codepoint = first; + if ((first & 0xe0) == 0xc0 && index + 1 < text.size()) { + byte_count = 2; + codepoint = (first & 0x1f) << 6 + | (static_cast(text[index + 1]) & 0x3f); + } else if ((first & 0xf0) == 0xe0 && index + 2 < text.size()) { + byte_count = 3; + codepoint = (first & 0x0f) << 12 + | (static_cast(text[index + 1]) & 0x3f) << 6 + | (static_cast(text[index + 2]) & 0x3f); + } else if ((first & 0xf8) == 0xf0 && index + 3 < text.size()) { + byte_count = 4; + codepoint = (first & 0x07) << 18 + | (static_cast(text[index + 1]) & 0x3f) << 12 + | (static_cast(text[index + 2]) & 0x3f) << 6 + | (static_cast(text[index + 3]) & 0x3f); + } + + count += utf16_mode && codepoint >= 0x10000 ? 2 : 1; + index += byte_count; + } + return count; +} + +void SemanticTokenCollector::add_lexer_token(const AST::LexerToken& token) { + auto type = classify_lexer_token(token.kind); + if (!type.has_value() && token.kind == "CATCHALL") { + static constexpr std::array operator_text{ + std::string_view("!"), + std::string_view("["), + std::string_view("]"), + std::string_view("+"), + std::string_view("-"), + std::string_view("*"), + std::string_view("/"), + std::string_view("%"), + std::string_view("="), + std::string_view("<"), + std::string_view(">"), + std::string_view("&"), + std::string_view("|") + }; + if (std::ranges::contains(operator_text, token.text)) { + type = SemanticTokenType::Operator; + } + } + if (!type.has_value() || token.text.empty()) return; + + std::string text = token.text; + ParserPosition position = token.location.begin; + const bool preserve_whitespace = *type == SemanticTokenType::String; + if (!preserve_whitespace) { + const size_t first = text.find_first_not_of(" \t\r\n"); + if (first == std::string::npos) return; + + for (size_t index = 0; index < first; index++) { + if (text[index] == '\n') { + position.line++; + position.column = 0; + } else if (text[index] != '\r') { + position.column++; + } + } + + const size_t last = text.find_last_not_of(" \t\r\n"); + text = text.substr(first, last - first + 1); + } + + size_t segment_start = 0; + while (segment_start <= text.size()) { + const size_t newline = text.find('\n', segment_start); + const size_t segment_end = newline == std::string::npos + ? text.size() + : newline; + const std::string segment = text.substr( + segment_start, + segment_end - segment_start + ); + const uint32_t length = encoded_character_count(segment); + if (length > 0) { + add_token(BashppSemanticToken{ + .line = position.line, + .start_character = position.column, + .length = length, + .type = *type, + .modifiers = 0, + .syntax = true + }); + } + + if (newline == std::string::npos) break; + position.line++; + position.column = 0; + segment_start = newline + 1; + } +} + +SemanticTokenType SemanticTokenCollector::classify_reference( + const AST::Token& token, + SemanticTokenType fallback +) const { + std::shared_ptr entity; + try { + entity = resolve_entity_at( + source_file, + token.getLine(), + token.getCharPositionInLine(), + program + ); + } catch (...) { + return fallback; + } + + if (std::dynamic_pointer_cast(entity)) { + return SemanticTokenType::Method; + } + if (std::dynamic_pointer_cast(entity)) { + return SemanticTokenType::Property; + } + if (std::dynamic_pointer_cast(entity)) { + return SemanticTokenType::Parameter; + } + if (std::dynamic_pointer_cast(entity)) { + return SemanticTokenType::Class; + } + if (std::dynamic_pointer_cast(entity)) { + return SemanticTokenType::Variable; + } + return fallback; +} + +void SemanticTokenCollector::enterClassDefinition( + const std::shared_ptr& node +) { + add_token(node->CLASSNAME(), SemanticTokenType::Class, declaration_modifier); + if (node->PARENTCLASSNAME().has_value()) { + add_token(*node->PARENTCLASSNAME(), SemanticTokenType::Class); + } +} + +void SemanticTokenCollector::enterMethodDefinition( + const std::shared_ptr& node +) { + add_token(node->NAME(), SemanticTokenType::Method, declaration_modifier); + + for (const auto& parameter_token : node->PARAMETERS()) { + const auto& parameter = parameter_token.getValue(); + if (parameter.type.has_value()) { + add_token(*parameter.type, SemanticTokenType::Class); + } + add_token(parameter.name, SemanticTokenType::Parameter, declaration_modifier); + } +} + +void SemanticTokenCollector::enterDatamemberDeclaration( + const std::shared_ptr& node +) { + datamember_declaration_depth++; + if (node->TYPE().has_value()) { + add_token(*node->TYPE(), SemanticTokenType::Class); + } + if (node->IDENTIFIER().has_value()) { + add_token(*node->IDENTIFIER(), SemanticTokenType::Property, declaration_modifier); + } +} + +void SemanticTokenCollector::exitDatamemberDeclaration( + const std::shared_ptr& /*node*/ +) { + datamember_declaration_depth--; +} + +void SemanticTokenCollector::enterObjectInstantiation( + const std::shared_ptr& node +) { + add_token(node->TYPE(), SemanticTokenType::Class); + add_token( + node->IDENTIFIER(), + datamember_declaration_depth > 0 + ? SemanticTokenType::Property + : SemanticTokenType::Variable, + declaration_modifier + ); +} + +void SemanticTokenCollector::enterPointerDeclaration( + const std::shared_ptr& node +) { + add_token(node->TYPE(), SemanticTokenType::Class); + add_token( + node->IDENTIFIER(), + datamember_declaration_depth > 0 + ? SemanticTokenType::Property + : SemanticTokenType::Variable, + declaration_modifier + ); +} + +void SemanticTokenCollector::enterObjectReference( + const std::shared_ptr& node +) { + if (!node->isSelfReference()) { + add_token( + node->IDENTIFIER(), + classify_reference(node->IDENTIFIER(), SemanticTokenType::Variable) + ); + } + + for (const auto& identifier : node->IDENTIFIERS()) { + add_token( + identifier, + classify_reference(identifier, SemanticTokenType::Property) + ); + } +} + +void SemanticTokenCollector::enterNewStatement( + const std::shared_ptr& node +) { + add_token(node->TYPE(), SemanticTokenType::Class); +} + +void SemanticTokenCollector::enterDynamicCastTarget( + const std::shared_ptr& node +) { + if (node->TARGETTYPE().has_value()) { + add_token(*node->TARGETTYPE(), SemanticTokenType::Class); + } +} + +void SemanticTokenCollector::enterPrimitiveAssignment( + const std::shared_ptr& node +) { + add_token(node->IDENTIFIER(), SemanticTokenType::Variable); +} + +void SemanticTokenCollector::enterBashFunction( + const std::shared_ptr& node +) { + add_token(node->NAME(), SemanticTokenType::Function, declaration_modifier); +} + +std::vector SemanticTokenCollector::encode() const { + std::vector semantic_tokens; + for (const auto& token : tokens) { + if (!token.syntax) semantic_tokens.push_back(token); + } + + std::vector sorted_tokens = semantic_tokens; + for (const auto& syntax_token : tokens) { + if (!syntax_token.syntax) continue; + + // LSP tokens cannot overlap. Preserve AST classifications by splitting + // broader lexer tokens into the portions that remain around them. + std::vector> fragments = {{ + syntax_token.start_character, + syntax_token.start_character + syntax_token.length + }}; + + for (const auto& semantic_token : semantic_tokens) { + if (semantic_token.line != syntax_token.line) continue; + + const uint32_t semantic_start = semantic_token.start_character; + const uint32_t semantic_end = semantic_start + semantic_token.length; + std::vector> remaining_fragments; + for (const auto& [fragment_start, fragment_end] : fragments) { + if (semantic_end <= fragment_start || semantic_start >= fragment_end) { + remaining_fragments.emplace_back(fragment_start, fragment_end); + continue; + } + if (fragment_start < semantic_start) { + remaining_fragments.emplace_back(fragment_start, semantic_start); + } + if (semantic_end < fragment_end) { + remaining_fragments.emplace_back(semantic_end, fragment_end); + } + } + fragments = std::move(remaining_fragments); + } + + for (const auto& [start, end] : fragments) { + if (start == end) continue; + auto fragment = syntax_token; + fragment.start_character = start; + fragment.length = end - start; + sorted_tokens.push_back(fragment); + } + } + + std::ranges::sort(sorted_tokens, [](const auto& left, const auto& right) { + if (left.line != right.line) return left.line < right.line; + return left.start_character < right.start_character; + }); + + std::vector result; + result.reserve(sorted_tokens.size() * 5); + + uint32_t previous_line = 0; + uint32_t previous_start_character = 0; + bool first_token = true; + + for (const auto& token : sorted_tokens) { + const uint32_t delta_line = first_token ? token.line : token.line - previous_line; + const uint32_t delta_start = first_token || delta_line > 0 + ? token.start_character + : token.start_character - previous_start_character; + + result.push_back(delta_line); + result.push_back(delta_start); + result.push_back(token.length); + result.push_back(static_cast(token.type)); + result.push_back(token.modifiers); + + previous_line = token.line; + previous_start_character = token.start_character; + first_token = false; + } + + return result; +} + +} // namespace bpp diff --git a/src/lsp/include/SemanticTokenCollector.h b/src/lsp/include/SemanticTokenCollector.h new file mode 100644 index 00000000..81b60f45 --- /dev/null +++ b/src/lsp/include/SemanticTokenCollector.h @@ -0,0 +1,89 @@ +/* + * Copyright (C) 2026 Andrew S. Rightenburg + * Bash++: Bash with classes + * SPDX-License-Identifier: GPL-3.0-or-later + */ + +#pragma once + +#include +#include +#include +#include + +#include +#include +#include + +namespace bpp { + +enum class SemanticTokenType : uint32_t { + Class, + Method, + Property, + Variable, + Parameter, + Keyword, + Comment, + String, + Operator, + Number, + Function +}; + +struct BashppSemanticToken { + uint32_t line = 0; + uint32_t start_character = 0; + uint32_t length = 0; + SemanticTokenType type = SemanticTokenType::Variable; + uint32_t modifiers = 0; + bool syntax = false; +}; + +class SemanticTokenCollector : public AST::BaseListener { + private: + static constexpr uint32_t declaration_modifier = 1; + + std::string source_file; + std::shared_ptr program; + std::vector tokens; + size_t datamember_declaration_depth = 0; + bool utf16_mode = false; + + void add_token(BashppSemanticToken token); + void add_token( + const AST::Token& token, + SemanticTokenType type, + uint32_t modifiers = 0 + ); + SemanticTokenType classify_reference( + const AST::Token& token, + SemanticTokenType fallback + ) const; + void add_lexer_token(const AST::LexerToken& token); + uint32_t encoded_character_count(const std::string& text) const; + + public: + SemanticTokenCollector( + std::string source_file, + std::shared_ptr program, + const std::vector& lexer_tokens, + bool utf16_mode + ); + + void enterClassDefinition(const std::shared_ptr& node); + void enterMethodDefinition(const std::shared_ptr& node); + void enterDatamemberDeclaration(const std::shared_ptr& node); + void exitDatamemberDeclaration(const std::shared_ptr& node); + void enterObjectInstantiation(const std::shared_ptr& node); + void enterPointerDeclaration(const std::shared_ptr& node); + void enterObjectReference(const std::shared_ptr& node); + void enterNewStatement(const std::shared_ptr& node); + void enterDynamicCastTarget(const std::shared_ptr& node); + void enterPrimitiveAssignment(const std::shared_ptr& node); + void enterBashFunction(const std::shared_ptr& node); + + std::vector encode() const; +}; + +} // namespace bpp diff --git a/test-suite/README.md b/test-suite/README.md index e5fb97c9..af81a1ab 100644 --- a/test-suite/README.md +++ b/test-suite/README.md @@ -25,6 +25,10 @@ You can run *specific* test cases by passing an argument to the script. For exam Calling `run.bpp` with no arguments will run all the test cases. +Language server integration tests are also written in Bash++ and can be run +with `make test-lsp`. These tests require `jq` to encode and inspect JSON-RPC +messages. + ## Test Suite Structure The test suite is itself written in Bash++. diff --git a/test-suite/lsp-fixtures/malformed.bpp b/test-suite/lsp-fixtures/malformed.bpp new file mode 100644 index 00000000..8c9ec83c --- /dev/null +++ b/test-suite/lsp-fixtures/malformed.bpp @@ -0,0 +1,3 @@ +@class Broken { + @public value= + @public @method incomplete diff --git a/test-suite/lsp-fixtures/semantic-tokens.bpp b/test-suite/lsp-fixtures/semantic-tokens.bpp new file mode 100644 index 00000000..66837827 --- /dev/null +++ b/test-suite/lsp-fixtures/semantic-tokens.bpp @@ -0,0 +1,42 @@ +@class Parent { + @public inherited=0 +} + +@class Thing { + @public value=0 + @public @method act argument { + echo "$argument" + } +} + +@class Child : Parent { + @public member=1 + @public @Thing* pointerMember + @public @method run primitive @Thing* item { + @Thing object + @Thing* pointer + @Thing* allocated=@new Thing + @object.value=primitive + @object.act primitive + @this.member=@object.value + echo "😀"; @Thing unicodeObject + echo "@object.value @(echo @object.value)" + echo @dynamic_cast &@object + } +} + +@Child child +@child.run argument @nullptr + +majorVersion=@(awk -F. '{print $1}' <<< "$BASH_VERSION") +minorVersion=@(awk -F. '{print $2}' <<< "$BASH_VERSION") + +# Bash syntax must remain highlighted after supershell recovery. +in_tty=false +if [[ -t 1 ]]; then + in_tty=true +fi + +if ! grep -P -x "abc" <<< "abc"; then + echo "Bash syntax remains highlighted." +fi diff --git a/test-suite/lsp-semantic-tokens.bpp b/test-suite/lsp-semantic-tokens.bpp new file mode 100644 index 00000000..48061250 --- /dev/null +++ b/test-suite/lsp-semantic-tokens.bpp @@ -0,0 +1,437 @@ +#!/usr/bin/env bpp + +# Copyright (C) 2026 Andrew S. Rightenburg +# Bash++: Bash with classes +# SPDX-License-Identifier: GPL-3.0-or-later + +@class LanguageServerClient { + @private executable + @private temporaryDirectory + @private requestFifo + @private responseFifo + @private errorFile + @private processId + @private nextRequestId=1 + @private started=0 + @public response + + @public @method start executable { + @this.executable="$executable" + @this.temporaryDirectory=@(mktemp -d) + @this.requestFifo="@this.temporaryDirectory/request" + @this.responseFifo="@this.temporaryDirectory/response" + @this.errorFile="@this.temporaryDirectory/stderr" + + mkfifo "@this.requestFifo" "@this.responseFifo" + "@this.executable" --stdio -j1 \ + < "@this.requestFifo" \ + > "@this.responseFifo" \ + 2> "@this.errorFile" & + @this.processId=$! + + exec 3> "@this.requestFifo" + exec 4< "@this.responseFifo" + @this.started=1 + } + + @private @method send payload { + local contentLength + contentLength=@(LC_ALL=C printf "%s" "$payload" | wc -c | tr -d ' ') + printf "Content-Length: %s\r\n\r\n%s" "$contentLength" "$payload" >&3 + } + + @private @method readMessage { + local line="" + local contentLength="" + local message="" + + while IFS= read -r -u 4 line; do + line="${line%$'\r'}" + if [[ -z "$line" ]]; then + break + fi + case "$line" in + Content-Length:\ *) + contentLength=@(sed 's/^Content-Length: //' <<< "$line") + ;; + esac + done + + if [[ -z "$contentLength" ]]; then + echo "Language server response omitted Content-Length." >&2 + cat "@this.errorFile" >&2 + exit 1 + fi + + IFS= LC_ALL=C read -r -N "$contentLength" -u 4 message + @this.response="$message" + } + + @public @method request method params { + local requestId="@this.nextRequestId" + local methodJson + local payload + local responseId + + @this.nextRequestId=$((@this.nextRequestId + 1)) + methodJson=@(printf "%s" "$method" | jq -Rs .) + payload="{\"jsonrpc\":\"2.0\",\"id\":$requestId,\"method\":$methodJson,\"params\":$params}" + @this.send "$payload" + + while true; do + @this.readMessage + responseId=@(jq -r 'if has("id") then .id else empty end' <<< "@this.response") + if [[ "$responseId" != "$requestId" ]]; then + continue + fi + if jq -e 'has("error")' <<< "@this.response" >/dev/null; then + echo "Language server request failed: $method" >&2 + jq '.error' <<< "@this.response" >&2 + exit 1 + fi + return + done + } + + @public @method notify method params { + local methodJson + local payload + + methodJson=@(printf "%s" "$method" | jq -Rs .) + payload="{\"jsonrpc\":\"2.0\",\"method\":$methodJson,\"params\":$params}" + @this.send "$payload" + } + + @public @method close { + if [[ @this.started -eq 0 ]]; then + return + fi + + @this.request "shutdown" "null" + @this.notify "exit" "null" + exec 3>&- + exec 4<&- + wait "@this.processId" + rm -rf "@this.temporaryDirectory" + @this.started=0 + } + + @public @method forceClose { + if [[ @this.started -eq 0 ]]; then + return + fi + + exec 3>&- + exec 4<&- + if kill -0 "@this.processId" 2>/dev/null; then + kill "@this.processId" 2>/dev/null + fi + wait "@this.processId" 2>/dev/null + rm -rf "@this.temporaryDirectory" + @this.started=0 + } +} + +function fail() { + echo "$1" >&2 + exit 1 +} + +function assertEquals() { + local actual="$1" + local expected="$2" + local message="$3" + + if [[ "$actual" != "$expected" ]]; then + fail "$message: expected '$expected', received '$actual'" + fi +} + +function utf16Slice() { + local line="$1" + local start="$2" + local length="$3" + + printf "%s" "$line" \ + | iconv -f UTF-8 -t UTF-16LE \ + | dd bs=2 skip="$start" count="$length" 2>/dev/null \ + | iconv -f UTF-16LE -t UTF-8 +} + +function utf16Length() { + local value="$1" + local byteLength + + byteLength=@(printf "%s" "$value" | iconv -f UTF-8 -t UTF-16LE | wc -c | tr -d ' ') + echo $((byteLength / 2)) +} + +tokenTexts=() +tokenTypes=() +tokenDeclarations=() +tokenLines=() +tokenStarts=() + +function decodeTokens() { + local sourceFile="$1" + local response="$2" + local encodedTokens=() + local sourceLines=() + local line=0 + local start=0 + local previousTokenLine=-1 + local previousTokenEnd=0 + local index + + tokenTexts=() + tokenTypes=() + tokenDeclarations=() + tokenLines=() + tokenStarts=() + + mapfile -t encodedTokens < <(jq -r '.result.data[]' <<< "$response") + mapfile -t sourceLines < "$sourceFile" + + if [[ $((${#encodedTokens[@]} % 5)) -ne 0 ]]; then + fail "Semantic token data length is not divisible by five." + fi + + for ((index=0; index<${#encodedTokens[@]}; index+=5)); do + local deltaLine="${encodedTokens[$index]}" + local deltaStart="${encodedTokens[$((index + 1))]}" + local length="${encodedTokens[$((index + 2))]}" + local type="${encodedTokens[$((index + 3))]}" + local modifiers="${encodedTokens[$((index + 4))]}" + + line=$((line + deltaLine)) + if [[ "$deltaLine" -gt 0 ]]; then + start="$deltaStart" + else + start=$((start + deltaStart)) + fi + if [[ "$line" -eq "$previousTokenLine" ]] \ + && [[ "$start" -lt "$previousTokenEnd" ]]; then + fail "Semantic token ranges overlap on line $line." + fi + previousTokenLine="$line" + previousTokenEnd=$((start + length)) + + local tokenText + tokenText=@(utf16Slice "${sourceLines[$line]}" "$start" "$length") + tokenTexts+=("$tokenText") + tokenTypes+=("$type") + tokenDeclarations+=("$((modifiers & 1))") + tokenLines+=("$line") + tokenStarts+=("$start") + done +} + +function tokenExists() { + local text="$1" + local type="$2" + local declaration="$3" + local index + + for ((index=0; index<${#tokenTexts[@]}; index++)); do + if [[ "${tokenTexts[$index]}" == "$text" ]] \ + && [[ "${tokenTypes[$index]}" == "$type" ]] \ + && [[ "${tokenDeclarations[$index]}" == "$declaration" ]]; then + return 0 + fi + done + return 1 +} + +function assertToken() { + local text="$1" + local type="$2" + local declaration="$3" + + if ! tokenExists "$text" "$type" "$declaration"; then + fail "Missing semantic token '$text' (type=$type, declaration=$declaration)." + fi +} + +function countTokens() { + local text="$1" + local type="$2" + local declaration="$3" + local count=0 + local index + + for ((index=0; index<${#tokenTexts[@]}; index++)); do + if [[ "${tokenTexts[$index]}" == "$text" ]] \ + && [[ "${tokenTypes[$index]}" == "$type" ]] \ + && [[ "${tokenDeclarations[$index]}" == "$declaration" ]]; then + count=$((count + 1)) + fi + done + echo "$count" +} + +function tokenStart() { + local text="$1" + local declaration="$2" + local index + + for ((index=0; index<${#tokenTexts[@]}; index++)); do + if [[ "${tokenTexts[$index]}" == "$text" ]] \ + && [[ "${tokenDeclarations[$index]}" == "$declaration" ]]; then + echo "${tokenStarts[$index]}" + return + fi + done + fail "Could not find the start position for token '$text'." +} + +if ! command -v jq >/dev/null; then + fail "The semantic token integration test requires jq." +fi + +executable="$1" +if [[ -z "$executable" ]]; then + executable="bin/bpp-lsp" +fi +fixtureDirectory="test-suite/lsp-fixtures" +temporaryDirectory=@(mktemp -d) +validPath="$temporaryDirectory/semantic-tokens.bpp" +malformedPath="$temporaryDirectory/malformed.bpp" +cp "$fixtureDirectory/semantic-tokens.bpp" "$validPath" +cp "$fixtureDirectory/malformed.bpp" "$malformedPath" + +validUri="file://@(realpath "$validPath")" +malformedUri="file://@(realpath "$malformedPath")" +rootUri="file://@(realpath "$temporaryDirectory")" +validSource=@(cat "$validPath") +malformedSource=@(cat "$malformedPath") +validSourceJson=@(printf "%s" "$validSource" | jq -Rs .) +malformedSourceJson=@(printf "%s" "$malformedSource" | jq -Rs .) + +@LanguageServerClient client + +function cleanup() { + @client.forceClose + rm -rf "$temporaryDirectory" +} + +@client.start "$executable" +trap cleanup EXIT + +initializeParams=@(jq -cn --arg rootUri "$rootUri" '{ + processId: null, + rootUri: $rootUri, + capabilities: { + general: {positionEncodings: ["utf-16"]}, + textDocument: { + semanticTokens: { + requests: {full: true}, + tokenTypes: [], + tokenModifiers: [], + formats: ["relative"] + } + } + } +}') +@client.request "initialize" "$initializeParams" +initializeResponse="@client.response" + +assertEquals \ + "@(jq -c '.result.capabilities.semanticTokensProvider.legend.tokenTypes' <<< "$initializeResponse")" \ + '["class","method","property","variable","parameter","keyword","comment","string","operator","number","function"]' \ + "Unexpected semantic token types" +assertEquals \ + "@(jq -c '.result.capabilities.semanticTokensProvider.legend.tokenModifiers' <<< "$initializeResponse")" \ + '["declaration"]' \ + "Unexpected semantic token modifiers" +assertEquals \ + "@(jq -r '.result.capabilities.semanticTokensProvider.full' <<< "$initializeResponse")" \ + "true" \ + "Full semantic tokens were not advertised" + +didOpenParams="{\"textDocument\":{\"uri\":\"$validUri\",\"languageId\":\"bashpp\",\"version\":1,\"text\":$validSourceJson}}" +@client.notify "textDocument/didOpen" "$didOpenParams" + +semanticTokensParams="{\"textDocument\":{\"uri\":\"$validUri\"}}" +@client.request "textDocument/semanticTokens/full" "$semanticTokensParams" +decodeTokens "$validPath" "@client.response" + +# Token type indices follow the legend asserted above. +assertToken "Parent" 0 1 +assertToken "Parent" 0 0 +assertToken "Thing" 0 1 +assertToken "Child" 0 1 +assertToken "act" 1 1 +assertToken "act" 1 0 +assertToken "run" 1 1 +assertToken "run" 1 0 +assertToken "member" 2 1 +assertToken "member" 2 0 +assertToken "pointerMember" 2 1 +assertToken "value" 2 1 +assertToken "value" 2 0 +assertToken "primitive" 4 1 +assertToken "item" 4 1 +assertToken "object" 3 1 +assertToken "object" 3 0 +assertToken "pointer" 3 1 +assertToken "allocated" 3 1 +assertToken "unicodeObject" 3 1 +assertToken "child" 3 1 +assertToken "child" 3 0 +assertToken "# Bash syntax must remain highlighted after supershell recovery." 6 0 +assertToken "if" 5 0 +assertToken "then" 5 0 +assertToken "fi" 5 0 +assertToken "grep" 10 0 +assertToken "echo" 10 0 +assertToken "Bash syntax remains highlighted." 7 0 +assertToken "1" 9 0 +assertToken "in_tty" 3 0 + +if [[ "@(countTokens "Thing" 0 0)" -lt 7 ]]; then + fail "Class references from declarations, allocation, and casts are missing." +fi +if [[ "@(countTokens "value" 2 0)" -lt 4 ]]; then + fail "Property references inside strings or supershells are missing." +fi + +expectedUtf16Column=@(utf16Length $'\t\techo "😀"; @Thing ') +assertEquals \ + "@(tokenStart "unicodeObject" 1)" \ + "$expectedUtf16Column" \ + "Unicode token column was not reported in UTF-16 code units" + +changedSource="$validSource"$'\n\n@Thing changedObject\n' +changedSourceJson=@(printf "%s" "$changedSource" | jq -Rs .) +didChangeParams="{\"textDocument\":{\"uri\":\"$validUri\",\"version\":2},\"contentChanges\":[{\"text\":$changedSourceJson}]}" +@client.notify "textDocument/didChange" "$didChangeParams" + +changedPath="$temporaryDirectory/changed.bpp" +printf "%s" "$changedSource" > "$changedPath" +changedTokensFound=0 +for ((attempt=0; attempt<50; attempt++)); do + @client.request "textDocument/semanticTokens/full" "$semanticTokensParams" + decodeTokens "$changedPath" "@client.response" + if tokenExists "changedObject" 3 1; then + changedTokensFound=1 + break + fi + sleep 0.1; +done +if [[ "$changedTokensFound" -ne 1 ]]; then + fail "Semantic tokens did not reflect unsaved changes." +fi + +malformedDidOpenParams="{\"textDocument\":{\"uri\":\"$malformedUri\",\"languageId\":\"bashpp\",\"version\":1,\"text\":$malformedSourceJson}}" +@client.notify "textDocument/didOpen" "$malformedDidOpenParams" +malformedTokensParams="{\"textDocument\":{\"uri\":\"$malformedUri\"}}" +@client.request "textDocument/semanticTokens/full" "$malformedTokensParams" +if ! jq -e '.result.data | type == "array"' <<< "@client.response" >/dev/null; then + fail "Malformed source did not produce a semantic token array." +fi + +@client.close +trap - EXIT +rm -rf "$temporaryDirectory" + +echo "Bash++ semantic token integration tests passed." diff --git a/wiki/bpp-lsp.md b/wiki/bpp-lsp.md index 3b4fc1f2..7f8d326b 100644 --- a/wiki/bpp-lsp.md +++ b/wiki/bpp-lsp.md @@ -14,10 +14,14 @@ bpp-lsp [options] # DESCRIPTION -The Bash++ language server (`bpp-lsp`) provides language server protocol support for Bash++ files. It includes features such as code completion, diagnostics, and more. This package is intended for use with editors that support the language server protocol, such as Visual Studio Code or Eclipse Theia. +The Bash++ language server (`bpp-lsp`) provides language server protocol support for Bash++ files. It includes features such as code completion, diagnostics, semantic tokens, and more. This package is intended for use with editors that support the language server protocol, such as Visual Studio Code, Zed, or Eclipse Theia. It is not required for running Bash++ scripts, but enhances the development experience by providing advanced features for Bash++ development. +Semantic tokens are generated from the compiler's Flex/Bison AST. This keeps +class, method, property, object, pointer, parameter, and reference +classification synchronized with the language accepted by the compiler. + # OPTIONS ###### `--stdio` diff --git a/zed/Cargo.lock b/zed/Cargo.lock new file mode 100644 index 00000000..a9f00696 --- /dev/null +++ b/zed/Cargo.lock @@ -0,0 +1,817 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 4 + +[[package]] +name = "adler2" +version = "2.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "320119579fcad9c21884f5c4861d16174d0e06250625266f50fe6898340abefa" + +[[package]] +name = "anyhow" +version = "1.0.102" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f202df86484c868dbad7eaa557ef785d5c66295e41b460ef922eca0723b842c" + +[[package]] +name = "auditable-serde" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c7bf8143dfc3c0258df908843e169b5cc5fcf76c7718bd66135ef4a9cd558c5" +dependencies = [ + "semver", + "serde", + "serde_json", + "topological-sort", +] + +[[package]] +name = "bashpp-zed" +version = "0.1.0" +dependencies = [ + "zed_extension_api", +] + +[[package]] +name = "bitflags" +version = "2.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b4388bee8683e3d04af747c73422af53102d2bd24d9eadb6cbc100baef4b43f8" + +[[package]] +name = "cfg-if" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801" + +[[package]] +name = "crc32fast" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9481c1c90cbf2ac953f07c8d4a58aa3945c425b7185c9154d67a65e4230da511" +dependencies = [ + "cfg-if", +] + +[[package]] +name = "displaydoc" +version = "0.2.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1ac70aa55017e108007fbaf5aa0f54b021c98f92ff8af59d42eda9da96e3dd4f" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "equivalent" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f" + +[[package]] +name = "flate2" +version = "1.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "843fba2746e448b37e26a819579957415c8cef339bf08564fe8b7ddbd959573c" +dependencies = [ + "crc32fast", + "miniz_oxide", +] + +[[package]] +name = "foldhash" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d9c4f5dac5e15c24eb999c26181a6ca40b39fe946cbe4c263c7209467bc83af2" + +[[package]] +name = "form_urlencoded" +version = "1.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cb4cb245038516f5f85277875cdaa4f7d2c9a0fa0468de06ed190163b1581fcf" +dependencies = [ + "percent-encoding", +] + +[[package]] +name = "futures" +version = "0.3.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b147ee9d1f6d097cef9ce628cd2ee62288d963e16fb287bd9286455b241382d" +dependencies = [ + "futures-channel", + "futures-core", + "futures-executor", + "futures-io", + "futures-sink", + "futures-task", + "futures-util", +] + +[[package]] +name = "futures-channel" +version = "0.3.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "07bbe89c50d7a535e539b8c17bc0b49bdb77747034daa8087407d655f3f7cc1d" +dependencies = [ + "futures-core", + "futures-sink", +] + +[[package]] +name = "futures-core" +version = "0.3.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7e3450815272ef58cec6d564423f6e755e25379b217b0bc688e295ba24df6b1d" + +[[package]] +name = "futures-executor" +version = "0.3.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "baf29c38818342a3b26b5b923639e7b1f4a61fc5e76102d4b1981c6dc7a7579d" +dependencies = [ + "futures-core", + "futures-task", + "futures-util", +] + +[[package]] +name = "futures-io" +version = "0.3.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cecba35d7ad927e23624b22ad55235f2239cfa44fd10428eecbeba6d6a717718" + +[[package]] +name = "futures-macro" +version = "0.3.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e835b70203e41293343137df5c0664546da5745f82ec9b84d40be8336958447b" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "futures-sink" +version = "0.3.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c39754e157331b013978ec91992bde1ac089843443c49cbc7f46150b0fad0893" + +[[package]] +name = "futures-task" +version = "0.3.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "037711b3d59c33004d3856fbdc83b99d4ff37a24768fa1be9ce3538a1cde4393" + +[[package]] +name = "futures-util" +version = "0.3.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "389ca41296e6190b48053de0321d02a77f32f8a5d2461dd38762c0593805c6d6" +dependencies = [ + "futures-channel", + "futures-core", + "futures-io", + "futures-macro", + "futures-sink", + "futures-task", + "memchr", + "pin-project-lite", + "slab", +] + +[[package]] +name = "hashbrown" +version = "0.15.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9229cfe53dfd69f0609a49f65461bd93001ea1ef889cd5529dd176593f5338a1" +dependencies = [ + "foldhash", +] + +[[package]] +name = "hashbrown" +version = "0.17.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ed5909b6e89a2db4456e54cd5f673791d7eca6732202bbf2a9cc504fe2f9b84a" + +[[package]] +name = "heck" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" + +[[package]] +name = "icu_collections" +version = "2.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2984d1cd16c883d7935b9e07e44071dca8d917fd52ecc02c04d5fa0b5a3f191c" +dependencies = [ + "displaydoc", + "potential_utf", + "utf8_iter", + "yoke", + "zerofrom", + "zerovec", +] + +[[package]] +name = "icu_locale_core" +version = "2.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "92219b62b3e2b4d88ac5119f8904c10f8f61bf7e95b640d25ba3075e6cac2c29" +dependencies = [ + "displaydoc", + "litemap", + "tinystr", + "writeable", + "zerovec", +] + +[[package]] +name = "icu_normalizer" +version = "2.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c56e5ee99d6e3d33bd91c5d85458b6005a22140021cc324cea84dd0e72cff3b4" +dependencies = [ + "icu_collections", + "icu_normalizer_data", + "icu_properties", + "icu_provider", + "smallvec", + "zerovec", +] + +[[package]] +name = "icu_normalizer_data" +version = "2.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "da3be0ae77ea334f4da67c12f149704f19f81d1adf7c51cf482943e84a2bad38" + +[[package]] +name = "icu_properties" +version = "2.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bee3b67d0ea5c2cca5003417989af8996f8604e34fb9ddf96208a033901e70de" +dependencies = [ + "icu_collections", + "icu_locale_core", + "icu_properties_data", + "icu_provider", + "zerotrie", + "zerovec", +] + +[[package]] +name = "icu_properties_data" +version = "2.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e2bbb201e0c04f7b4b3e14382af113e17ba4f63e2c9d2ee626b720cbce54a14" + +[[package]] +name = "icu_provider" +version = "2.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "139c4cf31c8b5f33d7e199446eff9c1e02decfc2f0eec2c8d71f65befa45b421" +dependencies = [ + "displaydoc", + "icu_locale_core", + "writeable", + "yoke", + "zerofrom", + "zerotrie", + "zerovec", +] + +[[package]] +name = "id-arena" +version = "2.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3d3067d79b975e8844ca9eb072e16b31c3c1c36928edf9c6789548c524d0d954" + +[[package]] +name = "idna" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3b0875f23caa03898994f6ddc501886a45c7d3d62d04d2d90788d47be1b1e4de" +dependencies = [ + "idna_adapter", + "smallvec", + "utf8_iter", +] + +[[package]] +name = "idna_adapter" +version = "1.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cb68373c0d6620ef8105e855e7745e18b0d00d3bdb07fb532e434244cdb9a714" +dependencies = [ + "icu_normalizer", + "icu_properties", +] + +[[package]] +name = "indexmap" +version = "2.14.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d466e9454f08e4a911e14806c24e16fba1b4c121d1ea474396f396069cf949d9" +dependencies = [ + "equivalent", + "hashbrown 0.17.1", + "serde", + "serde_core", +] + +[[package]] +name = "itoa" +version = "1.0.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f42a60cbdf9a97f5d2305f08a87dc4e09308d1276d28c869c684d7777685682" + +[[package]] +name = "leb128fmt" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09edd9e8b54e49e587e4f6295a7d29c3ea94d469cb40ab8ca70b288248a81db2" + +[[package]] +name = "litemap" +version = "0.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "92daf443525c4cce67b150400bc2316076100ce0b3686209eb8cf3c31612e6f0" + +[[package]] +name = "log" +version = "0.4.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "953f07c43838f8e6f9758cab68bf5bed85465e7587ebe0b823f1bcd81978ad3a" + +[[package]] +name = "memchr" +version = "2.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "88904434abc2901f197fe8cc55f0445e7ded921dba5911dad2e2b39b48e663c4" + +[[package]] +name = "miniz_oxide" +version = "0.8.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1fa76a2c86f704bdb222d66965fb3d63269ce38518b83cb0575fca855ebb6316" +dependencies = [ + "adler2", + "simd-adler32", +] + +[[package]] +name = "once_cell" +version = "1.21.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9f7c3e4beb33f85d45ae3e3a1792185706c8e16d043238c593331cc7cd313b50" + +[[package]] +name = "percent-encoding" +version = "2.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9b4f627cb1b25917193a259e49bdad08f671f8d9708acfd5fe0a8c1455d87220" + +[[package]] +name = "pin-project-lite" +version = "0.2.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a89322df9ebe1c1578d689c92318e070967d1042b512afbe49518723f4e6d5cd" + +[[package]] +name = "potential_utf" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0103b1cef7ec0cf76490e969665504990193874ea05c85ff9bab8b911d0a0564" +dependencies = [ + "zerovec", +] + +[[package]] +name = "prettyplease" +version = "0.2.37" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "479ca8adacdd7ce8f1fb39ce9ecccbfe93a3f1344b3d0d97f20bc0196208f62b" +dependencies = [ + "proc-macro2", + "syn", +] + +[[package]] +name = "proc-macro2" +version = "1.0.106" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8fd00f0bb2e90d81d1044c2b32617f68fcb9fa3bb7640c23e9c748e53fb30934" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "quote" +version = "1.0.45" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41f2619966050689382d2b44f664f4bc593e129785a36d6ee376ddf37259b924" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "semver" +version = "1.0.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8a7852d02fc848982e0c167ef163aaff9cd91dc640ba85e263cb1ce46fae51cd" +dependencies = [ + "serde", + "serde_core", +] + +[[package]] +name = "serde" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a8e94ea7f378bd32cbbd37198a4a91436180c5bb472411e48b5ec2e2124ae9e" +dependencies = [ + "serde_core", + "serde_derive", +] + +[[package]] +name = "serde_core" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41d385c7d4ca58e59fc732af25c3983b67ac852c1a25000afe1175de458b67ad" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde_derive" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "serde_json" +version = "1.0.150" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e8014e44b4736ed0538adeecded0fce2a272f22dc9578a7eb6b2d9993c74cfb9" +dependencies = [ + "itoa", + "memchr", + "serde", + "serde_core", + "zmij", +] + +[[package]] +name = "simd-adler32" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "703d5c7ef118737c72f1af64ad2f6f8c5e1921f818cdcb97b8fe6fc69bf66214" + +[[package]] +name = "slab" +version = "0.4.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0c790de23124f9ab44544d7ac05d60440adc586479ce501c1d6d7da3cd8c9cf5" + +[[package]] +name = "smallvec" +version = "1.15.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8ed6a63f02c8539c91a8685a86f4099661ba3da017932f6ebbea6de3f0fa7c90" + +[[package]] +name = "spdx" +version = "0.10.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c3e17e880bafaeb362a7b751ec46bdc5b61445a188f80e0606e68167cd540fa3" +dependencies = [ + "smallvec", +] + +[[package]] +name = "stable_deref_trait" +version = "1.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6ce2be8dc25455e1f91df71bfa12ad37d7af1092ae736f3a6cd0e37bc7810596" + +[[package]] +name = "syn" +version = "2.0.117" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e665b8803e7b1d2a727f4023456bbbbe74da67099c585258af0ad9c5013b9b99" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "synstructure" +version = "0.13.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "728a70f3dbaf5bab7f0c4b1ac8d7ae5ea60a4b5549c8a5914361c99147a709d2" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "tinystr" +version = "0.8.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c8323304221c2a851516f22236c5722a72eaa19749016521d6dff0824447d96d" +dependencies = [ + "displaydoc", + "zerovec", +] + +[[package]] +name = "topological-sort" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ea68304e134ecd095ac6c3574494fc62b909f416c4fca77e440530221e549d3d" + +[[package]] +name = "unicode-ident" +version = "1.0.24" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6e4313cd5fcd3dad5cafa179702e2b244f760991f45397d14d4ebf38247da75" + +[[package]] +name = "unicode-xid" +version = "0.2.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ebc1c04c71510c7f702b52b7c350734c9ff1295c464a03335b00bb84fc54f853" + +[[package]] +name = "url" +version = "2.5.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ff67a8a4397373c3ef660812acab3268222035010ab8680ec4215f38ba3d0eed" +dependencies = [ + "form_urlencoded", + "idna", + "percent-encoding", + "serde", +] + +[[package]] +name = "utf8_iter" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6c140620e7ffbb22c2dee59cafe6084a59b5ffc27a8859a5f0d494b5d52b6be" + +[[package]] +name = "wasm-encoder" +version = "0.227.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "80bb72f02e7fbf07183443b27b0f3d4144abf8c114189f2e088ed95b696a7822" +dependencies = [ + "leb128fmt", + "wasmparser", +] + +[[package]] +name = "wasm-metadata" +version = "0.227.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ce1ef0faabbbba6674e97a56bee857ccddf942785a336c8b47b42373c922a91d" +dependencies = [ + "anyhow", + "auditable-serde", + "flate2", + "indexmap", + "serde", + "serde_derive", + "serde_json", + "spdx", + "url", + "wasm-encoder", + "wasmparser", +] + +[[package]] +name = "wasmparser" +version = "0.227.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0f51cad774fb3c9461ab9bccc9c62dfb7388397b5deda31bf40e8108ccd678b2" +dependencies = [ + "bitflags", + "hashbrown 0.15.5", + "indexmap", + "semver", +] + +[[package]] +name = "wit-bindgen" +version = "0.41.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "10fb6648689b3929d56bbc7eb1acf70c9a42a29eb5358c67c10f54dbd5d695de" +dependencies = [ + "wit-bindgen-rt", + "wit-bindgen-rust-macro", +] + +[[package]] +name = "wit-bindgen-core" +version = "0.41.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "92fa781d4f2ff6d3f27f3cc9b74a73327b31ca0dc4a3ef25a0ce2983e0e5af9b" +dependencies = [ + "anyhow", + "heck", + "wit-parser", +] + +[[package]] +name = "wit-bindgen-rt" +version = "0.41.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c4db52a11d4dfb0a59f194c064055794ee6564eb1ced88c25da2cf76e50c5621" +dependencies = [ + "bitflags", + "futures", + "once_cell", +] + +[[package]] +name = "wit-bindgen-rust" +version = "0.41.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9d0809dc5ba19e2e98661bf32fc0addc5a3ca5bf3a6a7083aa6ba484085ff3ce" +dependencies = [ + "anyhow", + "heck", + "indexmap", + "prettyplease", + "syn", + "wasm-metadata", + "wit-bindgen-core", + "wit-component", +] + +[[package]] +name = "wit-bindgen-rust-macro" +version = "0.41.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ad19eec017904e04c60719592a803ee5da76cb51c81e3f6fbf9457f59db49799" +dependencies = [ + "anyhow", + "prettyplease", + "proc-macro2", + "quote", + "syn", + "wit-bindgen-core", + "wit-bindgen-rust", +] + +[[package]] +name = "wit-component" +version = "0.227.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "635c3adc595422cbf2341a17fb73a319669cc8d33deed3a48368a841df86b676" +dependencies = [ + "anyhow", + "bitflags", + "indexmap", + "log", + "serde", + "serde_derive", + "serde_json", + "wasm-encoder", + "wasm-metadata", + "wasmparser", + "wit-parser", +] + +[[package]] +name = "wit-parser" +version = "0.227.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ddf445ed5157046e4baf56f9138c124a0824d4d1657e7204d71886ad8ce2fc11" +dependencies = [ + "anyhow", + "id-arena", + "indexmap", + "log", + "semver", + "serde", + "serde_derive", + "serde_json", + "unicode-xid", + "wasmparser", +] + +[[package]] +name = "writeable" +version = "0.6.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1ffae5123b2d3fc086436f8834ae3ab053a283cfac8fe0a0b8eaae044768a4c4" + +[[package]] +name = "yoke" +version = "0.8.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "709fe23a0424b6a435d82152b1bd3fdfb0833487d5fa90d05d42762a9891fef5" +dependencies = [ + "stable_deref_trait", + "yoke-derive", + "zerofrom", +] + +[[package]] +name = "yoke-derive" +version = "0.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "de844c262c8848816172cef550288e7dc6c7b7814b4ee56b3e1553f275f1858e" +dependencies = [ + "proc-macro2", + "quote", + "syn", + "synstructure", +] + +[[package]] +name = "zed_extension_api" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0729d50b4ca0a7e28e590bbe32e3ca0194d97ef654961451a424c661a366fca0" +dependencies = [ + "serde", + "serde_json", + "wit-bindgen", +] + +[[package]] +name = "zerofrom" +version = "0.1.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0ec05a11813ea801ff6d75110ad09cd0824ddba17dfe17128ea0d5f68e6c5272" +dependencies = [ + "zerofrom-derive", +] + +[[package]] +name = "zerofrom-derive" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "11532158c46691caf0f2593ea8358fed6bbf68a0315e80aae9bd41fbade684a1" +dependencies = [ + "proc-macro2", + "quote", + "syn", + "synstructure", +] + +[[package]] +name = "zerotrie" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0f9152d31db0792fa83f70fb2f83148effb5c1f5b8c7686c3459e361d9bc20bf" +dependencies = [ + "displaydoc", + "yoke", + "zerofrom", +] + +[[package]] +name = "zerovec" +version = "0.11.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "90f911cbc359ab6af17377d242225f4d75119aec87ea711a880987b18cd7b239" +dependencies = [ + "yoke", + "zerofrom", + "zerovec-derive", +] + +[[package]] +name = "zerovec-derive" +version = "0.11.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "625dc425cab0dca6dc3c3319506e6593dcb08a9f387ea3b284dbd52a92c40555" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "zmij" +version = "1.0.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b8848ee67ecc8aedbaf3e4122217aff892639231befc6a1b58d29fff4c2cabaa" diff --git a/zed/Cargo.toml b/zed/Cargo.toml new file mode 100644 index 00000000..059cc44d --- /dev/null +++ b/zed/Cargo.toml @@ -0,0 +1,14 @@ +[package] +name = "bashpp-zed" +version = "0.1.0" +edition = "2021" +publish = false +license = "GPL-3.0-or-later" +description = "Bash++ language support for Zed" +repository = "https://github.com/rail5/bashpp" + +[lib] +crate-type = ["cdylib"] + +[dependencies] +zed_extension_api = "0.7.0" diff --git a/zed/LICENSE b/zed/LICENSE new file mode 120000 index 00000000..ea5b6064 --- /dev/null +++ b/zed/LICENSE @@ -0,0 +1 @@ +../LICENSE \ No newline at end of file diff --git a/zed/README.md b/zed/README.md new file mode 100644 index 00000000..d97f79a1 --- /dev/null +++ b/zed/README.md @@ -0,0 +1,45 @@ +# Bash++ for Zed + +This extension recognizes `.bpp` files and reuses Zed's built-in Bash grammar +for baseline syntax highlighting. Bash++ identifiers are classified by +`bpp-lsp` using the compiler's Flex/Bison AST, so the extension does not +maintain a second Bash++ parser. + +## Development installation + +1. Build and install Bash++ so that `bpp-lsp` is available on `PATH`. +2. Install Rust and add the targets used by this repository and current Zed + releases: + + ```bash + rustup target add wasm32-wasip1 wasm32-wasip2 + ``` + +3. Run `make test-zed` from the repository root. +4. Open Zed's extension view, select **Install Dev Extension**, and choose this + `zed/` directory. +5. Enable combined semantic highlighting in Zed's settings: + + ```json + { + "languages": { + "Bash++": { + "semantic_tokens": "combined" + } + } + } + ``` + +Zed will use Tree-sitter Bash highlighting immediately. Once `bpp-lsp` starts, +semantic tokens add Bash++ class, method, property, variable, and parameter +highlighting. They also provide compiler-lexer fallback highlighting for Bash +syntax that Zed's Bash grammar cannot recover after Bash++ constructs. + +## Marketplace publication + +After the extension is ready for publication, add this repository as a +submodule in the +[`zed-industries/extensions`](https://github.com/zed-industries/extensions) +registry and set `path = "zed"` for its entry in `extensions.toml`. The +extension source remains in this repository; no separate parser or package is +required. diff --git a/zed/extension.toml b/zed/extension.toml new file mode 100644 index 00000000..421e1ad3 --- /dev/null +++ b/zed/extension.toml @@ -0,0 +1,11 @@ +id = "bashpp" +name = "Bash++" +version = "0.1.0" +schema_version = 1 +authors = ["Andrew S. Rightenburg", "Natnael Taddese"] +description = "Bash++ language support using Zed's Bash grammar and bpp-lsp" +repository = "https://github.com/rail5/bashpp" + +[language_servers.bpp-lsp] +name = "Bash++ Language Server" +languages = ["Bash++"] diff --git a/zed/languages/bashpp/brackets.scm b/zed/languages/bashpp/brackets.scm new file mode 100644 index 00000000..99d877c5 --- /dev/null +++ b/zed/languages/bashpp/brackets.scm @@ -0,0 +1,33 @@ +("(" @open + ")" @close) + +("[" @open + "]" @close) + +("{" @open + "}" @close) + +(("\"" @open + "\"" @close) + (#set! rainbow.exclude)) + +(("`" @open + "`" @close) + (#set! rainbow.exclude)) + +(("do" @open + "done" @close) + (#set! newline.only) + (#set! rainbow.exclude)) + +((case_statement + ("in" @open + "esac" @close)) + (#set! newline.only) + (#set! rainbow.exclude)) + +((if_statement + ("then" @open + "fi" @close)) + (#set! newline.only) + (#set! rainbow.exclude)) diff --git a/zed/languages/bashpp/config.toml b/zed/languages/bashpp/config.toml new file mode 100644 index 00000000..df50f24f --- /dev/null +++ b/zed/languages/bashpp/config.toml @@ -0,0 +1,32 @@ +name = "Bash++" +code_fence_block_name = "bashpp" +grammar = "bash" +path_suffixes = ["bpp"] +modeline_aliases = ["bashpp", "bpp"] +line_comments = ["# "] +first_line_pattern = '^#!.*\b(?:bashpp|bpp)\b' +autoclose_before = "}])" +brackets = [ + { start = "[", end = "]", close = true, newline = false }, + { start = "(", end = ")", close = true, newline = true }, + { start = "{", end = "}", close = true, newline = true }, + { start = "\"", end = "\"", close = true, newline = false, not_in = ["comment", "string"] }, + { start = "'", end = "'", close = true, newline = false, not_in = ["string", "comment"] }, + { start = "do", end = "done", close = false, newline = true, not_in = ["comment", "string"] }, + { start = "then", end = "fi", close = false, newline = true, not_in = ["comment", "string"] }, + { start = "then", end = "else", close = false, newline = true, not_in = ["comment", "string"] }, + { start = "then", end = "elif", close = false, newline = true, not_in = ["comment", "string"] }, + { start = "in", end = "esac", close = false, newline = true, not_in = ["comment", "string"] }, +] + +auto_indent_using_last_non_empty_line = false +increase_indent_pattern = "^\\s*(\\b(else|elif)\\b|([^#]+\\b(do|then|in)\\b)|([\\w\\*]+\\)))\\s*$" +decrease_indent_patterns = [ + { pattern = "^\\s*elif\\b.*", valid_after = ["if", "elif"] }, + { pattern = "^\\s*else\\b.*", valid_after = ["if", "elif", "for", "while"] }, + { pattern = "^\\s*fi\\b.*", valid_after = ["if", "elif", "else"] }, + { pattern = "^\\s*done\\b.*", valid_after = ["for", "while"] }, + { pattern = "^\\s*esac\\b.*", valid_after = ["case"] }, + { pattern = "^\\s*[\\w\\*]+\\)\\s*$", valid_after = ["case_item"] }, +] +decrease_indent_pattern = "(^|\\s+|;)(elif)\\b.*$" diff --git a/zed/languages/bashpp/highlights.scm b/zed/languages/bashpp/highlights.scm new file mode 100644 index 00000000..dcdf77c9 --- /dev/null +++ b/zed/languages/bashpp/highlights.scm @@ -0,0 +1,128 @@ +[ + (string) + (raw_string) + (heredoc_body) + (heredoc_start) + (heredoc_end) + (ansi_c_string) +] @string + +; TODO: HACK. Remove this override if the shared Bash grammar can recover +; cleanly from supershells containing positional expansions. It currently +; treats the rest of the command as raw strings, coloring unrelated Bash++ +; code as string contents. The LSP recolors the neutralized range using the +; compiler lexer's semantic tokens. +((command + (variable_assignment + value: (concatenation + (word) @supershell + (_)* + (raw_string) @variable))) + (#eq? @supershell "@") + (#match? @variable "\n")) + +((command + (variable_assignment + value: (concatenation + (word) @supershell)) + name: (command_name + (concatenation + (_)* + (raw_string) @variable))) + (#eq? @supershell "@") + (#match? @variable "\n")) + +(variable_name) @variable + +[ + "export" + "function" + "unset" + "local" + "declare" +] @keyword + +[ + "case" + "do" + "done" + "elif" + "else" + "esac" + "fi" + "for" + "if" + "in" + "select" + "then" + "until" + "while" +] @keyword.control + +(comment) @comment + +((program + . + (comment) @keyword.directive) + (#match? @keyword.directive "^#![ \t]*/")) + +(function_definition + name: (word) @function) + +(command_name + (word) @function) + +[ + (file_descriptor) + (number) +] @number + +(regex) @string.regex + +[ + (command_substitution) + (process_substitution) + (expansion) +] @embedded + +[ + "$" + "&&" + "||" + ">" + "<<" + ">>" + ">&" + ">&-" + "<" + "|" + "=" + "=~" + "==" + "!=" + "-o" + "-a" + "+" + "-" + "*" + "**" + "!" +] @operator + +(test_operator) @keyword.operator + +";" @punctuation.delimiter + +[ + "(" + ")" + "{" + "}" + "[" + "]" +] @punctuation.bracket + +(special_variable_name) @variable.special + +((word) @keyword + (#match? @keyword "^@(class|method|constructor|destructor|public|private|protected|virtual|new|delete|nullptr|include|include_once|this|super|typeof|dynamic_cast)$")) diff --git a/zed/languages/bashpp/indents.scm b/zed/languages/bashpp/indents.scm new file mode 100644 index 00000000..2072fae7 --- /dev/null +++ b/zed/languages/bashpp/indents.scm @@ -0,0 +1,20 @@ +(_ + "[" + "]" @end) @indent + +(_ + "{" + "}" @end) @indent + +(_ + "(" + ")" @end) @indent + +(function_definition) @start.function +(if_statement) @start.if +(elif_clause) @start.elif +(else_clause) @start.else +(for_statement) @start.for +(while_statement) @start.while +(case_statement) @start.case +(case_item) @start.case_item diff --git a/zed/languages/bashpp/overrides.scm b/zed/languages/bashpp/overrides.scm new file mode 100644 index 00000000..81fec9a5 --- /dev/null +++ b/zed/languages/bashpp/overrides.scm @@ -0,0 +1,2 @@ +(comment) @comment.inclusive +(string) @string diff --git a/zed/src/lib.rs b/zed/src/lib.rs new file mode 100644 index 00000000..fff78712 --- /dev/null +++ b/zed/src/lib.rs @@ -0,0 +1,33 @@ +/* + * Copyright (C) 2026 Andrew S. Rightenburg + * Bash++: Bash with classes + * SPDX-License-Identifier: GPL-3.0-or-later + */ + +use zed_extension_api as zed; + +struct BashppExtension; + +impl zed::Extension for BashppExtension { + fn new() -> Self { + Self + } + + fn language_server_command( + &mut self, + _language_server_id: &zed::LanguageServerId, + worktree: &zed::Worktree, + ) -> zed::Result { + let command = worktree + .which("bpp-lsp") + .ok_or_else(|| "bpp-lsp must be installed and available on PATH".to_string())?; + + Ok(zed::Command { + command, + args: vec!["--stdio".to_string()], + env: Default::default(), + }) + } +} + +zed::register_extension!(BashppExtension);