From 2b32ea883fe72504eb893de2839471e601b93f87 Mon Sep 17 00:00:00 2001 From: Atsushi Morimoto <74th.tech@gmail.com> Date: Sun, 19 Apr 2026 14:56:55 +0900 Subject: [PATCH 1/5] feat: Add protobuf support for WebSocket messages and refactor WebSocket handling - Updated `pyproject.toml` to include `grpcio-tools` and `protobuf` dependencies. - Added generated protobuf files for WebSocket messages in `stackchan_server/generated_protobuf`. - Implemented `protobuf_ws.py` for encoding and parsing WebSocket messages. - Refactored `speak.py` to utilize new protobuf message encoding for audio WAV messages. - Updated `ws_proxy.py` to handle protobuf messages and removed legacy struct-based message handling. - Enhanced error handling for invalid protobuf messages in WebSocket communication. --- Makefile | 26 ++ firmware/include/listening.hpp | 2 +- firmware/include/protocols.hpp | 44 ++- .../generated_protobuf/websocket-message.pb.c | 56 +++ .../generated_protobuf/websocket-message.pb.h | 370 ++++++++++++++++++ firmware/src/listening.cpp | 44 ++- firmware/src/main.cpp | 204 +++++++--- firmware/src/protocols.cpp | 163 ++++++++ platformio.ini | 1 + protobuf/websocket-message.options | 2 + protobuf/websocket-message.proto | 110 ++++++ pyproject.toml | 2 + .../generated_protobuf/__init__.py | 1 + .../websocket_message_pb2.py | 68 ++++ stackchan_server/protobuf_ws.py | 170 ++++++++ stackchan_server/speak.py | 56 +-- stackchan_server/ws_proxy.py | 214 ++++------ uv.lock | 46 +++ 18 files changed, 1314 insertions(+), 265 deletions(-) create mode 100644 firmware/lib/generated_protobuf/websocket-message.pb.c create mode 100644 firmware/lib/generated_protobuf/websocket-message.pb.h create mode 100644 firmware/src/protocols.cpp create mode 100644 protobuf/websocket-message.options create mode 100644 protobuf/websocket-message.proto create mode 100644 stackchan_server/generated_protobuf/__init__.py create mode 100644 stackchan_server/generated_protobuf/websocket_message_pb2.py create mode 100644 stackchan_server/protobuf_ws.py diff --git a/Makefile b/Makefile index 318a6f4..761a0b1 100644 --- a/Makefile +++ b/Makefile @@ -1,3 +1,12 @@ +UV ?= uv +PROTO_DIR := protobuf +PROTO_FILE := $(PROTO_DIR)/websocket-message.proto +PY_PROTO_OUT_DIR := stackchan_server/generated_protobuf +FW_PROTO_OUT_DIR := firmware/lib/generated_protobuf +NANOPB_GENERATOR := .pio/libdeps/m5stack-cores3-m5unified/Nanopb/generator/nanopb_generator.py + +.PHONY: lint lint-fix protobuf protobuf-python protobuf-firmware clean-protobuf + lint: uv run ruff check stackchan_server example_apps uv run ty check stackchan_server example_apps @@ -5,3 +14,20 @@ lint: lint-fix: uv run ruff check --fix stackchan_server example_apps uv run ty check stackchan_server example_apps + +protobuf: protobuf-python protobuf-firmware + +protobuf-python: $(PROTO_FILE) + mkdir -p $(PY_PROTO_OUT_DIR) + touch $(PY_PROTO_OUT_DIR)/__init__.py + $(UV) run python -m grpc_tools.protoc -I$(PROTO_DIR) --python_out=$(PY_PROTO_OUT_DIR) $(PROTO_FILE) + +protobuf-firmware: $(PROTO_FILE) + @test -f $(NANOPB_GENERATOR) || (echo "nanopb generator not found: $(NANOPB_GENERATOR)" && exit 1) + mkdir -p $(FW_PROTO_OUT_DIR) + $(UV) run python $(NANOPB_GENERATOR) --proto-path=$(PROTO_DIR) --output-dir=$(FW_PROTO_OUT_DIR) $(PROTO_FILE) + +clean-protobuf: + rm -f $(PY_PROTO_OUT_DIR)/websocket_message_pb2.py + rm -f stackchan_server/generated/websocket_message_pb2.py + rm -f $(FW_PROTO_OUT_DIR)/websocket-message.pb.h $(FW_PROTO_OUT_DIR)/websocket-message.pb.c diff --git a/firmware/include/listening.hpp b/firmware/include/listening.hpp index f3a45ee..cf00113 100644 --- a/firmware/include/listening.hpp +++ b/firmware/include/listening.hpp @@ -53,7 +53,7 @@ class Listening size_t ring_read_ = 0; size_t ring_available_ = 0; - uint16_t seq_counter_ = 0; + uint32_t seq_counter_ = 0; bool streaming_ = false; bool events_registered_ = false; diff --git a/firmware/include/protocols.hpp b/firmware/include/protocols.hpp index b39838d..feea050 100644 --- a/firmware/include/protocols.hpp +++ b/firmware/include/protocols.hpp @@ -1,15 +1,14 @@ // Protocol definitions shared between CoreS3 firmware and other components #pragma once +#include #include +#include -// WebSocket binary protocol (audio + future kinds) -// Header layout (little-endian, packed): -// - kind: uint8_t (message kind) -// - messageType: uint8_t (START/DATA/END) -// - reserved: uint8_t (0, future flags) -// - seq: uint16 (sequence number) -// - payloadBytes: uint16 (bytes following the header) +#include "../lib/generated_protobuf/websocket-message.pb.h" + +// Internal compatibility metadata for message routing after protobuf decode. +// This is no longer sent on the wire directly. enum class MessageKind : uint8_t { @@ -35,8 +34,8 @@ struct __attribute__((packed)) WsHeader uint8_t kind; // MessageKind uint8_t messageType; // MessageType uint8_t reserved; // 0 (flags/reserved) - uint16_t seq; // sequence number - uint16_t payloadBytes; // bytes following the header + uint32_t seq; // sequence number + uint32_t payloadBytes; // bytes following the header }; // payload for kind=StateCmd, messageType=DATA @@ -59,3 +58,30 @@ enum class ServoCommandOp : uint8_t MoveX = 1, MoveY = 2, }; + +constexpr size_t kProtoAudioChunkMaxBytes = 4096; +constexpr size_t kProtoServoCommandMaxCount = 255; +constexpr size_t kMaxEncodedWebSocketMessageBytes = stackchan_websocket_v1_WebSocketMessage_size; + +stackchan_websocket_v1_MessageKind toProtoMessageKind(MessageKind kind); +stackchan_websocket_v1_MessageType toProtoMessageType(MessageType type); +stackchan_websocket_v1_StackchanState toProtoState(RemoteState state); +stackchan_websocket_v1_ServoOperation toProtoServoOperation(ServoCommandOp op); + +RemoteState fromProtoState(stackchan_websocket_v1_StackchanState state); +ServoCommandOp fromProtoServoOperation(stackchan_websocket_v1_ServoOperation op); + +bool setProtoAudioChunk( + stackchan_websocket_v1_AudioChunk &chunk, + const uint8_t *data, + size_t data_len); +const uint8_t *getProtoAudioChunkBytes(const stackchan_websocket_v1_AudioChunk &chunk); +size_t getProtoAudioChunkSize(const stackchan_websocket_v1_AudioChunk &chunk); + +bool encodeWebSocketMessage( + const stackchan_websocket_v1_WebSocketMessage &message, + std::vector &encoded); +bool decodeWebSocketMessage( + const uint8_t *data, + size_t data_len, + stackchan_websocket_v1_WebSocketMessage &message); diff --git a/firmware/lib/generated_protobuf/websocket-message.pb.c b/firmware/lib/generated_protobuf/websocket-message.pb.c new file mode 100644 index 0000000..e024c81 --- /dev/null +++ b/firmware/lib/generated_protobuf/websocket-message.pb.c @@ -0,0 +1,56 @@ +/* Automatically generated nanopb constant definitions */ +/* Generated by nanopb-0.4.9.1 */ + +#include "websocket-message.pb.h" +#if PB_PROTO_HEADER_VERSION != 40 +#error Regenerate this file with the current version of nanopb generator. +#endif + +PB_BIND(stackchan_websocket_v1_WebSocketMessage, stackchan_websocket_v1_WebSocketMessage, 4) + + +PB_BIND(stackchan_websocket_v1_AudioPcmStart, stackchan_websocket_v1_AudioPcmStart, AUTO) + + +PB_BIND(stackchan_websocket_v1_AudioPcmEnd, stackchan_websocket_v1_AudioPcmEnd, AUTO) + + +PB_BIND(stackchan_websocket_v1_AudioWavStart, stackchan_websocket_v1_AudioWavStart, AUTO) + + +PB_BIND(stackchan_websocket_v1_AudioWavEnd, stackchan_websocket_v1_AudioWavEnd, AUTO) + + +PB_BIND(stackchan_websocket_v1_AudioChunk, stackchan_websocket_v1_AudioChunk, 4) + + +PB_BIND(stackchan_websocket_v1_StateCommand, stackchan_websocket_v1_StateCommand, AUTO) + + +PB_BIND(stackchan_websocket_v1_WakeWordEvent, stackchan_websocket_v1_WakeWordEvent, AUTO) + + +PB_BIND(stackchan_websocket_v1_StateEvent, stackchan_websocket_v1_StateEvent, AUTO) + + +PB_BIND(stackchan_websocket_v1_SpeakDoneEvent, stackchan_websocket_v1_SpeakDoneEvent, AUTO) + + +PB_BIND(stackchan_websocket_v1_ServoCommandSequence, stackchan_websocket_v1_ServoCommandSequence, 2) + + +PB_BIND(stackchan_websocket_v1_ServoCommand, stackchan_websocket_v1_ServoCommand, AUTO) + + +PB_BIND(stackchan_websocket_v1_ServoDoneEvent, stackchan_websocket_v1_ServoDoneEvent, AUTO) + + + + + + + + + + + diff --git a/firmware/lib/generated_protobuf/websocket-message.pb.h b/firmware/lib/generated_protobuf/websocket-message.pb.h new file mode 100644 index 0000000..ce13703 --- /dev/null +++ b/firmware/lib/generated_protobuf/websocket-message.pb.h @@ -0,0 +1,370 @@ +/* Automatically generated nanopb header */ +/* Generated by nanopb-0.4.9.1 */ + +#ifndef PB_STACKCHAN_WEBSOCKET_V1_WEBSOCKET_MESSAGE_PB_H_INCLUDED +#define PB_STACKCHAN_WEBSOCKET_V1_WEBSOCKET_MESSAGE_PB_H_INCLUDED +#include + +#if PB_PROTO_HEADER_VERSION != 40 +#error Regenerate this file with the current version of nanopb generator. +#endif + +/* Enum definitions */ +typedef enum _stackchan_websocket_v1_MessageKind { + stackchan_websocket_v1_MessageKind_MESSAGE_KIND_UNSPECIFIED = 0, + stackchan_websocket_v1_MessageKind_MESSAGE_KIND_AUDIO_PCM = 1, + stackchan_websocket_v1_MessageKind_MESSAGE_KIND_AUDIO_WAV = 2, + stackchan_websocket_v1_MessageKind_MESSAGE_KIND_STATE_CMD = 3, + stackchan_websocket_v1_MessageKind_MESSAGE_KIND_WAKE_WORD_EVT = 4, + stackchan_websocket_v1_MessageKind_MESSAGE_KIND_STATE_EVT = 5, + stackchan_websocket_v1_MessageKind_MESSAGE_KIND_SPEAK_DONE_EVT = 6, + stackchan_websocket_v1_MessageKind_MESSAGE_KIND_SERVO_CMD = 7, + stackchan_websocket_v1_MessageKind_MESSAGE_KIND_SERVO_DONE_EVT = 8 +} stackchan_websocket_v1_MessageKind; + +typedef enum _stackchan_websocket_v1_MessageType { + stackchan_websocket_v1_MessageType_MESSAGE_TYPE_UNSPECIFIED = 0, + stackchan_websocket_v1_MessageType_MESSAGE_TYPE_START = 1, + stackchan_websocket_v1_MessageType_MESSAGE_TYPE_DATA = 2, + stackchan_websocket_v1_MessageType_MESSAGE_TYPE_END = 3 +} stackchan_websocket_v1_MessageType; + +typedef enum _stackchan_websocket_v1_StackchanState { + stackchan_websocket_v1_StackchanState_STACKCHAN_STATE_IDLE = 0, + stackchan_websocket_v1_StackchanState_STACKCHAN_STATE_LISTENING = 1, + stackchan_websocket_v1_StackchanState_STACKCHAN_STATE_THINKING = 2, + stackchan_websocket_v1_StackchanState_STACKCHAN_STATE_SPEAKING = 3 +} stackchan_websocket_v1_StackchanState; + +typedef enum _stackchan_websocket_v1_ServoOperation { + stackchan_websocket_v1_ServoOperation_SERVO_OPERATION_SLEEP = 0, + stackchan_websocket_v1_ServoOperation_SERVO_OPERATION_MOVE_X = 1, + stackchan_websocket_v1_ServoOperation_SERVO_OPERATION_MOVE_Y = 2 +} stackchan_websocket_v1_ServoOperation; + +/* Struct definitions */ +typedef struct _stackchan_websocket_v1_AudioPcmStart { + char dummy_field; +} stackchan_websocket_v1_AudioPcmStart; + +typedef struct _stackchan_websocket_v1_AudioPcmEnd { + char dummy_field; +} stackchan_websocket_v1_AudioPcmEnd; + +typedef struct _stackchan_websocket_v1_AudioWavStart { + uint32_t sample_rate; + uint32_t channels; +} stackchan_websocket_v1_AudioWavStart; + +typedef struct _stackchan_websocket_v1_AudioWavEnd { + char dummy_field; +} stackchan_websocket_v1_AudioWavEnd; + +typedef PB_BYTES_ARRAY_T(4096) stackchan_websocket_v1_AudioChunk_pcm_bytes_t; +typedef struct _stackchan_websocket_v1_AudioChunk { + stackchan_websocket_v1_AudioChunk_pcm_bytes_t pcm_bytes; +} stackchan_websocket_v1_AudioChunk; + +typedef struct _stackchan_websocket_v1_StateCommand { + stackchan_websocket_v1_StackchanState state; +} stackchan_websocket_v1_StateCommand; + +typedef struct _stackchan_websocket_v1_WakeWordEvent { + bool detected; +} stackchan_websocket_v1_WakeWordEvent; + +typedef struct _stackchan_websocket_v1_StateEvent { + stackchan_websocket_v1_StackchanState state; +} stackchan_websocket_v1_StateEvent; + +typedef struct _stackchan_websocket_v1_SpeakDoneEvent { + bool done; +} stackchan_websocket_v1_SpeakDoneEvent; + +typedef struct _stackchan_websocket_v1_ServoCommand { + stackchan_websocket_v1_ServoOperation op; + int32_t angle; /* used by MOVE_X / MOVE_Y */ + int32_t duration_ms; /* used by all operations */ +} stackchan_websocket_v1_ServoCommand; + +typedef struct _stackchan_websocket_v1_ServoCommandSequence { + pb_size_t commands_count; + stackchan_websocket_v1_ServoCommand commands[255]; +} stackchan_websocket_v1_ServoCommandSequence; + +typedef struct _stackchan_websocket_v1_ServoDoneEvent { + bool done; +} stackchan_websocket_v1_ServoDoneEvent; + +/* One WebSocket binary frame carries exactly one WebSocketMessage. + + Instead of concatenating two protobuf messages such as Header + Body, + this envelope keeps the routing metadata and the typed body together in a + single protobuf message. The `kind` / `message_type` fields preserve the + current protocol semantics, while `body` provides strongly typed payloads + for Python and firmware implementations. */ +typedef struct _stackchan_websocket_v1_WebSocketMessage { + stackchan_websocket_v1_MessageKind kind; + stackchan_websocket_v1_MessageType message_type; + uint32_t seq; /* current implementation uses uint16, but proto uses uint32 */ + pb_size_t which_body; + union { + stackchan_websocket_v1_AudioPcmStart audio_pcm_start; + stackchan_websocket_v1_AudioChunk audio_pcm_data; + stackchan_websocket_v1_AudioPcmEnd audio_pcm_end; + stackchan_websocket_v1_AudioWavStart audio_wav_start; + stackchan_websocket_v1_AudioChunk audio_wav_data; + stackchan_websocket_v1_AudioWavEnd audio_wav_end; + stackchan_websocket_v1_StateCommand state_cmd; + stackchan_websocket_v1_WakeWordEvent wake_word_evt; + stackchan_websocket_v1_StateEvent state_evt; + stackchan_websocket_v1_SpeakDoneEvent speak_done_evt; + stackchan_websocket_v1_ServoCommandSequence servo_cmd; + stackchan_websocket_v1_ServoDoneEvent servo_done_evt; + } body; +} stackchan_websocket_v1_WebSocketMessage; + + +#ifdef __cplusplus +extern "C" { +#endif + +/* Helper constants for enums */ +#define _stackchan_websocket_v1_MessageKind_MIN stackchan_websocket_v1_MessageKind_MESSAGE_KIND_UNSPECIFIED +#define _stackchan_websocket_v1_MessageKind_MAX stackchan_websocket_v1_MessageKind_MESSAGE_KIND_SERVO_DONE_EVT +#define _stackchan_websocket_v1_MessageKind_ARRAYSIZE ((stackchan_websocket_v1_MessageKind)(stackchan_websocket_v1_MessageKind_MESSAGE_KIND_SERVO_DONE_EVT+1)) + +#define _stackchan_websocket_v1_MessageType_MIN stackchan_websocket_v1_MessageType_MESSAGE_TYPE_UNSPECIFIED +#define _stackchan_websocket_v1_MessageType_MAX stackchan_websocket_v1_MessageType_MESSAGE_TYPE_END +#define _stackchan_websocket_v1_MessageType_ARRAYSIZE ((stackchan_websocket_v1_MessageType)(stackchan_websocket_v1_MessageType_MESSAGE_TYPE_END+1)) + +#define _stackchan_websocket_v1_StackchanState_MIN stackchan_websocket_v1_StackchanState_STACKCHAN_STATE_IDLE +#define _stackchan_websocket_v1_StackchanState_MAX stackchan_websocket_v1_StackchanState_STACKCHAN_STATE_SPEAKING +#define _stackchan_websocket_v1_StackchanState_ARRAYSIZE ((stackchan_websocket_v1_StackchanState)(stackchan_websocket_v1_StackchanState_STACKCHAN_STATE_SPEAKING+1)) + +#define _stackchan_websocket_v1_ServoOperation_MIN stackchan_websocket_v1_ServoOperation_SERVO_OPERATION_SLEEP +#define _stackchan_websocket_v1_ServoOperation_MAX stackchan_websocket_v1_ServoOperation_SERVO_OPERATION_MOVE_Y +#define _stackchan_websocket_v1_ServoOperation_ARRAYSIZE ((stackchan_websocket_v1_ServoOperation)(stackchan_websocket_v1_ServoOperation_SERVO_OPERATION_MOVE_Y+1)) + +#define stackchan_websocket_v1_WebSocketMessage_kind_ENUMTYPE stackchan_websocket_v1_MessageKind +#define stackchan_websocket_v1_WebSocketMessage_message_type_ENUMTYPE stackchan_websocket_v1_MessageType + + + + + + +#define stackchan_websocket_v1_StateCommand_state_ENUMTYPE stackchan_websocket_v1_StackchanState + + +#define stackchan_websocket_v1_StateEvent_state_ENUMTYPE stackchan_websocket_v1_StackchanState + + + +#define stackchan_websocket_v1_ServoCommand_op_ENUMTYPE stackchan_websocket_v1_ServoOperation + + + +/* Initializer values for message structs */ +#define stackchan_websocket_v1_WebSocketMessage_init_default {_stackchan_websocket_v1_MessageKind_MIN, _stackchan_websocket_v1_MessageType_MIN, 0, 0, {stackchan_websocket_v1_AudioPcmStart_init_default}} +#define stackchan_websocket_v1_AudioPcmStart_init_default {0} +#define stackchan_websocket_v1_AudioPcmEnd_init_default {0} +#define stackchan_websocket_v1_AudioWavStart_init_default {0, 0} +#define stackchan_websocket_v1_AudioWavEnd_init_default {0} +#define stackchan_websocket_v1_AudioChunk_init_default {{0, {0}}} +#define stackchan_websocket_v1_StateCommand_init_default {_stackchan_websocket_v1_StackchanState_MIN} +#define stackchan_websocket_v1_WakeWordEvent_init_default {0} +#define stackchan_websocket_v1_StateEvent_init_default {_stackchan_websocket_v1_StackchanState_MIN} +#define stackchan_websocket_v1_SpeakDoneEvent_init_default {0} +#define stackchan_websocket_v1_ServoCommandSequence_init_default {0, {stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default, stackchan_websocket_v1_ServoCommand_init_default}} +#define stackchan_websocket_v1_ServoCommand_init_default {_stackchan_websocket_v1_ServoOperation_MIN, 0, 0} +#define stackchan_websocket_v1_ServoDoneEvent_init_default {0} +#define stackchan_websocket_v1_WebSocketMessage_init_zero {_stackchan_websocket_v1_MessageKind_MIN, _stackchan_websocket_v1_MessageType_MIN, 0, 0, {stackchan_websocket_v1_AudioPcmStart_init_zero}} +#define stackchan_websocket_v1_AudioPcmStart_init_zero {0} +#define stackchan_websocket_v1_AudioPcmEnd_init_zero {0} +#define stackchan_websocket_v1_AudioWavStart_init_zero {0, 0} +#define stackchan_websocket_v1_AudioWavEnd_init_zero {0} +#define stackchan_websocket_v1_AudioChunk_init_zero {{0, {0}}} +#define stackchan_websocket_v1_StateCommand_init_zero {_stackchan_websocket_v1_StackchanState_MIN} +#define stackchan_websocket_v1_WakeWordEvent_init_zero {0} +#define stackchan_websocket_v1_StateEvent_init_zero {_stackchan_websocket_v1_StackchanState_MIN} +#define stackchan_websocket_v1_SpeakDoneEvent_init_zero {0} +#define stackchan_websocket_v1_ServoCommandSequence_init_zero {0, {stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero, stackchan_websocket_v1_ServoCommand_init_zero}} +#define stackchan_websocket_v1_ServoCommand_init_zero {_stackchan_websocket_v1_ServoOperation_MIN, 0, 0} +#define stackchan_websocket_v1_ServoDoneEvent_init_zero {0} + +/* Field tags (for use in manual encoding/decoding) */ +#define stackchan_websocket_v1_AudioWavStart_sample_rate_tag 1 +#define stackchan_websocket_v1_AudioWavStart_channels_tag 2 +#define stackchan_websocket_v1_AudioChunk_pcm_bytes_tag 1 +#define stackchan_websocket_v1_StateCommand_state_tag 1 +#define stackchan_websocket_v1_WakeWordEvent_detected_tag 1 +#define stackchan_websocket_v1_StateEvent_state_tag 1 +#define stackchan_websocket_v1_SpeakDoneEvent_done_tag 1 +#define stackchan_websocket_v1_ServoCommand_op_tag 1 +#define stackchan_websocket_v1_ServoCommand_angle_tag 2 +#define stackchan_websocket_v1_ServoCommand_duration_ms_tag 3 +#define stackchan_websocket_v1_ServoCommandSequence_commands_tag 1 +#define stackchan_websocket_v1_ServoDoneEvent_done_tag 1 +#define stackchan_websocket_v1_WebSocketMessage_kind_tag 1 +#define stackchan_websocket_v1_WebSocketMessage_message_type_tag 2 +#define stackchan_websocket_v1_WebSocketMessage_seq_tag 3 +#define stackchan_websocket_v1_WebSocketMessage_audio_pcm_start_tag 10 +#define stackchan_websocket_v1_WebSocketMessage_audio_pcm_data_tag 11 +#define stackchan_websocket_v1_WebSocketMessage_audio_pcm_end_tag 12 +#define stackchan_websocket_v1_WebSocketMessage_audio_wav_start_tag 20 +#define stackchan_websocket_v1_WebSocketMessage_audio_wav_data_tag 21 +#define stackchan_websocket_v1_WebSocketMessage_audio_wav_end_tag 22 +#define stackchan_websocket_v1_WebSocketMessage_state_cmd_tag 30 +#define stackchan_websocket_v1_WebSocketMessage_wake_word_evt_tag 31 +#define stackchan_websocket_v1_WebSocketMessage_state_evt_tag 32 +#define stackchan_websocket_v1_WebSocketMessage_speak_done_evt_tag 33 +#define stackchan_websocket_v1_WebSocketMessage_servo_cmd_tag 34 +#define stackchan_websocket_v1_WebSocketMessage_servo_done_evt_tag 35 + +/* Struct field encoding specification for nanopb */ +#define stackchan_websocket_v1_WebSocketMessage_FIELDLIST(X, a) \ +X(a, STATIC, SINGULAR, UENUM, kind, 1) \ +X(a, STATIC, SINGULAR, UENUM, message_type, 2) \ +X(a, STATIC, SINGULAR, UINT32, seq, 3) \ +X(a, STATIC, ONEOF, MESSAGE, (body,audio_pcm_start,body.audio_pcm_start), 10) \ +X(a, STATIC, ONEOF, MESSAGE, (body,audio_pcm_data,body.audio_pcm_data), 11) \ +X(a, STATIC, ONEOF, MESSAGE, (body,audio_pcm_end,body.audio_pcm_end), 12) \ +X(a, STATIC, ONEOF, MESSAGE, (body,audio_wav_start,body.audio_wav_start), 20) \ +X(a, STATIC, ONEOF, MESSAGE, (body,audio_wav_data,body.audio_wav_data), 21) \ +X(a, STATIC, ONEOF, MESSAGE, (body,audio_wav_end,body.audio_wav_end), 22) \ +X(a, STATIC, ONEOF, MESSAGE, (body,state_cmd,body.state_cmd), 30) \ +X(a, STATIC, ONEOF, MESSAGE, (body,wake_word_evt,body.wake_word_evt), 31) \ +X(a, STATIC, ONEOF, MESSAGE, (body,state_evt,body.state_evt), 32) \ +X(a, STATIC, ONEOF, MESSAGE, (body,speak_done_evt,body.speak_done_evt), 33) \ +X(a, STATIC, ONEOF, MESSAGE, (body,servo_cmd,body.servo_cmd), 34) \ +X(a, STATIC, ONEOF, MESSAGE, (body,servo_done_evt,body.servo_done_evt), 35) +#define stackchan_websocket_v1_WebSocketMessage_CALLBACK NULL +#define stackchan_websocket_v1_WebSocketMessage_DEFAULT NULL +#define stackchan_websocket_v1_WebSocketMessage_body_audio_pcm_start_MSGTYPE stackchan_websocket_v1_AudioPcmStart +#define stackchan_websocket_v1_WebSocketMessage_body_audio_pcm_data_MSGTYPE stackchan_websocket_v1_AudioChunk +#define stackchan_websocket_v1_WebSocketMessage_body_audio_pcm_end_MSGTYPE stackchan_websocket_v1_AudioPcmEnd +#define stackchan_websocket_v1_WebSocketMessage_body_audio_wav_start_MSGTYPE stackchan_websocket_v1_AudioWavStart +#define stackchan_websocket_v1_WebSocketMessage_body_audio_wav_data_MSGTYPE stackchan_websocket_v1_AudioChunk +#define stackchan_websocket_v1_WebSocketMessage_body_audio_wav_end_MSGTYPE stackchan_websocket_v1_AudioWavEnd +#define stackchan_websocket_v1_WebSocketMessage_body_state_cmd_MSGTYPE stackchan_websocket_v1_StateCommand +#define stackchan_websocket_v1_WebSocketMessage_body_wake_word_evt_MSGTYPE stackchan_websocket_v1_WakeWordEvent +#define stackchan_websocket_v1_WebSocketMessage_body_state_evt_MSGTYPE stackchan_websocket_v1_StateEvent +#define stackchan_websocket_v1_WebSocketMessage_body_speak_done_evt_MSGTYPE stackchan_websocket_v1_SpeakDoneEvent +#define stackchan_websocket_v1_WebSocketMessage_body_servo_cmd_MSGTYPE stackchan_websocket_v1_ServoCommandSequence +#define stackchan_websocket_v1_WebSocketMessage_body_servo_done_evt_MSGTYPE stackchan_websocket_v1_ServoDoneEvent + +#define stackchan_websocket_v1_AudioPcmStart_FIELDLIST(X, a) \ + +#define stackchan_websocket_v1_AudioPcmStart_CALLBACK NULL +#define stackchan_websocket_v1_AudioPcmStart_DEFAULT NULL + +#define stackchan_websocket_v1_AudioPcmEnd_FIELDLIST(X, a) \ + +#define stackchan_websocket_v1_AudioPcmEnd_CALLBACK NULL +#define stackchan_websocket_v1_AudioPcmEnd_DEFAULT NULL + +#define stackchan_websocket_v1_AudioWavStart_FIELDLIST(X, a) \ +X(a, STATIC, SINGULAR, UINT32, sample_rate, 1) \ +X(a, STATIC, SINGULAR, UINT32, channels, 2) +#define stackchan_websocket_v1_AudioWavStart_CALLBACK NULL +#define stackchan_websocket_v1_AudioWavStart_DEFAULT NULL + +#define stackchan_websocket_v1_AudioWavEnd_FIELDLIST(X, a) \ + +#define stackchan_websocket_v1_AudioWavEnd_CALLBACK NULL +#define stackchan_websocket_v1_AudioWavEnd_DEFAULT NULL + +#define stackchan_websocket_v1_AudioChunk_FIELDLIST(X, a) \ +X(a, STATIC, SINGULAR, BYTES, pcm_bytes, 1) +#define stackchan_websocket_v1_AudioChunk_CALLBACK NULL +#define stackchan_websocket_v1_AudioChunk_DEFAULT NULL + +#define stackchan_websocket_v1_StateCommand_FIELDLIST(X, a) \ +X(a, STATIC, SINGULAR, UENUM, state, 1) +#define stackchan_websocket_v1_StateCommand_CALLBACK NULL +#define stackchan_websocket_v1_StateCommand_DEFAULT NULL + +#define stackchan_websocket_v1_WakeWordEvent_FIELDLIST(X, a) \ +X(a, STATIC, SINGULAR, BOOL, detected, 1) +#define stackchan_websocket_v1_WakeWordEvent_CALLBACK NULL +#define stackchan_websocket_v1_WakeWordEvent_DEFAULT NULL + +#define stackchan_websocket_v1_StateEvent_FIELDLIST(X, a) \ +X(a, STATIC, SINGULAR, UENUM, state, 1) +#define stackchan_websocket_v1_StateEvent_CALLBACK NULL +#define stackchan_websocket_v1_StateEvent_DEFAULT NULL + +#define stackchan_websocket_v1_SpeakDoneEvent_FIELDLIST(X, a) \ +X(a, STATIC, SINGULAR, BOOL, done, 1) +#define stackchan_websocket_v1_SpeakDoneEvent_CALLBACK NULL +#define stackchan_websocket_v1_SpeakDoneEvent_DEFAULT NULL + +#define stackchan_websocket_v1_ServoCommandSequence_FIELDLIST(X, a) \ +X(a, STATIC, REPEATED, MESSAGE, commands, 1) +#define stackchan_websocket_v1_ServoCommandSequence_CALLBACK NULL +#define stackchan_websocket_v1_ServoCommandSequence_DEFAULT NULL +#define stackchan_websocket_v1_ServoCommandSequence_commands_MSGTYPE stackchan_websocket_v1_ServoCommand + +#define stackchan_websocket_v1_ServoCommand_FIELDLIST(X, a) \ +X(a, STATIC, SINGULAR, UENUM, op, 1) \ +X(a, STATIC, SINGULAR, SINT32, angle, 2) \ +X(a, STATIC, SINGULAR, SINT32, duration_ms, 3) +#define stackchan_websocket_v1_ServoCommand_CALLBACK NULL +#define stackchan_websocket_v1_ServoCommand_DEFAULT NULL + +#define stackchan_websocket_v1_ServoDoneEvent_FIELDLIST(X, a) \ +X(a, STATIC, SINGULAR, BOOL, done, 1) +#define stackchan_websocket_v1_ServoDoneEvent_CALLBACK NULL +#define stackchan_websocket_v1_ServoDoneEvent_DEFAULT NULL + +extern const pb_msgdesc_t stackchan_websocket_v1_WebSocketMessage_msg; +extern const pb_msgdesc_t stackchan_websocket_v1_AudioPcmStart_msg; +extern const pb_msgdesc_t stackchan_websocket_v1_AudioPcmEnd_msg; +extern const pb_msgdesc_t stackchan_websocket_v1_AudioWavStart_msg; +extern const pb_msgdesc_t stackchan_websocket_v1_AudioWavEnd_msg; +extern const pb_msgdesc_t stackchan_websocket_v1_AudioChunk_msg; +extern const pb_msgdesc_t stackchan_websocket_v1_StateCommand_msg; +extern const pb_msgdesc_t stackchan_websocket_v1_WakeWordEvent_msg; +extern const pb_msgdesc_t stackchan_websocket_v1_StateEvent_msg; +extern const pb_msgdesc_t stackchan_websocket_v1_SpeakDoneEvent_msg; +extern const pb_msgdesc_t stackchan_websocket_v1_ServoCommandSequence_msg; +extern const pb_msgdesc_t stackchan_websocket_v1_ServoCommand_msg; +extern const pb_msgdesc_t stackchan_websocket_v1_ServoDoneEvent_msg; + +/* Defines for backwards compatibility with code written before nanopb-0.4.0 */ +#define stackchan_websocket_v1_WebSocketMessage_fields &stackchan_websocket_v1_WebSocketMessage_msg +#define stackchan_websocket_v1_AudioPcmStart_fields &stackchan_websocket_v1_AudioPcmStart_msg +#define stackchan_websocket_v1_AudioPcmEnd_fields &stackchan_websocket_v1_AudioPcmEnd_msg +#define stackchan_websocket_v1_AudioWavStart_fields &stackchan_websocket_v1_AudioWavStart_msg +#define stackchan_websocket_v1_AudioWavEnd_fields &stackchan_websocket_v1_AudioWavEnd_msg +#define stackchan_websocket_v1_AudioChunk_fields &stackchan_websocket_v1_AudioChunk_msg +#define stackchan_websocket_v1_StateCommand_fields &stackchan_websocket_v1_StateCommand_msg +#define stackchan_websocket_v1_WakeWordEvent_fields &stackchan_websocket_v1_WakeWordEvent_msg +#define stackchan_websocket_v1_StateEvent_fields &stackchan_websocket_v1_StateEvent_msg +#define stackchan_websocket_v1_SpeakDoneEvent_fields &stackchan_websocket_v1_SpeakDoneEvent_msg +#define stackchan_websocket_v1_ServoCommandSequence_fields &stackchan_websocket_v1_ServoCommandSequence_msg +#define stackchan_websocket_v1_ServoCommand_fields &stackchan_websocket_v1_ServoCommand_msg +#define stackchan_websocket_v1_ServoDoneEvent_fields &stackchan_websocket_v1_ServoDoneEvent_msg + +/* Maximum encoded size of messages (where known) */ +#define STACKCHAN_WEBSOCKET_V1_WEBSOCKET_MESSAGE_PB_H_MAX_SIZE stackchan_websocket_v1_WebSocketMessage_size +#define stackchan_websocket_v1_AudioChunk_size 4099 +#define stackchan_websocket_v1_AudioPcmEnd_size 0 +#define stackchan_websocket_v1_AudioPcmStart_size 0 +#define stackchan_websocket_v1_AudioWavEnd_size 0 +#define stackchan_websocket_v1_AudioWavStart_size 12 +#define stackchan_websocket_v1_ServoCommandSequence_size 4080 +#define stackchan_websocket_v1_ServoCommand_size 14 +#define stackchan_websocket_v1_ServoDoneEvent_size 2 +#define stackchan_websocket_v1_SpeakDoneEvent_size 2 +#define stackchan_websocket_v1_StateCommand_size 2 +#define stackchan_websocket_v1_StateEvent_size 2 +#define stackchan_websocket_v1_WakeWordEvent_size 2 +#define stackchan_websocket_v1_WebSocketMessage_size 4113 + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#endif diff --git a/firmware/src/listening.cpp b/firmware/src/listening.cpp index 5ca9193..2aa9d77 100644 --- a/firmware/src/listening.cpp +++ b/firmware/src/listening.cpp @@ -4,6 +4,11 @@ #include #include +namespace +{ +stackchan_websocket_v1_WebSocketMessage g_listening_tx_message = stackchan_websocket_v1_WebSocketMessage_init_zero; +} + Listening::Listening(WebSocketsClient &ws, StateMachine &sm, int sampleRate) : ws_(ws), state_(sm), sample_rate_(sampleRate), chunk_samples_(static_cast(sampleRate) / 8), @@ -189,19 +194,38 @@ bool Listening::sendPacket(MessageType type, const int16_t *samples, size_t samp return false; } - WsHeader header{}; - header.kind = static_cast(MessageKind::AudioPcm); - header.messageType = static_cast(type); - header.reserved = 0; - header.seq = seq_counter_++; - header.payloadBytes = static_cast(sampleCount * sizeof(int16_t)); + auto &message = g_listening_tx_message; + message = stackchan_websocket_v1_WebSocketMessage_init_zero; + message.kind = toProtoMessageKind(MessageKind::AudioPcm); + message.message_type = toProtoMessageType(type); + message.seq = seq_counter_++; + + switch (type) + { + case MessageType::START: + message.which_body = stackchan_websocket_v1_WebSocketMessage_audio_pcm_start_tag; + break; + case MessageType::DATA: + message.which_body = stackchan_websocket_v1_WebSocketMessage_audio_pcm_data_tag; + if (!setProtoAudioChunk( + message.body.audio_pcm_data, + reinterpret_cast(samples), + sampleCount * sizeof(int16_t))) + { + return false; + } + break; + case MessageType::END: + message.which_body = stackchan_websocket_v1_WebSocketMessage_audio_pcm_end_tag; + break; + default: + return false; + } std::vector packet; - packet.resize(sizeof(WsHeader) + header.payloadBytes); - memcpy(packet.data(), &header, sizeof(WsHeader)); - if (header.payloadBytes > 0 && samples != nullptr) + if (!encodeWebSocketMessage(message, packet)) { - memcpy(packet.data() + sizeof(WsHeader), samples, header.payloadBytes); + return false; } ws_.sendBIN(packet.data(), packet.size()); diff --git a/firmware/src/main.cpp b/firmware/src/main.cpp index bb0f904..ca932b3 100644 --- a/firmware/src/main.cpp +++ b/firmware/src/main.cpp @@ -6,6 +6,7 @@ #include #include #include +#include #include #include "config.h" #include "../include/protocols.hpp" @@ -37,9 +38,11 @@ static BodyServo servo; // Protocol types are defined in include/protocols.hpp namespace { -uint16_t g_uplink_seq = 0; +uint32_t g_uplink_seq = 0; uint32_t g_last_comm_ms = 0; constexpr uint32_t kCommTimeoutMs = 60000; +stackchan_websocket_v1_WebSocketMessage g_tx_message = stackchan_websocket_v1_WebSocketMessage_init_zero; +stackchan_websocket_v1_WebSocketMessage g_rx_message = stackchan_websocket_v1_WebSocketMessage_init_zero; void markCommunicationActive() { @@ -64,36 +67,41 @@ void handleCommunicationTimeout() } } -bool sendUplinkPacket(MessageKind kind, MessageType msgType, const uint8_t *payload, size_t payload_len) +bool sendUplinkMessage(const stackchan_websocket_v1_WebSocketMessage &message) { if ((WiFi.status() != WL_CONNECTED) || !wsClient.isConnected()) { return false; } - WsHeader header{}; - header.kind = static_cast(kind); - header.messageType = static_cast(msgType); - header.reserved = 0; - header.seq = g_uplink_seq++; - header.payloadBytes = static_cast(payload_len); - std::vector packet; - packet.resize(sizeof(WsHeader) + payload_len); - memcpy(packet.data(), &header, sizeof(WsHeader)); - if (payload_len > 0 && payload != nullptr) + if (!encodeWebSocketMessage(message, packet)) { - memcpy(packet.data() + sizeof(WsHeader), payload, payload_len); + return false; } + wsClient.sendBIN(packet.data(), packet.size()); markCommunicationActive(); return true; } +void appendInt16Le(std::vector &payload, int16_t value) +{ + size_t start = payload.size(); + payload.resize(start + sizeof(value)); + memcpy(payload.data() + start, &value, sizeof(value)); +} + void notifyWakeWordDetected() { - const uint8_t payload = 1; // detected - if (!sendUplinkPacket(MessageKind::WakeWordEvt, MessageType::DATA, &payload, sizeof(payload))) + auto &message = g_tx_message; + message = stackchan_websocket_v1_WebSocketMessage_init_zero; + message.kind = toProtoMessageKind(MessageKind::WakeWordEvt); + message.message_type = toProtoMessageType(MessageType::DATA); + message.seq = g_uplink_seq++; + message.which_body = stackchan_websocket_v1_WebSocketMessage_wake_word_evt_tag; + message.body.wake_word_evt.detected = true; + if (!sendUplinkMessage(message)) { log_w("Failed to send WakeWordEvt"); } @@ -101,17 +109,29 @@ void notifyWakeWordDetected() void notifyCurrentState(StateMachine::State state) { - const uint8_t payload = static_cast(state); - if (!sendUplinkPacket(MessageKind::StateEvt, MessageType::DATA, &payload, sizeof(payload))) + auto &message = g_tx_message; + message = stackchan_websocket_v1_WebSocketMessage_init_zero; + message.kind = toProtoMessageKind(MessageKind::StateEvt); + message.message_type = toProtoMessageType(MessageType::DATA); + message.seq = g_uplink_seq++; + message.which_body = stackchan_websocket_v1_WebSocketMessage_state_evt_tag; + message.body.state_evt.state = static_cast(static_cast(state)); + if (!sendUplinkMessage(message)) { - log_w("Failed to send StateEvt state=%u", static_cast(payload)); + log_w("Failed to send StateEvt state=%u", static_cast(state)); } } void notifySpeakDone() { - const uint8_t payload = 1; // done - if (!sendUplinkPacket(MessageKind::SpeakDoneEvt, MessageType::DATA, &payload, sizeof(payload))) + auto &message = g_tx_message; + message = stackchan_websocket_v1_WebSocketMessage_init_zero; + message.kind = toProtoMessageKind(MessageKind::SpeakDoneEvt); + message.message_type = toProtoMessageType(MessageType::DATA); + message.seq = g_uplink_seq++; + message.which_body = stackchan_websocket_v1_WebSocketMessage_speak_done_evt_tag; + message.body.speak_done_evt.done = true; + if (!sendUplinkMessage(message)) { log_w("Failed to send SpeakDoneEvt"); } @@ -119,22 +139,22 @@ void notifySpeakDone() void notifyServoDone() { - const uint8_t payload = 1; // done - if (!sendUplinkPacket(MessageKind::ServoDoneEvt, MessageType::DATA, &payload, sizeof(payload))) + auto &message = g_tx_message; + message = stackchan_websocket_v1_WebSocketMessage_init_zero; + message.kind = toProtoMessageKind(MessageKind::ServoDoneEvt); + message.message_type = toProtoMessageType(MessageType::DATA); + message.seq = g_uplink_seq++; + message.which_body = stackchan_websocket_v1_WebSocketMessage_servo_done_evt_tag; + message.body.servo_done_evt.done = true; + if (!sendUplinkMessage(message)) { log_w("Failed to send ServoDoneEvt"); } } -bool applyRemoteStateCommand(const uint8_t *body, size_t bodyLen) +bool applyRemoteStateCommand(const stackchan_websocket_v1_StateCommand &command) { - if (body == nullptr || bodyLen < 1) - { - log_w("StateCmd payload too short: %u", static_cast(bodyLen)); - return false; - } - - RemoteState target = static_cast(body[0]); + RemoteState target = fromProtoState(command.state); switch (target) { case RemoteState::Idle: @@ -150,14 +170,54 @@ bool applyRemoteStateCommand(const uint8_t *body, size_t bodyLen) stateMachine.setState(StateMachine::Speaking); return true; default: - log_w("Unknown remote state: %u", static_cast(body[0])); + log_w("Unknown remote state"); return false; } } -bool applyServoCommand(const uint8_t *body, size_t bodyLen) +bool applyServoCommand(const stackchan_websocket_v1_ServoCommandSequence &sequence) { - if (!servo.enqueueSequence(body, bodyLen)) + if (sequence.commands_count > kProtoServoCommandMaxCount) + { + log_w("ServoCmd count too large: %u", static_cast(sequence.commands_count)); + return false; + } + + std::vector payload; + payload.reserve(1 + sequence.commands_count * 4); + payload.push_back(static_cast(sequence.commands_count)); + + for (pb_size_t i = 0; i < sequence.commands_count; ++i) + { + const auto &command = sequence.commands[i]; + const ServoCommandOp op = fromProtoServoOperation(command.op); + + if (command.duration_ms < std::numeric_limits::min() || + command.duration_ms > std::numeric_limits::max()) + { + log_w("ServoCmd duration out of range at command=%u", static_cast(i)); + return false; + } + + payload.push_back(static_cast(op)); + if (op == ServoCommandOp::Sleep) + { + appendInt16Le(payload, static_cast(command.duration_ms)); + continue; + } + + if (command.angle < std::numeric_limits::min() || + command.angle > std::numeric_limits::max()) + { + log_w("ServoCmd angle out of range at command=%u", static_cast(i)); + return false; + } + + payload.push_back(static_cast(static_cast(command.angle))); + appendInt16Le(payload, static_cast(command.duration_ms)); + } + + if (!servo.enqueueSequence(payload.data(), payload.size())) { log_w("Failed to apply servo command"); return false; @@ -202,49 +262,75 @@ void handleWsEvent(WStype_t type, uint8_t *payload, size_t length) case WStype_BIN: { markCommunicationActive(); - if (length < sizeof(WsHeader)) + auto &rx = g_rx_message; + rx = stackchan_websocket_v1_WebSocketMessage_init_zero; + if (!decodeWebSocketMessage(payload, length, rx)) { - // M5.Display.println("WS bin too short"); - log_i("WS bin too short: %d", (int)length); + log_i("WS protobuf decode failed: %d", (int)length); break; } - WsHeader rx{}; - memcpy(&rx, payload, sizeof(WsHeader)); - size_t rx_payload_len = length - sizeof(WsHeader); - if (rx_payload_len != rx.payloadBytes) - { - // M5.Display.println("WS payload len mismatch"); - log_i("WS payload len mismatch: expected=%u got=%u", (unsigned)rx.payloadBytes, (unsigned)rx_payload_len); - break; - } + log_i("WS protobuf kind=%u len=%d", (unsigned)rx.kind, (int)length); - const uint8_t *body = payload + sizeof(WsHeader); - log_i("WS bin kind=%u len=%d", (unsigned)rx.kind, (int)length); - - switch (static_cast(rx.kind)) + switch (rx.kind) + { + case stackchan_websocket_v1_MessageKind_MESSAGE_KIND_AUDIO_WAV: { - case MessageKind::AudioWav: - speaking.handleWavMessage(rx, body, rx_payload_len); + WsHeader compat{}; + compat.kind = static_cast(MessageKind::AudioWav); + compat.messageType = static_cast(rx.message_type); + compat.seq = rx.seq; + + if (rx.message_type == stackchan_websocket_v1_MessageType_MESSAGE_TYPE_START && + rx.which_body == stackchan_websocket_v1_WebSocketMessage_audio_wav_start_tag) + { + uint8_t body[6]{}; + uint32_t sample_rate = rx.body.audio_wav_start.sample_rate; + uint16_t channels = static_cast(rx.body.audio_wav_start.channels); + memcpy(body, &sample_rate, sizeof(sample_rate)); + memcpy(body + sizeof(sample_rate), &channels, sizeof(channels)); + compat.payloadBytes = sizeof(body); + speaking.handleWavMessage(compat, body, sizeof(body)); + } + else if (rx.message_type == stackchan_websocket_v1_MessageType_MESSAGE_TYPE_DATA && + rx.which_body == stackchan_websocket_v1_WebSocketMessage_audio_wav_data_tag) + { + size_t body_len = getProtoAudioChunkSize(rx.body.audio_wav_data); + compat.payloadBytes = body_len; + speaking.handleWavMessage(compat, getProtoAudioChunkBytes(rx.body.audio_wav_data), body_len); + } + else if (rx.message_type == stackchan_websocket_v1_MessageType_MESSAGE_TYPE_END && + rx.which_body == stackchan_websocket_v1_WebSocketMessage_audio_wav_end_tag) + { + compat.payloadBytes = 0; + speaking.handleWavMessage(compat, nullptr, 0); + } + else + { + log_w("AudioWav protobuf body mismatch type=%u body=%u", (unsigned)rx.message_type, (unsigned)rx.which_body); + } break; - case MessageKind::StateCmd: - if (static_cast(rx.messageType) == MessageType::DATA) + } + case stackchan_websocket_v1_MessageKind_MESSAGE_KIND_STATE_CMD: + if (rx.message_type == stackchan_websocket_v1_MessageType_MESSAGE_TYPE_DATA && + rx.which_body == stackchan_websocket_v1_WebSocketMessage_state_cmd_tag) { - applyRemoteStateCommand(body, rx_payload_len); + applyRemoteStateCommand(rx.body.state_cmd); } else { - log_w("StateCmd unsupported msgType=%u", static_cast(rx.messageType)); + log_w("StateCmd protobuf body mismatch type=%u body=%u", (unsigned)rx.message_type, (unsigned)rx.which_body); } break; - case MessageKind::ServoCmd: - if (static_cast(rx.messageType) == MessageType::DATA) + case stackchan_websocket_v1_MessageKind_MESSAGE_KIND_SERVO_CMD: + if (rx.message_type == stackchan_websocket_v1_MessageType_MESSAGE_TYPE_DATA && + rx.which_body == stackchan_websocket_v1_WebSocketMessage_servo_cmd_tag) { - applyServoCommand(body, rx_payload_len); + applyServoCommand(rx.body.servo_cmd); } else { - log_w("ServoCmd unsupported msgType=%u", static_cast(rx.messageType)); + log_w("ServoCmd protobuf body mismatch type=%u body=%u", (unsigned)rx.message_type, (unsigned)rx.which_body); } break; default: diff --git a/firmware/src/protocols.cpp b/firmware/src/protocols.cpp new file mode 100644 index 0000000..8b61312 --- /dev/null +++ b/firmware/src/protocols.cpp @@ -0,0 +1,163 @@ +#include "../include/protocols.hpp" + +#include + +#include +#include + +stackchan_websocket_v1_MessageKind toProtoMessageKind(MessageKind kind) +{ + switch (kind) + { + case MessageKind::AudioPcm: + return stackchan_websocket_v1_MessageKind_MESSAGE_KIND_AUDIO_PCM; + case MessageKind::AudioWav: + return stackchan_websocket_v1_MessageKind_MESSAGE_KIND_AUDIO_WAV; + case MessageKind::StateCmd: + return stackchan_websocket_v1_MessageKind_MESSAGE_KIND_STATE_CMD; + case MessageKind::WakeWordEvt: + return stackchan_websocket_v1_MessageKind_MESSAGE_KIND_WAKE_WORD_EVT; + case MessageKind::StateEvt: + return stackchan_websocket_v1_MessageKind_MESSAGE_KIND_STATE_EVT; + case MessageKind::SpeakDoneEvt: + return stackchan_websocket_v1_MessageKind_MESSAGE_KIND_SPEAK_DONE_EVT; + case MessageKind::ServoCmd: + return stackchan_websocket_v1_MessageKind_MESSAGE_KIND_SERVO_CMD; + case MessageKind::ServoDoneEvt: + return stackchan_websocket_v1_MessageKind_MESSAGE_KIND_SERVO_DONE_EVT; + default: + return stackchan_websocket_v1_MessageKind_MESSAGE_KIND_UNSPECIFIED; + } +} + +stackchan_websocket_v1_MessageType toProtoMessageType(MessageType type) +{ + switch (type) + { + case MessageType::START: + return stackchan_websocket_v1_MessageType_MESSAGE_TYPE_START; + case MessageType::DATA: + return stackchan_websocket_v1_MessageType_MESSAGE_TYPE_DATA; + case MessageType::END: + return stackchan_websocket_v1_MessageType_MESSAGE_TYPE_END; + default: + return stackchan_websocket_v1_MessageType_MESSAGE_TYPE_UNSPECIFIED; + } +} + +stackchan_websocket_v1_StackchanState toProtoState(RemoteState state) +{ + switch (state) + { + case RemoteState::Idle: + return stackchan_websocket_v1_StackchanState_STACKCHAN_STATE_IDLE; + case RemoteState::Listening: + return stackchan_websocket_v1_StackchanState_STACKCHAN_STATE_LISTENING; + case RemoteState::Thinking: + return stackchan_websocket_v1_StackchanState_STACKCHAN_STATE_THINKING; + case RemoteState::Speaking: + return stackchan_websocket_v1_StackchanState_STACKCHAN_STATE_SPEAKING; + default: + return stackchan_websocket_v1_StackchanState_STACKCHAN_STATE_IDLE; + } +} + +stackchan_websocket_v1_ServoOperation toProtoServoOperation(ServoCommandOp op) +{ + switch (op) + { + case ServoCommandOp::Sleep: + return stackchan_websocket_v1_ServoOperation_SERVO_OPERATION_SLEEP; + case ServoCommandOp::MoveX: + return stackchan_websocket_v1_ServoOperation_SERVO_OPERATION_MOVE_X; + case ServoCommandOp::MoveY: + return stackchan_websocket_v1_ServoOperation_SERVO_OPERATION_MOVE_Y; + default: + return stackchan_websocket_v1_ServoOperation_SERVO_OPERATION_SLEEP; + } +} + +RemoteState fromProtoState(stackchan_websocket_v1_StackchanState state) +{ + switch (state) + { + case stackchan_websocket_v1_StackchanState_STACKCHAN_STATE_IDLE: + return RemoteState::Idle; + case stackchan_websocket_v1_StackchanState_STACKCHAN_STATE_LISTENING: + return RemoteState::Listening; + case stackchan_websocket_v1_StackchanState_STACKCHAN_STATE_THINKING: + return RemoteState::Thinking; + case stackchan_websocket_v1_StackchanState_STACKCHAN_STATE_SPEAKING: + return RemoteState::Speaking; + default: + return RemoteState::Idle; + } +} + +ServoCommandOp fromProtoServoOperation(stackchan_websocket_v1_ServoOperation op) +{ + switch (op) + { + case stackchan_websocket_v1_ServoOperation_SERVO_OPERATION_SLEEP: + return ServoCommandOp::Sleep; + case stackchan_websocket_v1_ServoOperation_SERVO_OPERATION_MOVE_X: + return ServoCommandOp::MoveX; + case stackchan_websocket_v1_ServoOperation_SERVO_OPERATION_MOVE_Y: + return ServoCommandOp::MoveY; + default: + return ServoCommandOp::Sleep; + } +} + +bool setProtoAudioChunk( + stackchan_websocket_v1_AudioChunk &chunk, + const uint8_t *data, + size_t data_len) +{ + if (data_len > kProtoAudioChunkMaxBytes) + { + return false; + } + + chunk.pcm_bytes.size = static_cast(data_len); + if (data_len > 0 && data != nullptr) + { + memcpy(chunk.pcm_bytes.bytes, data, data_len); + } + return true; +} + +const uint8_t *getProtoAudioChunkBytes(const stackchan_websocket_v1_AudioChunk &chunk) +{ + return chunk.pcm_bytes.bytes; +} + +size_t getProtoAudioChunkSize(const stackchan_websocket_v1_AudioChunk &chunk) +{ + return chunk.pcm_bytes.size; +} + +bool encodeWebSocketMessage( + const stackchan_websocket_v1_WebSocketMessage &message, + std::vector &encoded) +{ + encoded.assign(kMaxEncodedWebSocketMessageBytes, 0); + pb_ostream_t stream = pb_ostream_from_buffer(encoded.data(), encoded.size()); + if (!pb_encode(&stream, stackchan_websocket_v1_WebSocketMessage_fields, &message)) + { + encoded.clear(); + return false; + } + encoded.resize(stream.bytes_written); + return true; +} + +bool decodeWebSocketMessage( + const uint8_t *data, + size_t data_len, + stackchan_websocket_v1_WebSocketMessage &message) +{ + message = stackchan_websocket_v1_WebSocketMessage_init_zero; + pb_istream_t stream = pb_istream_from_buffer(data, data_len); + return pb_decode(&stream, stackchan_websocket_v1_WebSocketMessage_fields, &message); +} diff --git a/platformio.ini b/platformio.ini index 8543ec1..394794a 100644 --- a/platformio.ini +++ b/platformio.ini @@ -20,6 +20,7 @@ lib_deps = Links2004/WebSockets@^2.7.2 ESP32Async/AsyncTCP@^3.4.10 madhephaestus/ESP32Servo@^3.1.3 + nanopb/Nanopb@^0.4.91 https://github.com/74th/ESP-SR-For-M5Unified.git@1.0.0 https://github.com/mongonta0716/SCServo.git diff --git a/protobuf/websocket-message.options b/protobuf/websocket-message.options new file mode 100644 index 0000000..233e71d --- /dev/null +++ b/protobuf/websocket-message.options @@ -0,0 +1,2 @@ +stackchan.websocket.v1.AudioChunk.pcm_bytes max_size:4096 +stackchan.websocket.v1.ServoCommandSequence.commands max_count:255 diff --git a/protobuf/websocket-message.proto b/protobuf/websocket-message.proto new file mode 100644 index 0000000..d1de065 --- /dev/null +++ b/protobuf/websocket-message.proto @@ -0,0 +1,110 @@ +syntax = "proto3"; + +package stackchan.websocket.v1; + +// One WebSocket binary frame carries exactly one WebSocketMessage. +// +// Instead of concatenating two protobuf messages such as Header + Body, +// this envelope keeps the routing metadata and the typed body together in a +// single protobuf message. The `kind` / `message_type` fields preserve the +// current protocol semantics, while `body` provides strongly typed payloads +// for Python and firmware implementations. +message WebSocketMessage { + MessageKind kind = 1; + MessageType message_type = 2; + uint32 seq = 3; // current implementation uses uint16, but proto uses uint32 + + oneof body { + AudioPcmStart audio_pcm_start = 10; + AudioChunk audio_pcm_data = 11; + AudioPcmEnd audio_pcm_end = 12; + + AudioWavStart audio_wav_start = 20; + AudioChunk audio_wav_data = 21; + AudioWavEnd audio_wav_end = 22; + + StateCommand state_cmd = 30; + WakeWordEvent wake_word_evt = 31; + StateEvent state_evt = 32; + SpeakDoneEvent speak_done_evt = 33; + ServoCommandSequence servo_cmd = 34; + ServoDoneEvent servo_done_evt = 35; + } +} + +enum MessageKind { + MESSAGE_KIND_UNSPECIFIED = 0; + MESSAGE_KIND_AUDIO_PCM = 1; + MESSAGE_KIND_AUDIO_WAV = 2; + MESSAGE_KIND_STATE_CMD = 3; + MESSAGE_KIND_WAKE_WORD_EVT = 4; + MESSAGE_KIND_STATE_EVT = 5; + MESSAGE_KIND_SPEAK_DONE_EVT = 6; + MESSAGE_KIND_SERVO_CMD = 7; + MESSAGE_KIND_SERVO_DONE_EVT = 8; +} + +enum MessageType { + MESSAGE_TYPE_UNSPECIFIED = 0; + MESSAGE_TYPE_START = 1; + MESSAGE_TYPE_DATA = 2; + MESSAGE_TYPE_END = 3; +} + +enum StackchanState { + STACKCHAN_STATE_IDLE = 0; + STACKCHAN_STATE_LISTENING = 1; + STACKCHAN_STATE_THINKING = 2; + STACKCHAN_STATE_SPEAKING = 3; +} + +enum ServoOperation { + SERVO_OPERATION_SLEEP = 0; + SERVO_OPERATION_MOVE_X = 1; + SERVO_OPERATION_MOVE_Y = 2; +} + +message AudioPcmStart {} + +message AudioPcmEnd {} + +message AudioWavStart { + uint32 sample_rate = 1; + uint32 channels = 2; +} + +message AudioWavEnd {} + +message AudioChunk { + bytes pcm_bytes = 1; +} + +message StateCommand { + StackchanState state = 1; +} + +message WakeWordEvent { + bool detected = 1; +} + +message StateEvent { + StackchanState state = 1; +} + +message SpeakDoneEvent { + bool done = 1; +} + +message ServoCommandSequence { + repeated ServoCommand commands = 1; +} + +message ServoCommand { + ServoOperation op = 1; + sint32 angle = 2; // used by MOVE_X / MOVE_Y + sint32 duration_ms = 3; // used by all operations +} + +message ServoDoneEvent { + bool done = 1; +} diff --git a/pyproject.toml b/pyproject.toml index 7f1a290..90638f2 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -16,10 +16,12 @@ dependencies = [ "voicevox-client>=1.1.0", "python-dotenv>=1.2.1", "pydantic-settings>=2.13.1", + "protobuf>=6.33.3", ] [dependency-groups] dev = [ + "grpcio-tools>=1.76.0", "ruff>=0.15.2", "ty>=0.0.17", ] diff --git a/stackchan_server/generated_protobuf/__init__.py b/stackchan_server/generated_protobuf/__init__.py new file mode 100644 index 0000000..79429d2 --- /dev/null +++ b/stackchan_server/generated_protobuf/__init__.py @@ -0,0 +1 @@ +"""Generated protobuf modules for StackChan WebSocket messages.""" \ No newline at end of file diff --git a/stackchan_server/generated_protobuf/websocket_message_pb2.py b/stackchan_server/generated_protobuf/websocket_message_pb2.py new file mode 100644 index 0000000..985a939 --- /dev/null +++ b/stackchan_server/generated_protobuf/websocket_message_pb2.py @@ -0,0 +1,68 @@ +# -*- coding: utf-8 -*- +# Generated by the protocol buffer compiler. DO NOT EDIT! +# NO CHECKED-IN PROTOBUF GENCODE +# source: websocket-message.proto +# Protobuf Python Version: 6.31.1 +"""Generated protocol buffer code.""" +from google.protobuf import descriptor as _descriptor +from google.protobuf import descriptor_pool as _descriptor_pool +from google.protobuf import runtime_version as _runtime_version +from google.protobuf import symbol_database as _symbol_database +from google.protobuf.internal import builder as _builder +_runtime_version.ValidateProtobufRuntimeVersion( + _runtime_version.Domain.PUBLIC, + 6, + 31, + 1, + '', + 'websocket-message.proto' +) +# @@protoc_insertion_point(imports) + +_sym_db = _symbol_database.Default() + + + + +DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x17websocket-message.proto\x12\x16stackchan.websocket.v1\"\x8c\x07\n\x10WebSocketMessage\x12\x31\n\x04kind\x18\x01 \x01(\x0e\x32#.stackchan.websocket.v1.MessageKind\x12\x39\n\x0cmessage_type\x18\x02 \x01(\x0e\x32#.stackchan.websocket.v1.MessageType\x12\x0b\n\x03seq\x18\x03 \x01(\r\x12@\n\x0f\x61udio_pcm_start\x18\n \x01(\x0b\x32%.stackchan.websocket.v1.AudioPcmStartH\x00\x12<\n\x0e\x61udio_pcm_data\x18\x0b \x01(\x0b\x32\".stackchan.websocket.v1.AudioChunkH\x00\x12<\n\raudio_pcm_end\x18\x0c \x01(\x0b\x32#.stackchan.websocket.v1.AudioPcmEndH\x00\x12@\n\x0f\x61udio_wav_start\x18\x14 \x01(\x0b\x32%.stackchan.websocket.v1.AudioWavStartH\x00\x12<\n\x0e\x61udio_wav_data\x18\x15 \x01(\x0b\x32\".stackchan.websocket.v1.AudioChunkH\x00\x12<\n\raudio_wav_end\x18\x16 \x01(\x0b\x32#.stackchan.websocket.v1.AudioWavEndH\x00\x12\x39\n\tstate_cmd\x18\x1e \x01(\x0b\x32$.stackchan.websocket.v1.StateCommandH\x00\x12>\n\rwake_word_evt\x18\x1f \x01(\x0b\x32%.stackchan.websocket.v1.WakeWordEventH\x00\x12\x37\n\tstate_evt\x18 \x01(\x0b\x32\".stackchan.websocket.v1.StateEventH\x00\x12@\n\x0espeak_done_evt\x18! \x01(\x0b\x32&.stackchan.websocket.v1.SpeakDoneEventH\x00\x12\x41\n\tservo_cmd\x18\" \x01(\x0b\x32,.stackchan.websocket.v1.ServoCommandSequenceH\x00\x12@\n\x0eservo_done_evt\x18# \x01(\x0b\x32&.stackchan.websocket.v1.ServoDoneEventH\x00\x42\x06\n\x04\x62ody\"\x0f\n\rAudioPcmStart\"\r\n\x0b\x41udioPcmEnd\"6\n\rAudioWavStart\x12\x13\n\x0bsample_rate\x18\x01 \x01(\r\x12\x10\n\x08\x63hannels\x18\x02 \x01(\r\"\r\n\x0b\x41udioWavEnd\"\x1f\n\nAudioChunk\x12\x11\n\tpcm_bytes\x18\x01 \x01(\x0c\"E\n\x0cStateCommand\x12\x35\n\x05state\x18\x01 \x01(\x0e\x32&.stackchan.websocket.v1.StackchanState\"!\n\rWakeWordEvent\x12\x10\n\x08\x64\x65tected\x18\x01 \x01(\x08\"C\n\nStateEvent\x12\x35\n\x05state\x18\x01 \x01(\x0e\x32&.stackchan.websocket.v1.StackchanState\"\x1e\n\x0eSpeakDoneEvent\x12\x0c\n\x04\x64one\x18\x01 \x01(\x08\"N\n\x14ServoCommandSequence\x12\x36\n\x08\x63ommands\x18\x01 \x03(\x0b\x32$.stackchan.websocket.v1.ServoCommand\"f\n\x0cServoCommand\x12\x32\n\x02op\x18\x01 \x01(\x0e\x32&.stackchan.websocket.v1.ServoOperation\x12\r\n\x05\x61ngle\x18\x02 \x01(\x11\x12\x13\n\x0b\x64uration_ms\x18\x03 \x01(\x11\"\x1e\n\x0eServoDoneEvent\x12\x0c\n\x04\x64one\x18\x01 \x01(\x08*\x99\x02\n\x0bMessageKind\x12\x1c\n\x18MESSAGE_KIND_UNSPECIFIED\x10\x00\x12\x1a\n\x16MESSAGE_KIND_AUDIO_PCM\x10\x01\x12\x1a\n\x16MESSAGE_KIND_AUDIO_WAV\x10\x02\x12\x1a\n\x16MESSAGE_KIND_STATE_CMD\x10\x03\x12\x1e\n\x1aMESSAGE_KIND_WAKE_WORD_EVT\x10\x04\x12\x1a\n\x16MESSAGE_KIND_STATE_EVT\x10\x05\x12\x1f\n\x1bMESSAGE_KIND_SPEAK_DONE_EVT\x10\x06\x12\x1a\n\x16MESSAGE_KIND_SERVO_CMD\x10\x07\x12\x1f\n\x1bMESSAGE_KIND_SERVO_DONE_EVT\x10\x08*p\n\x0bMessageType\x12\x1c\n\x18MESSAGE_TYPE_UNSPECIFIED\x10\x00\x12\x16\n\x12MESSAGE_TYPE_START\x10\x01\x12\x15\n\x11MESSAGE_TYPE_DATA\x10\x02\x12\x14\n\x10MESSAGE_TYPE_END\x10\x03*\x85\x01\n\x0eStackchanState\x12\x18\n\x14STACKCHAN_STATE_IDLE\x10\x00\x12\x1d\n\x19STACKCHAN_STATE_LISTENING\x10\x01\x12\x1c\n\x18STACKCHAN_STATE_THINKING\x10\x02\x12\x1c\n\x18STACKCHAN_STATE_SPEAKING\x10\x03*c\n\x0eServoOperation\x12\x19\n\x15SERVO_OPERATION_SLEEP\x10\x00\x12\x1a\n\x16SERVO_OPERATION_MOVE_X\x10\x01\x12\x1a\n\x16SERVO_OPERATION_MOVE_Y\x10\x02\x62\x06proto3') + +_globals = globals() +_builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) +_builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'websocket_message_pb2', _globals) +if not _descriptor._USE_C_DESCRIPTORS: + DESCRIPTOR._loaded_options = None + _globals['_MESSAGEKIND']._serialized_start=1522 + _globals['_MESSAGEKIND']._serialized_end=1803 + _globals['_MESSAGETYPE']._serialized_start=1805 + _globals['_MESSAGETYPE']._serialized_end=1917 + _globals['_STACKCHANSTATE']._serialized_start=1920 + _globals['_STACKCHANSTATE']._serialized_end=2053 + _globals['_SERVOOPERATION']._serialized_start=2055 + _globals['_SERVOOPERATION']._serialized_end=2154 + _globals['_WEBSOCKETMESSAGE']._serialized_start=52 + _globals['_WEBSOCKETMESSAGE']._serialized_end=960 + _globals['_AUDIOPCMSTART']._serialized_start=962 + _globals['_AUDIOPCMSTART']._serialized_end=977 + _globals['_AUDIOPCMEND']._serialized_start=979 + _globals['_AUDIOPCMEND']._serialized_end=992 + _globals['_AUDIOWAVSTART']._serialized_start=994 + _globals['_AUDIOWAVSTART']._serialized_end=1048 + _globals['_AUDIOWAVEND']._serialized_start=1050 + _globals['_AUDIOWAVEND']._serialized_end=1063 + _globals['_AUDIOCHUNK']._serialized_start=1065 + _globals['_AUDIOCHUNK']._serialized_end=1096 + _globals['_STATECOMMAND']._serialized_start=1098 + _globals['_STATECOMMAND']._serialized_end=1167 + _globals['_WAKEWORDEVENT']._serialized_start=1169 + _globals['_WAKEWORDEVENT']._serialized_end=1202 + _globals['_STATEEVENT']._serialized_start=1204 + _globals['_STATEEVENT']._serialized_end=1271 + _globals['_SPEAKDONEEVENT']._serialized_start=1273 + _globals['_SPEAKDONEEVENT']._serialized_end=1303 + _globals['_SERVOCOMMANDSEQUENCE']._serialized_start=1305 + _globals['_SERVOCOMMANDSEQUENCE']._serialized_end=1383 + _globals['_SERVOCOMMAND']._serialized_start=1385 + _globals['_SERVOCOMMAND']._serialized_end=1487 + _globals['_SERVODONEEVENT']._serialized_start=1489 + _globals['_SERVODONEEVENT']._serialized_end=1519 +# @@protoc_insertion_point(module_scope) diff --git a/stackchan_server/protobuf_ws.py b/stackchan_server/protobuf_ws.py new file mode 100644 index 0000000..94b808c --- /dev/null +++ b/stackchan_server/protobuf_ws.py @@ -0,0 +1,170 @@ +from __future__ import annotations + +from collections.abc import Sequence +from enum import StrEnum +from typing import Any, Literal, cast + +from .generated_protobuf import websocket_message_pb2 as _ws_pb2 + +ws_pb2: Any = _ws_pb2 + +ServoMoveType: type[StrEnum] | None = None +ServoWaitType: type[StrEnum] | None = None +ServoMoveCommand = tuple[Literal["move_x", "move_y"] | StrEnum, int, int] +ServoSleepCommand = tuple[Literal["sleep"] | StrEnum, int] +ServoCommand = ServoMoveCommand | ServoSleepCommand + + +def _ensure_range(value: int, *, minimum: int, maximum: int, label: str) -> int: + if not minimum <= value <= maximum: + raise ValueError(f"{label} must be between {minimum} and {maximum}: {value}") + return value + + +def parse_websocket_message(data: bytes) -> Any: + message = ws_pb2.WebSocketMessage() + message.ParseFromString(data) + return message + + +def _new_message(kind: int, message_type: int, seq: int) -> Any: + return ws_pb2.WebSocketMessage(kind=kind, message_type=message_type, seq=seq) + + +def encode_audio_pcm_start_message(seq: int) -> bytes: + message = _new_message( + ws_pb2.MESSAGE_KIND_AUDIO_PCM, + ws_pb2.MESSAGE_TYPE_START, + seq, + ) + message.audio_pcm_start.SetInParent() + return message.SerializeToString() + + +def encode_audio_pcm_data_message(seq: int, pcm_bytes: bytes) -> bytes: + message = _new_message( + ws_pb2.MESSAGE_KIND_AUDIO_PCM, + ws_pb2.MESSAGE_TYPE_DATA, + seq, + ) + message.audio_pcm_data.pcm_bytes = pcm_bytes + return message.SerializeToString() + + +def encode_audio_pcm_end_message(seq: int) -> bytes: + message = _new_message( + ws_pb2.MESSAGE_KIND_AUDIO_PCM, + ws_pb2.MESSAGE_TYPE_END, + seq, + ) + message.audio_pcm_end.SetInParent() + return message.SerializeToString() + + +def encode_audio_wav_start_message(seq: int, *, sample_rate: int, channels: int) -> bytes: + message = _new_message( + ws_pb2.MESSAGE_KIND_AUDIO_WAV, + ws_pb2.MESSAGE_TYPE_START, + seq, + ) + message.audio_wav_start.sample_rate = int(sample_rate) + message.audio_wav_start.channels = int(channels) + return message.SerializeToString() + + +def encode_audio_wav_data_message(seq: int, pcm_bytes: bytes) -> bytes: + message = _new_message( + ws_pb2.MESSAGE_KIND_AUDIO_WAV, + ws_pb2.MESSAGE_TYPE_DATA, + seq, + ) + message.audio_wav_data.pcm_bytes = pcm_bytes + return message.SerializeToString() + + +def encode_audio_wav_end_message(seq: int) -> bytes: + message = _new_message( + ws_pb2.MESSAGE_KIND_AUDIO_WAV, + ws_pb2.MESSAGE_TYPE_END, + seq, + ) + message.audio_wav_end.SetInParent() + return message.SerializeToString() + + +def encode_state_command_message(seq: int, state_id: int) -> bytes: + message = _new_message( + ws_pb2.MESSAGE_KIND_STATE_CMD, + ws_pb2.MESSAGE_TYPE_DATA, + seq, + ) + message.state_cmd.state = int(state_id) + return message.SerializeToString() + + +def encode_servo_command_message(seq: int, commands: Sequence[ServoCommand]) -> bytes: + normalized = list(commands) + _ensure_range(len(normalized), minimum=0, maximum=255, label="servo command count") + + message = _new_message( + ws_pb2.MESSAGE_KIND_SERVO_CMD, + ws_pb2.MESSAGE_TYPE_DATA, + seq, + ) + + for index, command in enumerate(normalized): + encoded = message.servo_cmd.commands.add() + if len(command) == 2: + name, raw_duration_ms = cast(ServoSleepCommand, command) + if str(name) != "sleep": + raise ValueError(f"unsupported servo command at index {index}: {name}") + encoded.op = ws_pb2.SERVO_OPERATION_SLEEP + encoded.duration_ms = _ensure_range( + int(raw_duration_ms), + minimum=-32768, + maximum=32767, + label="sleep duration", + ) + continue + + if len(command) == 3: + name, raw_angle, raw_duration_ms = cast(ServoMoveCommand, command) + if str(name) not in ("move_x", "move_y"): + raise ValueError(f"unsupported servo command at index {index}: {name}") + encoded.op = ( + ws_pb2.SERVO_OPERATION_MOVE_X + if str(name) == "move_x" + else ws_pb2.SERVO_OPERATION_MOVE_Y + ) + encoded.angle = _ensure_range( + int(raw_angle), + minimum=-128, + maximum=127, + label="servo angle", + ) + encoded.duration_ms = _ensure_range( + int(raw_duration_ms), + minimum=-32768, + maximum=32767, + label="servo duration", + ) + continue + + raise ValueError(f"unsupported servo command at index {index}: {command}") + + return message.SerializeToString() + + +__all__ = [ + "ServoCommand", + "encode_audio_pcm_data_message", + "encode_audio_pcm_end_message", + "encode_audio_pcm_start_message", + "encode_audio_wav_data_message", + "encode_audio_wav_end_message", + "encode_audio_wav_start_message", + "encode_servo_command_message", + "encode_state_command_message", + "parse_websocket_message", + "ws_pb2", +] diff --git a/stackchan_server/speak.py b/stackchan_server/speak.py index b748a37..743ae36 100644 --- a/stackchan_server/speak.py +++ b/stackchan_server/speak.py @@ -2,7 +2,6 @@ import asyncio import io -import struct import wave from datetime import UTC, datetime from logging import getLogger @@ -12,6 +11,11 @@ from fastapi import WebSocket, WebSocketDisconnect from .listen import TimeoutError +from .protobuf_ws import ( + encode_audio_wav_data_message, + encode_audio_wav_end_message, + encode_audio_wav_start_message, +) from .types import AudioFormat, SpeechSynthesizer, StreamingSpeechSynthesizer logger = getLogger(__name__) @@ -22,11 +26,11 @@ def __init__( self, *, websocket: WebSocket, - ws_header_fmt: str, - wav_kind: int, - start_msg_type: int, - data_msg_type: int, - end_msg_type: int, + ws_header_fmt: str | None = None, + wav_kind: int | None = None, + start_msg_type: int | None = None, + data_msg_type: int | None = None, + end_msg_type: int | None = None, down_wav_chunk: int, down_segment_millis: int, down_segment_stagger_millis: int, @@ -36,11 +40,6 @@ def __init__( debug_recording: bool, ) -> None: self.ws = websocket - self.ws_header_fmt = ws_header_fmt - self.wav_kind = wav_kind - self.start_msg_type = start_msg_type - self.data_msg_type = data_msg_type - self.end_msg_type = end_msg_type self.down_wav_chunk = down_wav_chunk self.down_segment_millis = down_segment_millis self.down_segment_stagger_millis = down_segment_stagger_millis @@ -309,40 +308,21 @@ async def _send_segment( next_seq: Callable[[], int], ) -> None: logger.info("Sending segment bytes=%d", len(segment_pcm)) - start_payload = struct.pack(" int: - if not minimum <= value <= maximum: - raise ValueError(f"{label} must be between {minimum} and {maximum}: {value}") - return value - - -def _encode_servo_commands(commands: Sequence[ServoCommand]) -> bytes: - normalized = list(commands) - _ensure_range(len(normalized), minimum=0, maximum=255, label="servo command count") - - payload = bytearray() - payload.append(len(normalized)) - - for index, command in enumerate(normalized): - if len(command) == 2: - sleep_command = cast(ServoSleepCommand, command) - name, raw_duration_ms = sleep_command - name = str(name) - if name != "sleep": - raise ValueError( - f"unsupported servo command at index {index}: {name}" - ) - duration_ms = _ensure_range( - int(raw_duration_ms), - minimum=-32768, - maximum=32767, - label="sleep duration", - ) - payload.append(_ServoOp.SLEEP) - payload.extend(struct.pack(" None: await self.send_state_command(FirmwareState.IDLE) async def move_servo(self, commands: Sequence[ServoCommand]) -> None: - payload = _encode_servo_commands(commands) previous_counter = self._servo_sent_counter target_counter = previous_counter + 1 self._servo_sent_counter = target_counter self._pending_servo_wait_targets.append(target_counter) try: - await self._send_packet(_WsKind.SERVO_CMD, _WsMsgType.DATA, payload) + await self.ws.send_bytes( + encode_servo_command_message(self._next_down_seq(), commands) + ) except Exception: if ( self._pending_servo_wait_targets @@ -285,60 +205,65 @@ async def start_talking(self, text: str) -> None: async def _receive_loop(self) -> None: try: while True: - message = await self.ws.receive_bytes() - if len(message) < _WS_HEADER_SIZE: - await self.ws.close(code=1003, reason="header too short") + raw_message = await self.ws.receive_bytes() + try: + message = parse_websocket_message(raw_message) + except DecodeError: + await self.ws.close(code=1003, reason="invalid protobuf message") break - kind, msg_type, _reserved, _seq, payload_bytes = struct.unpack( - _WS_HEADER_FMT, message[:_WS_HEADER_SIZE] - ) - - payload = message[_WS_HEADER_SIZE:] - if payload_bytes != len(payload): - await self.ws.close(code=1003, reason="payload length mismatch") - break + if message.kind == ws_pb2.MESSAGE_KIND_AUDIO_PCM: + body_name = message.WhichOneof("body") - if kind == _WsKind.PCM: - if msg_type == _WsMsgType.START: + if ( + message.message_type == ws_pb2.MESSAGE_TYPE_START + and body_name == "audio_pcm_start" + ): if not await self._listener.handle_start(self.ws): break continue - if msg_type == _WsMsgType.DATA: + if ( + message.message_type == ws_pb2.MESSAGE_TYPE_DATA + and body_name == "audio_pcm_data" + ): + payload = bytes(message.audio_pcm_data.pcm_bytes) if not await self._listener.handle_data( - self.ws, payload_bytes, payload + self.ws, len(payload), payload ): break continue - if msg_type == _WsMsgType.END: + if ( + message.message_type == ws_pb2.MESSAGE_TYPE_END + and body_name == "audio_pcm_end" + ): await self._listener.handle_end( self.ws, - payload_bytes=payload_bytes, - payload=payload, + payload_bytes=0, + payload=b"", send_state_command=self.send_state_command, thinking_state=FirmwareState.THINKING, ) continue - await self.ws.close(code=1003, reason="unknown PCM msg type") + await self.ws.close(code=1003, reason="unknown PCM protobuf body") break - if kind == _WsKind.WAKEWORD_EVT: - self._handle_wakeword_event(msg_type, payload) + if message.kind == ws_pb2.MESSAGE_KIND_WAKE_WORD_EVT: + self._handle_wakeword_event(message) continue - if kind == _WsKind.STATE_EVT: - self._handle_state_event(msg_type, payload) + if message.kind == ws_pb2.MESSAGE_KIND_STATE_EVT: + self._handle_state_event(message) continue - if kind == _WsKind.SPEAK_DONE_EVT: - self._handle_speak_done_event(msg_type, payload) + if message.kind == ws_pb2.MESSAGE_KIND_SPEAK_DONE_EVT: + self._handle_speak_done_event(message) continue - if kind == _WsKind.SERVO_DONE_EVT: - self._handle_servo_done_event(msg_type, payload) + if message.kind == ws_pb2.MESSAGE_KIND_SERVO_DONE_EVT: + self._handle_servo_done_event(message) continue await self.ws.close(code=1003, reason="unsupported kind") @@ -348,20 +273,22 @@ async def _receive_loop(self) -> None: finally: self._closed = True - def _handle_wakeword_event(self, msg_type: int, payload: bytes) -> None: - if msg_type != _WsMsgType.DATA: + def _handle_wakeword_event(self, message: Any) -> None: + if message.message_type != ws_pb2.MESSAGE_TYPE_DATA: return - if len(payload) < 1: + if message.WhichOneof("body") != "wake_word_evt": + return + if not message.wake_word_evt.detected: return logger.info("Received wakeword event") self._wakeword_event.set() - def _handle_state_event(self, msg_type: int, payload: bytes) -> None: - if msg_type != _WsMsgType.DATA: + def _handle_state_event(self, message: Any) -> None: + if message.message_type != ws_pb2.MESSAGE_TYPE_DATA: return - if len(payload) < 1: + if message.WhichOneof("body") != "state_evt": return - raw_state = int(payload[0]) + raw_state = int(message.state_evt.state) try: state = FirmwareState(raw_state) self._current_firmware_state = state @@ -369,38 +296,29 @@ def _handle_state_event(self, msg_type: int, payload: bytes) -> None: except ValueError: logger.info("Received firmware state=%d", raw_state) - def _handle_speak_done_event(self, msg_type: int, payload: bytes) -> None: - if msg_type != _WsMsgType.DATA: + def _handle_speak_done_event(self, message: Any) -> None: + if message.message_type != ws_pb2.MESSAGE_TYPE_DATA: + return + if message.WhichOneof("body") != "speak_done_evt": return - if len(payload) < 1: + if not message.speak_done_evt.done: return self._speaker.handle_speak_done_event() - def _handle_servo_done_event(self, msg_type: int, payload: bytes) -> None: - if msg_type != _WsMsgType.DATA: + def _handle_servo_done_event(self, message: Any) -> None: + if message.message_type != ws_pb2.MESSAGE_TYPE_DATA: + return + if message.WhichOneof("body") != "servo_done_evt": return - if len(payload) < 1: + if not message.servo_done_evt.done: return self._servo_done_counter += 1 logger.info("Received servo done event") async def _send_state_command(self, state_id: int | FirmwareState) -> None: - payload = struct.pack(" None: - hdr = struct.pack( - _WS_HEADER_FMT, - int(kind), - int(msg_type), - 0, - self._down_seq, - len(payload), + await self.ws.send_bytes( + encode_state_command_message(self._next_down_seq(), int(state_id)) ) - await self.ws.send_bytes(hdr + payload) - self._down_seq += 1 async def _wait_for_counter( self, diff --git a/uv.lock b/uv.lock index 2540b7b..1a6db32 100644 --- a/uv.lock +++ b/uv.lock @@ -549,6 +549,39 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/8c/cc/27ba60ad5a5f2067963e6a858743500df408eb5855e98be778eaef8c9b02/grpcio_status-1.76.0-py3-none-any.whl", hash = "sha256:380568794055a8efbbd8871162df92012e0228a5f6dffaf57f2a00c534103b18", size = 14425 }, ] +[[package]] +name = "grpcio-tools" +version = "1.76.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "grpcio" }, + { name = "protobuf" }, + { name = "setuptools" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/a0/77/17d60d636ccd86a0db0eccc24d02967bbc3eea86b9db7324b04507ebaa40/grpcio_tools-1.76.0.tar.gz", hash = "sha256:ce80169b5e6adf3e8302f3ebb6cb0c3a9f08089133abca4b76ad67f751f5ad88", size = 5390807 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/13/01/b16fe73f129df49811d886dc99d3813a33cf4d1c6e101252b81c895e929f/grpcio_tools-1.76.0-cp313-cp313-linux_armv7l.whl", hash = "sha256:ff48969f81858397ef33a36b326f2dbe2053a48b254593785707845db73c8f44", size = 2546312 }, + { url = "https://files.pythonhosted.org/packages/25/17/2594c5feb76bb0b25bfbf91ec1075b276e1b2325e4bc7ea649a7b5dbf353/grpcio_tools-1.76.0-cp313-cp313-macosx_11_0_universal2.whl", hash = "sha256:aa2f030fd0ef17926026ee8e2b700e388d3439155d145c568fa6b32693277613", size = 5839627 }, + { url = "https://files.pythonhosted.org/packages/c7/c6/097b1aa26fbf72fb3cdb30138a2788529e4f10d8759de730a83f5c06726e/grpcio_tools-1.76.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:bacbf3c54f88c38de8e28f8d9b97c90b76b105fb9ddef05d2c50df01b32b92af", size = 2592817 }, + { url = "https://files.pythonhosted.org/packages/03/78/d1d985b48592a674509a85438c1a3d4c36304ddfc99d1b05d27233b51062/grpcio_tools-1.76.0-cp313-cp313-manylinux2014_i686.manylinux_2_17_i686.whl", hash = "sha256:0d4e4afe9a0e3c24fad2f1af45f98cf8700b2bfc4d790795756ba035d2ea7bdc", size = 2905186 }, + { url = "https://files.pythonhosted.org/packages/b9/0e/770afbb47f0b5f594b93a7b46a95b892abda5eebe60efb511e96cee52170/grpcio_tools-1.76.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:fbbd4e1fc5af98001ceef5e780e8c10921d94941c3809238081e73818ef707f1", size = 2656188 }, + { url = "https://files.pythonhosted.org/packages/3d/2b/017c2fcf4c5d3cf00cf7d5ce21eb88521de0d89bdcf26538ad2862ec6d07/grpcio_tools-1.76.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:b05efe5a59883ab8292d596657273a60e0c3e4f5a9723c32feb9fc3a06f2f3ef", size = 3109141 }, + { url = "https://files.pythonhosted.org/packages/e9/5f/2495f88e3d50c6f2c2da2752bad4fa3a30c52ece6c9d8b0c636cd8b1430b/grpcio_tools-1.76.0-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:be483b90e62b7892eb71fa1fc49750bee5b2ee35b5ec99dd2b32bed4bedb5d71", size = 3657892 }, + { url = "https://files.pythonhosted.org/packages/5e/1d/c4f39d31b19d9baf35d900bf3f969ce1c842f63a8560c8003ed2e5474760/grpcio_tools-1.76.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:630cd7fd3e8a63e20703a7ad816979073c2253e591b5422583c27cae2570de73", size = 3324778 }, + { url = "https://files.pythonhosted.org/packages/b4/b6/35ee3a6e4af85a93da28428f81f4b29bcb36f6986b486ad71910fcc02e25/grpcio_tools-1.76.0-cp313-cp313-win32.whl", hash = "sha256:eb2567280f9f6da5444043f0e84d8408c7a10df9ba3201026b30e40ef3814736", size = 993084 }, + { url = "https://files.pythonhosted.org/packages/f3/7a/5bd72344d86ee860e5920c9a7553cfe3bc7b1fce79f18c00ac2497f5799f/grpcio_tools-1.76.0-cp313-cp313-win_amd64.whl", hash = "sha256:0071b1c0bd0f5f9d292dca4efab32c92725d418e57f9c60acdc33c0172af8b53", size = 1158151 }, + { url = "https://files.pythonhosted.org/packages/f0/c0/aa20eebe8f3553b7851643e9c88d237c3a6ca30ade646897e25dbb27be99/grpcio_tools-1.76.0-cp314-cp314-linux_armv7l.whl", hash = "sha256:c53c5719ef2a435997755abde3826ba4087174bd432aa721d8fac781fcea79e4", size = 2546297 }, + { url = "https://files.pythonhosted.org/packages/d9/98/6af702804934443c1d0d4d27d21b990d92d22ddd1b6bec6b056558cbbffa/grpcio_tools-1.76.0-cp314-cp314-macosx_11_0_universal2.whl", hash = "sha256:e3db1300d7282264639eeee7243f5de7e6a7c0283f8bf05d66c0315b7b0f0b36", size = 5839804 }, + { url = "https://files.pythonhosted.org/packages/ea/8d/7725fa7b134ef8405ffe0a37c96eeb626e5af15d70e1bdac4f8f1abf842e/grpcio_tools-1.76.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:0b018a4b7455a7e8c16d0fdb3655a6ba6c9536da6de6c5d4f11b6bb73378165b", size = 2593922 }, + { url = "https://files.pythonhosted.org/packages/de/ff/5b6b5012c79fa72f9107dc13f7226d9ce7e059ea639fd8c779e0dd284386/grpcio_tools-1.76.0-cp314-cp314-manylinux2014_i686.manylinux_2_17_i686.whl", hash = "sha256:ec6e4de3866e47cfde56607b1fae83ecc5aa546e06dec53de11f88063f4b5275", size = 2905327 }, + { url = "https://files.pythonhosted.org/packages/24/01/2691d369ea462cd6b6c92544122885ca01f7fa5ac75dee023e975e675858/grpcio_tools-1.76.0-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:b8da4d828883913f1852bdd67383713ae5c11842f6c70f93f31893eab530aead", size = 2656214 }, + { url = "https://files.pythonhosted.org/packages/6a/e7/3f8856e6ec3dd492336a91572993344966f237b0e3819fbe96437b19d313/grpcio_tools-1.76.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:5c120c2cf4443121800e7f9bcfe2e94519fa25f3bb0b9882359dd3b252c78a7b", size = 3109889 }, + { url = "https://files.pythonhosted.org/packages/f3/e4/ce5248072e47db276dc7e069e93978dcde490c959788ce7cce8081d0bfdc/grpcio_tools-1.76.0-cp314-cp314-musllinux_1_2_i686.whl", hash = "sha256:8b7df5591d699cd9076065f1f15049e9c3597e0771bea51c8c97790caf5e4197", size = 3657939 }, + { url = "https://files.pythonhosted.org/packages/f6/df/81ff88af93c52135e425cd5ec9fe8b186169c7d5f9e0409bdf2bbedc3919/grpcio_tools-1.76.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:a25048c5f984d33e3f5b6ad7618e98736542461213ade1bd6f2fcfe8ce804e3d", size = 3324752 }, + { url = "https://files.pythonhosted.org/packages/35/3d/f6b83044afbf6522254a3b509515a00fed16a819c87731a478dbdd1d35c1/grpcio_tools-1.76.0-cp314-cp314-win32.whl", hash = "sha256:4b77ce6b6c17869858cfe14681ad09ed3a8a80e960e96035de1fd87f78158740", size = 1015578 }, + { url = "https://files.pythonhosted.org/packages/95/4d/31236cddb7ffb09ba4a49f4f56d2608fec3bbb21c7a0a975d93bca7cd22e/grpcio_tools-1.76.0-cp314-cp314-win_amd64.whl", hash = "sha256:2ccd2c8d041351cc29d0fc4a84529b11ee35494a700b535c1f820b642f2a72fc", size = 1190242 }, +] + [[package]] name = "h11" version = "0.16.0" @@ -1187,6 +1220,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/6d/78/097c0798b1dab9f8affe73da9642bb4500e098cb27fd8dc9724816ac747b/ruff-0.15.2-py3-none-win_arm64.whl", hash = "sha256:cabddc5822acdc8f7b5527b36ceac55cc51eec7b1946e60181de8fe83ca8876e", size = 10941649 }, ] +[[package]] +name = "setuptools" +version = "82.0.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/4f/db/cfac1baf10650ab4d1c111714410d2fbb77ac5a616db26775db562c8fab2/setuptools-82.0.1.tar.gz", hash = "sha256:7d872682c5d01cfde07da7bccc7b65469d3dca203318515ada1de5eda35efbf9", size = 1152316 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/9d/76/f789f7a86709c6b087c5a2f52f911838cad707cc613162401badc665acfe/setuptools-82.0.1-py3-none-any.whl", hash = "sha256:a59e362652f08dcd477c78bb6e7bd9d80a7995bc73ce773050228a348ce2e5bb", size = 1006223 }, +] + [[package]] name = "sniffio" version = "1.3.1" @@ -1412,6 +1454,7 @@ dependencies = [ { name = "fastapi" }, { name = "google-cloud-speech" }, { name = "google-genai" }, + { name = "protobuf" }, { name = "pydantic-settings" }, { name = "python-dotenv" }, { name = "uvicorn", extra = ["standard"] }, @@ -1420,6 +1463,7 @@ dependencies = [ [package.dev-dependencies] dev = [ + { name = "grpcio-tools" }, { name = "ruff" }, { name = "ty" }, ] @@ -1432,6 +1476,7 @@ requires-dist = [ { name = "fastapi", specifier = ">=0.128.0" }, { name = "google-cloud-speech", specifier = ">=2.35.0" }, { name = "google-genai", specifier = ">=1.59.0" }, + { name = "protobuf", specifier = ">=6.33.3" }, { name = "pydantic-settings", specifier = ">=2.13.1" }, { name = "python-dotenv", specifier = ">=1.2.1" }, { name = "uvicorn", extras = ["standard"], specifier = ">=0.40.0" }, @@ -1440,6 +1485,7 @@ requires-dist = [ [package.metadata.requires-dev] dev = [ + { name = "grpcio-tools", specifier = ">=1.76.0" }, { name = "ruff", specifier = ">=0.15.2" }, { name = "ty", specifier = ">=0.0.17" }, ] From 595e765cef8815a31edf7d97c3413320a3b3a6d6 Mon Sep 17 00:00:00 2001 From: Atsushi Morimoto <74th.tech@gmail.com> Date: Sun, 19 Apr 2026 15:14:59 +0900 Subject: [PATCH 2/5] =?UTF-8?q?feat:=20Protobuf=E3=83=A1=E3=83=83=E3=82=BB?= =?UTF-8?q?=E3=83=BC=E3=82=B8=E3=82=BF=E3=82=A4=E3=83=97=E3=81=AB=E5=9F=BA?= =?UTF-8?q?=E3=81=A5=E3=81=8FWebSocket=E3=83=A1=E3=83=83=E3=82=BB=E3=83=BC?= =?UTF-8?q?=E3=82=B8=E5=87=A6=E7=90=86=E3=81=AE=E6=9B=B4=E6=96=B0=E3=81=A8?= =?UTF-8?q?=E3=83=AA=E3=83=95=E3=82=A1=E3=82=AF=E3=82=BF=E3=83=AA=E3=83=B3?= =?UTF-8?q?=E3=82=B0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- firmware/include/listening.hpp | 2 +- firmware/include/protocols.hpp | 60 ---------------- firmware/include/servo.hpp | 2 +- firmware/include/speaking.hpp | 8 ++- firmware/src/listening.cpp | 20 +++--- firmware/src/main.cpp | 53 ++++++-------- firmware/src/protocols.cpp | 104 --------------------------- firmware/src/servo.cpp | 20 +++--- firmware/src/speaking.cpp | 125 +++++++++++++++------------------ 9 files changed, 103 insertions(+), 291 deletions(-) diff --git a/firmware/include/listening.hpp b/firmware/include/listening.hpp index cf00113..0e18ba8 100644 --- a/firmware/include/listening.hpp +++ b/firmware/include/listening.hpp @@ -36,7 +36,7 @@ class Listening private: void updateLevelStats(const int16_t *samples, size_t sampleCount); - bool sendPacket(MessageType type, const int16_t *samples, size_t sampleCount); + bool sendPacket(stackchan_websocket_v1_MessageType type, const int16_t *samples, size_t sampleCount); void ringPush(const int16_t *src, size_t samples); size_t ringPop(int16_t *dst, size_t samples); diff --git a/firmware/include/protocols.hpp b/firmware/include/protocols.hpp index feea050..89cb7d6 100644 --- a/firmware/include/protocols.hpp +++ b/firmware/include/protocols.hpp @@ -7,70 +7,10 @@ #include "../lib/generated_protobuf/websocket-message.pb.h" -// Internal compatibility metadata for message routing after protobuf decode. -// This is no longer sent on the wire directly. - -enum class MessageKind : uint8_t -{ - AudioPcm = 1, // uplink PCM16LE stream (client -> server) - AudioWav = 2, // downlink WAV bytes (server -> client) - StateCmd = 3, // state transition command (server -> client) - WakeWordEvt = 4, // wake word event (client -> server) - StateEvt = 5, // current state event (client -> server) - SpeakDoneEvt = 6, // speaking completed event (client -> server) - ServoCmd = 7, // servo command sequence (server -> client) - ServoDoneEvt = 8, // servo sequence completed event (client -> server) -}; - -enum class MessageType : uint8_t -{ - START = 1, - DATA = 2, - END = 3, -}; - -struct __attribute__((packed)) WsHeader -{ - uint8_t kind; // MessageKind - uint8_t messageType; // MessageType - uint8_t reserved; // 0 (flags/reserved) - uint32_t seq; // sequence number - uint32_t payloadBytes; // bytes following the header -}; - -// payload for kind=StateCmd, messageType=DATA -// 1 byte: target state id (matches StateMachine::State) -enum class RemoteState : uint8_t -{ - Idle = 0, - Listening = 1, - Thinking = 2, - Speaking = 3, -}; - -// payload for kind=ServoCmd, messageType=DATA -// -// command op=Sleep: -// command op=MoveX/Y: -enum class ServoCommandOp : uint8_t -{ - Sleep = 0, - MoveX = 1, - MoveY = 2, -}; - constexpr size_t kProtoAudioChunkMaxBytes = 4096; constexpr size_t kProtoServoCommandMaxCount = 255; constexpr size_t kMaxEncodedWebSocketMessageBytes = stackchan_websocket_v1_WebSocketMessage_size; -stackchan_websocket_v1_MessageKind toProtoMessageKind(MessageKind kind); -stackchan_websocket_v1_MessageType toProtoMessageType(MessageType type); -stackchan_websocket_v1_StackchanState toProtoState(RemoteState state); -stackchan_websocket_v1_ServoOperation toProtoServoOperation(ServoCommandOp op); - -RemoteState fromProtoState(stackchan_websocket_v1_StackchanState state); -ServoCommandOp fromProtoServoOperation(stackchan_websocket_v1_ServoOperation op); - bool setProtoAudioChunk( stackchan_websocket_v1_AudioChunk &chunk, const uint8_t *data, diff --git a/firmware/include/servo.hpp b/firmware/include/servo.hpp index f9b4e3b..de7aafd 100644 --- a/firmware/include/servo.hpp +++ b/firmware/include/servo.hpp @@ -51,7 +51,7 @@ class BodyServo struct Step { - ServoCommandOp op; + stackchan_websocket_v1_ServoOperation op; int8_t angle = 0; int16_t duration_ms = 0; }; diff --git a/firmware/include/speaking.hpp b/firmware/include/speaking.hpp index fbf90c1..e3630b3 100644 --- a/firmware/include/speaking.hpp +++ b/firmware/include/speaking.hpp @@ -19,8 +19,10 @@ class Speaking void begin(); void end(); - // Process one WS audio message of kind AudioWav - void handleWavMessage(const WsHeader &hdr, const uint8_t *body, size_t bodyLen); + // Process AudioWav protobuf messages. + void handleWavStart(uint32_t seq, uint32_t sampleRate, uint16_t channels); + void handleWavData(uint32_t seq, const uint8_t *body, size_t bodyLen); + void handleWavEnd(uint32_t seq); // Called from main loop to progress playback state void loop(); @@ -37,7 +39,7 @@ class Speaking bool playing_ = false; bool mic_was_enabled_ = false; bool streaming_ = false; - uint16_t next_seq_ = 0; + uint32_t next_seq_ = 0; uint32_t sample_rate_ = 24000; uint16_t channels_ = 1; std::function on_speak_finished_; diff --git a/firmware/src/listening.cpp b/firmware/src/listening.cpp index 2aa9d77..edb2e35 100644 --- a/firmware/src/listening.cpp +++ b/firmware/src/listening.cpp @@ -58,7 +58,7 @@ bool Listening::startStreaming() last_level_ = 0; silence_since_ms_ = 0; streaming_ = true; - return sendPacket(MessageType::START, nullptr, 0); + return sendPacket(stackchan_websocket_v1_MessageType_MESSAGE_TYPE_START, nullptr, 0); } bool Listening::stopStreaming() @@ -79,7 +79,7 @@ bool Listening::stopStreaming() { size_t chunk = std::min({chunk_samples_, to_send, tail_capacity}); size_t sent = ringPop(tail_buf.data(), chunk); - if (!sendPacket(MessageType::DATA, tail_buf.data(), sent)) + if (!sendPacket(stackchan_websocket_v1_MessageType_MESSAGE_TYPE_DATA, tail_buf.data(), sent)) { ok = false; break; @@ -89,7 +89,7 @@ bool Listening::stopStreaming() } streaming_ = false; - ok = sendPacket(MessageType::END, nullptr, 0) && ok; + ok = sendPacket(stackchan_websocket_v1_MessageType_MESSAGE_TYPE_END, nullptr, 0) && ok; return ok; } @@ -119,7 +119,7 @@ void Listening::loop() } size_t got = ringPop(send_buf.data(), chunk_samples_); - if (!sendPacket(MessageType::DATA, send_buf.data(), got)) + if (!sendPacket(stackchan_websocket_v1_MessageType_MESSAGE_TYPE_DATA, send_buf.data(), got)) { streaming_ = false; log_i("WS send failed (data)"); @@ -187,7 +187,7 @@ bool Listening::shouldStopForSilence() const return elapsed >= kSilenceDurationMs; } -bool Listening::sendPacket(MessageType type, const int16_t *samples, size_t sampleCount) +bool Listening::sendPacket(stackchan_websocket_v1_MessageType type, const int16_t *samples, size_t sampleCount) { if ((WiFi.status() != WL_CONNECTED) || !ws_.isConnected()) { @@ -196,16 +196,16 @@ bool Listening::sendPacket(MessageType type, const int16_t *samples, size_t samp auto &message = g_listening_tx_message; message = stackchan_websocket_v1_WebSocketMessage_init_zero; - message.kind = toProtoMessageKind(MessageKind::AudioPcm); - message.message_type = toProtoMessageType(type); + message.kind = stackchan_websocket_v1_MessageKind_MESSAGE_KIND_AUDIO_PCM; + message.message_type = type; message.seq = seq_counter_++; switch (type) { - case MessageType::START: + case stackchan_websocket_v1_MessageType_MESSAGE_TYPE_START: message.which_body = stackchan_websocket_v1_WebSocketMessage_audio_pcm_start_tag; break; - case MessageType::DATA: + case stackchan_websocket_v1_MessageType_MESSAGE_TYPE_DATA: message.which_body = stackchan_websocket_v1_WebSocketMessage_audio_pcm_data_tag; if (!setProtoAudioChunk( message.body.audio_pcm_data, @@ -215,7 +215,7 @@ bool Listening::sendPacket(MessageType type, const int16_t *samples, size_t samp return false; } break; - case MessageType::END: + case stackchan_websocket_v1_MessageType_MESSAGE_TYPE_END: message.which_body = stackchan_websocket_v1_WebSocketMessage_audio_pcm_end_tag; break; default: diff --git a/firmware/src/main.cpp b/firmware/src/main.cpp index ca932b3..8d41cca 100644 --- a/firmware/src/main.cpp +++ b/firmware/src/main.cpp @@ -96,8 +96,8 @@ void notifyWakeWordDetected() { auto &message = g_tx_message; message = stackchan_websocket_v1_WebSocketMessage_init_zero; - message.kind = toProtoMessageKind(MessageKind::WakeWordEvt); - message.message_type = toProtoMessageType(MessageType::DATA); + message.kind = stackchan_websocket_v1_MessageKind_MESSAGE_KIND_WAKE_WORD_EVT; + message.message_type = stackchan_websocket_v1_MessageType_MESSAGE_TYPE_DATA; message.seq = g_uplink_seq++; message.which_body = stackchan_websocket_v1_WebSocketMessage_wake_word_evt_tag; message.body.wake_word_evt.detected = true; @@ -111,8 +111,8 @@ void notifyCurrentState(StateMachine::State state) { auto &message = g_tx_message; message = stackchan_websocket_v1_WebSocketMessage_init_zero; - message.kind = toProtoMessageKind(MessageKind::StateEvt); - message.message_type = toProtoMessageType(MessageType::DATA); + message.kind = stackchan_websocket_v1_MessageKind_MESSAGE_KIND_STATE_EVT; + message.message_type = stackchan_websocket_v1_MessageType_MESSAGE_TYPE_DATA; message.seq = g_uplink_seq++; message.which_body = stackchan_websocket_v1_WebSocketMessage_state_evt_tag; message.body.state_evt.state = static_cast(static_cast(state)); @@ -126,8 +126,8 @@ void notifySpeakDone() { auto &message = g_tx_message; message = stackchan_websocket_v1_WebSocketMessage_init_zero; - message.kind = toProtoMessageKind(MessageKind::SpeakDoneEvt); - message.message_type = toProtoMessageType(MessageType::DATA); + message.kind = stackchan_websocket_v1_MessageKind_MESSAGE_KIND_SPEAK_DONE_EVT; + message.message_type = stackchan_websocket_v1_MessageType_MESSAGE_TYPE_DATA; message.seq = g_uplink_seq++; message.which_body = stackchan_websocket_v1_WebSocketMessage_speak_done_evt_tag; message.body.speak_done_evt.done = true; @@ -141,8 +141,8 @@ void notifyServoDone() { auto &message = g_tx_message; message = stackchan_websocket_v1_WebSocketMessage_init_zero; - message.kind = toProtoMessageKind(MessageKind::ServoDoneEvt); - message.message_type = toProtoMessageType(MessageType::DATA); + message.kind = stackchan_websocket_v1_MessageKind_MESSAGE_KIND_SERVO_DONE_EVT; + message.message_type = stackchan_websocket_v1_MessageType_MESSAGE_TYPE_DATA; message.seq = g_uplink_seq++; message.which_body = stackchan_websocket_v1_WebSocketMessage_servo_done_evt_tag; message.body.servo_done_evt.done = true; @@ -154,19 +154,18 @@ void notifyServoDone() bool applyRemoteStateCommand(const stackchan_websocket_v1_StateCommand &command) { - RemoteState target = fromProtoState(command.state); - switch (target) + switch (command.state) { - case RemoteState::Idle: + case stackchan_websocket_v1_StackchanState_STACKCHAN_STATE_IDLE: stateMachine.setState(StateMachine::Idle); return true; - case RemoteState::Listening: + case stackchan_websocket_v1_StackchanState_STACKCHAN_STATE_LISTENING: stateMachine.setState(StateMachine::Listening); return true; - case RemoteState::Thinking: + case stackchan_websocket_v1_StackchanState_STACKCHAN_STATE_THINKING: stateMachine.setState(StateMachine::Thinking); return true; - case RemoteState::Speaking: + case stackchan_websocket_v1_StackchanState_STACKCHAN_STATE_SPEAKING: stateMachine.setState(StateMachine::Speaking); return true; default: @@ -190,7 +189,7 @@ bool applyServoCommand(const stackchan_websocket_v1_ServoCommandSequence &sequen for (pb_size_t i = 0; i < sequence.commands_count; ++i) { const auto &command = sequence.commands[i]; - const ServoCommandOp op = fromProtoServoOperation(command.op); + const auto op = command.op; if (command.duration_ms < std::numeric_limits::min() || command.duration_ms > std::numeric_limits::max()) @@ -200,7 +199,7 @@ bool applyServoCommand(const stackchan_websocket_v1_ServoCommandSequence &sequen } payload.push_back(static_cast(op)); - if (op == ServoCommandOp::Sleep) + if (op == stackchan_websocket_v1_ServoOperation_SERVO_OPERATION_SLEEP) { appendInt16Le(payload, static_cast(command.duration_ms)); continue; @@ -276,34 +275,24 @@ void handleWsEvent(WStype_t type, uint8_t *payload, size_t length) { case stackchan_websocket_v1_MessageKind_MESSAGE_KIND_AUDIO_WAV: { - WsHeader compat{}; - compat.kind = static_cast(MessageKind::AudioWav); - compat.messageType = static_cast(rx.message_type); - compat.seq = rx.seq; - if (rx.message_type == stackchan_websocket_v1_MessageType_MESSAGE_TYPE_START && rx.which_body == stackchan_websocket_v1_WebSocketMessage_audio_wav_start_tag) { - uint8_t body[6]{}; - uint32_t sample_rate = rx.body.audio_wav_start.sample_rate; - uint16_t channels = static_cast(rx.body.audio_wav_start.channels); - memcpy(body, &sample_rate, sizeof(sample_rate)); - memcpy(body + sizeof(sample_rate), &channels, sizeof(channels)); - compat.payloadBytes = sizeof(body); - speaking.handleWavMessage(compat, body, sizeof(body)); + speaking.handleWavStart( + rx.seq, + rx.body.audio_wav_start.sample_rate, + static_cast(rx.body.audio_wav_start.channels)); } else if (rx.message_type == stackchan_websocket_v1_MessageType_MESSAGE_TYPE_DATA && rx.which_body == stackchan_websocket_v1_WebSocketMessage_audio_wav_data_tag) { size_t body_len = getProtoAudioChunkSize(rx.body.audio_wav_data); - compat.payloadBytes = body_len; - speaking.handleWavMessage(compat, getProtoAudioChunkBytes(rx.body.audio_wav_data), body_len); + speaking.handleWavData(rx.seq, getProtoAudioChunkBytes(rx.body.audio_wav_data), body_len); } else if (rx.message_type == stackchan_websocket_v1_MessageType_MESSAGE_TYPE_END && rx.which_body == stackchan_websocket_v1_WebSocketMessage_audio_wav_end_tag) { - compat.payloadBytes = 0; - speaking.handleWavMessage(compat, nullptr, 0); + speaking.handleWavEnd(rx.seq); } else { diff --git a/firmware/src/protocols.cpp b/firmware/src/protocols.cpp index 8b61312..16af592 100644 --- a/firmware/src/protocols.cpp +++ b/firmware/src/protocols.cpp @@ -5,110 +5,6 @@ #include #include -stackchan_websocket_v1_MessageKind toProtoMessageKind(MessageKind kind) -{ - switch (kind) - { - case MessageKind::AudioPcm: - return stackchan_websocket_v1_MessageKind_MESSAGE_KIND_AUDIO_PCM; - case MessageKind::AudioWav: - return stackchan_websocket_v1_MessageKind_MESSAGE_KIND_AUDIO_WAV; - case MessageKind::StateCmd: - return stackchan_websocket_v1_MessageKind_MESSAGE_KIND_STATE_CMD; - case MessageKind::WakeWordEvt: - return stackchan_websocket_v1_MessageKind_MESSAGE_KIND_WAKE_WORD_EVT; - case MessageKind::StateEvt: - return stackchan_websocket_v1_MessageKind_MESSAGE_KIND_STATE_EVT; - case MessageKind::SpeakDoneEvt: - return stackchan_websocket_v1_MessageKind_MESSAGE_KIND_SPEAK_DONE_EVT; - case MessageKind::ServoCmd: - return stackchan_websocket_v1_MessageKind_MESSAGE_KIND_SERVO_CMD; - case MessageKind::ServoDoneEvt: - return stackchan_websocket_v1_MessageKind_MESSAGE_KIND_SERVO_DONE_EVT; - default: - return stackchan_websocket_v1_MessageKind_MESSAGE_KIND_UNSPECIFIED; - } -} - -stackchan_websocket_v1_MessageType toProtoMessageType(MessageType type) -{ - switch (type) - { - case MessageType::START: - return stackchan_websocket_v1_MessageType_MESSAGE_TYPE_START; - case MessageType::DATA: - return stackchan_websocket_v1_MessageType_MESSAGE_TYPE_DATA; - case MessageType::END: - return stackchan_websocket_v1_MessageType_MESSAGE_TYPE_END; - default: - return stackchan_websocket_v1_MessageType_MESSAGE_TYPE_UNSPECIFIED; - } -} - -stackchan_websocket_v1_StackchanState toProtoState(RemoteState state) -{ - switch (state) - { - case RemoteState::Idle: - return stackchan_websocket_v1_StackchanState_STACKCHAN_STATE_IDLE; - case RemoteState::Listening: - return stackchan_websocket_v1_StackchanState_STACKCHAN_STATE_LISTENING; - case RemoteState::Thinking: - return stackchan_websocket_v1_StackchanState_STACKCHAN_STATE_THINKING; - case RemoteState::Speaking: - return stackchan_websocket_v1_StackchanState_STACKCHAN_STATE_SPEAKING; - default: - return stackchan_websocket_v1_StackchanState_STACKCHAN_STATE_IDLE; - } -} - -stackchan_websocket_v1_ServoOperation toProtoServoOperation(ServoCommandOp op) -{ - switch (op) - { - case ServoCommandOp::Sleep: - return stackchan_websocket_v1_ServoOperation_SERVO_OPERATION_SLEEP; - case ServoCommandOp::MoveX: - return stackchan_websocket_v1_ServoOperation_SERVO_OPERATION_MOVE_X; - case ServoCommandOp::MoveY: - return stackchan_websocket_v1_ServoOperation_SERVO_OPERATION_MOVE_Y; - default: - return stackchan_websocket_v1_ServoOperation_SERVO_OPERATION_SLEEP; - } -} - -RemoteState fromProtoState(stackchan_websocket_v1_StackchanState state) -{ - switch (state) - { - case stackchan_websocket_v1_StackchanState_STACKCHAN_STATE_IDLE: - return RemoteState::Idle; - case stackchan_websocket_v1_StackchanState_STACKCHAN_STATE_LISTENING: - return RemoteState::Listening; - case stackchan_websocket_v1_StackchanState_STACKCHAN_STATE_THINKING: - return RemoteState::Thinking; - case stackchan_websocket_v1_StackchanState_STACKCHAN_STATE_SPEAKING: - return RemoteState::Speaking; - default: - return RemoteState::Idle; - } -} - -ServoCommandOp fromProtoServoOperation(stackchan_websocket_v1_ServoOperation op) -{ - switch (op) - { - case stackchan_websocket_v1_ServoOperation_SERVO_OPERATION_SLEEP: - return ServoCommandOp::Sleep; - case stackchan_websocket_v1_ServoOperation_SERVO_OPERATION_MOVE_X: - return ServoCommandOp::MoveX; - case stackchan_websocket_v1_ServoOperation_SERVO_OPERATION_MOVE_Y: - return ServoCommandOp::MoveY; - default: - return ServoCommandOp::Sleep; - } -} - bool setProtoAudioChunk( stackchan_websocket_v1_AudioChunk &chunk, const uint8_t *data, diff --git a/firmware/src/servo.cpp b/firmware/src/servo.cpp index bd1f826..2777d02 100644 --- a/firmware/src/servo.cpp +++ b/firmware/src/servo.cpp @@ -134,13 +134,13 @@ void BodyServo::loop() bool finished = false; switch (step.op) { - case ServoCommandOp::Sleep: + case stackchan_websocket_v1_ServoOperation_SERVO_OPERATION_SLEEP: finished = static_cast(now - sleep_deadline_ms_) >= 0; break; - case ServoCommandOp::MoveX: + case stackchan_websocket_v1_ServoOperation_SERVO_OPERATION_MOVE_X: finished = !axis_x_.moving; break; - case ServoCommandOp::MoveY: + case stackchan_websocket_v1_ServoOperation_SERVO_OPERATION_MOVE_Y: finished = !axis_y_.moving; break; default: @@ -187,13 +187,13 @@ bool BodyServo::enqueueSequence(const uint8_t *payload, size_t payload_len) return false; } - const ServoCommandOp op = static_cast(payload[offset++]); + const auto op = static_cast(payload[offset++]); Step step{}; step.op = op; switch (op) { - case ServoCommandOp::Sleep: + case stackchan_websocket_v1_ServoOperation_SERVO_OPERATION_SLEEP: if (offset + sizeof(int16_t) > payload_len) { log_w("ServoCmd sleep truncated at command=%u", static_cast(i)); @@ -202,8 +202,8 @@ bool BodyServo::enqueueSequence(const uint8_t *payload, size_t payload_len) step.duration_ms = readInt16Le(payload + offset); offset += sizeof(int16_t); break; - case ServoCommandOp::MoveX: - case ServoCommandOp::MoveY: + case stackchan_websocket_v1_ServoOperation_SERVO_OPERATION_MOVE_X: + case stackchan_websocket_v1_ServoOperation_SERVO_OPERATION_MOVE_Y: if (offset + sizeof(int8_t) + sizeof(int16_t) > payload_len) { log_w("ServoCmd move truncated at command=%u", static_cast(i)); @@ -380,13 +380,13 @@ void BodyServo::startCurrentStep(uint32_t now) step_started_ = true; switch (step.op) { - case ServoCommandOp::Sleep: + case stackchan_websocket_v1_ServoOperation_SERVO_OPERATION_SLEEP: sleep_deadline_ms_ = now + clampDuration(step.duration_ms); break; - case ServoCommandOp::MoveX: + case stackchan_websocket_v1_ServoOperation_SERVO_OPERATION_MOVE_X: startMove(axis_x_, step.angle, step.duration_ms); break; - case ServoCommandOp::MoveY: + case stackchan_websocket_v1_ServoOperation_SERVO_OPERATION_MOVE_Y: startMove(axis_y_, step.angle, step.duration_ms); break; default: diff --git a/firmware/src/speaking.cpp b/firmware/src/speaking.cpp index 24da288..c1bb8d9 100644 --- a/firmware/src/speaking.cpp +++ b/firmware/src/speaking.cpp @@ -1,5 +1,4 @@ #include "speaking.hpp" -#include #include void Speaking::reset() @@ -37,91 +36,77 @@ void Speaking::end() reset(); } -void Speaking::handleWavMessage(const WsHeader &hdr, const uint8_t *body, size_t bodyLen) +void Speaking::handleWavStart(uint32_t seq, uint32_t sampleRate, uint16_t channels) { - auto msgType = static_cast(hdr.messageType); + current_buffer_ = (current_buffer_ + 1) % 3; + std::vector &buf = buffer_[current_buffer_]; + buf.clear(); + playing_ = false; + streaming_ = true; + next_seq_ = seq + 1; + state_.setState(StateMachine::Speaking); - if (msgType == MessageType::START) + if (sampleRate > 0) { - current_buffer_ = (current_buffer_ + 1) % 3; - std::vector &buf = buffer_[current_buffer_]; - buf.clear(); - playing_ = false; - streaming_ = true; - next_seq_ = hdr.seq + 1; - state_.setState(StateMachine::Speaking); - - // START payload (optional): - if (body && bodyLen >= 6) - { - uint32_t sr = 0; - uint16_t ch = 1; - memcpy(&sr, body, sizeof(sr)); - memcpy(&ch, body + sizeof(sr), sizeof(ch)); - if (sr > 0) - { - sample_rate_ = sr; - } - if (ch > 0) - { - channels_ = ch; - } - log_i("TTS meta: sample_rate=%u channels=%u", (unsigned)sample_rate_, (unsigned)channels_); - } - else - { - log_w("TTS START without meta, fallback sr=%u ch=%u", (unsigned)sample_rate_, (unsigned)channels_); - } - log_i("TTS stream start seq=%u", (unsigned)hdr.seq); - return; + sample_rate_ = sampleRate; + } + if (channels > 0) + { + channels_ = channels; } - if (msgType == MessageType::DATA) + log_i("TTS meta: sample_rate=%u channels=%u", (unsigned)sample_rate_, (unsigned)channels_); + log_i("TTS stream start seq=%u", (unsigned)seq); +} + +void Speaking::handleWavData(uint32_t seq, const uint8_t *body, size_t bodyLen) +{ + if (!streaming_) { - if (!streaming_) - { - return; - } + return; + } - std::vector &buf = buffer_[current_buffer_]; + std::vector &buf = buffer_[current_buffer_]; - if (hdr.seq != next_seq_) - { - log_w("TTS seq gap: got=%u expected=%u", (unsigned)hdr.seq, (unsigned)next_seq_); - // TCP 前提で再送しない。検知だけして次を受ける。 - next_seq_ = hdr.seq + 1; - } - else - { - next_seq_++; - } + if (seq != next_seq_) + { + log_w("TTS seq gap: got=%u expected=%u", (unsigned)seq, (unsigned)next_seq_); + // TCP 前提で再送しない。検知だけして次を受ける。 + next_seq_ = seq + 1; + } + else + { + next_seq_++; + } - buf.insert(buf.end(), body, body + bodyLen); - log_d("TTS chunk size=%u recv=%u", (unsigned)bodyLen, (unsigned)buf.size()); + buf.insert(buf.end(), body, body + bodyLen); + log_d("TTS chunk size=%u recv=%u", (unsigned)bodyLen, (unsigned)buf.size()); +} + +void Speaking::handleWavEnd(uint32_t seq) +{ + if (!streaming_) + { return; } - if (msgType == MessageType::END) + if (seq != next_seq_) { - if (!streaming_) - { - return; - } + log_w("TTS end seq gap: got=%u expected=%u", (unsigned)seq, (unsigned)next_seq_); + } - std::vector &buf = buffer_[current_buffer_]; - streaming_ = false; - next_seq_ = 0; + std::vector &buf = buffer_[current_buffer_]; + streaming_ = false; + next_seq_ = 0; - if (!buf.empty()) - { - playing_ = true; + if (!buf.empty()) + { + playing_ = true; - const int16_t *samples = reinterpret_cast(buf.data()); - size_t sample_len = buf.size() / sizeof(int16_t); - bool stereo = channels_ > 1; - M5.Speaker.playRaw(samples, sample_len, sample_rate_, stereo, 1, 0); - } - return; + const int16_t *samples = reinterpret_cast(buf.data()); + size_t sample_len = buf.size() / sizeof(int16_t); + bool stereo = channels_ > 1; + M5.Speaker.playRaw(samples, sample_len, sample_rate_, stereo, 1, 0); } } From 5db30a5123b2f2d32d987b42aa87660c510d3732 Mon Sep 17 00:00:00 2001 From: Atsushi Morimoto <74th.tech@gmail.com> Date: Sun, 19 Apr 2026 15:16:08 +0900 Subject: [PATCH 3/5] =?UTF-8?q?feat:=20SpeakHandler=E3=81=AE=E3=82=B3?= =?UTF-8?q?=E3=83=B3=E3=82=B9=E3=83=88=E3=83=A9=E3=82=AF=E3=82=BF=E3=81=8B?= =?UTF-8?q?=E3=82=89=E6=9C=AA=E4=BD=BF=E7=94=A8=E3=81=AE=E5=BC=95=E6=95=B0?= =?UTF-8?q?=E3=82=92=E5=89=8A=E9=99=A4?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- stackchan_server/speak.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/stackchan_server/speak.py b/stackchan_server/speak.py index 743ae36..6153576 100644 --- a/stackchan_server/speak.py +++ b/stackchan_server/speak.py @@ -26,11 +26,6 @@ def __init__( self, *, websocket: WebSocket, - ws_header_fmt: str | None = None, - wav_kind: int | None = None, - start_msg_type: int | None = None, - data_msg_type: int | None = None, - end_msg_type: int | None = None, down_wav_chunk: int, down_segment_millis: int, down_segment_stagger_millis: int, From 0b363e9218a9e7184335454215c07ce264f99395 Mon Sep 17 00:00:00 2001 From: Atsushi Morimoto <74th.tech@gmail.com> Date: Sun, 19 Apr 2026 15:30:47 +0900 Subject: [PATCH 4/5] =?UTF-8?q?feat:=20=E3=83=97=E3=83=AD=E3=83=88?= =?UTF-8?q?=E3=82=B3=E3=83=AB=E9=96=A2=E9=80=A3=E3=81=AE=E5=9E=8B=E3=82=92?= =?UTF-8?q?stackchan=E3=81=8B=E3=82=89wsproto=E3=81=AB=E5=A4=89=E6=9B=B4?= =?UTF-8?q?=E3=81=97=E3=80=81protobuf=E3=83=A1=E3=83=83=E3=82=BB=E3=83=BC?= =?UTF-8?q?=E3=82=B8=E3=81=AE=E3=82=A8=E3=83=B3=E3=82=B3=E3=83=BC=E3=83=89?= =?UTF-8?q?/=E3=83=87=E3=82=B3=E3=83=BC=E3=83=89=E5=87=A6=E7=90=86?= =?UTF-8?q?=E3=82=92=E6=9B=B4=E6=96=B0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- AGENTS.md | 49 ++++++----- docs/websocket_protocols_ja.md | 149 +++++++++++++++++---------------- 2 files changed, 108 insertions(+), 90 deletions(-) diff --git a/AGENTS.md b/AGENTS.md index 6283f41..26b7ae4 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -5,6 +5,7 @@ ## 全体像 - CoreS3 側は `firmware/`、Python サーバー側は `stackchan_server/`。 +- WebSocket の on-wire 形式は手書きバイナリヘッダではなく `protobuf/websocket-message.proto` で定義した protobuf。 - 音声 uplink は `AudioPcm`、音声 downlink は `AudioWav`(実体は raw PCM)。 - サーバーは FastAPI を公開し、WebSocket と REST API の両方を持つ。 - サーボ制御が追加済みで、WebSocket プロトコルには `ServoCmd` / `ServoDoneEvt` がある。 @@ -12,42 +13,49 @@ ## 状態遷移の要点 - ファームウェア状態: `Idle`, `Listening`, `Thinking`, `Speaking`, `Disconnected` -- サーバーから指示できるのは `StateCmd` の `0..3` (`Idle`〜`Speaking`) +- サーバーから指示できるのは `StateCmd` の `Idle` / `Listening` / `Thinking` / `Speaking` - `Disconnected` はファームウェア内部状態で、WebSocket 切断時に入る - `WakeWordEvt` を受けるか、REST API の wakeword 擬似発火で talk session が始まる ## WebSocket プロトコル要約 -- 共通ヘッダ: `WsHeader` (``, packed, little-endian) +- 1 WebSocket binary frame = 1 protobuf `WebSocketMessage` +- protobuf 定義: `protobuf/websocket-message.proto` +- package: `stackchan.websocket.v1` +- envelope fields + - `kind` + - `message_type` + - `seq` + - `oneof body` - `kind` - - `1=AudioPcm` - - `2=AudioWav` - - `3=StateCmd` - - `4=WakeWordEvt` - - `5=StateEvt` - - `6=SpeakDoneEvt` - - `7=ServoCmd` - - `8=ServoDoneEvt` + - `AudioPcm` + - `AudioWav` + - `StateCmd` + - `WakeWordEvt` + - `StateEvt` + - `SpeakDoneEvt` + - `ServoCmd` + - `ServoDoneEvt` - `messageType` - - `1=START` - - `2=DATA` - - `3=END` + - `START` + - `DATA` + - `END` ### 現行挙動 - `AudioPcm` - PCM16LE / 16kHz / 1ch - - `START -> DATA* -> END` + - `AudioPcmStart -> AudioChunk* -> AudioPcmEnd` - `DATA` は 2000 samples(4000 bytes, 約 125ms)ごと - 無音 3 秒で自動終了 - `AudioWav` - 名前に反して WAV コンテナではなく PCM ストリーム - - `START` payload は `` + - `AudioWavStart.sample_rate` / `AudioWavStart.channels` を送る - `DATA` chunk は既定 4096 bytes - 約 2 秒セグメントで送信し、2 本目は約 1 秒後に先行開始 - `ServoCmd` - - payload: `` - - op: `0=Sleep`, `1=MoveX`, `2=MoveY` + - `ServoCommandSequence.commands[]` + - op: `Sleep`, `MoveX`, `MoveY` - 新規コマンド受信時は実行中シーケンスを置き換える ## サーバー側 (`stackchan_server/`) @@ -82,15 +90,16 @@ - `src/main.cpp` - Wi-Fi 接続後、`/ws/stackchan` に接続 - - `AudioWav`, `StateCmd`, `ServoCmd` を受信処理 + - protobuf `WebSocketMessage` を decode して `AudioWav`, `StateCmd`, `ServoCmd` を受信処理 - 通信が 60 秒止まると `Thinking` / `Speaking` から `Idle` に戻す - `src/listening.cpp` - マイク読み取り 256 サンプル単位 - 2 秒リングバッファ + - protobuf の `AudioPcmStart/Data/End` を送信 - 無音 3 秒で停止 - `src/speaking.cpp` - - 3 本バッファで TTS セグメント受信 - - `END` 後に `M5.Speaker.playRaw()` で再生 + - 3 本バッファで protobuf `AudioWavStart/Data/End` を受信 + - `AudioWavEnd` 後に `M5.Speaker.playRaw()` で再生 - 再生完了時に `SpeakDoneEvt` - `src/servo.cpp` - `ServoCmd` を非同期実行 diff --git a/docs/websocket_protocols_ja.md b/docs/websocket_protocols_ja.md index c417ab2..b816c13 100644 --- a/docs/websocket_protocols_ja.md +++ b/docs/websocket_protocols_ja.md @@ -2,47 +2,56 @@ コーディングエージェント向け指示: このディレクトリにはプロトコルのみを記述し、CPP、Pythonの実装コードの例を記述する必要はありません。どんなプロトコルが実装されているか確認するために用います。 --> -# WebSocket バイナリプロトコル仕様 +# WebSocket protobuf プロトコル仕様 -このドキュメントは、CoreS3 ファームウェアと Python サーバーがやり取りする WebSocket バイナリプロトコルの現行実装をまとめたものです。 +このドキュメントは、CoreS3 ファームウェアと Python サーバーがやり取りする WebSocket プロトコルの現行実装をまとめたものです。 -## 共通ヘッダ +現行実装では、1 回の WebSocket binary frame に 1 つの protobuf `WebSocketMessage` を格納します。 -共通ヘッダ `WsHeader` は `firmware/include/protocols.hpp` で定義されています。 +## protobuf 定義 -- packed -- little-endian -- 構造: `` +- proto file: `protobuf/websocket-message.proto` +- package: `stackchan.websocket.v1` +- top-level message: `WebSocketMessage` + +### `WebSocketMessage` | フィールド | 型 | 説明 | | --- | --- | --- | -| `kind` | `uint8` | メッセージ種別 | -| `messageType` | `uint8` | `1=START`, `2=DATA`, `3=END` | -| `reserved` | `uint8` | 現在は常に `0` | -| `seq` | `uint16` | 送信側でインクリメントするシーケンス番号 | -| `payloadBytes` | `uint16` | ヘッダ直後に続く payload のバイト数 | - -### `kind` 一覧 - -| kind | 名前 | 方向 | 用途 | -| --- | --- | --- | --- | -| `1` | `AudioPcm` | CoreS3 → Server | マイク音声 PCM ストリーム | -| `2` | `AudioWav` | Server → CoreS3 | TTS 音声 PCM ストリーム | -| `3` | `StateCmd` | Server → CoreS3 | 状態遷移指示 | -| `4` | `WakeWordEvt` | CoreS3 → Server | ウェイクワード検出通知 | -| `5` | `StateEvt` | CoreS3 → Server | 現在状態通知 | -| `6` | `SpeakDoneEvt` | CoreS3 → Server | 音声再生完了通知 | -| `7` | `ServoCmd` | Server → CoreS3 | サーボ動作シーケンス指示 | -| `8` | `ServoDoneEvt` | CoreS3 → Server | サーボ動作完了通知 | - -## `AudioPcm` (`kind=1`) +| `kind` | `MessageKind` | メッセージ種別 | +| `message_type` | `MessageType` | `START` / `DATA` / `END` | +| `seq` | `uint32` | 送信側でインクリメントするシーケンス番号 | +| `body` | `oneof` | `kind` / `message_type` に対応する typed body | + +### `MessageKind` 一覧 + +| 名前 | 方向 | 用途 | +| --- | --- | --- | +| `AudioPcm` | CoreS3 → Server | マイク音声 PCM ストリーム | +| `AudioWav` | Server → CoreS3 | TTS 音声 PCM ストリーム | +| `StateCmd` | Server → CoreS3 | 状態遷移指示 | +| `WakeWordEvt` | CoreS3 → Server | ウェイクワード検出通知 | +| `StateEvt` | CoreS3 → Server | 現在状態通知 | +| `SpeakDoneEvt` | CoreS3 → Server | 音声再生完了通知 | +| `ServoCmd` | Server → CoreS3 | サーボ動作シーケンス指示 | +| `ServoDoneEvt` | CoreS3 → Server | サーボ動作完了通知 | + +### `MessageType` 一覧 + +| 名前 | 用途 | +| --- | --- | +| `START` | ストリームまたはセグメント開始 | +| `DATA` | データ本体 | +| `END` | ストリームまたはセグメント終了 | + +## マイク入力 `AudioPcm` - 方向: CoreS3 → Server - フォーマット: PCM16LE / 16kHz / 1ch -- シーケンス: `START` → `DATA` 複数回 → `END` -- `START` payload: なし -- `DATA` payload: PCM16LE 生データ -- `END` payload: 現行ファームウェアではなし +- シーケンス: `AudioPcmStart` → `AudioChunk` 複数回 → `AudioPcmEnd` +- `START` body: `AudioPcmStart {}` +- `DATA` body: `AudioChunk { bytes pcm_bytes; }` +- `END` body: `AudioPcmEnd {}` ### 現行実装メモ @@ -53,19 +62,20 @@ - 無音判定は平均絶対振幅 `<= 200` が 3 秒継続したときに発火します。 - 停止時は未送信サンプルを `DATA` で flush してから `END` を送ります。 -## `AudioWav` (`kind=2`) +## スピーカ再生 `AudioWav` - 方向: Server → CoreS3 - 名前は `AudioWav` ですが、実際に送っているのは WAV コンテナではなく PCM16LE ストリームです。 -- 1 セグメントの流れは `START` → `DATA` 複数回 → `END` です。 +- 1 セグメントの流れは `AudioWavStart` → `AudioChunk` 複数回 → `AudioWavEnd` です。 -### payload 形式 +### body 形式 -| messageType | payload | +| messageType | body | | --- | --- | -| `START` | `` | -| `DATA` | PCM16LE 生データ | -| `END` | なし | +- `START` | `AudioWavStart { sample_rate, channels }` | +| `DATA` | `AudioChunk { bytes pcm_bytes; }` | +| `DATA` | `AudioChunk { pcm_bytes }` | +| `END` | `AudioWavEnd {}` | ### 現行実装メモ @@ -75,18 +85,18 @@ - CoreS3 は 3 本の受信バッファを持ち、`END` 到達後に `M5.Speaker.playRaw()` で再生します。 - `seq` の欠損は検知しますが、TCP 前提のため再送制御は行いません。 -## `StateCmd` (`kind=3`) +## 状態指示 `StateCmd` - 方向: Server → CoreS3 - `messageType`: `DATA` のみ -- payload: 1 byte の target state id +- body: `StateCommand { state }` -| 値 | 状態 | -| --- | --- | -| `0` | `Idle` | -| `1` | `Listening` | -| `2` | `Thinking` | -| `3` | `Speaking` | +利用する状態名: + +- `Idle` +- `Listening` +- `Thinking` +- `Speaking` ### 現行実装メモ @@ -94,54 +104,53 @@ - 音声 uplink の `END` を受けると、Server は `Thinking` を指示します。 - `proxy.speak()` 完了後、Server は `Idle` を指示します。 -## `WakeWordEvt` (`kind=4`) +## ウェイクワード検出 `WakeWordEvt` - 方向: CoreS3 → Server - `messageType`: `DATA` のみ -- payload: 1 byte (`1=detected`) +- body: `WakeWordEvent { detected }` - `Idle` 中のウェイクワード検出をサーバー側に通知します。 - REST API の `POST /v1/stackchan/{ip}/wakeword` は、このイベントをサーバー内部で擬似発火させます。 -## `StateEvt` (`kind=5`) +## 状態通知 `StateEvt` - 方向: CoreS3 → Server - `messageType`: `DATA` のみ -- payload: 1 byte の current state id +- body: `StateEvent { state }` -| 値 | 状態 | -| --- | --- | -| `0` | `Idle` | -| `1` | `Listening` | -| `2` | `Thinking` | -| `3` | `Speaking` | +利用する状態名: + +- `Idle` +- `Listening` +- `Thinking` +- `Speaking` - CoreS3 は状態遷移の entry hook で送信します。 - WebSocket 切断中は `Disconnected` 状態になりますが、切断時は uplink 送信できないため `StateEvt` では通知されません。 -## `SpeakDoneEvt` (`kind=6`) +## 発話完了通知 `SpeakDoneEvt` - 方向: CoreS3 → Server - `messageType`: `DATA` のみ -- payload: 1 byte (`1=done`) +- body: `SpeakDoneEvent { done }` - CoreS3 側の音声再生完了を通知します。 - Server はこの通知を待って `proxy.speak()` を完了させます。 -## `ServoCmd` (`kind=7`) +## サーボ動作指示 `ServoCmd` - 方向: Server → CoreS3 - `messageType`: `DATA` のみ -- payload はサーボ動作シーケンス全体です。 +- body: `ServoCommandSequence { commands }` -### payload 構造 +### body 構造 -- 先頭 1 byte: `` -- 続いて `command_count` 個のコマンド +- `commands` は最大 255 個まで(`protobuf/websocket-message.options` で nanopb の `max_count:255` を指定) -| op | 名前 | payload | -| --- | --- | --- | -| `0` | `Sleep` | `` | -| `1` | `MoveX` | `` | -| `2` | `MoveY` | `` | +| 名前 | `ServoCommand` のフィールド | +| --- | --- | +| `Sleep` | `op`, `duration_ms` | +| `MoveX` | `op`, `angle`, `duration_ms` | +| `MoveY` | `op`, `angle`, `duration_ms` | ### 現行実装メモ @@ -150,10 +159,10 @@ - `duration_ms <= 0` は即時反映になります。 - 新しい `ServoCmd` を受けると、実行中シーケンスは置き換えられます。 -## `ServoDoneEvt` (`kind=8`) +## サーボ動作完了通知 `ServoDoneEvt` - 方向: CoreS3 → Server - `messageType`: `DATA` のみ -- payload: 1 byte (`1=done`) +- body: `ServoDoneEvent { done }` - 直前に受信したサーボシーケンスの完了通知です。 - Server は `proxy.wait_servo_complete()` でこの完了を待てます。 From 137bdbd8b4e0a9894e8035bbc705358e03c4b7a0 Mon Sep 17 00:00:00 2001 From: Atsushi Morimoto <74th.tech@gmail.com> Date: Sun, 19 Apr 2026 15:33:39 +0900 Subject: [PATCH 5/5] feat: Add instructions for writing commit messages in English --- .github/copilot-instructions.md | 1 + 1 file changed, 1 insertion(+) create mode 100644 .github/copilot-instructions.md diff --git a/.github/copilot-instructions.md b/.github/copilot-instructions.md new file mode 100644 index 0000000..9086770 --- /dev/null +++ b/.github/copilot-instructions.md @@ -0,0 +1 @@ +コミットメッセージは英語で書いてください。日本語のコミットメッセージは避けてください。