From 350a81b3d893a6d23e8df27a3e9b8a407158ca07 Mon Sep 17 00:00:00 2001 From: yuecideng Date: Fri, 17 Apr 2026 13:14:05 +0000 Subject: [PATCH 1/9] wip --- ...-04-17-atomic-action-abstraction-design.md | 1367 +++++++++++++++++ .../lab/sim/atomic_actions/__init__.py | 58 + embodichain/lab/sim/atomic_actions/actions.py | 546 +++++++ embodichain/lab/sim/atomic_actions/core.py | 371 +++++ embodichain/lab/sim/atomic_actions/engine.py | 328 ++++ tests/sim/atomic_actions/__init__.py | 17 + tests/sim/atomic_actions/test_core.py | 229 +++ 7 files changed, 2916 insertions(+) create mode 100644 docs/superpowers/specs/2026-04-17-atomic-action-abstraction-design.md create mode 100644 embodichain/lab/sim/atomic_actions/__init__.py create mode 100644 embodichain/lab/sim/atomic_actions/actions.py create mode 100644 embodichain/lab/sim/atomic_actions/core.py create mode 100644 embodichain/lab/sim/atomic_actions/engine.py create mode 100644 tests/sim/atomic_actions/__init__.py create mode 100644 tests/sim/atomic_actions/test_core.py diff --git a/docs/superpowers/specs/2026-04-17-atomic-action-abstraction-design.md b/docs/superpowers/specs/2026-04-17-atomic-action-abstraction-design.md new file mode 100644 index 00000000..97230290 --- /dev/null +++ b/docs/superpowers/specs/2026-04-17-atomic-action-abstraction-design.md @@ -0,0 +1,1367 @@ +# Atomic Action Abstraction Design for Embodied AI Motion Generation + +**Date:** 2026-04-17 +**Status:** Design Draft + +--- + +## 1. Overview + +This document describes the design of an **atomic action abstraction layer** for embodied AI motion generation. The design supports both demo simulation and data generation within gym environments, providing a consistent interface for agentic workflows while maintaining extensibility for new motion primitives. + +--- + +## 2. Design Principles + +### 2.1 Leverage Existing Infrastructure +- **MotionGenerator**: Use existing motion planning with TOPPRA and interpolation +- **Solvers**: Integrate IK/FK solvers (BaseSolver, PinkSolver, etc.) +- **Warp**: Utilize GPU-accelerated trajectory interpolation from `warp` module + +### 2.2 Object Semantics +Atomic actions should consider: +- **Semantic labels**: Object categories (e.g., "apple", "bottle") +- **Affordance data**: Grasp poses, interaction points +- **Geometry**: Shape, dimensions, collision boundaries + +### 2.3 Consistent Interface +- Unified API across all motion primitives +- Standard parameter types (torch.Tensor, np.ndarray) +- Predictable return types (PlanResult from motion planning) + +### 2.4 Extensibility +- Registry-based motion primitive registration +- Plugin architecture for custom actions +- Strategy pattern for interaction behaviors + +--- + +## 3. Architecture + +### 3.1 Core Components + +``` +┌─────────────────────────────────────────────────────────────┐ +│ AtomicActionEngine │ +├─────────────────────────────────────────────────────────────┤ +│ ┌──────────────┐ ┌──────────────┐ ┌──────────────────┐ │ +│ │ Semantic │ │ Motion │ │ Affordance │ │ +│ │ Analyzer │ │ Planner │ │ Provider │ │ +│ └──────────────┘ └──────────────┘ └──────────────────┘ │ +└─────────────────────────────────────────────────────────────┘ + │ + ┌─────────────────────┼─────────────────────┐ + ▼ ▼ ▼ +┌───────────────┐ ┌────────────────┐ ┌──────────────────┐ +│ ReachAction │ │ GraspAction │ │ MoveAction │ +└───────────────┘ └────────────────┘ └──────────────────┘ +``` + +### 3.2 Class Hierarchy + +```python +# Base class for all atomic actions +AtomicAction(ABC) + ├── ReachAction + ├── GraspAction + ├── ReleaseAction + ├── MoveAction + ├── RotateAction + ├── PushAction + └── CustomAction + +# Configuration classes +ActionConfig(ABC) + ├── ReachConfig + ├── GraspConfig + └── ... + +# Uses existing PlanResult from embodichain.lab.sim.planners +PlanResult (existing) + ├── success: bool + ├── positions: torch.Tensor # [T, DOF] + ├── xpos_list: torch.Tensor # [T, 4, 4] + ├── final_pose: torch.Tensor + └── execution_time: float +``` + +--- + +## 4. Core API Design + +### 4.1 Base AtomicAction Interface + +```python +from abc import ABC, abstractmethod +from dataclasses import dataclass, field +from typing import Optional, Dict, Any, List +import torch +import numpy as np + +from embodichain.lab.sim.planners import PlanResult, PlanState, MoveType +from embodichain.utils import configclass + + +# ============================================================================= +# Affordance Classes +# ============================================================================= + +@dataclass +class Affordance: + """Base class for affordance data. + + Affordance represents interaction possibilities for an object. + This is the base class for specific affordance types. + """ + object_label: str = "" + """Label of the object this affordance belongs to.""" + + def get_batch_size(self) -> int: + """Return the batch size of this affordance data.""" + return 1 + + +@dataclass +class GraspPose(Affordance): + """Grasp pose affordance containing a batch of 4x4 transformation matrices. + + Each grasp pose represents a valid end-effector pose for grasping the object. + Multiple poses may be available for different grasp types (pinch, power, etc.) + or approach directions. + """ + poses: torch.Tensor = field(default_factory=lambda: torch.eye(4).unsqueeze(0)) + """Batch of grasp poses with shape [B, 4, 4]. + + Each pose is a 4x4 homogeneous transformation matrix representing + the end-effector pose in the object's local coordinate frame. + """ + + grasp_types: List[str] = field(default_factory=lambda: ["default"]) + """List of grasp type labels for each pose in the batch. + + Examples: "pinch", "power", "hook", "spherical", etc. + Length must match the batch dimension of `poses`. + """ + + confidence_scores: Optional[torch.Tensor] = None + """Optional confidence scores for each grasp pose with shape [B]. + + Higher values indicate more reliable/ stable grasps. + Used for grasp selection when multiple options exist. + """ + + def get_batch_size(self) -> int: + """Return the number of grasp poses in this affordance.""" + return self.poses.shape[0] + + def get_grasp_by_type(self, grasp_type: str) -> Optional[torch.Tensor]: + """Get grasp pose by type label. + + Args: + grasp_type: Type of grasp (e.g., "pinch", "power") + + Returns: + 4x4 pose tensor if found, None otherwise + """ + if grasp_type in self.grasp_types: + idx = self.grasp_types.index(grasp_type) + return self.poses[idx] + return None + + def get_best_grasp(self) -> torch.Tensor: + """Get the best grasp pose based on confidence scores. + + Returns: + 4x4 pose tensor with highest confidence + """ + if self.confidence_scores is not None: + best_idx = torch.argmax(self.confidence_scores) + return self.poses[best_idx] + return self.poses[0] # Default to first if no scores available + + +@dataclass +class InteractionPoints(Affordance): + """Interaction points affordance containing a batch of 3D positions. + + Interaction points define specific locations on an object surface + that can be used for contact-based interactions (pushing, poking, + touching) rather than full grasping. + """ + points: torch.Tensor = field(default_factory=lambda: torch.zeros(1, 3)) + """Batch of 3D interaction points with shape [B, 3]. + + Each point is a 3D coordinate in the object's local coordinate frame. + """ + + normals: Optional[torch.Tensor] = None + """Optional surface normals at each interaction point with shape [B, 3]. + + Normals indicate the surface orientation at each point, + useful for determining approach directions. + """ + + point_types: List[str] = field(default_factory=lambda: ["contact"]) + """List of interaction types for each point. + + Examples: "push", "poke", "touch", "support", "handle", etc. + Length must match the batch dimension of `points`. + """ + + contact_regions: Optional[List[str]] = None + """Optional labels for object regions each point belongs to. + + Examples: "handle", "face", "edge", "corner", "center" + """ + + def get_batch_size(self) -> int: + """Return the number of interaction points in this affordance.""" + return self.points.shape[0] + + def get_points_by_type(self, point_type: str) -> Optional[torch.Tensor]: + """Get all points of a specific interaction type. + + Args: + point_type: Type of interaction (e.g., "push", "handle") + + Returns: + Tensor of points with shape [N, 3] if found, None otherwise + """ + indices = [i for i, t in enumerate(self.point_types) if t == point_type] + if indices: + return self.points[indices] + return None + + def get_approach_direction(self, point_idx: int) -> torch.Tensor: + """Get recommended approach direction for a given point. + + Args: + point_idx: Index of the point + + Returns: + 3D approach direction vector (normalized) + """ + if self.normals is not None: + # Approach from the opposite direction of the surface normal + return -self.normals[point_idx] + # Default: approach from positive z + return torch.tensor([0, 0, 1], dtype=self.points.dtype, device=self.points.device) + + +# ============================================================================= +# ObjectSemantics +# ============================================================================= + +@dataclass +class ObjectSemantics: + """Semantic information about interaction target. + + This class encapsulates all semantic and geometric information about + an object needed for intelligent interaction planning. + """ + label: str + """Object category label (e.g., 'apple', 'bottle').""" + + affordance: Affordance + """Affordance data (GraspPose, InteractionPoints, etc.).""" + + geometry: Dict[str, Any] + """Geometric information including bounding box, mesh data.""" + + properties: Dict[str, Any] + """Physical properties: mass, friction, etc.""" + + uid: Optional[str] = None + """Optional unique identifier for the object instance.""" + + +# ============================================================================= +# ActionCfg and AtomicAction +# ============================================================================= + +@configclass +class ActionCfg: + """Configuration for atomic actions.""" + control_part: str = "left_arm" + """Control part name for the action.""" + + interpolation_type: str = "linear" + """Interpolation type: 'linear', 'cubic', or 'toppra'.""" + + velocity_limit: Optional[float] = None + """Maximum velocity for trajectory.""" + + acceleration_limit: Optional[float] = None + """Maximum acceleration for trajectory.""" + + +class AtomicAction(ABC): + """Abstract base class for atomic actions. + + All atomic actions use PlanResult from embodichain.lab.sim.planners + as the return type for execute() method, ensuring consistency with + the existing motion planning infrastructure. + """ + + def __init__( + self, + motion_generator: "MotionGenerator", + robot: "Robot", + control_part: str, + device: torch.device = torch.device("cuda"), + ): + self.motion_generator = motion_generator + self.robot = robot + self.control_part = control_part + self.device = device + + @abstractmethod + def execute( + self, + target: Union[torch.Tensor, ObjectSemantics], + start_qpos: Optional[torch.Tensor] = None, + **kwargs + ) -> PlanResult: + """Execute the atomic action. + + Args: + target: Target pose [4, 4] or ObjectSemantics + start_qpos: Starting joint configuration [DOF] + **kwargs: Additional action-specific parameters + + Returns: + PlanResult with trajectory (positions, velocities, accelerations), + end-effector poses (xpos_list), and success status. + Use result.positions for joint trajectory [T, DOF]. + Use result.xpos_list for EE poses [T, 4, 4]. + """ + pass + + @abstractmethod + def validate( + self, + target: Union[torch.Tensor, ObjectSemantics], + start_qpos: Optional[torch.Tensor] = None, + **kwargs + ) -> bool: + """Validate if the action is feasible without executing. + + This method performs a quick feasibility check (e.g., IK solvability) + without generating a full trajectory. + + Returns: + True if action appears feasible, False otherwise + """ + pass + + def _get_current_qpos(self) -> torch.Tensor: + """Get current joint configuration from robot.""" + return self.robot.get_qpos()[0] # Assuming single environment + + def _ik_solve( + self, + target_pose: torch.Tensor, + qpos_seed: Optional[torch.Tensor] = None + ) -> torch.Tensor: + """Solve IK for target pose. + + Args: + target_pose: Target pose [4, 4] + qpos_seed: Seed configuration [DOF] + + Returns: + Joint configuration [DOF] + + Raises: + RuntimeError: If IK fails to find a solution + """ + if qpos_seed is None: + qpos_seed = self._get_current_qpos() + + success, qpos = self.robot.compute_ik( + pose=target_pose.unsqueeze(0), + qpos_seed=qpos_seed.unsqueeze(0), + name=self.control_part, + ) + + if not success.all(): + raise RuntimeError(f"IK failed for target pose: {target_pose}") + + return qpos.squeeze(0) + + def _fk_compute(self, qpos: torch.Tensor) -> torch.Tensor: + """Compute forward kinematics. + + Args: + qpos: Joint configuration [DOF] or [B, DOF] + + Returns: + End-effector pose [4, 4] or [B, 4, 4] + """ + if qpos.dim() == 1: + qpos = qpos.unsqueeze(0) + + xpos = self.robot.compute_fk( + qpos=qpos, + name=self.control_part, + to_matrix=True, + ) + + return xpos.squeeze(0) if xpos.shape[0] == 1 else xpos + + def _apply_offset(self, pose: torch.Tensor, offset: torch.Tensor) -> torch.Tensor: + """Apply offset to pose in local frame. + + Args: + pose: Base pose [4, 4] + offset: Offset in local frame [3] + + Returns: + Pose with offset applied [4, 4] + """ + result = pose.clone() + result[:3, 3] += pose[:3, :3] @ offset + return result + + def plan_trajectory( + self, + target_states: List["PlanState"], + options: Optional["MotionGenOptions"] = None, + ) -> "PlanResult": + """Plan trajectory using motion generator.""" + if options is None: + options = MotionGenOptions(control_part=self.control_part) + return self.motion_generator.generate(target_states, options) +``` + +### 4.2 ReachAction Implementation + +```python +class ReachAction(AtomicAction): + """Atomic action for reaching a target pose or object.""" + + def __init__( + self, + motion_generator: "MotionGenerator", + robot: "Robot", + control_part: str, + device: torch.device = torch.device("cuda"), + interpolation_type: str = "linear", # "linear", "cubic", "toppra" + ): + super().__init__(motion_generator, robot, control_part, device) + self.interpolation_type = interpolation_type + + def execute( + self, + target: Union[torch.Tensor, ObjectSemantics], + start_qpos: Optional[torch.Tensor] = None, + approach_offset: Optional[torch.Tensor] = None, + use_affordance: bool = True, + **kwargs + ) -> PlanResult: + """Execute reach action. + + Args: + target: Target pose [4, 4] or ObjectSemantics + start_qpos: Starting joint configuration + approach_offset: Offset for pre-grasp approach [x, y, z] + use_affordance: Whether to use object's affordance data + + Returns: + PlanResult with trajectory and execution status + """ + # Resolve target pose from ObjectSemantics if needed + if isinstance(target, ObjectSemantics): + target_pose = self._resolve_target_pose(target, use_affordance) + else: + target_pose = target + + # Apply approach offset if specified + if approach_offset is not None: + approach_pose = self._apply_offset(target_pose, approach_offset) + else: + approach_pose = target_pose + + # Get current state if not provided + if start_qpos is None: + start_qpos = self._get_current_qpos() + + # Create plan states + target_states = [ + PlanState( + qpos=start_qpos, + move_type=MoveType.JOINT_MOVE + ), + PlanState( + xpos=approach_pose, + move_type=MoveType.EEF_MOVE + ), + ] + + # Plan trajectory + options = MotionGenOptions( + control_part=self.control_part, + is_interpolate=True, + is_linear=self.interpolation_type == "linear", + interpolate_position_step=0.002, + plan_opts=ToppraPlanOptions( + sample_interval=kwargs.get("sample_interval", 30), + ), + ) + + result = self.plan_trajectory(target_states, options) + + # Return PlanResult directly + return result + + def validate( + self, + target: Union[torch.Tensor, ObjectSemantics], + start_qpos: Optional[torch.Tensor] = None, + **kwargs + ) -> bool: + """Check if the reach action is feasible.""" + try: + # Quick IK feasibility check + if isinstance(target, ObjectSemantics): + target_pose = self._resolve_target_pose(target, use_affordance=True) + else: + target_pose = target + + # Attempt IK + qpos_seed = start_qpos if start_qpos is not None else self._get_current_qpos() + success, _ = self.robot.compute_ik( + pose=target_pose.unsqueeze(0), + qpos_seed=qpos_seed.unsqueeze(0), + name=self.control_part, + ) + return success.all().item() + except Exception: + return False + + def _resolve_target_pose( + self, + semantics: ObjectSemantics, + use_affordance: bool + ) -> torch.Tensor: + """Resolve target pose from object semantics.""" + if use_affordance and "grasp_pose" in semantics.affordance_data: + # Use precomputed grasp pose from affordance data + grasp_pose = semantics.affordance_data["grasp_pose"] + object_pose = self._get_object_pose(semantics.label) + target_pose = object_pose @ grasp_pose + else: + # Default to object center with approach direction + object_pose = self._get_object_pose(semantics.label) + approach_offset = torch.tensor([0, 0, 0.05], device=self.device) + target_pose = object_pose.clone() + target_pose[:3, 3] += approach_offset + + return target_pose + + def _get_object_pose(self, label: str) -> torch.Tensor: + """Get current pose of object by label.""" + # Implementation depends on environment's object management + pass + + def _get_current_qpos(self) -> torch.Tensor: + """Get current joint configuration.""" + return self.robot.get_qpos()[0] # Assuming single environment + + def _apply_offset(self, pose: torch.Tensor, offset: torch.Tensor) -> torch.Tensor: + """Apply offset to pose in local frame.""" + result = pose.clone() + result[:3, 3] += pose[:3, :3] @ offset + return result +``` + +### 4.3 GraspAction Implementation + +```python +class GraspAction(AtomicAction): + """Atomic action for grasping objects.""" + + def __init__( + self, + motion_generator: "MotionGenerator", + robot: "Robot", + control_part: str, + device: torch.device = torch.device("cuda"), + pre_grasp_distance: float = 0.05, + approach_direction: str = "z", # "x", "y", "z", or "custom" + ): + super().__init__(motion_generator, robot, control_part, device) + self.pre_grasp_distance = pre_grasp_distance + self.approach_direction = approach_direction + + def execute( + self, + target: ObjectSemantics, + start_qpos: Optional[torch.Tensor] = None, + use_affordance: bool = True, + grasp_type: str = "default", # "default", "pinch", "power" + **kwargs + ) -> PlanResult: + """Execute grasp action. + + Args: + target: ObjectSemantics with grasp affordances + start_qpos: Starting joint configuration + use_affordance: Whether to use precomputed grasp poses + grasp_type: Type of grasp to execute + """ + # Resolve grasp pose + grasp_pose = self._resolve_grasp_pose(target, use_affordance, grasp_type) + + # Compute pre-grasp pose (approach position) + pre_grasp_pose = self._compute_pre_grasp_pose(grasp_pose) + + # Get current state + if start_qpos is None: + start_qpos = self._get_current_qpos() + + # Build trajectory plan states + target_states = [ + PlanState(qpos=start_qpos, move_type=MoveType.JOINT_MOVE), + PlanState(xpos=pre_grasp_pose, move_type=MoveType.EEF_MOVE), + PlanState(xpos=grasp_pose, move_type=MoveType.EEF_MOVE), + ] + + # Plan trajectory + options = MotionGenOptions( + control_part=self.control_part, + is_interpolate=True, + is_linear=False, + interpolate_position_step=0.001, + plan_opts=ToppraPlanOptions( + sample_interval=kwargs.get("sample_interval", 30), + ), + ) + + result = self.plan_trajectory(target_states, options) + + # Return PlanResult directly - it contains all trajectory data + return result + + def validate( + self, + target: ObjectSemantics, + start_qpos: Optional[torch.Tensor] = None, + **kwargs + ) -> bool: + """Validate if grasp is feasible.""" + try: + grasp_pose = self._resolve_grasp_pose(target, use_affordance=True, grasp_type="default") + qpos_seed = start_qpos if start_qpos is not None else self._get_current_qpos() + success, _ = self.robot.compute_ik( + pose=grasp_pose.unsqueeze(0), + qpos_seed=qpos_seed.unsqueeze(0), + name=self.control_part, + ) + return success.all().item() + except Exception: + return False + + def _resolve_grasp_pose( + self, + semantics: ObjectSemantics, + use_affordance: bool, + grasp_type: str + ) -> torch.Tensor: + """Resolve grasp pose from object semantics.""" + if use_affordance and "grasp_poses" in semantics.affordance_data: + grasp_poses = semantics.affordance_data["grasp_poses"] + if grasp_type in grasp_poses: + grasp_offset = grasp_poses[grasp_type] + else: + grasp_offset = grasp_poses["default"] + + object_pose = self._get_object_pose(semantics.label) + return object_pose @ grasp_offset + + # Fallback: compute grasp pose from geometry + return self._compute_grasp_from_geometry(semantics) + + def _compute_pre_grasp_pose(self, grasp_pose: torch.Tensor) -> torch.Tensor: + """Compute pre-grasp pose with offset.""" + offset = torch.zeros(3, device=self.device) + if self.approach_direction == "z": + offset[2] = -self.pre_grasp_distance + elif self.approach_direction == "x": + offset[0] = -self.pre_grasp_distance + elif self.approach_direction == "y": + offset[1] = -self.pre_grasp_distance + + pre_grasp = grasp_pose.clone() + pre_grasp[:3, 3] += grasp_pose[:3, :3] @ offset + return pre_grasp +``` + +### 4.4 MoveAction Implementation + +```python +class MoveAction(AtomicAction): + """Atomic action for moving to a target position.""" + + def __init__( + self, + motion_generator: "MotionGenerator", + robot: "Robot", + control_part: str, + device: torch.device = torch.device("cuda"), + move_type: str = "cartesian", # "cartesian", "joint" + interpolation: str = "linear", # "linear", "cubic", "toppra" + ): + super().__init__(motion_generator, robot, control_part, device) + self.move_type = move_type + self.interpolation = interpolation + + def execute( + self, + target: Union[torch.Tensor, ObjectSemantics], + start_qpos: Optional[torch.Tensor] = None, + offset: Optional[torch.Tensor] = None, + velocity_limit: Optional[float] = None, + acceleration_limit: Optional[float] = None, + **kwargs + ) -> PlanResult: + """Execute move action. + + Args: + target: Target pose [4, 4] or ObjectSemantics + start_qpos: Starting joint configuration + offset: Optional offset from target + velocity_limit: Max velocity for trajectory + acceleration_limit: Max acceleration for trajectory + """ + # Resolve target + if isinstance(target, ObjectSemantics): + target_pose = self._get_object_pose(target.label) + else: + target_pose = target + + # Apply offset if specified + if offset is not None: + target_pose = self._apply_offset(target_pose, offset) + + # Get start state + if start_qpos is None: + start_qpos = self._get_current_qpos() + + # Create plan states based on move type + if self.move_type == "cartesian": + target_states = [ + PlanState(qpos=start_qpos, move_type=MoveType.JOINT_MOVE), + PlanState(xpos=target_pose, move_type=MoveType.EEF_MOVE), + ] + is_linear = self.interpolation == "linear" + else: # joint space + target_qpos = self._ik_solve(target_pose, start_qpos) + target_states = [ + PlanState(qpos=start_qpos, move_type=MoveType.JOINT_MOVE), + PlanState(qpos=target_qpos, move_type=MoveType.JOINT_MOVE), + ] + is_linear = False + + # Configure motion generation + options = MotionGenOptions( + control_part=self.control_part, + is_interpolate=True, + is_linear=is_linear, + interpolate_position_step=0.002, + plan_opts=ToppraPlanOptions( + sample_interval=kwargs.get("sample_interval", 30), + velocity_limit=velocity_limit, + acceleration_limit=acceleration_limit, + ), + ) + + result = self.plan_trajectory(target_states, options) + + # Return PlanResult directly - it contains all trajectory data + return result +``` + +### 4.5 RigidObject Extension + +```python +class RigidObject(BatchEntity): + """RigidObject with ObjectSemantics support.""" + + def get_object_semantics(self, env_id: int = 0) -> ObjectSemantics: + """Return ObjectSemantics for this rigid object. + + This method aggregates object label, affordance data, geometry, + and physical properties into a unified semantic representation + for intelligent interaction planning. + + Args: + env_id: Environment index for batched objects (default: 0) + + Returns: + ObjectSemantics containing all semantic information + + Example: + >>> obj = sim.get_rigid_object("apple_01") + >>> semantics = obj.get_object_semantics() + >>> print(semantics.label) # "apple" + >>> print(semantics.affordance.poses.shape) # [B, 4, 4] + """ + # Get object label from configuration or user data + label = self._get_object_label(env_id) + + # Get or compute affordance data + affordance = self._get_affordance_data(env_id) + + # Get geometry information + geometry = self._get_geometry_data(env_id) + + # Get physical properties + properties = self._get_physical_properties(env_id) + + return ObjectSemantics( + label=label, + affordance=affordance, + geometry=geometry, + properties=properties, + uid=self._entities[env_id].get_user_id() if env_id < len(self._entities) else None, + ) + + def _get_object_label(self, env_id: int) -> str: + """Extract object label from configuration or metadata.""" + # Try to get from object user data + entity = self._entities[env_id] if env_id < len(self._entities) else None + if entity: + # Check for label in user data + label = entity.get_user_data().get("label", None) if hasattr(entity, "get_user_data") else None + if label: + return label + + # Fallback: derive from object name or class + class_name = self.__class__.__name__.lower() + if "cube" in class_name: + return "cube" + elif "sphere" in class_name: + return "sphere" + elif "cylinder" in class_name: + return "cylinder" + + return "unknown" + + def _get_affordance_data(self, env_id: int) -> Affordance: + """Get or compute affordance data for the object.""" + # Check if precomputed affordance data exists + entity = self._entities[env_id] if env_id < len(self._entities) else None + if entity and hasattr(entity, "get_user_data"): + user_data = entity.get_user_data() + if "affordance" in user_data: + return user_data["affordance"] + + # Compute default affordance based on geometry + geometry = self._get_geometry_data(env_id) + bbox = geometry.get("bounding_box", [0.1, 0.1, 0.1]) + + # Create default grasp poses based on bounding box + # Generate poses for top, front, and side grasps + poses = [] + # Top grasp + pose_top = torch.eye(4) + pose_top[2, 3] = bbox[2] / 2 + 0.05 # Offset above object + poses.append(pose_top) + + # Front grasp + pose_front = torch.eye(4) + pose_front[0, 3] = bbox[0] / 2 + 0.05 + poses.append(pose_front) + + poses_tensor = torch.stack(poses) + + return GraspPose( + poses=poses_tensor, + grasp_types=["top", "front"], + object_label=self._get_object_label(env_id), + ) + + def _get_geometry_data(self, env_id: int) -> Dict[str, Any]: + """Get geometric information about the object.""" + # Get bounding box from entity + entity = self._entities[env_id] if env_id < len(self._entities) else None + bbox = [0.1, 0.1, 0.1] # Default + + if entity: + try: + # Try to get bounding box from entity + if hasattr(entity, "get_bounding_box"): + bbox = entity.get_bounding_box() + except Exception: + pass + + # Get mesh information if available + mesh_info = {} + if entity and hasattr(entity, "get_mesh_count"): + mesh_info["mesh_count"] = entity.get_mesh_count() + + return { + "bounding_box": bbox, + "volume": bbox[0] * bbox[1] * bbox[2], + "mesh_info": mesh_info, + } + + def _get_physical_properties(self, env_id: int) -> Dict[str, Any]: + """Get physical properties of the object.""" + entity = self._entities[env_id] if env_id < len(self._entities) else None + + properties = { + "mass": 1.0, + "friction": 0.5, + "restitution": 0.1, + "body_type": self.body_type, + } + + if entity and hasattr(entity, "get_physical_body"): + try: + phys_body = entity.get_physical_body() + properties["mass"] = phys_body.get_mass() + properties["friction"] = phys_body.get_dynamic_friction() + except Exception: + pass + + return properties + + +# ============================================================================= +# Action Engine +# ============================================================================= + +class AtomicActionEngine: + """Central engine for managing and executing atomic actions.""" + + def __init__( + self, + robot: "Robot", + motion_generator: "MotionGenerator", + device: torch.device = torch.device("cuda"), + ): + self.robot = robot + self.motion_generator = motion_generator + self.device = device + + # Registry of action instances + self._actions: Dict[str, AtomicAction] = {} + + # Semantic analyzer for object understanding + self._semantic_analyzer = SemanticAnalyzer() + + # Initialize default actions + self._init_default_actions() + + def _init_default_actions(self): + """Initialize default atomic action instances.""" + control_parts = self.robot.control_parts or ["default"] + + for part in control_parts: + self.register_action( + f"reach_{part}", + ReachAction( + motion_generator=self.motion_generator, + robot=self.robot, + control_part=part, + device=self.device, + ) + ) + self.register_action( + f"grasp_{part}", + GraspAction( + motion_generator=self.motion_generator, + robot=self.robot, + control_part=part, + device=self.device, + ) + ) + self.register_action( + f"move_{part}", + MoveAction( + motion_generator=self.motion_generator, + robot=self.robot, + control_part=part, + device=self.device, + ) + ) + + def register_action(self, name: str, action: AtomicAction): + """Register a custom atomic action.""" + self._actions[name] = action + + def execute( + self, + action_name: str, + target: Union[torch.Tensor, str, ObjectSemantics], + control_part: Optional[str] = None, + **kwargs + ) -> PlanResult: + """Execute an atomic action. + + Args: + action_name: Name of registered action + target: Target pose, object label, or ObjectSemantics + control_part: Robot control part to use + **kwargs: Additional action parameters + + Returns: + PlanResult with trajectory (positions, velocities, accelerations), + end-effector poses (xpos_list), and success status. + """ + # Resolve action + if control_part: + action_key = f"{action_name}_{control_part}" + else: + action_key = action_name + + if action_key not in self._actions: + raise ValueError(f"Unknown action: {action_key}") + + action = self._actions[action_key] + + # Resolve target to ObjectSemantics if string label provided + if isinstance(target, str): + target = self._semantic_analyzer.analyze(target) + + # Execute action - returns PlanResult directly + return action.execute(target, **kwargs) + + def validate( + self, + action_name: str, + target: Union[torch.Tensor, str, ObjectSemantics], + control_part: Optional[str] = None, + **kwargs + ) -> bool: + """Validate if an action is feasible without executing.""" + if control_part: + action_key = f"{action_name}_{control_part}" + else: + action_key = action_name + + if action_key not in self._actions: + return False + + action = self._actions[action_key] + + if isinstance(target, str): + target = self._semantic_analyzer.analyze(target) + + return action.validate(target, **kwargs) +``` + +--- + +## 5. Usage Examples + +### 5.1 Basic Usage + +```python +from embodichain.lab.sim.planners import MotionGenerator, MotionGenCfg, ToppraPlannerCfg +from embodichain.lab.sim.objects import Robot +from embodichain.lab.gym.envs import BaseEnv + +# Initialize components +env: BaseEnv = ... # Your environment +robot: Robot = env.robot +motion_gen = MotionGenerator( + cfg=MotionGenCfg(planner_cfg=ToppraPlannerCfg(robot_uid=robot.uid)) +) + +# Create action engine +engine = AtomicActionEngine( + robot=robot, + motion_generator=motion_gen, + device=env.device, +) + +# Execute reach action +result = engine.execute( + action_name="reach", + target=target_pose, # [4, 4] tensor + control_part="left_arm", + approach_offset=torch.tensor([0, 0, -0.05]), +) + +if result.success: + trajectory = result.trajectory # [T, DOF] + env.execute_trajectory(trajectory) +``` + +### 5.2 Using Object Semantics + +```python +# Define object semantics +apple_semantics = ObjectSemantics( + label="apple", + affordance_data={ + "grasp_poses": { + "default": grasp_pose_1, # [4, 4] + "pinch": grasp_pose_2, + }, + "interaction_points": [point_1, point_2], + }, + geometry={ + "bounding_box": [0.08, 0.08, 0.08], + "mesh": "apple_mesh.obj", + }, + properties={ + "mass": 0.15, + "friction": 0.5, + }, +) + +# Execute grasp with semantics +result = engine.execute( + action_name="grasp", + target=apple_semantics, + control_part="right_arm", + grasp_type="pinch", +) +``` + +### 5.3 Custom Atomic Action + +```python +class RotateEEFAction(AtomicAction): + """Custom action to rotate end-effector.""" + + def execute( + self, + target: torch.Tensor, # Rotation angle in degrees + start_qpos: Optional[torch.Tensor] = None, + axis: str = "z", + **kwargs + ) -> PlanResult: + # Get current pose + current_qpos = start_qpos or self._get_current_qpos() + current_pose = self.robot.compute_fk( + qpos=current_qpos.unsqueeze(0), + name=self.control_part, + to_matrix=True, + ).squeeze(0) + + # Apply rotation + angle_rad = torch.deg2rad(target) + rotation = self._create_rotation_matrix(angle_rad, axis) + target_pose = current_pose.clone() + target_pose[:3, :3] = rotation @ current_pose[:3, :3] + + # Plan motion + target_states = [ + PlanState(qpos=current_qpos, move_type=MoveType.JOINT_MOVE), + PlanState(xpos=target_pose, move_type=MoveType.EEF_MOVE), + ] + + result = self.plan_trajectory(target_states) + + # Return PlanResult directly - it contains all trajectory data + return result + +# Register custom action +engine.register_action( + "rotate_eef", + RotateEEFAction( + motion_generator=motion_gen, + robot=robot, + control_part="left_arm", + device=device, + ) +) +``` + +--- + +## 6. Integration with Agentic Workflows + +### 6.1 LLM Action Selection + +```python +class ActionSelector: + """Select actions based on LLM reasoning.""" + + def __init__(self, action_engine: AtomicActionEngine): + self.engine = action_engine + + def select_action( + self, + task_description: str, + scene_observation: Dict, + available_objects: List[ObjectSemantics], + ) -> Tuple[str, Dict]: + """Select appropriate action based on task and scene.""" + # Parse task to determine required action + if "pick" in task_description.lower(): + action_name = "grasp" + target = self._resolve_object(task_description, available_objects) + params = {"grasp_type": "default"} + elif "reach" in task_description.lower(): + action_name = "reach" + target = self._resolve_target_pose(task_description, scene_observation) + params = {} + elif "place" in task_description.lower(): + action_name = "move" + target = self._resolve_placement(task_description, scene_observation) + params = {} + else: + raise ValueError(f"Cannot determine action for task: {task_description}") + + return action_name, {"target": target, **params} + + def _resolve_object( + self, + task_description: str, + available_objects: List[ObjectSemantics], + ) -> ObjectSemantics: + """Resolve object reference from task description.""" + # Use LLM or keyword matching to identify object + pass +``` + +### 6.2 Action Composition + +```python +class ActionComposer: + """Compose multiple atomic actions into complex behaviors.""" + + def __init__(self, engine: AtomicActionEngine): + self.engine = engine + + def pick_and_place( + self, + pick_object: ObjectSemantics, + place_pose: torch.Tensor, + control_part: str = "right_arm", + ) -> List[PlanResult]: + """Compose pick and place behavior.""" + results = [] + + # 1. Reach pre-grasp + result = self.engine.execute( + "reach", + target=pick_object, + control_part=control_part, + approach_offset=torch.tensor([0, 0, -0.08]), + ) + results.append(result) + if not result.success: + return results + + # 2. Grasp + result = self.engine.execute( + "grasp", + target=pick_object, + control_part=control_part, + ) + results.append(result) + if not result.success: + return results + + # 3. Lift + current_qpos = result.trajectory[-1] if result.trajectory is not None else None + lift_pose = self._get_lift_pose(pick_object) + result = self.engine.execute( + "move", + target=lift_pose, + control_part=control_part, + start_qpos=current_qpos, + ) + results.append(result) + if not result.success: + return results + + # 4. Move to place + result = self.engine.execute( + "move", + target=place_pose, + control_part=control_part, + ) + results.append(result) + if not result.success: + return results + + # 5. Release + result = self.engine.execute( + "release", + target=place_pose, + control_part=control_part, + ) + results.append(result) + + return results +``` + +--- + +## 7. Extensibility + +### 7.1 Custom Action Registration + +```python +# Define custom action +class PushAction(AtomicAction): + def execute(self, target, **kwargs): + # Implementation + pass + +# Register at runtime +from embodichain.lab.sim.atomic_actions import register_action + +register_action( + name="push", + action_class=PushAction, + motion_generator=motion_gen, + robot=robot, + control_part="right_arm", +) + +# Use in engine +engine.execute("push", target=object_semantics) +``` + +### 7.2 Plugin Architecture + +```python +class ActionPlugin(ABC): + """Base class for action plugins.""" + + @abstractmethod + def get_actions(self) -> Dict[str, Type[AtomicAction]]: + """Return mapping of action names to classes.""" + pass + + @abstractmethod + def get_configurators(self) -> Dict[str, Any]: + """Return configuration helpers.""" + pass + +# Example plugin +class ManipulationActionsPlugin(ActionPlugin): + def get_actions(self): + return { + "twist": TwistAction, + "slide": SlideAction, + "insert": InsertAction, + } +``` + +--- + +## 8. Summary + +This atomic action abstraction design provides: + +1. **Unified Interface**: All atomic actions inherit from `AtomicAction` with consistent `execute()` and `validate()` methods + +2. **Semantic Awareness**: Object semantics (label, affordance, geometry) are first-class citizens + +3. **Motion Planning Integration**: Leverages existing `MotionGenerator`, solvers, and warp interpolation + +4. **Agentic Workflow Support**: Easy composition into complex behaviors and LLM integration + +5. **Extensibility**: Registry-based action registration and plugin architecture + +The design bridges low-level motion planning with high-level agent reasoning, enabling both precise control and semantic task execution. diff --git a/embodichain/lab/sim/atomic_actions/__init__.py b/embodichain/lab/sim/atomic_actions/__init__.py new file mode 100644 index 00000000..cb1ea4c4 --- /dev/null +++ b/embodichain/lab/sim/atomic_actions/__init__.py @@ -0,0 +1,58 @@ +# ---------------------------------------------------------------------------- +# Copyright (c) 2021-2026 DexForce Technology Co., Ltd. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ---------------------------------------------------------------------------- + +"""Atomic action abstraction layer for embodied AI motion generation. + +This module provides a unified interface for atomic actions like reach, grasp, +move, etc., with support for semantic object understanding and extensible +custom action registration. +""" + +from .core import ( + Affordance, + GraspPose, + InteractionPoints, + ObjectSemantics, + ActionCfg, + AtomicAction, +) +from .actions import ( + ReachAction, + GraspAction, + ReleaseAction, + MoveAction, +) +from .engine import AtomicActionEngine, register_action, unregister_action, get_registered_actions + +__all__ = [ + # Core classes + "Affordance", + "GraspPose", + "InteractionPoints", + "ObjectSemantics", + "ActionCfg", + "AtomicAction", + # Action implementations + "ReachAction", + "GraspAction", + "ReleaseAction", + "MoveAction", + # Engine + "AtomicActionEngine", + "register_action", + "unregister_action", + "get_registered_actions", +] diff --git a/embodichain/lab/sim/atomic_actions/actions.py b/embodichain/lab/sim/atomic_actions/actions.py new file mode 100644 index 00000000..342055cd --- /dev/null +++ b/embodichain/lab/sim/atomic_actions/actions.py @@ -0,0 +1,546 @@ +# ---------------------------------------------------------------------------- +# Copyright (c) 2021-2026 DexForce Technology Co., Ltd. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ---------------------------------------------------------------------------- + +from __future__ import annotations + +import torch +from typing import Optional, Union, TYPE_CHECKING, Any + +from embodichain.lab.sim.planners import PlanResult, PlanState, MoveType +from embodichain.lab.sim.planners.motion_generator import MotionGenOptions +from embodichain.lab.sim.planners.toppra_planner import ToppraPlanOptions +from .core import AtomicAction, ObjectSemantics + +if TYPE_CHECKING: + from embodichain.lab.sim.planners import MotionGenerator + from embodichain.lab.sim.objects import Robot + + +# ============================================================================= +# Reach Action +# ============================================================================= + + +class ReachAction(AtomicAction): + """Atomic action for reaching a target pose or object.""" + + def __init__( + self, + motion_generator: "MotionGenerator", + robot: "Robot", + control_part: str, + device: torch.device = torch.device("cuda"), + interpolation_type: str = "linear", # "linear", "cubic", "toppra" + ): + super().__init__(motion_generator, robot, control_part, device) + self.interpolation_type = interpolation_type + + def execute( + self, + target: Union[torch.Tensor, ObjectSemantics], + start_qpos: Optional[torch.Tensor] = None, + approach_offset: Optional[torch.Tensor] = None, + use_affordance: bool = True, + **kwargs + ) -> PlanResult: + """Execute reach action. + + Args: + target: Target pose [4, 4] or ObjectSemantics + start_qpos: Starting joint configuration + approach_offset: Offset for pre-grasp approach [x, y, z] + use_affordance: Whether to use object's affordance data + + Returns: + PlanResult with trajectory and execution status + """ + # Resolve target pose from ObjectSemantics if needed + if isinstance(target, ObjectSemantics): + target_pose = self._resolve_target_pose(target, use_affordance) + else: + target_pose = target + + # Apply approach offset if specified + if approach_offset is not None: + approach_pose = self._apply_offset(target_pose, approach_offset) + else: + approach_pose = target_pose + + # Get current state if not provided + if start_qpos is None: + start_qpos = self._get_current_qpos() + + # Create plan states + target_states = [ + PlanState( + qpos=start_qpos, + move_type=MoveType.JOINT_MOVE + ), + PlanState( + xpos=approach_pose, + move_type=MoveType.EEF_MOVE + ), + ] + + # Plan trajectory + options = MotionGenOptions( + control_part=self.control_part, + is_interpolate=True, + is_linear=self.interpolation_type == "linear", + interpolate_position_step=0.002, + plan_opts=ToppraPlanOptions( + sample_interval=kwargs.get("sample_interval", 30), + ), + ) + + result = self.plan_trajectory(target_states, options) + + # Return PlanResult directly + return result + + def validate( + self, + target: Union[torch.Tensor, ObjectSemantics], + start_qpos: Optional[torch.Tensor] = None, + **kwargs + ) -> bool: + """Check if the reach action is feasible.""" + try: + # Quick IK feasibility check + if isinstance(target, ObjectSemantics): + target_pose = self._resolve_target_pose(target, use_affordance=True) + else: + target_pose = target + + # Attempt IK + qpos_seed = start_qpos if start_qpos is not None else self._get_current_qpos() + success, _ = self.robot.compute_ik( + pose=target_pose.unsqueeze(0), + qpos_seed=qpos_seed.unsqueeze(0), + name=self.control_part, + ) + return success.all().item() + except Exception: + return False + + def _resolve_target_pose( + self, + semantics: ObjectSemantics, + use_affordance: bool + ) -> torch.Tensor: + """Resolve target pose from object semantics.""" + from .core import GraspPose + + if use_affordance and isinstance(semantics.affordance, GraspPose): + # Use precomputed grasp pose from affordance data + grasp_pose = semantics.affordance.get_best_grasp() + object_pose = self._get_object_pose(semantics.label) + target_pose = object_pose @ grasp_pose + else: + # Default to object center with approach direction + object_pose = self._get_object_pose(semantics.label) + approach_offset = torch.tensor([0, 0, 0.05], device=self.device) + target_pose = object_pose.clone() + target_pose[:3, 3] += approach_offset + + return target_pose + + def _get_object_pose(self, label: str) -> torch.Tensor: + """Get current pose of object by label.""" + # Implementation depends on environment's object management + # This is a placeholder - should be implemented based on environment + raise NotImplementedError( + "_get_object_pose must be implemented by subclass or " + "provided with environment-specific object management" + ) + + +# ============================================================================= +# Grasp Action +# ============================================================================= + + +class GraspAction(AtomicAction): + """Atomic action for grasping objects.""" + + def __init__( + self, + motion_generator: "MotionGenerator", + robot: "Robot", + control_part: str, + device: torch.device = torch.device("cuda"), + pre_grasp_distance: float = 0.05, + approach_direction: str = "z", # "x", "y", "z", or "custom" + ): + super().__init__(motion_generator, robot, control_part, device) + self.pre_grasp_distance = pre_grasp_distance + self.approach_direction = approach_direction + + def execute( + self, + target: ObjectSemantics, + start_qpos: Optional[torch.Tensor] = None, + use_affordance: bool = True, + grasp_type: str = "default", # "default", "pinch", "power" + **kwargs + ) -> PlanResult: + """Execute grasp action. + + Args: + target: ObjectSemantics with grasp affordances + start_qpos: Starting joint configuration + use_affordance: Whether to use precomputed grasp poses + grasp_type: Type of grasp to execute + """ + # Resolve grasp pose + grasp_pose = self._resolve_grasp_pose(target, use_affordance, grasp_type) + + # Compute pre-grasp pose (approach position) + pre_grasp_pose = self._compute_pre_grasp_pose(grasp_pose) + + # Get current state + if start_qpos is None: + start_qpos = self._get_current_qpos() + + # Build trajectory plan states + target_states = [ + PlanState(qpos=start_qpos, move_type=MoveType.JOINT_MOVE), + PlanState(xpos=pre_grasp_pose, move_type=MoveType.EEF_MOVE), + PlanState(xpos=grasp_pose, move_type=MoveType.EEF_MOVE), + ] + + # Plan trajectory + options = MotionGenOptions( + control_part=self.control_part, + is_interpolate=True, + is_linear=False, + interpolate_position_step=0.001, + plan_opts=ToppraPlanOptions( + sample_interval=kwargs.get("sample_interval", 30), + ), + ) + + result = self.plan_trajectory(target_states, options) + + # Return PlanResult directly - it contains all trajectory data + return result + + def validate( + self, + target: ObjectSemantics, + start_qpos: Optional[torch.Tensor] = None, + **kwargs + ) -> bool: + """Validate if grasp is feasible.""" + try: + grasp_pose = self._resolve_grasp_pose(target, use_affordance=True, grasp_type="default") + qpos_seed = start_qpos if start_qpos is not None else self._get_current_qpos() + success, _ = self.robot.compute_ik( + pose=grasp_pose.unsqueeze(0), + qpos_seed=qpos_seed.unsqueeze(0), + name=self.control_part, + ) + return success.all().item() + except Exception: + return False + + def _resolve_grasp_pose( + self, + semantics: ObjectSemantics, + use_affordance: bool, + grasp_type: str + ) -> torch.Tensor: + """Resolve grasp pose from object semantics.""" + from .core import GraspPose + + if use_affordance and isinstance(semantics.affordance, GraspPose): + grasp_pose_affordance = semantics.affordance + grasp_pose = grasp_pose_affordance.get_grasp_by_type(grasp_type) + if grasp_pose is None: + grasp_pose = grasp_pose_affordance.get_best_grasp() + + # Transform to world frame + object_pose = self._get_object_pose(semantics.label) + return object_pose @ grasp_pose + + # Fallback: compute grasp pose from geometry + return self._compute_grasp_from_geometry(semantics) + + def _compute_pre_grasp_pose(self, grasp_pose: torch.Tensor) -> torch.Tensor: + """Compute pre-grasp pose with offset.""" + offset = torch.zeros(3, device=self.device) + if self.approach_direction == "z": + offset[2] = -self.pre_grasp_distance + elif self.approach_direction == "x": + offset[0] = -self.pre_grasp_distance + elif self.approach_direction == "y": + offset[1] = -self.pre_grasp_distance + + pre_grasp = grasp_pose.clone() + pre_grasp[:3, 3] += grasp_pose[:3, :3] @ offset + return pre_grasp + + def _get_object_pose(self, label: str) -> torch.Tensor: + """Get current pose of object by label.""" + raise NotImplementedError( + "_get_object_pose must be implemented by subclass or " + "provided with environment-specific object management" + ) + + def _compute_grasp_from_geometry(self, semantics: ObjectSemantics) -> torch.Tensor: + """Compute grasp pose from object geometry.""" + # Get object pose + object_pose = self._get_object_pose(semantics.label) + + # Get bounding box from geometry + bbox = semantics.geometry.get("bounding_box", [0.1, 0.1, 0.1]) + + # Default top-down grasp + grasp_offset = torch.eye(4, device=self.device) + grasp_offset[2, 3] = bbox[2] / 2 + 0.02 # Slightly above object + + return object_pose @ grasp_offset + + +# ============================================================================= +# Move Action +# ============================================================================= + + +class MoveAction(AtomicAction): + """Atomic action for moving to a target position.""" + + def __init__( + self, + motion_generator: "MotionGenerator", + robot: "Robot", + control_part: str, + device: torch.device = torch.device("cuda"), + move_type: str = "cartesian", # "cartesian", "joint" + interpolation: str = "linear", # "linear", "cubic", "toppra" + ): + super().__init__(motion_generator, robot, control_part, device) + self.move_type = move_type + self.interpolation = interpolation + + def execute( + self, + target: Union[torch.Tensor, ObjectSemantics], + start_qpos: Optional[torch.Tensor] = None, + offset: Optional[torch.Tensor] = None, + velocity_limit: Optional[float] = None, + acceleration_limit: Optional[float] = None, + **kwargs + ) -> PlanResult: + """Execute move action. + + Args: + target: Target pose [4, 4] or ObjectSemantics + start_qpos: Starting joint configuration + offset: Optional offset from target + velocity_limit: Max velocity for trajectory + acceleration_limit: Max acceleration for trajectory + """ + # Resolve target + if isinstance(target, ObjectSemantics): + target_pose = self._get_object_pose(target.label) + else: + target_pose = target + + # Apply offset if specified + if offset is not None: + target_pose = self._apply_offset(target_pose, offset) + + # Get start state + if start_qpos is None: + start_qpos = self._get_current_qpos() + + # Create plan states based on move type + if self.move_type == "cartesian": + target_states = [ + PlanState(qpos=start_qpos, move_type=MoveType.JOINT_MOVE), + PlanState(xpos=target_pose, move_type=MoveType.EEF_MOVE), + ] + is_linear = self.interpolation == "linear" + else: # joint space + target_qpos = self._ik_solve(target_pose, start_qpos) + target_states = [ + PlanState(qpos=start_qpos, move_type=MoveType.JOINT_MOVE), + PlanState(qpos=target_qpos, move_type=MoveType.JOINT_MOVE), + ] + is_linear = False + + # Configure motion generation + options = MotionGenOptions( + control_part=self.control_part, + is_interpolate=True, + is_linear=is_linear, + interpolate_position_step=0.002, + plan_opts=ToppraPlanOptions( + sample_interval=kwargs.get("sample_interval", 30), + velocity_limit=velocity_limit, + acceleration_limit=acceleration_limit, + ), + ) + + result = self.plan_trajectory(target_states, options) + + # Return PlanResult directly - it contains all trajectory data + return result + + def validate( + self, + target: Union[torch.Tensor, ObjectSemantics], + start_qpos: Optional[torch.Tensor] = None, + **kwargs + ) -> bool: + """Validate if move action is feasible.""" + try: + if isinstance(target, ObjectSemantics): + target_pose = self._get_object_pose(target.label) + else: + target_pose = target + + qpos_seed = start_qpos if start_qpos is not None else self._get_current_qpos() + + if self.move_type == "joint": + # For joint space moves, we need IK solvability + self._ik_solve(target_pose, qpos_seed) + else: + # For cartesian moves, just check IK + success, _ = self.robot.compute_ik( + pose=target_pose.unsqueeze(0), + qpos_seed=qpos_seed.unsqueeze(0), + name=self.control_part, + ) + if not success.all(): + return False + + return True + except Exception: + return False + + def _get_object_pose(self, label: str) -> torch.Tensor: + """Get current pose of object by label.""" + raise NotImplementedError( + "_get_object_pose must be implemented by subclass or " + "provided with environment-specific object management" + ) + + +# ============================================================================= +# Release Action +# ============================================================================= + + +class ReleaseAction(AtomicAction): + """Atomic action for releasing an object.""" + + def execute( + self, + target: Optional[Union[torch.Tensor, ObjectSemantics]] = None, + start_qpos: Optional[torch.Tensor] = None, + **kwargs + ) -> PlanResult: + """Execute release action. + + Args: + target: Optional target pose after release (for place operations) + start_qpos: Starting joint configuration + + Returns: + PlanResult with trajectory (may be empty for simple release) + """ + # Get current state + if start_qpos is None: + start_qpos = self._get_current_qpos() + + # If target is specified, move to that pose first + if target is not None: + if isinstance(target, ObjectSemantics): + # Move above the object + target_pose = self._get_object_pose(target.label) + approach_offset = torch.tensor([0, 0, 0.1], device=self.device) + target_pose = self._apply_offset(target_pose, approach_offset) + else: + target_pose = target + + target_states = [ + PlanState(qpos=start_qpos, move_type=MoveType.JOINT_MOVE), + PlanState(xpos=target_pose, move_type=MoveType.EEF_MOVE), + ] + + options = MotionGenOptions( + control_part=self.control_part, + is_interpolate=True, + is_linear=False, + interpolate_position_step=0.002, + plan_opts=ToppraPlanOptions( + sample_interval=kwargs.get("sample_interval", 30), + ), + ) + + result = self.plan_trajectory(target_states, options) + else: + # Simple release - return success with current state + result = PlanResult( + success=True, + positions=start_qpos.unsqueeze(0), + ) + + # Open gripper (if applicable) + # This would be robot-specific and should be implemented by subclasses + self._open_gripper() + + return result + + def validate( + self, + target: Optional[Union[torch.Tensor, ObjectSemantics]] = None, + start_qpos: Optional[torch.Tensor] = None, + **kwargs + ) -> bool: + """Validate if release action is feasible.""" + # Release is generally always feasible + # If target is specified, validate that we can reach it + if target is not None and isinstance(target, torch.Tensor): + try: + qpos_seed = start_qpos if start_qpos is not None else self._get_current_qpos() + success, _ = self.robot.compute_ik( + pose=target.unsqueeze(0), + qpos_seed=qpos_seed.unsqueeze(0), + name=self.control_part, + ) + return success.all().item() + except Exception: + return False + return True + + def _open_gripper(self) -> None: + """Open the gripper to release the object. + + This is a placeholder method that should be implemented by subclasses + based on the specific robot hardware or simulation environment. + """ + # Placeholder - should be implemented by subclass + pass + + def _get_object_pose(self, label: str) -> torch.Tensor: + """Get current pose of object by label.""" + raise NotImplementedError( + "_get_object_pose must be implemented by subclass or " + "provided with environment-specific object management" + ) diff --git a/embodichain/lab/sim/atomic_actions/core.py b/embodichain/lab/sim/atomic_actions/core.py new file mode 100644 index 00000000..67d27ec7 --- /dev/null +++ b/embodichain/lab/sim/atomic_actions/core.py @@ -0,0 +1,371 @@ +# ---------------------------------------------------------------------------- +# Copyright (c) 2021-2026 DexForce Technology Co., Ltd. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ---------------------------------------------------------------------------- + +from __future__ import annotations + +import torch +from abc import ABC, abstractmethod +from dataclasses import dataclass, field +from typing import Any, Dict, List, Optional, Union, TYPE_CHECKING + +from embodichain.lab.sim.planners import PlanResult, PlanState, MoveType +from embodichain.utils import configclass + +if TYPE_CHECKING: + from embodichain.lab.sim.planners import MotionGenerator + from embodichain.lab.sim.objects import Robot + + +# ============================================================================= +# Affordance Classes +# ============================================================================= + + +@dataclass +class Affordance: + """Base class for affordance data. + + Affordance represents interaction possibilities for an object. + This is the base class for specific affordance types. + """ + + object_label: str = "" + """Label of the object this affordance belongs to.""" + + def get_batch_size(self) -> int: + """Return the batch size of this affordance data.""" + return 1 + + +@dataclass +class GraspPose(Affordance): + """Grasp pose affordance containing a batch of 4x4 transformation matrices. + + Each grasp pose represents a valid end-effector pose for grasping the object. + Multiple poses may be available for different grasp types (pinch, power, etc.) + or approach directions. + """ + + poses: torch.Tensor = field(default_factory=lambda: torch.eye(4).unsqueeze(0)) + """Batch of grasp poses with shape [B, 4, 4]. + + Each pose is a 4x4 homogeneous transformation matrix representing + the end-effector pose in the object's local coordinate frame. + """ + + grasp_types: List[str] = field(default_factory=lambda: ["default"]) + """List of grasp type labels for each pose in the batch. + + Examples: "pinch", "power", "hook", "spherical", etc. + Length must match the batch dimension of `poses`. + """ + + confidence_scores: Optional[torch.Tensor] = None + """Optional confidence scores for each grasp pose with shape [B]. + + Higher values indicate more reliable/ stable grasps. + Used for grasp selection when multiple options exist. + """ + + def get_batch_size(self) -> int: + """Return the number of grasp poses in this affordance.""" + return self.poses.shape[0] + + def get_grasp_by_type(self, grasp_type: str) -> Optional[torch.Tensor]: + """Get grasp pose by type label. + + Args: + grasp_type: Type of grasp (e.g., "pinch", "power") + + Returns: + 4x4 pose tensor if found, None otherwise + """ + if grasp_type in self.grasp_types: + idx = self.grasp_types.index(grasp_type) + return self.poses[idx] + return None + + def get_best_grasp(self) -> torch.Tensor: + """Get the best grasp pose based on confidence scores. + + Returns: + 4x4 pose tensor with highest confidence + """ + if self.confidence_scores is not None: + best_idx = torch.argmax(self.confidence_scores) + return self.poses[best_idx] + return self.poses[0] # Default to first if no scores available + + +@dataclass +class InteractionPoints(Affordance): + """Interaction points affordance containing a batch of 3D positions. + + Interaction points define specific locations on an object surface + that can be used for contact-based interactions (pushing, poking, + touching) rather than full grasping. + """ + + points: torch.Tensor = field(default_factory=lambda: torch.zeros(1, 3)) + """Batch of 3D interaction points with shape [B, 3]. + + Each point is a 3D coordinate in the object's local coordinate frame. + """ + + normals: Optional[torch.Tensor] = None + """Optional surface normals at each interaction point with shape [B, 3]. + + Normals indicate the surface orientation at each point, + useful for determining approach directions. + """ + + point_types: List[str] = field(default_factory=lambda: ["contact"]) + """List of interaction types for each point. + + Examples: "push", "poke", "touch", "support", "handle", etc. + Length must match the batch dimension of `points`. + """ + + contact_regions: Optional[List[str]] = None + """Optional labels for object regions each point belongs to. + + Examples: "handle", "face", "edge", "corner", "center" + """ + + def get_batch_size(self) -> int: + """Return the number of interaction points in this affordance.""" + return self.points.shape[0] + + def get_points_by_type(self, point_type: str) -> Optional[torch.Tensor]: + """Get all points of a specific interaction type. + + Args: + point_type: Type of interaction (e.g., "push", "handle") + + Returns: + Tensor of points with shape [N, 3] if found, None otherwise + """ + indices = [i for i, t in enumerate(self.point_types) if t == point_type] + if indices: + return self.points[indices] + return None + + def get_approach_direction(self, point_idx: int) -> torch.Tensor: + """Get recommended approach direction for a given point. + + Args: + point_idx: Index of the point + + Returns: + 3D approach direction vector (normalized) + """ + if self.normals is not None: + # Approach from the opposite direction of the surface normal + return -self.normals[point_idx] + # Default: approach from positive z + return torch.tensor([0, 0, 1], dtype=self.points.dtype, device=self.points.device) + + +# ============================================================================= +# ObjectSemantics +# ============================================================================= + + +@dataclass +class ObjectSemantics: + """Semantic information about interaction target. + + This class encapsulates all semantic and geometric information about + an object needed for intelligent interaction planning. + """ + + label: str + """Object category label (e.g., 'apple', 'bottle').""" + + affordance: Affordance + """Affordance data (GraspPose, InteractionPoints, etc.).""" + + geometry: Dict[str, Any] + """Geometric information including bounding box, mesh data.""" + + properties: Dict[str, Any] + """Physical properties: mass, friction, etc.""" + + uid: Optional[str] = None + """Optional unique identifier for the object instance.""" + + +# ============================================================================= +# ActionCfg and AtomicAction +# ============================================================================= + + +@configclass +class ActionCfg: + """Configuration for atomic actions.""" + + control_part: str = "left_arm" + """Control part name for the action.""" + + interpolation_type: str = "linear" + """Interpolation type: 'linear', 'cubic', or 'toppra'.""" + + velocity_limit: Optional[float] = None + """Maximum velocity for trajectory.""" + + acceleration_limit: Optional[float] = None + """Maximum acceleration for trajectory.""" + + +class AtomicAction(ABC): + """Abstract base class for atomic actions. + + All atomic actions use PlanResult from embodichain.lab.sim.planners + as the return type for execute() method, ensuring consistency with + the existing motion planning infrastructure. + """ + + def __init__( + self, + motion_generator: "MotionGenerator", + robot: "Robot", + control_part: str, + device: torch.device = torch.device("cuda"), + ): + self.motion_generator = motion_generator + self.robot = robot + self.control_part = control_part + self.device = device + + @abstractmethod + def execute( + self, + target: Union[torch.Tensor, ObjectSemantics], + start_qpos: Optional[torch.Tensor] = None, + **kwargs + ) -> PlanResult: + """Execute the atomic action. + + Args: + target: Target pose [4, 4] or ObjectSemantics + start_qpos: Starting joint configuration [DOF] + **kwargs: Additional action-specific parameters + + Returns: + PlanResult with trajectory (positions, velocities, accelerations), + end-effector poses (xpos_list), and success status. + Use result.positions for joint trajectory [T, DOF]. + Use result.xpos_list for EE poses [T, 4, 4]. + """ + pass + + @abstractmethod + def validate( + self, + target: Union[torch.Tensor, ObjectSemantics], + start_qpos: Optional[torch.Tensor] = None, + **kwargs + ) -> bool: + """Validate if the action is feasible without executing. + + This method performs a quick feasibility check (e.g., IK solvability) + without generating a full trajectory. + + Returns: + True if action appears feasible, False otherwise + """ + pass + + def _get_current_qpos(self) -> torch.Tensor: + """Get current joint configuration from robot.""" + return self.robot.get_qpos()[0] # Assuming single environment + + def _ik_solve( + self, + target_pose: torch.Tensor, + qpos_seed: Optional[torch.Tensor] = None + ) -> torch.Tensor: + """Solve IK for target pose. + + Args: + target_pose: Target pose [4, 4] + qpos_seed: Seed configuration [DOF] + + Returns: + Joint configuration [DOF] + + Raises: + RuntimeError: If IK fails to find a solution + """ + if qpos_seed is None: + qpos_seed = self._get_current_qpos() + + success, qpos = self.robot.compute_ik( + pose=target_pose.unsqueeze(0), + qpos_seed=qpos_seed.unsqueeze(0), + name=self.control_part, + ) + + if not success.all(): + raise RuntimeError(f"IK failed for target pose: {target_pose}") + + return qpos.squeeze(0) + + def _fk_compute(self, qpos: torch.Tensor) -> torch.Tensor: + """Compute forward kinematics. + + Args: + qpos: Joint configuration [DOF] or [B, DOF] + + Returns: + End-effector pose [4, 4] or [B, 4, 4] + """ + if qpos.dim() == 1: + qpos = qpos.unsqueeze(0) + + xpos = self.robot.compute_fk( + qpos=qpos, + name=self.control_part, + to_matrix=True, + ) + + return xpos.squeeze(0) if xpos.shape[0] == 1 else xpos + + def _apply_offset(self, pose: torch.Tensor, offset: torch.Tensor) -> torch.Tensor: + """Apply offset to pose in local frame. + + Args: + pose: Base pose [4, 4] + offset: Offset in local frame [3] + + Returns: + Pose with offset applied [4, 4] + """ + result = pose.clone() + result[:3, 3] += pose[:3, :3] @ offset + return result + + def plan_trajectory( + self, + target_states: List[PlanState], + options: Optional["MotionGenOptions"] = None, + ) -> "PlanResult": + """Plan trajectory using motion generator.""" + from embodichain.lab.sim.planners import MotionGenOptions + + if options is None: + options = MotionGenOptions(control_part=self.control_part) + return self.motion_generator.generate(target_states, options) diff --git a/embodichain/lab/sim/atomic_actions/engine.py b/embodichain/lab/sim/atomic_actions/engine.py new file mode 100644 index 00000000..1d58a4f4 --- /dev/null +++ b/embodichain/lab/sim/atomic_actions/engine.py @@ -0,0 +1,328 @@ +# ---------------------------------------------------------------------------- +# Copyright (c) 2021-2026 DexForce Technology Co., Ltd. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ---------------------------------------------------------------------------- + +from __future__ import annotations + +import torch +from typing import Dict, List, Optional, Type, Union, TYPE_CHECKING + +from embodichain.lab.sim.planners import PlanResult +from .core import AtomicAction, ObjectSemantics, ActionCfg + +if TYPE_CHECKING: + from embodichain.lab.sim.planners import MotionGenerator + from embodichain.lab.sim.objects import Robot + + +# ============================================================================= +# Global Action Registry +# ============================================================================= + +_global_action_registry: Dict[str, Type[AtomicAction]] = {} +_global_action_configs: Dict[str, Type[ActionCfg]] = {} + + +def register_action( + name: str, + action_class: Type[AtomicAction], + config_class: Optional[Type[ActionCfg]] = None, +) -> None: + """Register a custom atomic action class globally. + + This function allows registration of custom action types that can then + be instantiated by the AtomicActionEngine. + + Args: + name: Unique identifier for the action type + action_class: The AtomicAction subclass to register + config_class: Optional configuration class for the action + + Example: + >>> class MyCustomAction(AtomicAction): + ... def execute(self, target, **kwargs): + ... # Implementation + ... pass + ... def validate(self, target, **kwargs): + ... return True + >>> register_action("my_custom", MyCustomAction) + """ + _global_action_registry[name] = action_class + if config_class is not None: + _global_action_configs[name] = config_class + + +def unregister_action(name: str) -> None: + """Unregister an action type. + + Args: + name: The action type identifier to remove + """ + _global_action_registry.pop(name, None) + _global_action_configs.pop(name, None) + + +def get_registered_actions() -> Dict[str, Type[AtomicAction]]: + """Get all registered action types. + + Returns: + Dictionary mapping action names to their classes + """ + return _global_action_registry.copy() + + +# ============================================================================= +# Semantic Analyzer +# ============================================================================= + + +class SemanticAnalyzer: + """Analyzes objects and provides ObjectSemantics for atomic actions.""" + + def __init__(self): + self._object_cache: Dict[str, ObjectSemantics] = {} + + def analyze(self, label: str) -> ObjectSemantics: + """Analyze object by label and return ObjectSemantics. + + This is a placeholder implementation that should be extended + with actual object detection and affordance computation. + + Args: + label: Object category label (e.g., "apple", "bottle") + + Returns: + ObjectSemantics containing affordance data + """ + # Check cache first + if label in self._object_cache: + return self._object_cache[label] + + # Create default semantics (placeholder implementation) + from .core import GraspPose, InteractionPoints + + # Generate default grasp poses based on object type + default_poses = torch.eye(4).unsqueeze(0) + default_poses[0, 2, 3] = 0.1 # Default offset + + grasp_affordance = GraspPose( + object_label=label, + poses=default_poses, + grasp_types=["default"], + ) + + # Default interaction points + interaction_affordance = InteractionPoints( + object_label=label, + points=torch.zeros(1, 3), + point_types=["contact"], + ) + + semantics = ObjectSemantics( + label=label, + affordance=grasp_affordance, + geometry={"bounding_box": [0.1, 0.1, 0.1]}, + properties={"mass": 1.0, "friction": 0.5}, + ) + + # Cache and return + self._object_cache[label] = semantics + return semantics + + def clear_cache(self) -> None: + """Clear the object semantics cache.""" + self._object_cache.clear() + + +# ============================================================================= +# Atomic Action Engine +# ============================================================================= + + +class AtomicActionEngine: + """Central engine for managing and executing atomic actions.""" + + def __init__( + self, + robot: "Robot", + motion_generator: "MotionGenerator", + device: torch.device = torch.device("cuda"), + ): + self.robot = robot + self.motion_generator = motion_generator + self.device = device + + # Registry of action instances + self._actions: Dict[str, AtomicAction] = {} + + # Semantic analyzer for object understanding + self._semantic_analyzer = SemanticAnalyzer() + + # Initialize default actions + self._init_default_actions() + + def _init_default_actions(self): + """Initialize default atomic action instances.""" + from .actions import ReachAction, GraspAction, MoveAction, ReleaseAction + + control_parts = getattr(self.robot, 'control_parts', None) or ["default"] + + for part in control_parts: + self.register_action( + f"reach_{part}", + ReachAction( + motion_generator=self.motion_generator, + robot=self.robot, + control_part=part, + device=self.device, + ) + ) + self.register_action( + f"grasp_{part}", + GraspAction( + motion_generator=self.motion_generator, + robot=self.robot, + control_part=part, + device=self.device, + ) + ) + self.register_action( + f"move_{part}", + MoveAction( + motion_generator=self.motion_generator, + robot=self.robot, + control_part=part, + device=self.device, + ) + ) + self.register_action( + f"release_{part}", + ReleaseAction( + motion_generator=self.motion_generator, + robot=self.robot, + control_part=part, + device=self.device, + ) + ) + + # Register action classes for dynamic instantiation + for action_name, action_class in _global_action_registry.items(): + # Don't override default actions + if action_name not in ["reach", "grasp", "move", "release"]: + for part in control_parts: + action_key = f"{action_name}_{part}" + if action_key not in self._actions: + # Create instance with default config + try: + instance = action_class( + motion_generator=self.motion_generator, + robot=self.robot, + control_part=part, + device=self.device, + ) + self._actions[action_key] = instance + except Exception: + # Skip if instantiation fails + pass + + def register_action(self, name: str, action: AtomicAction): + """Register a custom atomic action.""" + self._actions[name] = action + + def unregister_action(self, name: str) -> bool: + """Unregister an action by name. + + Args: + name: Name of the action to unregister + + Returns: + True if action was found and removed, False otherwise + """ + if name in self._actions: + del self._actions[name] + return True + return False + + def get_action_names(self) -> List[str]: + """Get list of registered action names.""" + return list(self._actions.keys()) + + def execute( + self, + action_name: str, + target: Union[torch.Tensor, str, ObjectSemantics], + control_part: Optional[str] = None, + **kwargs + ) -> PlanResult: + """Execute an atomic action. + + Args: + action_name: Name of registered action + target: Target pose, object label, or ObjectSemantics + control_part: Robot control part to use + **kwargs: Additional action parameters + + Returns: + PlanResult with trajectory (positions, velocities, accelerations), + end-effector poses (xpos_list), and success status. + """ + # Resolve action + if control_part: + action_key = f"{action_name}_{control_part}" + else: + action_key = action_name + + if action_key not in self._actions: + raise ValueError(f"Unknown action: {action_key}") + + action = self._actions[action_key] + + # Resolve target to ObjectSemantics if string label provided + if isinstance(target, str): + target = self._semantic_analyzer.analyze(target) + + # Execute action - returns PlanResult directly + return action.execute(target, **kwargs) + + def validate( + self, + action_name: str, + target: Union[torch.Tensor, str, ObjectSemantics], + control_part: Optional[str] = None, + **kwargs + ) -> bool: + """Validate if an action is feasible without executing.""" + if control_part: + action_key = f"{action_name}_{control_part}" + else: + action_key = action_name + + if action_key not in self._actions: + return False + + action = self._actions[action_key] + + if isinstance(target, str): + target = self._semantic_analyzer.analyze(target) + + return action.validate(target, **kwargs) + + def get_semantic_analyzer(self) -> SemanticAnalyzer: + """Get the semantic analyzer for object understanding.""" + return self._semantic_analyzer + + def set_semantic_analyzer(self, analyzer: SemanticAnalyzer) -> None: + """Set a custom semantic analyzer.""" + self._semantic_analyzer = analyzer diff --git a/tests/sim/atomic_actions/__init__.py b/tests/sim/atomic_actions/__init__.py new file mode 100644 index 00000000..0671165d --- /dev/null +++ b/tests/sim/atomic_actions/__init__.py @@ -0,0 +1,17 @@ +# ---------------------------------------------------------------------------- +# Copyright (c) 2021-2026 DexForce Technology Co., Ltd. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ---------------------------------------------------------------------------- + +"""Tests for atomic actions module.""" diff --git a/tests/sim/atomic_actions/test_core.py b/tests/sim/atomic_actions/test_core.py new file mode 100644 index 00000000..2b9cbb13 --- /dev/null +++ b/tests/sim/atomic_actions/test_core.py @@ -0,0 +1,229 @@ +# ---------------------------------------------------------------------------- +# Copyright (c) 2021-2026 DexForce Technology Co., Ltd. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ---------------------------------------------------------------------------- + +import torch +import pytest + +from embodichain.lab.sim.atomic_actions import ( + Affordance, + GraspPose, + InteractionPoints, + ObjectSemantics, + ActionCfg, + register_action, + unregister_action, + get_registered_actions, +) + + +class TestAffordance: + """Test affordance base class and subclasses.""" + + def test_affordance_base(self): + """Test base affordance class.""" + aff = Affordance(object_label="test_object") + assert aff.object_label == "test_object" + assert aff.get_batch_size() == 1 + + def test_grasp_pose_default(self): + """Test GraspPose with default values.""" + grasp = GraspPose(object_label="bottle") + assert grasp.object_label == "bottle" + assert grasp.poses.shape == (1, 4, 4) + assert grasp.grasp_types == ["default"] + assert grasp.get_batch_size() == 1 + + def test_grasp_pose_multiple(self): + """Test GraspPose with multiple poses.""" + poses = torch.stack([ + torch.eye(4), + torch.eye(4), + torch.eye(4), + ]) + grasp = GraspPose( + object_label="bottle", + poses=poses, + grasp_types=["pinch", "power", "hook"], + ) + assert grasp.get_batch_size() == 3 + + # Test get_grasp_by_type + pinch_pose = grasp.get_grasp_by_type("pinch") + assert pinch_pose is not None + assert torch.allclose(pinch_pose, torch.eye(4)) + + nonexistent = grasp.get_grasp_by_type("nonexistent") + assert nonexistent is None + + def test_grasp_pose_best_grasp(self): + """Test get_best_grasp method.""" + poses = torch.stack([ + torch.eye(4), + torch.eye(4) * 2, + ]) + confidence = torch.tensor([0.7, 0.9]) + grasp = GraspPose( + poses=poses, + grasp_types=["low_conf", "high_conf"], + confidence_scores=confidence, + ) + + best = grasp.get_best_grasp() + # Should return the second pose (higher confidence) + assert torch.allclose(best, poses[1]) + + def test_interaction_points(self): + """Test InteractionPoints class.""" + points = torch.tensor([ + [0.1, 0.0, 0.0], + [0.0, 0.1, 0.0], + [0.0, 0.0, 0.1], + ]) + normals = torch.tensor([ + [1.0, 0.0, 0.0], + [0.0, 1.0, 0.0], + [0.0, 0.0, 1.0], + ]) + interaction = InteractionPoints( + object_label="cube", + points=points, + normals=normals, + point_types=["push", "poke", "touch"], + ) + + assert interaction.get_batch_size() == 3 + + # Test get_points_by_type + push_points = interaction.get_points_by_type("push") + assert push_points is not None + assert torch.allclose(push_points, points[0:1]) + + nonexistent = interaction.get_points_by_type("nonexistent") + assert nonexistent is None + + # Test get_approach_direction + approach = interaction.get_approach_direction(0) + assert torch.allclose(approach, torch.tensor([-1.0, 0.0, 0.0])) + + def test_interaction_points_no_normals(self): + """Test InteractionPoints without normals.""" + points = torch.tensor([[0.1, 0.2, 0.3]]) + interaction = InteractionPoints(points=points) + + # Default approach direction should be +z + approach = interaction.get_approach_direction(0) + assert torch.allclose(approach, torch.tensor([0.0, 0.0, 1.0])) + + +class TestObjectSemantics: + """Test ObjectSemantics dataclass.""" + + def test_basic_creation(self): + """Test basic ObjectSemantics creation.""" + affordance = GraspPose(object_label="bottle") + semantics = ObjectSemantics( + label="bottle", + affordance=affordance, + geometry={"bounding_box": [0.1, 0.2, 0.3]}, + properties={"mass": 0.5, "friction": 0.8}, + uid="bottle_001", + ) + + assert semantics.label == "bottle" + assert semantics.uid == "bottle_001" + assert semantics.affordance.object_label == "bottle" + assert semantics.properties["mass"] == 0.5 + + def test_no_uid(self): + """Test ObjectSemantics without UID.""" + affordance = GraspPose() + semantics = ObjectSemantics( + label="apple", + affordance=affordance, + geometry={}, + properties={}, + ) + + assert semantics.uid is None + + +class TestActionCfg: + """Test ActionCfg dataclass.""" + + def test_defaults(self): + """Test ActionCfg default values.""" + cfg = ActionCfg() + assert cfg.control_part == "left_arm" + assert cfg.interpolation_type == "linear" + assert cfg.velocity_limit is None + assert cfg.acceleration_limit is None + + def test_custom_values(self): + """Test ActionCfg with custom values.""" + cfg = ActionCfg( + control_part="right_arm", + interpolation_type="toppra", + velocity_limit=0.5, + acceleration_limit=1.0, + ) + assert cfg.control_part == "right_arm" + assert cfg.interpolation_type == "toppra" + assert cfg.velocity_limit == 0.5 + assert cfg.acceleration_limit == 1.0 + + +class TestActionRegistry: + """Test action registry functions.""" + + def test_register_and_unregister(self): + """Test registering and unregistering actions.""" + from embodichain.lab.sim.atomic_actions import AtomicAction + + class TestAction(AtomicAction): + def execute(self, target, **kwargs): + return PlanResult(success=True) + + def validate(self, target, **kwargs): + return True + + # Register + register_action("test", TestAction) + assert "test" in get_registered_actions() + + # Unregister + unregister_action("test") + assert "test" not in get_registered_actions() + + def test_get_registered_actions_copy(self): + """Test that get_registered_actions returns a copy.""" + from embodichain.lab.sim.atomic_actions import AtomicAction + + initial = get_registered_actions() + + class DummyAction(AtomicAction): + def execute(self, target, **kwargs): + return PlanResult(success=True) + + def validate(self, target, **kwargs): + return True + + register_action("dummy", DummyAction) + + # Original should not contain the new action + assert "dummy" not in initial + + # Cleanup + unregister_action("dummy") From 1874fd0223cc24933e949165c1b82b54101f70c6 Mon Sep 17 00:00:00 2001 From: yuecideng Date: Sun, 19 Apr 2026 12:40:53 +0000 Subject: [PATCH 2/9] wip --- .../lab/sim/atomic_actions/__init__.py | 7 +- embodichain/lab/sim/atomic_actions/actions.py | 54 +++++------ embodichain/lab/sim/atomic_actions/core.py | 95 ++++--------------- embodichain/lab/sim/atomic_actions/engine.py | 14 +-- tests/sim/atomic_actions/test_core.py | 46 +++++---- 5 files changed, 87 insertions(+), 129 deletions(-) diff --git a/embodichain/lab/sim/atomic_actions/__init__.py b/embodichain/lab/sim/atomic_actions/__init__.py index cb1ea4c4..f3f92da7 100644 --- a/embodichain/lab/sim/atomic_actions/__init__.py +++ b/embodichain/lab/sim/atomic_actions/__init__.py @@ -35,7 +35,12 @@ ReleaseAction, MoveAction, ) -from .engine import AtomicActionEngine, register_action, unregister_action, get_registered_actions +from .engine import ( + AtomicActionEngine, + register_action, + unregister_action, + get_registered_actions, +) __all__ = [ # Core classes diff --git a/embodichain/lab/sim/atomic_actions/actions.py b/embodichain/lab/sim/atomic_actions/actions.py index 342055cd..d15a7eff 100644 --- a/embodichain/lab/sim/atomic_actions/actions.py +++ b/embodichain/lab/sim/atomic_actions/actions.py @@ -54,7 +54,7 @@ def execute( start_qpos: Optional[torch.Tensor] = None, approach_offset: Optional[torch.Tensor] = None, use_affordance: bool = True, - **kwargs + **kwargs, ) -> PlanResult: """Execute reach action. @@ -85,14 +85,8 @@ def execute( # Create plan states target_states = [ - PlanState( - qpos=start_qpos, - move_type=MoveType.JOINT_MOVE - ), - PlanState( - xpos=approach_pose, - move_type=MoveType.EEF_MOVE - ), + PlanState(qpos=start_qpos, move_type=MoveType.JOINT_MOVE), + PlanState(xpos=approach_pose, move_type=MoveType.EEF_MOVE), ] # Plan trajectory @@ -115,7 +109,7 @@ def validate( self, target: Union[torch.Tensor, ObjectSemantics], start_qpos: Optional[torch.Tensor] = None, - **kwargs + **kwargs, ) -> bool: """Check if the reach action is feasible.""" try: @@ -126,7 +120,9 @@ def validate( target_pose = target # Attempt IK - qpos_seed = start_qpos if start_qpos is not None else self._get_current_qpos() + qpos_seed = ( + start_qpos if start_qpos is not None else self._get_current_qpos() + ) success, _ = self.robot.compute_ik( pose=target_pose.unsqueeze(0), qpos_seed=qpos_seed.unsqueeze(0), @@ -137,9 +133,7 @@ def validate( return False def _resolve_target_pose( - self, - semantics: ObjectSemantics, - use_affordance: bool + self, semantics: ObjectSemantics, use_affordance: bool ) -> torch.Tensor: """Resolve target pose from object semantics.""" from .core import GraspPose @@ -195,7 +189,7 @@ def execute( start_qpos: Optional[torch.Tensor] = None, use_affordance: bool = True, grasp_type: str = "default", # "default", "pinch", "power" - **kwargs + **kwargs, ) -> PlanResult: """Execute grasp action. @@ -242,12 +236,16 @@ def validate( self, target: ObjectSemantics, start_qpos: Optional[torch.Tensor] = None, - **kwargs + **kwargs, ) -> bool: """Validate if grasp is feasible.""" try: - grasp_pose = self._resolve_grasp_pose(target, use_affordance=True, grasp_type="default") - qpos_seed = start_qpos if start_qpos is not None else self._get_current_qpos() + grasp_pose = self._resolve_grasp_pose( + target, use_affordance=True, grasp_type="default" + ) + qpos_seed = ( + start_qpos if start_qpos is not None else self._get_current_qpos() + ) success, _ = self.robot.compute_ik( pose=grasp_pose.unsqueeze(0), qpos_seed=qpos_seed.unsqueeze(0), @@ -258,10 +256,7 @@ def validate( return False def _resolve_grasp_pose( - self, - semantics: ObjectSemantics, - use_affordance: bool, - grasp_type: str + self, semantics: ObjectSemantics, use_affordance: bool, grasp_type: str ) -> torch.Tensor: """Resolve grasp pose from object semantics.""" from .core import GraspPose @@ -343,7 +338,6 @@ def execute( offset: Optional[torch.Tensor] = None, velocity_limit: Optional[float] = None, acceleration_limit: Optional[float] = None, - **kwargs ) -> PlanResult: """Execute move action. @@ -405,7 +399,7 @@ def validate( self, target: Union[torch.Tensor, ObjectSemantics], start_qpos: Optional[torch.Tensor] = None, - **kwargs + **kwargs, ) -> bool: """Validate if move action is feasible.""" try: @@ -414,7 +408,9 @@ def validate( else: target_pose = target - qpos_seed = start_qpos if start_qpos is not None else self._get_current_qpos() + qpos_seed = ( + start_qpos if start_qpos is not None else self._get_current_qpos() + ) if self.move_type == "joint": # For joint space moves, we need IK solvability @@ -453,7 +449,7 @@ def execute( self, target: Optional[Union[torch.Tensor, ObjectSemantics]] = None, start_qpos: Optional[torch.Tensor] = None, - **kwargs + **kwargs, ) -> PlanResult: """Execute release action. @@ -511,14 +507,16 @@ def validate( self, target: Optional[Union[torch.Tensor, ObjectSemantics]] = None, start_qpos: Optional[torch.Tensor] = None, - **kwargs + **kwargs, ) -> bool: """Validate if release action is feasible.""" # Release is generally always feasible # If target is specified, validate that we can reach it if target is not None and isinstance(target, torch.Tensor): try: - qpos_seed = start_qpos if start_qpos is not None else self._get_current_qpos() + qpos_seed = ( + start_qpos if start_qpos is not None else self._get_current_qpos() + ) success, _ = self.robot.compute_ik( pose=target.unsqueeze(0), qpos_seed=qpos_seed.unsqueeze(0), diff --git a/embodichain/lab/sim/atomic_actions/core.py b/embodichain/lab/sim/atomic_actions/core.py index 67d27ec7..75b67943 100644 --- a/embodichain/lab/sim/atomic_actions/core.py +++ b/embodichain/lab/sim/atomic_actions/core.py @@ -45,7 +45,8 @@ class Affordance: object_label: str = "" """Label of the object this affordance belongs to.""" - def get_batch_size(self) -> int: + @property + def batch_size(self) -> int: """Return the batch size of this affordance data.""" return 1 @@ -66,38 +67,18 @@ class GraspPose(Affordance): the end-effector pose in the object's local coordinate frame. """ - grasp_types: List[str] = field(default_factory=lambda: ["default"]) - """List of grasp type labels for each pose in the batch. - - Examples: "pinch", "power", "hook", "spherical", etc. - Length must match the batch dimension of `poses`. - """ - - confidence_scores: Optional[torch.Tensor] = None + confidence_scores: torch.Tensor | None = None """Optional confidence scores for each grasp pose with shape [B]. Higher values indicate more reliable/ stable grasps. Used for grasp selection when multiple options exist. """ - def get_batch_size(self) -> int: + @property + def batch_size(self) -> int: """Return the number of grasp poses in this affordance.""" return self.poses.shape[0] - def get_grasp_by_type(self, grasp_type: str) -> Optional[torch.Tensor]: - """Get grasp pose by type label. - - Args: - grasp_type: Type of grasp (e.g., "pinch", "power") - - Returns: - 4x4 pose tensor if found, None otherwise - """ - if grasp_type in self.grasp_types: - idx = self.grasp_types.index(grasp_type) - return self.poses[idx] - return None - def get_best_grasp(self) -> torch.Tensor: """Get the best grasp pose based on confidence scores. @@ -125,44 +106,24 @@ class InteractionPoints(Affordance): Each point is a 3D coordinate in the object's local coordinate frame. """ - normals: Optional[torch.Tensor] = None + normals: torch.Tensor | None = None """Optional surface normals at each interaction point with shape [B, 3]. Normals indicate the surface orientation at each point, useful for determining approach directions. """ - point_types: List[str] = field(default_factory=lambda: ["contact"]) - """List of interaction types for each point. - - Examples: "push", "poke", "touch", "support", "handle", etc. - Length must match the batch dimension of `points`. - """ - - contact_regions: Optional[List[str]] = None + contact_regions: torch.Tensor | None = None """Optional labels for object regions each point belongs to. Examples: "handle", "face", "edge", "corner", "center" """ - def get_batch_size(self) -> int: + @property + def batch_size(self) -> int: """Return the number of interaction points in this affordance.""" return self.points.shape[0] - def get_points_by_type(self, point_type: str) -> Optional[torch.Tensor]: - """Get all points of a specific interaction type. - - Args: - point_type: Type of interaction (e.g., "push", "handle") - - Returns: - Tensor of points with shape [N, 3] if found, None otherwise - """ - indices = [i for i, t in enumerate(self.point_types) if t == point_type] - if indices: - return self.points[indices] - return None - def get_approach_direction(self, point_idx: int) -> torch.Tensor: """Get recommended approach direction for a given point. @@ -176,7 +137,9 @@ def get_approach_direction(self, point_idx: int) -> torch.Tensor: # Approach from the opposite direction of the surface normal return -self.normals[point_idx] # Default: approach from positive z - return torch.tensor([0, 0, 1], dtype=self.points.dtype, device=self.points.device) + return torch.tensor( + [0, 0, 1], dtype=self.points.dtype, device=self.points.device + ) # ============================================================================= @@ -192,7 +155,7 @@ class ObjectSemantics: an object needed for intelligent interaction planning. """ - label: str + label: str = "none" """Object category label (e.g., 'apple', 'bottle').""" affordance: Affordance @@ -221,13 +184,7 @@ class ActionCfg: """Control part name for the action.""" interpolation_type: str = "linear" - """Interpolation type: 'linear', 'cubic', or 'toppra'.""" - - velocity_limit: Optional[float] = None - """Maximum velocity for trajectory.""" - - acceleration_limit: Optional[float] = None - """Maximum acceleration for trajectory.""" + """Interpolation type: 'linear', 'cubic'.""" class AtomicAction(ABC): @@ -240,22 +197,18 @@ class AtomicAction(ABC): def __init__( self, - motion_generator: "MotionGenerator", - robot: "Robot", - control_part: str, - device: torch.device = torch.device("cuda"), + motion_generator: MotionGenerator, ): self.motion_generator = motion_generator - self.robot = robot - self.control_part = control_part - self.device = device + self.robot = motion_generator.robot + self.device = self.robot.device @abstractmethod def execute( self, target: Union[torch.Tensor, ObjectSemantics], start_qpos: Optional[torch.Tensor] = None, - **kwargs + **kwargs, ) -> PlanResult: """Execute the atomic action. @@ -277,7 +230,7 @@ def validate( self, target: Union[torch.Tensor, ObjectSemantics], start_qpos: Optional[torch.Tensor] = None, - **kwargs + **kwargs, ) -> bool: """Validate if the action is feasible without executing. @@ -289,14 +242,8 @@ def validate( """ pass - def _get_current_qpos(self) -> torch.Tensor: - """Get current joint configuration from robot.""" - return self.robot.get_qpos()[0] # Assuming single environment - def _ik_solve( - self, - target_pose: torch.Tensor, - qpos_seed: Optional[torch.Tensor] = None + self, target_pose: torch.Tensor, qpos_seed: Optional[torch.Tensor] = None ) -> torch.Tensor: """Solve IK for target pose. @@ -311,7 +258,7 @@ def _ik_solve( RuntimeError: If IK fails to find a solution """ if qpos_seed is None: - qpos_seed = self._get_current_qpos() + qpos_seed = self.robot.get_qpos() success, qpos = self.robot.compute_ik( pose=target_pose.unsqueeze(0), diff --git a/embodichain/lab/sim/atomic_actions/engine.py b/embodichain/lab/sim/atomic_actions/engine.py index 1d58a4f4..b152adfa 100644 --- a/embodichain/lab/sim/atomic_actions/engine.py +++ b/embodichain/lab/sim/atomic_actions/engine.py @@ -177,7 +177,7 @@ def _init_default_actions(self): """Initialize default atomic action instances.""" from .actions import ReachAction, GraspAction, MoveAction, ReleaseAction - control_parts = getattr(self.robot, 'control_parts', None) or ["default"] + control_parts = getattr(self.robot, "control_parts", None) or ["default"] for part in control_parts: self.register_action( @@ -187,7 +187,7 @@ def _init_default_actions(self): robot=self.robot, control_part=part, device=self.device, - ) + ), ) self.register_action( f"grasp_{part}", @@ -196,7 +196,7 @@ def _init_default_actions(self): robot=self.robot, control_part=part, device=self.device, - ) + ), ) self.register_action( f"move_{part}", @@ -205,7 +205,7 @@ def _init_default_actions(self): robot=self.robot, control_part=part, device=self.device, - ) + ), ) self.register_action( f"release_{part}", @@ -214,7 +214,7 @@ def _init_default_actions(self): robot=self.robot, control_part=part, device=self.device, - ) + ), ) # Register action classes for dynamic instantiation @@ -264,7 +264,7 @@ def execute( action_name: str, target: Union[torch.Tensor, str, ObjectSemantics], control_part: Optional[str] = None, - **kwargs + **kwargs, ) -> PlanResult: """Execute an atomic action. @@ -301,7 +301,7 @@ def validate( action_name: str, target: Union[torch.Tensor, str, ObjectSemantics], control_part: Optional[str] = None, - **kwargs + **kwargs, ) -> bool: """Validate if an action is feasible without executing.""" if control_part: diff --git a/tests/sim/atomic_actions/test_core.py b/tests/sim/atomic_actions/test_core.py index 2b9cbb13..bbb02a36 100644 --- a/tests/sim/atomic_actions/test_core.py +++ b/tests/sim/atomic_actions/test_core.py @@ -48,11 +48,13 @@ def test_grasp_pose_default(self): def test_grasp_pose_multiple(self): """Test GraspPose with multiple poses.""" - poses = torch.stack([ - torch.eye(4), - torch.eye(4), - torch.eye(4), - ]) + poses = torch.stack( + [ + torch.eye(4), + torch.eye(4), + torch.eye(4), + ] + ) grasp = GraspPose( object_label="bottle", poses=poses, @@ -70,10 +72,12 @@ def test_grasp_pose_multiple(self): def test_grasp_pose_best_grasp(self): """Test get_best_grasp method.""" - poses = torch.stack([ - torch.eye(4), - torch.eye(4) * 2, - ]) + poses = torch.stack( + [ + torch.eye(4), + torch.eye(4) * 2, + ] + ) confidence = torch.tensor([0.7, 0.9]) grasp = GraspPose( poses=poses, @@ -87,16 +91,20 @@ def test_grasp_pose_best_grasp(self): def test_interaction_points(self): """Test InteractionPoints class.""" - points = torch.tensor([ - [0.1, 0.0, 0.0], - [0.0, 0.1, 0.0], - [0.0, 0.0, 0.1], - ]) - normals = torch.tensor([ - [1.0, 0.0, 0.0], - [0.0, 1.0, 0.0], - [0.0, 0.0, 1.0], - ]) + points = torch.tensor( + [ + [0.1, 0.0, 0.0], + [0.0, 0.1, 0.0], + [0.0, 0.0, 0.1], + ] + ) + normals = torch.tensor( + [ + [1.0, 0.0, 0.0], + [0.0, 1.0, 0.0], + [0.0, 0.0, 1.0], + ] + ) interaction = InteractionPoints( object_label="cube", points=points, From 7cc7ec203671fb301331788a7b8968ba6b0feaf1 Mon Sep 17 00:00:00 2001 From: yuecideng Date: Mon, 20 Apr 2026 02:18:46 +0000 Subject: [PATCH 3/9] Fix atomic actions core module implementation bugs - Fix dataclass field ordering in ObjectSemantics (non-default follows default) - Convert batch_size property to get_batch_size() method for consistency - Add missing grasp_types field and get_grasp_by_type() method to GraspPose - Add missing point_types field and get_points_by_type() method to InteractionPoints - Add missing velocity_limit and acceleration_limit fields to ActionCfg Co-Authored-By: Claude Opus 4.6 --- embodichain/lab/sim/atomic_actions/core.py | 64 +++++++++++++++++----- 1 file changed, 51 insertions(+), 13 deletions(-) diff --git a/embodichain/lab/sim/atomic_actions/core.py b/embodichain/lab/sim/atomic_actions/core.py index 75b67943..42337fa3 100644 --- a/embodichain/lab/sim/atomic_actions/core.py +++ b/embodichain/lab/sim/atomic_actions/core.py @@ -45,8 +45,7 @@ class Affordance: object_label: str = "" """Label of the object this affordance belongs to.""" - @property - def batch_size(self) -> int: + def get_batch_size(self) -> int: """Return the batch size of this affordance data.""" return 1 @@ -67,6 +66,13 @@ class GraspPose(Affordance): the end-effector pose in the object's local coordinate frame. """ + grasp_types: List[str] = field(default_factory=lambda: ["default"]) + """List of grasp type labels for each pose in the batch. + + Examples: "pinch", "power", "hook", "spherical", etc. + Length must match the batch dimension of `poses`. + """ + confidence_scores: torch.Tensor | None = None """Optional confidence scores for each grasp pose with shape [B]. @@ -74,11 +80,24 @@ class GraspPose(Affordance): Used for grasp selection when multiple options exist. """ - @property - def batch_size(self) -> int: + def get_batch_size(self) -> int: """Return the number of grasp poses in this affordance.""" return self.poses.shape[0] + def get_grasp_by_type(self, grasp_type: str) -> Optional[torch.Tensor]: + """Get grasp pose by type label. + + Args: + grasp_type: Type of grasp (e.g., "pinch", "power") + + Returns: + 4x4 pose tensor if found, None otherwise + """ + if grasp_type in self.grasp_types: + idx = self.grasp_types.index(grasp_type) + return self.poses[idx] + return None + def get_best_grasp(self) -> torch.Tensor: """Get the best grasp pose based on confidence scores. @@ -113,14 +132,27 @@ class InteractionPoints(Affordance): useful for determining approach directions. """ - contact_regions: torch.Tensor | None = None - """Optional labels for object regions each point belongs to. + point_types: List[str] = field(default_factory=list) + """Optional labels for each point's interaction type. - Examples: "handle", "face", "edge", "corner", "center" + Examples: "push", "poke", "touch", "pinch" """ - @property - def batch_size(self) -> int: + def get_points_by_type(self, point_type: str) -> torch.Tensor | None: + """Get points by their interaction type. + + Args: + point_type: Type of interaction (e.g., "push", "poke") + + Returns: + Tensor of points if found, None otherwise + """ + if point_type in self.point_types: + indices = [i for i, t in enumerate(self.point_types) if t == point_type] + return self.points[indices] + return None + + def get_batch_size(self) -> int: """Return the number of interaction points in this affordance.""" return self.points.shape[0] @@ -155,18 +187,18 @@ class ObjectSemantics: an object needed for intelligent interaction planning. """ - label: str = "none" - """Object category label (e.g., 'apple', 'bottle').""" - affordance: Affordance """Affordance data (GraspPose, InteractionPoints, etc.).""" geometry: Dict[str, Any] """Geometric information including bounding box, mesh data.""" - properties: Dict[str, Any] + properties: Dict[str, Any] = field(default_factory=dict) """Physical properties: mass, friction, etc.""" + label: str = "none" + """Object category label (e.g., 'apple', 'bottle').""" + uid: Optional[str] = None """Optional unique identifier for the object instance.""" @@ -186,6 +218,12 @@ class ActionCfg: interpolation_type: str = "linear" """Interpolation type: 'linear', 'cubic'.""" + velocity_limit: Optional[float] = None + """Optional velocity limit for the motion.""" + + acceleration_limit: Optional[float] = None + """Optional acceleration limit for the motion.""" + class AtomicAction(ABC): """Abstract base class for atomic actions. From 74d95899870a2167bcc0e0fcceb9f42ac3a5b47a Mon Sep 17 00:00:00 2001 From: Chen Jian Date: Thu, 30 Apr 2026 23:53:16 +0800 Subject: [PATCH 4/9] atomic action (#243) Co-authored-by: chenjian Co-authored-by: yuecideng Co-authored-by: Copilot --- .../lab/sim/atomic_actions/__init__.py | 17 +- embodichain/lab/sim/atomic_actions/actions.py | 941 ++++++++++-------- embodichain/lab/sim/atomic_actions/core.py | 248 +++-- embodichain/lab/sim/atomic_actions/engine.py | 302 +++--- .../lab/sim/planners/motion_generator.py | 11 +- .../lab/sim/planners/toppra_planner.py | 8 +- .../graspkit/pg_grasp/antipodal_generator.py | 2 +- scripts/tutorials/sim/atom_action.py | 291 ++++++ tests/sim/atomic_actions/test_atom_actions.py | 257 +++++ tests/sim/atomic_actions/test_core.py | 237 ----- 10 files changed, 1431 insertions(+), 883 deletions(-) create mode 100644 scripts/tutorials/sim/atom_action.py create mode 100644 tests/sim/atomic_actions/test_atom_actions.py delete mode 100644 tests/sim/atomic_actions/test_core.py diff --git a/embodichain/lab/sim/atomic_actions/__init__.py b/embodichain/lab/sim/atomic_actions/__init__.py index f3f92da7..a154cf89 100644 --- a/embodichain/lab/sim/atomic_actions/__init__.py +++ b/embodichain/lab/sim/atomic_actions/__init__.py @@ -23,17 +23,18 @@ from .core import ( Affordance, - GraspPose, InteractionPoints, ObjectSemantics, ActionCfg, AtomicAction, ) from .actions import ( - ReachAction, - GraspAction, - ReleaseAction, MoveAction, + PickUpAction, + PlaceAction, + MoveActionCfg, + PickUpActionCfg, + PlaceActionCfg, ) from .engine import ( AtomicActionEngine, @@ -51,10 +52,12 @@ "ActionCfg", "AtomicAction", # Action implementations - "ReachAction", - "GraspAction", - "ReleaseAction", "MoveAction", + "PickUpAction", + "PlaceAction", + "MoveActionCfg", + "PickUpActionCfg", + "PlaceActionCfg", # Engine "AtomicActionEngine", "register_action", diff --git a/embodichain/lab/sim/atomic_actions/actions.py b/embodichain/lab/sim/atomic_actions/actions.py index d15a7eff..ae8b1212 100644 --- a/embodichain/lab/sim/atomic_actions/actions.py +++ b/embodichain/lab/sim/atomic_actions/actions.py @@ -22,523 +22,624 @@ from embodichain.lab.sim.planners import PlanResult, PlanState, MoveType from embodichain.lab.sim.planners.motion_generator import MotionGenOptions from embodichain.lab.sim.planners.toppra_planner import ToppraPlanOptions -from .core import AtomicAction, ObjectSemantics +from .core import AtomicAction, ObjectSemantics, AntipodalAffordance, ActionCfg +from embodichain.utils import logger +from embodichain.utils import configclass +from embodichain.lab.sim.utility.action_utils import interpolate_with_distance +import numpy as np if TYPE_CHECKING: from embodichain.lab.sim.planners import MotionGenerator from embodichain.lab.sim.objects import Robot -# ============================================================================= -# Reach Action -# ============================================================================= +@configclass +class MoveActionCfg(ActionCfg): + name: str = "move" + """Name of the action, used for identification and logging.""" + sample_interval: int = 50 + """Number of waypoints to sample for the motion trajectory. Should be large enough to ensure smooth motion, but not too large to cause unnecessary computation overhead.""" -class ReachAction(AtomicAction): - """Atomic action for reaching a target pose or object.""" +class MoveAction(AtomicAction): def __init__( self, - motion_generator: "MotionGenerator", - robot: "Robot", - control_part: str, - device: torch.device = torch.device("cuda"), - interpolation_type: str = "linear", # "linear", "cubic", "toppra" + motion_generator: MotionGenerator, + cfg: MoveActionCfg | None = None, ): - super().__init__(motion_generator, robot, control_part, device) - self.interpolation_type = interpolation_type - - def execute( - self, - target: Union[torch.Tensor, ObjectSemantics], - start_qpos: Optional[torch.Tensor] = None, - approach_offset: Optional[torch.Tensor] = None, - use_affordance: bool = True, - **kwargs, - ) -> PlanResult: - """Execute reach action. - + """ + Initialize the atomic action. Args: - target: Target pose [4, 4] or ObjectSemantics - start_qpos: Starting joint configuration - approach_offset: Offset for pre-grasp approach [x, y, z] - use_affordance: Whether to use object's affordance data - - Returns: - PlanResult with trajectory and execution status + motion_generator: The motion generator instance to use for planning. + cfg: Configuration for the action. """ - # Resolve target pose from ObjectSemantics if needed + super().__init__( + motion_generator, cfg=cfg if cfg is not None else MoveActionCfg() + ) + + self.n_envs = self.robot.get_qpos().shape[0] + self.arm_joint_ids = self.robot.get_joint_ids(name=self.cfg.control_part) + self.dof = len(self.arm_joint_ids) + + def _resolve_pose_target( + self, + target: Union[ObjectSemantics, torch.Tensor], + *, + action_name: str, + ) -> tuple[bool, torch.Tensor]: + """Resolve a pose target into a batched homogeneous transform tensor.""" if isinstance(target, ObjectSemantics): - target_pose = self._resolve_target_pose(target, use_affordance) - else: - target_pose = target + logger.log_error( + f"{action_name} currently does not support ObjectSemantics target. " + f"Please provide target pose as torch.Tensor of shape (4, 4) or " + f"(n_envs, 4, 4)", + NotImplementedError, + ) + if not isinstance(target, torch.Tensor): + logger.log_error( + "Target must be either ObjectSemantics or torch.Tensor of shape " + f"(4, 4) or ({self.n_envs}, 4, 4)", + TypeError, + ) - # Apply approach offset if specified - if approach_offset is not None: - approach_pose = self._apply_offset(target_pose, approach_offset) - else: - approach_pose = target_pose + if target.shape == (4, 4): + target = target.unsqueeze(0).repeat(self.n_envs, 1, 1) + if target.shape != (self.n_envs, 4, 4): + logger.log_error( + f"Target tensor must have shape (4, 4) or ({self.n_envs}, 4, 4), but got {target.shape}", + ValueError, + ) + return True, target - # Get current state if not provided + def _resolve_start_qpos( + self, + start_qpos: Optional[torch.Tensor], + arm_dof: Optional[int] = None, + ) -> torch.Tensor: + """Resolve planning start joint positions into batched arm joint positions.""" + arm_dof = self.dof if arm_dof is None else arm_dof if start_qpos is None: - start_qpos = self._get_current_qpos() + start_qpos = self.robot.get_qpos(name=self.cfg.control_part) + if start_qpos.shape == (arm_dof,): + start_qpos = start_qpos.unsqueeze(0).repeat(self.n_envs, 1) + if start_qpos.shape != (self.n_envs, arm_dof): + logger.log_error( + f"start_qpos must have shape ({self.n_envs}, {arm_dof}), but got {start_qpos.shape}", + ValueError, + ) + return start_qpos - # Create plan states - target_states = [ - PlanState(qpos=start_qpos, move_type=MoveType.JOINT_MOVE), - PlanState(xpos=approach_pose, move_type=MoveType.EEF_MOVE), - ] + def _compute_three_phase_waypoints( + self, + hand_interp_steps: int, + *, + first_phase_name: str, + third_phase_name: str, + first_phase_ratio: float = 0.6, + ) -> tuple[int, int, int]: + """Split total sample interval into motion, hand interpolation, and motion phases.""" + first_phase_waypoint = int( + np.round(self.cfg.sample_interval - hand_interp_steps) * first_phase_ratio + ) + if first_phase_waypoint < 2: + logger.log_error( + f"Not enough waypoints for {first_phase_name} trajectory. " + "Please increase sample_interval or decrease hand_interp_steps.", + ValueError, + ) + second_phase_waypoint = hand_interp_steps + third_phase_waypoint = ( + self.cfg.sample_interval - first_phase_waypoint - second_phase_waypoint + ) + if third_phase_waypoint < 2: + logger.log_error( + f"Not enough waypoints for {third_phase_name} trajectory. " + "Please increase sample_interval or decrease hand_interp_steps.", + ValueError, + ) + return first_phase_waypoint, second_phase_waypoint, third_phase_waypoint - # Plan trajectory - options = MotionGenOptions( - control_part=self.control_part, + def _build_motion_gen_options( + self, + start_qpos: torch.Tensor, + sample_interval: int, + ) -> MotionGenOptions: + """Build default motion generation options for an atomic action.""" + return MotionGenOptions( + start_qpos=start_qpos[0], + control_part=self.cfg.control_part, is_interpolate=True, - is_linear=self.interpolation_type == "linear", - interpolate_position_step=0.002, + is_linear=False, + interpolate_position_step=0.001, plan_opts=ToppraPlanOptions( - sample_interval=kwargs.get("sample_interval", 30), + sample_interval=sample_interval, ), ) - result = self.plan_trajectory(target_states, options) - - # Return PlanResult directly - return result - - def validate( + def _plan_arm_trajectory( self, - target: Union[torch.Tensor, ObjectSemantics], - start_qpos: Optional[torch.Tensor] = None, - **kwargs, - ) -> bool: - """Check if the reach action is feasible.""" - try: - # Quick IK feasibility check - if isinstance(target, ObjectSemantics): - target_pose = self._resolve_target_pose(target, use_affordance=True) - else: - target_pose = target - - # Attempt IK - qpos_seed = ( - start_qpos if start_qpos is not None else self._get_current_qpos() - ) - success, _ = self.robot.compute_ik( - pose=target_pose.unsqueeze(0), - qpos_seed=qpos_seed.unsqueeze(0), - name=self.control_part, + target_states_list: list[list[PlanState]], + start_qpos: torch.Tensor, + n_waypoints: int, + arm_dof: Optional[int] = None, + ) -> tuple[bool, torch.Tensor]: + """Plan batched arm trajectories for all environments.""" + arm_dof = self.dof if arm_dof is None else arm_dof + + n_state = len(target_states_list[0]) + xpos_traj = torch.zeros( + size=(self.n_envs, n_state, 4, 4), dtype=torch.float32, device=self.device + ) + for i, target_states in enumerate(target_states_list): + for j, target_state in enumerate(target_states): + # [env_i, state_j, 4, 4] + xpos_traj[i, j] = target_state.xpos + + trajectory = torch.zeros( + size=(self.n_envs, n_state, arm_dof), + dtype=torch.float32, + device=self.device, + ) + qpos_seed = start_qpos + for j in range(n_state): + is_success, qpos = self.robot.compute_ik( + pose=xpos_traj[:, j], name=self.cfg.control_part, joint_seed=qpos_seed ) - return success.all().item() - except Exception: - return False - - def _resolve_target_pose( - self, semantics: ObjectSemantics, use_affordance: bool - ) -> torch.Tensor: - """Resolve target pose from object semantics.""" - from .core import GraspPose - - if use_affordance and isinstance(semantics.affordance, GraspPose): - # Use precomputed grasp pose from affordance data - grasp_pose = semantics.affordance.get_best_grasp() - object_pose = self._get_object_pose(semantics.label) - target_pose = object_pose @ grasp_pose - else: - # Default to object center with approach direction - object_pose = self._get_object_pose(semantics.label) - approach_offset = torch.tensor([0, 0, 0.05], device=self.device) - target_pose = object_pose.clone() - target_pose[:3, 3] += approach_offset - - return target_pose - - def _get_object_pose(self, label: str) -> torch.Tensor: - """Get current pose of object by label.""" - # Implementation depends on environment's object management - # This is a placeholder - should be implemented based on environment - raise NotImplementedError( - "_get_object_pose must be implemented by subclass or " - "provided with environment-specific object management" + if not is_success: + logger.log_warning( + f"Failed to compute IK for target state {j} in some environments. " + "The resulting trajectory may be invalid." + ) + return False, trajectory + else: + trajectory[:, j] = qpos + qpos_seed = qpos + trajectory = torch.concatenate([start_qpos.unsqueeze(1), trajectory], dim=1) + interp_traj = interpolate_with_distance( + trajectory=trajectory, interp_num=n_waypoints, device=self.device ) + return True, interp_traj - -# ============================================================================= -# Grasp Action -# ============================================================================= - - -class GraspAction(AtomicAction): - """Atomic action for grasping objects.""" - - def __init__( + def _interpolate_hand_qpos( self, - motion_generator: "MotionGenerator", - robot: "Robot", - control_part: str, - device: torch.device = torch.device("cuda"), - pre_grasp_distance: float = 0.05, - approach_direction: str = "z", # "x", "y", "z", or "custom" - ): - super().__init__(motion_generator, robot, control_part, device) - self.pre_grasp_distance = pre_grasp_distance - self.approach_direction = approach_direction + start_hand_qpos: torch.Tensor, + end_hand_qpos: torch.Tensor, + n_waypoints: int, + ) -> torch.Tensor: + """Interpolate hand joint positions between two gripper states.""" + weights = torch.linspace(0, 1, steps=n_waypoints, device=self.device) + hand_qpos_list = [ + torch.lerp(start_hand_qpos, end_hand_qpos, weight) for weight in weights + ] + return torch.stack(hand_qpos_list, dim=0) def execute( self, - target: ObjectSemantics, + target: Union[ObjectSemantics, torch.Tensor], start_qpos: Optional[torch.Tensor] = None, - use_affordance: bool = True, - grasp_type: str = "default", # "default", "pinch", "power" **kwargs, - ) -> PlanResult: - """Execute grasp action. + ) -> tuple[bool, torch.Tensor, list[float]]: + """execute pick up action Args: - target: ObjectSemantics with grasp affordances - start_qpos: Starting joint configuration - use_affordance: Whether to use precomputed grasp poses - grasp_type: Type of grasp to execute - """ - # Resolve grasp pose - grasp_pose = self._resolve_grasp_pose(target, use_affordance, grasp_type) - - # Compute pre-grasp pose (approach position) - pre_grasp_pose = self._compute_pre_grasp_pose(grasp_pose) + target (ObjectSemantics): object semantics containing grasp affordance and entity information + start_qpos (Optional[torch.Tensor], optional): Planning start qpos. Defaults to None. - # Get current state - if start_qpos is None: - start_qpos = self._get_current_qpos() - - # Build trajectory plan states - target_states = [ - PlanState(qpos=start_qpos, move_type=MoveType.JOINT_MOVE), - PlanState(xpos=pre_grasp_pose, move_type=MoveType.EEF_MOVE), - PlanState(xpos=grasp_pose, move_type=MoveType.EEF_MOVE), - ] - - # Plan trajectory - options = MotionGenOptions( - control_part=self.control_part, - is_interpolate=True, - is_linear=False, - interpolate_position_step=0.001, - plan_opts=ToppraPlanOptions( - sample_interval=kwargs.get("sample_interval", 30), - ), + Returns: + tuple[bool, torch.Tensor, list[float]]: + is_success, + trajectory of shape (n_envs, n_waypoints, dof), + joint_ids corresponding to trajectory + """ + is_success, move_xpos = self._resolve_pose_target( + target, action_name=self.__class__.__name__ ) + start_qpos = self._resolve_start_qpos(start_qpos) - result = self.plan_trajectory(target_states, options) - - # Return PlanResult directly - it contains all trajectory data - return result - - def validate( - self, - target: ObjectSemantics, - start_qpos: Optional[torch.Tensor] = None, - **kwargs, - ) -> bool: - """Validate if grasp is feasible.""" - try: - grasp_pose = self._resolve_grasp_pose( - target, use_affordance=True, grasp_type="default" - ) - qpos_seed = ( - start_qpos if start_qpos is not None else self._get_current_qpos() - ) - success, _ = self.robot.compute_ik( - pose=grasp_pose.unsqueeze(0), - qpos_seed=qpos_seed.unsqueeze(0), - name=self.control_part, + # TODO: warning and fallback if no valid grasp pose found + if not is_success: + logger.log_warning( + "Failed to resolve grasp pose, using default approach pose" ) - return success.all().item() - except Exception: - return False - - def _resolve_grasp_pose( - self, semantics: ObjectSemantics, use_affordance: bool, grasp_type: str - ) -> torch.Tensor: - """Resolve grasp pose from object semantics.""" - from .core import GraspPose - - if use_affordance and isinstance(semantics.affordance, GraspPose): - grasp_pose_affordance = semantics.affordance - grasp_pose = grasp_pose_affordance.get_grasp_by_type(grasp_type) - if grasp_pose is None: - grasp_pose = grasp_pose_affordance.get_best_grasp() + return False, torch.empty(0), self.arm_joint_ids - # Transform to world frame - object_pose = self._get_object_pose(semantics.label) - return object_pose @ grasp_pose + target_states_list = [ + [ + PlanState(xpos=move_xpos[i], move_type=MoveType.EEF_MOVE), + ] + for i in range(self.n_envs) + ] + is_plan_success, trajectory = self._plan_arm_trajectory( + target_states_list, start_qpos, self.cfg.sample_interval + ) + return is_plan_success, trajectory, self.arm_joint_ids - # Fallback: compute grasp pose from geometry - return self._compute_grasp_from_geometry(semantics) + def validate(self, target, start_qpos=None, **kwargs): + # TODO: implement proper validation logic for pick up action + return True - def _compute_pre_grasp_pose(self, grasp_pose: torch.Tensor) -> torch.Tensor: - """Compute pre-grasp pose with offset.""" - offset = torch.zeros(3, device=self.device) - if self.approach_direction == "z": - offset[2] = -self.pre_grasp_distance - elif self.approach_direction == "x": - offset[0] = -self.pre_grasp_distance - elif self.approach_direction == "y": - offset[1] = -self.pre_grasp_distance - pre_grasp = grasp_pose.clone() - pre_grasp[:3, 3] += grasp_pose[:3, :3] @ offset - return pre_grasp +@configclass +class PickUpActionCfg(MoveActionCfg): + name: str = "pick_up" + """Name of the action, used for identification and logging.""" - def _get_object_pose(self, label: str) -> torch.Tensor: - """Get current pose of object by label.""" - raise NotImplementedError( - "_get_object_pose must be implemented by subclass or " - "provided with environment-specific object management" - ) + hand_open_qpos: torch.Tensor | None = None + """[hand_dof,] of float. Joint positions for open hand state. Must be specified for PickUpAction.""" - def _compute_grasp_from_geometry(self, semantics: ObjectSemantics) -> torch.Tensor: - """Compute grasp pose from object geometry.""" - # Get object pose - object_pose = self._get_object_pose(semantics.label) + hand_close_qpos: torch.Tensor | None = None + """[hand_dof,] of float. Joint positions for closed hand state. Must be specified for PickUpAction.""" - # Get bounding box from geometry - bbox = semantics.geometry.get("bounding_box", [0.1, 0.1, 0.1]) + hand_control_part: str = "hand" + """Name of the robot part that controls the hand joints. Must correspond to a valid control part in the robot definition.""" - # Default top-down grasp - grasp_offset = torch.eye(4, device=self.device) - grasp_offset[2, 3] = bbox[2] / 2 + 0.02 # Slightly above object + pre_grasp_distance: float = 0.15 + """Distance to offset back from the grasp pose along the approach direction to get the pre-grasp pose. Should be large enough to avoid collision during approach, but not too large to cause unnecessary detour.""" - return object_pose @ grasp_offset + approach_direction: torch.Tensor = torch.tensor([0, 0, -1], dtype=torch.float32) + """Direction from which the gripper approaches the object for grasping, expressed in the object local frame. Should be a unit vector. Default is [0, 0, -1], which means approaching from above along the negative z-axis.""" + lift_height: float = 0.1 + """Height to lift the object after grasping, expressed in meters. Should be large enough to avoid collision with the environment, but not too large to cause unnecessary motion.""" -# ============================================================================= -# Move Action -# ============================================================================= + sample_interval: int = 80 + """Number of waypoints to sample for the entire pick up motion trajectory, including approach, hand closing, and lifting. Should be large enough to ensure smooth motion, but not too large to cause unnecessary computation overhead.""" + hand_interp_steps: int = 5 + """Number of waypoints to interpolate for the hand closing motion. Should be at least 2 to ensure smooth interpolation between open and closed hand states, but not too large to cause unnecessary computation overhead.""" -class MoveAction(AtomicAction): - """Atomic action for moving to a target position.""" +class PickUpAction(MoveAction): def __init__( self, - motion_generator: "MotionGenerator", - robot: "Robot", - control_part: str, - device: torch.device = torch.device("cuda"), - move_type: str = "cartesian", # "cartesian", "joint" - interpolation: str = "linear", # "linear", "cubic", "toppra" + motion_generator: MotionGenerator, + cfg: PickUpActionCfg | None = None, ): - super().__init__(motion_generator, robot, control_part, device) - self.move_type = move_type - self.interpolation = interpolation + """ + Initialize the atomic action. + Args: + motion_generator: The motion generator instance to use for planning. + cfg: Configuration for the action. + """ + super().__init__( + motion_generator, cfg=cfg if cfg is not None else PickUpActionCfg() + ) + self.cfg = cfg + self.approach_direction = self.cfg.approach_direction.to(self.device) + if self.cfg.hand_open_qpos is None: + logger.log_error("hand_open_qpos must be specified in PickUpActionCfg") + if self.cfg.hand_close_qpos is None: + logger.log_error("hand_close_qpos must be specified in PickUpActionCfg") + self.hand_open_qpos = self.cfg.hand_open_qpos.to(self.device) + self.hand_close_qpos = self.cfg.hand_close_qpos.to(self.device) + + self.hand_joint_ids = self.robot.get_joint_ids(name=self.cfg.hand_control_part) + self.joint_ids = self.arm_joint_ids + self.hand_joint_ids + self.arm_dof = len(self.arm_joint_ids) + self.dof = len(self.joint_ids) def execute( self, - target: Union[torch.Tensor, ObjectSemantics], + target: Union[ObjectSemantics, torch.Tensor], start_qpos: Optional[torch.Tensor] = None, - offset: Optional[torch.Tensor] = None, - velocity_limit: Optional[float] = None, - acceleration_limit: Optional[float] = None, - ) -> PlanResult: - """Execute move action. + **kwargs, + ) -> tuple[bool, torch.Tensor, list[float]]: + """execute pick up action Args: - target: Target pose [4, 4] or ObjectSemantics - start_qpos: Starting joint configuration - offset: Optional offset from target - velocity_limit: Max velocity for trajectory - acceleration_limit: Max acceleration for trajectory + target (Union[ObjectSemantics, torch.Tensor]): target object semantics or target pose for grasping + start_qpos (Optional[torch.Tensor], optional): Planning start qpos. Defaults to None. + + Returns: + tuple[bool, torch.Tensor, list[float]]: + is_success, + trajectory of shape (n_envs, n_waypoints, dof), + joint_ids corresponding to trajectory """ - # Resolve target + + # Resolve grasp pose if isinstance(target, ObjectSemantics): - target_pose = self._get_object_pose(target.label) + is_success, grasp_xpos, open_length = self._resolve_grasp_pose(target) else: - target_pose = target - - # Apply offset if specified - if offset is not None: - target_pose = self._apply_offset(target_pose, offset) + is_success, grasp_xpos = self._resolve_pose_target( + target, action_name=self.__class__.__name__ + ) - # Get start state - if start_qpos is None: - start_qpos = self._get_current_qpos() + # TODO: warning and fallback if no valid grasp pose found + if not is_success: + logger.log_warning( + "Failed to resolve grasp pose, using default approach pose" + ) + return False, torch.empty(0), self.joint_ids + + # Compute pre-grasp pose + # TODO: only for parallel gripper, approach in negative grasp z direction + grasp_z = grasp_xpos[:, :3, 2] + pre_grasp_xpos = self._apply_offset( + pose=grasp_xpos, + offset=-grasp_z * self.cfg.pre_grasp_distance, + ) + # Compute lift pose + start_qpos = self._resolve_start_qpos(start_qpos, self.arm_dof) + + # compute waypoint number for each phase + n_approach_waypoint, n_close_waypoint, n_lift_waypoint = ( + self._compute_three_phase_waypoints( + self.cfg.hand_interp_steps, + first_phase_name="approach", + third_phase_name="lift", + ) + ) - # Create plan states based on move type - if self.move_type == "cartesian": - target_states = [ - PlanState(qpos=start_qpos, move_type=MoveType.JOINT_MOVE), - PlanState(xpos=target_pose, move_type=MoveType.EEF_MOVE), + # get pick trajectory + target_states_list = [ + [ + PlanState(xpos=pre_grasp_xpos[i], move_type=MoveType.EEF_MOVE), + PlanState(xpos=grasp_xpos[i], move_type=MoveType.EEF_MOVE), ] - is_linear = self.interpolation == "linear" - else: # joint space - target_qpos = self._ik_solve(target_pose, start_qpos) - target_states = [ - PlanState(qpos=start_qpos, move_type=MoveType.JOINT_MOVE), - PlanState(qpos=target_qpos, move_type=MoveType.JOINT_MOVE), + for i in range(self.n_envs) + ] + pick_trajectory = torch.zeros( + size=(self.n_envs, n_approach_waypoint, self.dof), + dtype=torch.float32, + device=self.device, + ) + is_success, plan_traj = self._plan_arm_trajectory( + target_states_list, + start_qpos, + n_approach_waypoint, + self.arm_dof, + ) + if not is_success: + logger.log_warning("Failed to plan approach trajectory.") + return False, pick_trajectory, self.joint_ids + pick_trajectory[:, :, : self.arm_dof] = plan_traj + # Padding hand open qpos to pick trajectory + pick_trajectory[:, :, self.arm_dof :] = self.hand_open_qpos + + # get hand closing trajectory + grasp_qpos = pick_trajectory[ + :, -1, : self.arm_dof + ] # Assuming the last point of pick trajectory is the grasp pose + hand_close_path = self._interpolate_hand_qpos( + self.hand_open_qpos, + self.hand_close_qpos, + n_close_waypoint, + ) + hand_close_trajectory = torch.zeros( + size=(self.n_envs, n_close_waypoint, self.dof), + device=self.device, + ) + hand_close_trajectory[:, :, : self.arm_dof] = grasp_qpos + hand_close_trajectory[:, :, self.arm_dof :] = hand_close_path + + # get lift trajectory + lift_trajectory = torch.zeros( + size=(self.n_envs, n_lift_waypoint, self.dof), + dtype=torch.float32, + device=self.device, + ) + # lift_xpos = self._compute_lift_xpos(grasp_xpos) + lift_xpos = self._apply_offset( + pose=grasp_xpos, + offset=torch.tensor([0, 0, 1], device=self.device) * self.cfg.lift_height, + ) + target_states_list = [ + [ + PlanState(xpos=lift_xpos[i], move_type=MoveType.EEF_MOVE), ] - is_linear = False + for i in range(self.n_envs) + ] + is_success, plan_traj = self._plan_arm_trajectory( + target_states_list, + grasp_qpos, + n_lift_waypoint, + self.arm_dof, + ) + if not is_success: + logger.log_warning("Failed to plan lift trajectory.") + return False, lift_trajectory, self.joint_ids + lift_trajectory[:, :, : self.arm_dof] = plan_traj + # padding hand close qpos to lift trajectory + lift_trajectory[:, :, self.arm_dof :] = self.hand_close_qpos + + # concatenate trajectories + trajectory = torch.cat( + [pick_trajectory, hand_close_trajectory, lift_trajectory], dim=1 + ) + return True, trajectory, self.joint_ids - # Configure motion generation - options = MotionGenOptions( - control_part=self.control_part, - is_interpolate=True, - is_linear=is_linear, - interpolate_position_step=0.002, - plan_opts=ToppraPlanOptions( - sample_interval=kwargs.get("sample_interval", 30), - velocity_limit=velocity_limit, - acceleration_limit=acceleration_limit, - ), + def _resolve_grasp_pose( + self, semantics: ObjectSemantics + ) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor]: + if not isinstance(semantics.affordance, AntipodalAffordance): + logger.log_error( + "Grasp pose affordance must be of type AntipodalAffordance" + ) + if semantics.entity is None: + logger.log_error( + "ObjectSemantics must be associated with an entity to get object pose" + ) + obj_poses = semantics.entity.get_local_pose(to_matrix=True) + + is_success, grasp_xpos, open_length = semantics.affordance.get_best_grasp_poses( + obj_poses=obj_poses, approach_direction=self.approach_direction ) + return is_success, grasp_xpos, open_length - result = self.plan_trajectory(target_states, options) + def validate(self, target, start_qpos=None, **kwargs): + # TODO: implement proper validation logic for pick up action + return True - # Return PlanResult directly - it contains all trajectory data - return result - def validate( - self, - target: Union[torch.Tensor, ObjectSemantics], - start_qpos: Optional[torch.Tensor] = None, - **kwargs, - ) -> bool: - """Validate if move action is feasible.""" - try: - if isinstance(target, ObjectSemantics): - target_pose = self._get_object_pose(target.label) - else: - target_pose = target +@configclass +class PlaceActionCfg(MoveActionCfg): + name: str = "place" + """Name of the action, used for identification and logging.""" - qpos_seed = ( - start_qpos if start_qpos is not None else self._get_current_qpos() - ) + hand_open_qpos: torch.Tensor | None = None + """[hand_dof,] of float. Joint positions for open hand state. Must be specified for PickUpAction.""" - if self.move_type == "joint": - # For joint space moves, we need IK solvability - self._ik_solve(target_pose, qpos_seed) - else: - # For cartesian moves, just check IK - success, _ = self.robot.compute_ik( - pose=target_pose.unsqueeze(0), - qpos_seed=qpos_seed.unsqueeze(0), - name=self.control_part, - ) - if not success.all(): - return False + hand_close_qpos: torch.Tensor | None = None + """[hand_dof,] of float. Joint positions for closed hand state. Must be specified for PickUpAction.""" - return True - except Exception: - return False + hand_control_part: str = "hand" + """Name of the robot part that controls the hand joints. Must correspond to a valid control part in the robot definition.""" - def _get_object_pose(self, label: str) -> torch.Tensor: - """Get current pose of object by label.""" - raise NotImplementedError( - "_get_object_pose must be implemented by subclass or " - "provided with environment-specific object management" - ) + lift_height: float = 0.1 + """Height to lift the object after grasping, expressed in meters. Should be large enough to avoid collision with the environment, but not too large to cause unnecessary motion.""" + sample_interval: int = 80 + """Number of waypoints to sample for the entire pick up motion trajectory, including approach, hand closing, and lifting. Should be large enough to ensure smooth motion, but not too large to cause unnecessary computation overhead.""" -# ============================================================================= -# Release Action -# ============================================================================= + hand_interp_steps: int = 5 + """Number of waypoints to interpolate for the hand closing motion. Should be at least 2 to ensure smooth interpolation between open and closed hand states, but not too large to cause unnecessary computation overhead.""" -class ReleaseAction(AtomicAction): - """Atomic action for releasing an object.""" +class PlaceAction(MoveAction): + def __init__( + self, + motion_generator: MotionGenerator, + cfg: PlaceActionCfg | None = None, + ): + """ + Initialize the atomic action. + Args: + motion_generator: The motion generator instance to use for planning. + cfg: Configuration for the action. + """ + super().__init__( + motion_generator, cfg=cfg if cfg is not None else PlaceActionCfg() + ) + self.cfg = cfg + if self.cfg.hand_open_qpos is None: + logger.log_error("hand_open_qpos must be specified in PlaceActionCfg") + if self.cfg.hand_close_qpos is None: + logger.log_error("hand_close_qpos must be specified in PlaceActionCfg") + self.hand_open_qpos = self.cfg.hand_open_qpos.to(self.device) + self.hand_close_qpos = self.cfg.hand_close_qpos.to(self.device) + + self.hand_joint_ids = self.robot.get_joint_ids(name=self.cfg.hand_control_part) + self.joint_ids = self.arm_joint_ids + self.hand_joint_ids + self.arm_dof = len(self.arm_joint_ids) + self.dof = len(self.joint_ids) def execute( self, - target: Optional[Union[torch.Tensor, ObjectSemantics]] = None, + target: Union[ObjectSemantics, torch.Tensor], start_qpos: Optional[torch.Tensor] = None, **kwargs, - ) -> PlanResult: - """Execute release action. + ) -> tuple[bool, torch.Tensor, list[float]]: + """execute pick up action Args: - target: Optional target pose after release (for place operations) - start_qpos: Starting joint configuration + target (ObjectSemantics): object semantics containing grasp affordance and entity information + start_qpos (Optional[torch.Tensor], optional): Planning start qpos. Defaults to None. Returns: - PlanResult with trajectory (may be empty for simple release) + tuple[bool, torch.Tensor, list[float]]: + is_success, + trajectory of shape (n_envs, n_waypoints, dof), + joint_ids corresponding to trajectory """ - # Get current state - if start_qpos is None: - start_qpos = self._get_current_qpos() - - # If target is specified, move to that pose first - if target is not None: - if isinstance(target, ObjectSemantics): - # Move above the object - target_pose = self._get_object_pose(target.label) - approach_offset = torch.tensor([0, 0, 0.1], device=self.device) - target_pose = self._apply_offset(target_pose, approach_offset) - else: - target_pose = target - - target_states = [ - PlanState(qpos=start_qpos, move_type=MoveType.JOINT_MOVE), - PlanState(xpos=target_pose, move_type=MoveType.EEF_MOVE), - ] + is_success, place_xpos = self._resolve_pose_target( + target, action_name=self.__class__.__name__ + ) + start_qpos = self._resolve_start_qpos(start_qpos, self.arm_dof) - options = MotionGenOptions( - control_part=self.control_part, - is_interpolate=True, - is_linear=False, - interpolate_position_step=0.002, - plan_opts=ToppraPlanOptions( - sample_interval=kwargs.get("sample_interval", 30), - ), + # TODO: warning and fallback if no valid grasp pose found + if not is_success: + logger.log_warning( + "Failed to resolve grasp pose, using default approach pose" ) - - result = self.plan_trajectory(target_states, options) - else: - # Simple release - return success with current state - result = PlanResult( - success=True, - positions=start_qpos.unsqueeze(0), + return False, torch.empty(0), self.joint_ids + + # compute waypoint number for each phase + n_down_waypoint, n_open_waypoint, n_lift_waypoint = ( + self._compute_three_phase_waypoints( + self.cfg.hand_interp_steps, + first_phase_name="approach", + third_phase_name="lift", ) + ) - # Open gripper (if applicable) - # This would be robot-specific and should be implemented by subclasses - self._open_gripper() - - return result + down_trajectory = torch.zeros( + size=(self.n_envs, n_down_waypoint, self.dof), + dtype=torch.float32, + device=self.device, + ) + lift_xpos = self._apply_offset( + pose=place_xpos, + offset=torch.tensor([0, 0, 1], device=self.device) * self.cfg.lift_height, + ) + target_states_list = [ + [ + PlanState(xpos=lift_xpos[i], move_type=MoveType.EEF_MOVE), + PlanState(xpos=place_xpos[i], move_type=MoveType.EEF_MOVE), + ] + for i in range(self.n_envs) + ] + is_success, plan_traj = self._plan_arm_trajectory( + target_states_list, + start_qpos, + n_down_waypoint, + self.arm_dof, + ) + if not is_success: + logger.log_warning("Failed to plan down trajectory.") + return False, down_trajectory, self.joint_ids + down_trajectory[:, :, : self.arm_dof] = plan_traj + # Padding hand open qpos to pick trajectory + down_trajectory[:, :, self.arm_dof :] = self.hand_close_qpos + + # get hand closing trajectory + reach_qpos = down_trajectory[ + :, -1, : self.arm_dof + ] # Assuming the last point of pick trajectory is the grasp pose + hand_open_path = self._interpolate_hand_qpos( + self.hand_close_qpos, + self.hand_open_qpos, + n_open_waypoint, + ) + hand_open_trajectory = torch.zeros( + size=(self.n_envs, n_open_waypoint, self.dof), + device=self.device, + ) + hand_open_trajectory[:, :, : self.arm_dof] = reach_qpos + hand_open_trajectory[:, :, self.arm_dof :] = hand_open_path + + # get lift trajectory + back_trajectory = torch.zeros( + size=(self.n_envs, n_lift_waypoint, self.dof), + dtype=torch.float32, + device=self.device, + ) + target_states_list = [ + [ + PlanState(xpos=lift_xpos[i], move_type=MoveType.EEF_MOVE), + ] + for i in range(self.n_envs) + ] + is_success, plan_traj = self._plan_arm_trajectory( + target_states_list, + reach_qpos, + n_lift_waypoint, + self.arm_dof, + ) + if not is_success: + logger.log_warning("Failed to plan back trajectory.") + return False, back_trajectory, self.joint_ids + back_trajectory[:, :, : self.arm_dof] = plan_traj + # padding hand open qpos to back trajectory + back_trajectory[:, :, self.arm_dof :] = self.hand_open_qpos + + # concatenate trajectories + trajectory = torch.cat( + [down_trajectory, hand_open_trajectory, back_trajectory], dim=1 + ) + return True, trajectory, self.joint_ids - def validate( - self, - target: Optional[Union[torch.Tensor, ObjectSemantics]] = None, - start_qpos: Optional[torch.Tensor] = None, - **kwargs, - ) -> bool: - """Validate if release action is feasible.""" - # Release is generally always feasible - # If target is specified, validate that we can reach it - if target is not None and isinstance(target, torch.Tensor): - try: - qpos_seed = ( - start_qpos if start_qpos is not None else self._get_current_qpos() - ) - success, _ = self.robot.compute_ik( - pose=target.unsqueeze(0), - qpos_seed=qpos_seed.unsqueeze(0), - name=self.control_part, - ) - return success.all().item() - except Exception: - return False + def validate(self, target, start_qpos=None, **kwargs): + # TODO: implement proper validation logic for pick up action return True - - def _open_gripper(self) -> None: - """Open the gripper to release the object. - - This is a placeholder method that should be implemented by subclasses - based on the specific robot hardware or simulation environment. - """ - # Placeholder - should be implemented by subclass - pass - - def _get_object_pose(self, label: str) -> torch.Tensor: - """Get current pose of object by label.""" - raise NotImplementedError( - "_get_object_pose must be implemented by subclass or " - "provided with environment-specific object management" - ) diff --git a/embodichain/lab/sim/atomic_actions/core.py b/embodichain/lab/sim/atomic_actions/core.py index 42337fa3..c16cebb0 100644 --- a/embodichain/lab/sim/atomic_actions/core.py +++ b/embodichain/lab/sim/atomic_actions/core.py @@ -24,8 +24,20 @@ from embodichain.lab.sim.planners import PlanResult, PlanState, MoveType from embodichain.utils import configclass +from embodichain.toolkits.graspkit.pg_grasp import ( + GraspGenerator, + GraspGeneratorCfg, +) +from embodichain.toolkits.graspkit.pg_grasp.gripper_collision_checker import ( + GripperCollisionCfg, +) +from embodichain.lab.sim.common import BatchEntity +from embodichain.utils import logger +from embodichain.lab.sim.sim_manager import SimulationManager +from embodichain.lab.sim.cfg import MarkerCfg + if TYPE_CHECKING: - from embodichain.lab.sim.planners import MotionGenerator + from embodichain.lab.sim.planners import MotionGenerator, MotionGenOptions from embodichain.lab.sim.objects import Robot @@ -45,69 +57,146 @@ class Affordance: object_label: str = "" """Label of the object this affordance belongs to.""" - def get_batch_size(self) -> int: - """Return the batch size of this affordance data.""" - return 1 - + geometry: Dict[str, Any] = field(default_factory=dict) + """Geometry dictionary shared with ObjectSemantics. -@dataclass -class GraspPose(Affordance): - """Grasp pose affordance containing a batch of 4x4 transformation matrices. - - Each grasp pose represents a valid end-effector pose for grasping the object. - Multiple poses may be available for different grasp types (pinch, power, etc.) - or approach directions. + The mesh payload is expected to be stored in: + - ``mesh_vertices``: torch.Tensor with shape [N, 3] + - ``mesh_triangles``: torch.Tensor with shape [M, 3] """ - poses: torch.Tensor = field(default_factory=lambda: torch.eye(4).unsqueeze(0)) - """Batch of grasp poses with shape [B, 4, 4]. + custom_config: Dict[str, Any] = field(default_factory=dict) + """User-defined configuration payload for affordance creation and usage.""" - Each pose is a 4x4 homogeneous transformation matrix representing - the end-effector pose in the object's local coordinate frame. - """ + @property + def mesh_vertices(self) -> torch.Tensor | None: + """Get mesh vertices from geometry. - grasp_types: List[str] = field(default_factory=lambda: ["default"]) - """List of grasp type labels for each pose in the batch. + Returns: + Mesh vertices tensor [N, 3], or None if unavailable. - Examples: "pinch", "power", "hook", "spherical", etc. - Length must match the batch dimension of `poses`. - """ + Raises: + TypeError: If ``mesh_vertices`` exists but is not a torch tensor. + """ + vertices = self.geometry.get("mesh_vertices") + if vertices is None: + return None + if not isinstance(vertices, torch.Tensor): + raise TypeError("geometry['mesh_vertices'] must be a torch.Tensor") + return vertices - confidence_scores: torch.Tensor | None = None - """Optional confidence scores for each grasp pose with shape [B]. + @property + def mesh_triangles(self) -> torch.Tensor | None: + """Get mesh triangles from geometry. - Higher values indicate more reliable/ stable grasps. - Used for grasp selection when multiple options exist. - """ + Returns: + Mesh triangle index tensor [M, 3], or None if unavailable. - def get_batch_size(self) -> int: - """Return the number of grasp poses in this affordance.""" - return self.poses.shape[0] + Raises: + TypeError: If ``mesh_triangles`` exists but is not a torch tensor. + """ + triangles = self.geometry.get("mesh_triangles") + if triangles is None: + return None + if not isinstance(triangles, torch.Tensor): + raise TypeError("geometry['mesh_triangles'] must be a torch.Tensor") + return triangles - def get_grasp_by_type(self, grasp_type: str) -> Optional[torch.Tensor]: - """Get grasp pose by type label. + def set_custom_config(self, key: str, value: Any) -> None: + """Set a custom affordance configuration value.""" + self.custom_config[key] = value - Args: - grasp_type: Type of grasp (e.g., "pinch", "power") + def get_custom_config(self, key: str, default: Any = None) -> Any: + """Get a custom affordance configuration value.""" + return self.custom_config.get(key, default) - Returns: - 4x4 pose tensor if found, None otherwise - """ - if grasp_type in self.grasp_types: - idx = self.grasp_types.index(grasp_type) - return self.poses[idx] - return None + def get_batch_size(self) -> int: + """Return the batch size of this affordance data.""" + return 1 - def get_best_grasp(self) -> torch.Tensor: - """Get the best grasp pose based on confidence scores. - Returns: - 4x4 pose tensor with highest confidence - """ - if self.confidence_scores is not None: - best_idx = torch.argmax(self.confidence_scores) - return self.poses[best_idx] - return self.poses[0] # Default to first if no scores available +@dataclass +class AntipodalAffordance(Affordance): + generator: GraspGenerator | None = None + """Grasp generator instance, initialized lazily when needed.""" + + force_reannotate: bool = False + """Whether to force re-annotation of grasp generator on each access.""" + + is_draw_grasp_xpos: bool = False + """Whether to visualize grasp poses in the simulator.""" + + def _init_generator(self): + if ( + self.geometry.get("mesh_vertices", None) is None + or self.geometry.get("mesh_triangles", None) is None + ): + logger.log_error( + "Mesh vertices and triangles must be provided in geometry to initialize AntipodalAffordance." + ) + self.generator = GraspGenerator( + vertices=self.geometry.get("mesh_vertices"), + triangles=self.geometry.get("mesh_triangles"), + cfg=self.custom_config.get("generator_cfg", None), + gripper_collision_cfg=self.custom_config.get("gripper_collision_cfg", None), + ) + if self.force_reannotate: + self.generator.annotate() + else: + if self.generator._hit_point_pairs is None: + self.generator.annotate() + + def get_best_grasp_poses( + self, + obj_poses: torch.Tensor, + approach_direction: torch.Tensor = torch.tensor( + [0, 0, -1], dtype=torch.float32 + ), + ) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor]: + if self.generator is None: + self._init_generator() + + grasp_xpos_list = [] + is_success_list = [] + open_length_list = [] + for i, obj_pose in enumerate(obj_poses): + is_success, grasp_xpos, open_length = self.generator.get_grasp_poses( + obj_pose, approach_direction + ) + if is_success: + grasp_xpos_list.append(grasp_xpos.unsqueeze(0)) + else: + logger.log_warning(f"No valid grasp pose found for {i}-th object.") + grasp_xpos_list.append( + torch.eye( + 4, dtype=torch.float32, device=self.generator.device + ).unsqueeze(0) + ) # Default to identity pose if no grasp found + is_success_list.append(is_success) + open_length_list.append(open_length) + is_success = torch.tensor( + is_success_list, dtype=torch.bool, device=self.generator.device + ) + grasp_xpos = torch.concatenate(grasp_xpos_list, dim=0) # [B, 4, 4] + open_length = torch.tensor( + open_length_list, dtype=torch.float32, device=self.generator.device + ) + if self.is_draw_grasp_xpos: + self._draw_grasp_xpos(grasp_xpos, open_length) + return is_success, grasp_xpos, open_length + + def _draw_grasp_xpos(self, grasp_xpos: torch.Tensor, open_length: torch.Tensor): + sim = SimulationManager.get_instance() + axis_xpos = [] + for i in range(grasp_xpos.shape[0]): + axis_xpos.append(grasp_xpos[i].to("cpu").numpy()) + sim.draw_marker( + cfg=MarkerCfg( + name="grasp_xpos", + axis_xpos=axis_xpos, + axis_len=0.05, + ) + ) @dataclass @@ -199,8 +288,17 @@ class ObjectSemantics: label: str = "none" """Object category label (e.g., 'apple', 'bottle').""" - uid: Optional[str] = None - """Optional unique identifier for the object instance.""" + entity: BatchEntity | None = None + """Optional reference to the underlying simulation entity representing this object.""" + + def __post_init__(self) -> None: + """Bind affordance metadata to this semantic object. + + The affordance shares the same geometry dict instance as + ``ObjectSemantics.geometry`` so mesh tensors are authored in one place. + """ + self.affordance.object_label = self.label + self.affordance.geometry = self.geometry # ============================================================================= @@ -212,7 +310,10 @@ class ObjectSemantics: class ActionCfg: """Configuration for atomic actions.""" - control_part: str = "left_arm" + name: str = "default" + """Name of the action, used for identification and logging.""" + + control_part: str = "arm" """Control part name for the action.""" interpolation_type: str = "linear" @@ -236,9 +337,18 @@ class AtomicAction(ABC): def __init__( self, motion_generator: MotionGenerator, + cfg: ActionCfg = ActionCfg(), ): + """ + Initialize the atomic action. + Args: + motion_generator: The motion generator instance to use for planning. + cfg: Configuration for the action. + """ self.motion_generator = motion_generator + self.cfg = cfg self.robot = motion_generator.robot + self.control_part = cfg.control_part self.device = self.robot.device @abstractmethod @@ -247,21 +357,19 @@ def execute( target: Union[torch.Tensor, ObjectSemantics], start_qpos: Optional[torch.Tensor] = None, **kwargs, - ) -> PlanResult: - """Execute the atomic action. + ) -> tuple[bool, torch.Tensor, list[float]]: + """execute pick up action Args: - target: Target pose [4, 4] or ObjectSemantics - start_qpos: Starting joint configuration [DOF] - **kwargs: Additional action-specific parameters + target (ObjectSemantics): object semantics containing grasp affordance and entity information + start_qpos (Optional[torch.Tensor], optional): Planning start qpos. Defaults to None. Returns: - PlanResult with trajectory (positions, velocities, accelerations), - end-effector poses (xpos_list), and success status. - Use result.positions for joint trajectory [T, DOF]. - Use result.xpos_list for EE poses [T, 4, 4]. + tuple[bool, torch.Tensor, list[float]]: + is_success, + trajectory of shape (n_envs, n_waypoints, dof), + joint_ids corresponding to trajectory """ - pass @abstractmethod def validate( @@ -333,14 +441,20 @@ def _apply_offset(self, pose: torch.Tensor, offset: torch.Tensor) -> torch.Tenso """Apply offset to pose in local frame. Args: - pose: Base pose [4, 4] - offset: Offset in local frame [3] + pose: Base pose [N, 4, 4] + offset: Offset in local frame [N, 3] or [3] Returns: - Pose with offset applied [4, 4] + Pose with offset applied [N, 4, 4] """ + if not len(pose.shape) == 3 or pose.shape[1:] != (4, 4): + logger.log_error("pose must have shape [N, 4, 4]") + if len(offset.shape) == 1: + offset = offset.unsqueeze(0) + if not len(offset.shape) == 2 or offset.shape[1] != 3: + logger.log_error("offset must have shape [N, 3] or [3]") result = pose.clone() - result[:3, 3] += pose[:3, :3] @ offset + result[:, :3, 3] += offset return result def plan_trajectory( diff --git a/embodichain/lab/sim/atomic_actions/engine.py b/embodichain/lab/sim/atomic_actions/engine.py index b152adfa..72ff7ea3 100644 --- a/embodichain/lab/sim/atomic_actions/engine.py +++ b/embodichain/lab/sim/atomic_actions/engine.py @@ -17,9 +17,10 @@ from __future__ import annotations import torch -from typing import Dict, List, Optional, Type, Union, TYPE_CHECKING +from typing import Any, Dict, List, Optional, Type, Union, TYPE_CHECKING from embodichain.lab.sim.planners import PlanResult +from embodichain.utils import logger from .core import AtomicAction, ObjectSemantics, ActionCfg if TYPE_CHECKING: @@ -94,7 +95,13 @@ class SemanticAnalyzer: def __init__(self): self._object_cache: Dict[str, ObjectSemantics] = {} - def analyze(self, label: str) -> ObjectSemantics: + def analyze( + self, + label: str, + geometry: Optional[Dict[str, Any]] = None, + custom_config: Optional[Dict[str, Any]] = None, + use_cache: bool = True, + ) -> ObjectSemantics: """Analyze object by label and return ObjectSemantics. This is a placeholder implementation that should be extended @@ -102,43 +109,51 @@ def analyze(self, label: str) -> ObjectSemantics: Args: label: Object category label (e.g., "apple", "bottle") + geometry: Optional geometry payload. Can include mesh tensors: + ``mesh_vertices`` [N, 3] and ``mesh_triangles`` [M, 3]. + custom_config: Optional user-defined affordance configuration. + use_cache: Whether to use cached semantics when available. Returns: ObjectSemantics containing affordance data """ - # Check cache first - if label in self._object_cache: + # Only use cache for default analyze path + if ( + use_cache + and geometry is None + and custom_config is None + and label in self._object_cache + ): return self._object_cache[label] # Create default semantics (placeholder implementation) - from .core import GraspPose, InteractionPoints + from .core import AntipodalAffordance # Generate default grasp poses based on object type default_poses = torch.eye(4).unsqueeze(0) default_poses[0, 2, 3] = 0.1 # Default offset - grasp_affordance = GraspPose( + default_geometry: Dict[str, Any] = {"bounding_box": [0.1, 0.1, 0.1]} + if geometry is not None: + default_geometry.update(geometry) + + grasp_affordance = AntipodalAffordance( object_label=label, poses=default_poses, grasp_types=["default"], - ) - - # Default interaction points - interaction_affordance = InteractionPoints( - object_label=label, - points=torch.zeros(1, 3), - point_types=["contact"], + custom_config=custom_config or {}, ) semantics = ObjectSemantics( label=label, affordance=grasp_affordance, - geometry={"bounding_box": [0.1, 0.1, 0.1]}, + geometry=default_geometry, properties={"mass": 1.0, "friction": 0.5}, ) - # Cache and return - self._object_cache[label] = semantics + # Cache only default path + if use_cache and geometry is None and custom_config is None: + self._object_cache[label] = semantics return semantics def clear_cache(self) -> None: @@ -156,168 +171,165 @@ class AtomicActionEngine: def __init__( self, - robot: "Robot", motion_generator: "MotionGenerator", - device: torch.device = torch.device("cuda"), + actions_cfg_list: Optional[List[ActionCfg]] = None, ): - self.robot = robot self.motion_generator = motion_generator - self.device = device - - # Registry of action instances - self._actions: Dict[str, AtomicAction] = {} + self.robot = self.motion_generator.robot + self.device = self.motion_generator.device # Semantic analyzer for object understanding self._semantic_analyzer = SemanticAnalyzer() # Initialize default actions - self._init_default_actions() - - def _init_default_actions(self): - """Initialize default atomic action instances.""" - from .actions import ReachAction, GraspAction, MoveAction, ReleaseAction - - control_parts = getattr(self.robot, "control_parts", None) or ["default"] - - for part in control_parts: - self.register_action( - f"reach_{part}", - ReachAction( - motion_generator=self.motion_generator, - robot=self.robot, - control_part=part, - device=self.device, - ), - ) - self.register_action( - f"grasp_{part}", - GraspAction( - motion_generator=self.motion_generator, - robot=self.robot, - control_part=part, - device=self.device, - ), - ) - self.register_action( - f"move_{part}", - MoveAction( - motion_generator=self.motion_generator, - robot=self.robot, - control_part=part, - device=self.device, - ), - ) - self.register_action( - f"release_{part}", - ReleaseAction( - motion_generator=self.motion_generator, - robot=self.robot, - control_part=part, - device=self.device, - ), + self._actions: List[AtomicAction] = self._init_actions(actions_cfg_list) + + def _init_actions(self, actions_cfg_list: Optional[List[ActionCfg]] = None): + actions: List[AtomicAction] = [] + from .actions import MoveAction, PickUpAction, PlaceAction + + name_action_map = { + "move": MoveAction, + "pick_up": PickUpAction, + "place": PlaceAction, + } + if actions_cfg_list is not None: + for cfg in actions_cfg_list: + action_class = name_action_map.get(cfg.name) + if action_class is None: + logger.log_error(f"Unknown action name in config: {cfg.name}") + instance = action_class(motion_generator=self.motion_generator, cfg=cfg) + actions.append(instance) + return actions + + def execute_static( + self, + target_list: List[Union[torch.Tensor, str, ObjectSemantics, Dict[str, Any]]], + ) -> tuple[bool, torch.Tensor]: + """Execute a static move action to target poses without motion planning.""" + if len(target_list) != len(self._actions): + logger.log_error( + f"Length of target_list ({len(target_list)}) must match number of actions ({len(self._actions)})." ) + start_qpos = self.motion_generator.robot.get_qpos() + n_envs = start_qpos.shape[0] + all_dof = self.motion_generator.robot.dof + all_trajectory = torch.empty( + size=(n_envs, 0, all_dof), dtype=torch.float32, device=self.device + ) - # Register action classes for dynamic instantiation - for action_name, action_class in _global_action_registry.items(): - # Don't override default actions - if action_name not in ["reach", "grasp", "move", "release"]: - for part in control_parts: - action_key = f"{action_name}_{part}" - if action_key not in self._actions: - # Create instance with default config - try: - instance = action_class( - motion_generator=self.motion_generator, - robot=self.robot, - control_part=part, - device=self.device, - ) - self._actions[action_key] = instance - except Exception: - # Skip if instantiation fails - pass - - def register_action(self, name: str, action: AtomicAction): - """Register a custom atomic action.""" - self._actions[name] = action - - def unregister_action(self, name: str) -> bool: - """Unregister an action by name. - - Args: - name: Name of the action to unregister - - Returns: - True if action was found and removed, False otherwise - """ - if name in self._actions: - del self._actions[name] - return True - return False + for i, target in enumerate(target_list): + atom_action = self._actions[i] + target = self._resolve_target(target) + control_part = atom_action.control_part + arm_joint_ids = self.motion_generator.robot.get_joint_ids(name=control_part) + start_qpos_part = start_qpos[:, arm_joint_ids] + is_success, traj, joint_ids = atom_action.execute( + target=target, start_qpos=start_qpos_part + ) + n_waypoints = traj.shape[1] - def get_action_names(self) -> List[str]: - """Get list of registered action names.""" - return list(self._actions.keys()) + traj_full = torch.zeros( + size=(n_envs, n_waypoints, all_dof), + dtype=torch.float32, + device=self.device, + ) + traj_full[:, :] = start_qpos + traj_full[:, :, joint_ids] = traj + all_trajectory = torch.cat((all_trajectory, traj_full), dim=1) + if is_success: + # update start qpos + start_qpos[:, joint_ids] = traj[:, -1, :] + else: + return False, all_trajectory + return True, all_trajectory - def execute( + def validate( self, action_name: str, - target: Union[torch.Tensor, str, ObjectSemantics], + target: Union[torch.Tensor, str, ObjectSemantics, Dict[str, Any]], control_part: Optional[str] = None, **kwargs, - ) -> PlanResult: - """Execute an atomic action. - - Args: - action_name: Name of registered action - target: Target pose, object label, or ObjectSemantics - control_part: Robot control part to use - **kwargs: Additional action parameters - - Returns: - PlanResult with trajectory (positions, velocities, accelerations), - end-effector poses (xpos_list), and success status. - """ - # Resolve action + ) -> bool: + """Validate if an action is feasible without executing.""" if control_part: action_key = f"{action_name}_{control_part}" else: action_key = action_name if action_key not in self._actions: - raise ValueError(f"Unknown action: {action_key}") + return False action = self._actions[action_key] - # Resolve target to ObjectSemantics if string label provided - if isinstance(target, str): - target = self._semantic_analyzer.analyze(target) + target = self._resolve_target(target) - # Execute action - returns PlanResult directly - return action.execute(target, **kwargs) + return action.validate(target, **kwargs) - def validate( + def _resolve_target( self, - action_name: str, - target: Union[torch.Tensor, str, ObjectSemantics], - control_part: Optional[str] = None, - **kwargs, - ) -> bool: - """Validate if an action is feasible without executing.""" - if control_part: - action_key = f"{action_name}_{control_part}" - else: - action_key = action_name + target: Union[torch.Tensor, str, ObjectSemantics, Dict[str, Any]], + ) -> Union[torch.Tensor, ObjectSemantics]: + """Resolve user target input into tensor pose or ObjectSemantics. - if action_key not in self._actions: - return False + Supports the convenience dict format in ``execute`` and ``validate``. + """ + if isinstance(target, torch.Tensor): + return target - action = self._actions[action_key] + if isinstance(target, ObjectSemantics): + return target if isinstance(target, str): - target = self._semantic_analyzer.analyze(target) + return self._semantic_analyzer.analyze(target) + + if isinstance(target, dict): + if "pose" in target: + pose = target["pose"] + if not isinstance(pose, torch.Tensor): + raise TypeError("target['pose'] must be a torch.Tensor") + return pose + + if "semantics" in target: + semantics = target["semantics"] + if not isinstance(semantics, ObjectSemantics): + raise TypeError( + "target['semantics'] must be an ObjectSemantics instance" + ) + return semantics + + label = target.get("label") + if label is None: + raise ValueError( + "Dict target must provide 'label', or use 'pose'/'semantics'." + ) + if not isinstance(label, str): + raise TypeError("target['label'] must be a string") + + geometry = target.get("geometry") + custom_config = target.get("custom_config") + use_cache = target.get("use_cache", True) + + semantics = self._semantic_analyzer.analyze( + label=label, + geometry=geometry, + custom_config=custom_config, + use_cache=use_cache, + ) - return action.validate(target, **kwargs) + properties = target.get("properties") + if properties is not None: + semantics.properties.update(properties) + + uid = target.get("uid") + if uid is not None: + semantics.uid = uid + + return semantics + + raise TypeError( + "target must be torch.Tensor, str, ObjectSemantics, or Dict[str, Any]" + ) def get_semantic_analyzer(self) -> SemanticAnalyzer: """Get the semantic analyzer for object understanding.""" diff --git a/embodichain/lab/sim/planners/motion_generator.py b/embodichain/lab/sim/planners/motion_generator.py index 0682c492..f8dedac7 100644 --- a/embodichain/lab/sim/planners/motion_generator.py +++ b/embodichain/lab/sim/planners/motion_generator.py @@ -508,7 +508,11 @@ def interpolate_trajectory( qpos_seed = options.start_qpos if qpos_seed is None and qpos_list is not None: + # first waypoint as seed qpos_seed = qpos_list[0] + if qpos_seed is None: + # fallback to current robot state as seed + qpos_seed = self.robot.get_qpos(name=control_part)[0] # Generate trajectory interpolate_qpos_list = [] @@ -551,9 +555,14 @@ def interpolate_trajectory( # compute_batch_ik expects (n_envs, n_batch, 7) or (n_envs, n_batch, 4, 4) # Here we assume n_envs = 1 or we want to apply this to all envs if available. # Since MotionGenerator usually works with self.robot.device, we use its batching capabilities. + qpos_seed_repeat = ( + qpos_seed.unsqueeze(0) + .repeat(total_interpolated_poses.shape[0], 1) + .unsqueeze(0) + ) success_batch, qpos_batch = self.robot.compute_batch_ik( pose=total_interpolated_poses.unsqueeze(0), - joint_seed=None, # Or use qpos_seed if properly shaped + joint_seed=qpos_seed_repeat, # Or use qpos_seed if properly shaped name=control_part, ) diff --git a/embodichain/lab/sim/planners/toppra_planner.py b/embodichain/lab/sim/planners/toppra_planner.py index 0c20ccf9..218d17ed 100644 --- a/embodichain/lab/sim/planners/toppra_planner.py +++ b/embodichain/lab/sim/planners/toppra_planner.py @@ -191,11 +191,9 @@ def plan( ) # Build waypoints - waypoints = [] - for target in target_states: - waypoints.append(np.array(target.qpos)) - - waypoints = np.array(waypoints) + waypoints = np.array( + [target.qpos.to("cpu").numpy() for target in target_states] + ) # Create spline interpolation # NOTE: Suitable for dense waypoints ss = np.linspace(0, 1, len(waypoints)) diff --git a/embodichain/toolkits/graspkit/pg_grasp/antipodal_generator.py b/embodichain/toolkits/graspkit/pg_grasp/antipodal_generator.py index 658f4f88..9ec009bc 100644 --- a/embodichain/toolkits/graspkit/pg_grasp/antipodal_generator.py +++ b/embodichain/toolkits/graspkit/pg_grasp/antipodal_generator.py @@ -73,7 +73,7 @@ class GraspGeneratorCfg: number of sampled surface points, ray perturbation angle, and gripper jaw distance limits. See :class:`AntipodalSamplerCfg` for details.""" - max_deviation_angle: float = np.pi / 12 + max_deviation_angle: float = np.pi / 6 """Maximum allowed angle (in radians) between the specified approach direction and the axis connecting an antipodal point pair. Pairs that deviate more than this threshold from perpendicular to the approach are diff --git a/scripts/tutorials/sim/atom_action.py b/scripts/tutorials/sim/atom_action.py new file mode 100644 index 00000000..6b0e8c9c --- /dev/null +++ b/scripts/tutorials/sim/atom_action.py @@ -0,0 +1,291 @@ +# ---------------------------------------------------------------------------- +# Copyright (c) 2021-2026 DexForce Technology Co., Ltd. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ---------------------------------------------------------------------------- + +""" +This script demonstrates the atomic action abstraction layer in the SimulationManager. +""" + +import argparse +import numpy as np +import time +import torch + +from embodichain.lab.sim import SimulationManager, SimulationManagerCfg +from embodichain.lab.sim.objects import Robot, RigidObject +from embodichain.lab.sim.shapes import MeshCfg +from embodichain.lab.sim.solvers import PytorchSolverCfg +from embodichain.data import get_data_path +from embodichain.lab.sim.cfg import ( + JointDrivePropertiesCfg, + RobotCfg, + RigidObjectCfg, + RigidBodyAttributesCfg, + LightCfg, + URDFCfg, +) +from embodichain.lab.sim.planners import MotionGenerator, MotionGenCfg, ToppraPlannerCfg +from embodichain.lab.sim.shapes import MeshCfg + +from embodichain.toolkits.graspkit.pg_grasp.gripper_collision_checker import ( + GripperCollisionCfg, +) +from embodichain.toolkits.graspkit.pg_grasp.antipodal_generator import ( + GraspGenerator, + GraspGeneratorCfg, + AntipodalSamplerCfg, +) +from embodichain.lab.sim.atomic_actions.engine import ( + AtomicActionEngine, + register_action, +) +from embodichain.lab.sim.atomic_actions.core import ObjectSemantics, AntipodalAffordance +from embodichain.lab.sim.atomic_actions.actions import ( + PickUpActionCfg, + PlaceActionCfg, + MoveActionCfg, +) + + +def parse_arguments(): + """ + Parse command-line arguments to configure the simulation. + + Returns: + argparse.Namespace: Parsed arguments including number of environments, device, and rendering options. + """ + parser = argparse.ArgumentParser( + description="Create and simulate a robot in SimulationManager" + ) + parser.add_argument( + "--enable_rt", action="store_true", help="Enable ray tracing rendering" + ) + parser.add_argument( + "--num_envs", type=int, default=1, help="Number of parallel environments" + ) + return parser.parse_args() + + +def initialize_simulation(args): + """ + Initialize the simulation environment based on the provided arguments. + + Args: + args (argparse.Namespace): Parsed command-line arguments. + + Returns: + SimulationManager: Configured simulation manager instance. + """ + config = SimulationManagerCfg( + headless=True, + sim_device="cuda", + enable_rt=args.enable_rt, + physics_dt=1.0 / 100.0, + num_envs=args.num_envs, + ) + sim = SimulationManager(config) + + light = sim.add_light( + cfg=LightCfg(uid="main_light", intensity=50.0, init_pos=(0, 0, 2.0)) + ) + + return sim + + +def create_robot(sim: SimulationManager, position=[0.0, 0.0, 0.0]): + """ + Create and configure a robot with an arm and a dexterous hand in the simulation. + + Args: + sim (SimulationManager): The simulation manager instance. + + Returns: + Robot: The configured robot instance added to the simulation. + """ + # Retrieve URDF paths for the robot arm and hand + ur10_urdf_path = get_data_path("UniversalRobots/UR10/UR10.urdf") + gripper_urdf_path = get_data_path("DH_PGC_140_50_M/DH_PGC_140_50_M.urdf") + # Configure the robot with its components and control properties + cfg = RobotCfg( + uid="UR10", + urdf_cfg=URDFCfg( + components=[ + {"component_type": "arm", "urdf_path": ur10_urdf_path}, + {"component_type": "hand", "urdf_path": gripper_urdf_path}, + ] + ), + drive_pros=JointDrivePropertiesCfg( + stiffness={"JOINT[0-9]": 1e4, "FINGER[1-2]": 1e2}, + damping={"JOINT[0-9]": 1e3, "FINGER[1-2]": 1e1}, + max_effort={"JOINT[0-9]": 1e5, "FINGER[1-2]": 1e3}, + drive_type="force", + ), + control_parts={ + "arm": ["JOINT[0-9]"], + "hand": ["FINGER[1-2]"], + }, + solver_cfg={ + "arm": PytorchSolverCfg( + end_link_name="ee_link", + root_link_name="base_link", + tcp=[ + [0.0, 1.0, 0.0, 0.0], + [-1.0, 0.0, 0.0, 0.0], + [0.0, 0.0, 1.0, 0.12], + [0.0, 0.0, 0.0, 1.0], + ], + ) + }, + init_qpos=[0.0, -np.pi / 2, -np.pi / 2, np.pi / 2, -np.pi / 2, 0.0, 0.0, 0.0], + init_pos=position, + ) + return sim.add_robot(cfg=cfg) + + +def create_mug(sim: SimulationManager) -> RigidObject: + mug_cfg = RigidObjectCfg( + uid="table", + shape=MeshCfg( + fpath=get_data_path("CoffeeCup/cup.ply"), + ), + attrs=RigidBodyAttributesCfg( + mass=0.01, + dynamic_friction=0.97, + static_friction=0.99, + ), + max_convex_hull_num=16, + init_pos=[0.55, 0.0, 0.01], + init_rot=[0.0, 0.0, -90], + body_scale=(4, 4, 4), + ) + mug = sim.add_rigid_object(cfg=mug_cfg) + return mug + + +def main(): + """ + Main function to demonstrate robot simulation. + + This function initializes the simulation, creates the robot and mug objects, + and performs the pickup action task. + """ + args = parse_arguments() + sim: SimulationManager = initialize_simulation(args) + robot = create_robot(sim) + mug = create_mug(sim) + + motion_gen = MotionGenerator( + cfg=MotionGenCfg(planner_cfg=ToppraPlannerCfg(robot_uid=robot.uid)) + ) + + pickup_cfg = PickUpActionCfg( + hand_open_qpos=torch.tensor( + [0.00, 0.00], dtype=torch.float32, device=sim.device + ), + hand_close_qpos=torch.tensor( + [0.025, 0.025], dtype=torch.float32, device=sim.device + ), + control_part="arm", + hand_control_part="hand", + approach_direction=torch.tensor( + [0.0, 0.0, -1.0], dtype=torch.float32, device=sim.device + ), + pre_grasp_distance=0.15, + lift_height=0.15, + ) + + place_cfg = PlaceActionCfg( + hand_open_qpos=torch.tensor( + [0.00, 0.00], dtype=torch.float32, device=sim.device + ), + hand_close_qpos=torch.tensor( + [0.025, 0.025], dtype=torch.float32, device=sim.device + ), + control_part="arm", + hand_control_part="hand", + lift_height=0.15, + ) + + move_cfg = MoveActionCfg( + control_part="arm", + ) + + atom_engine = AtomicActionEngine( + motion_generator=motion_gen, + actions_cfg_list=(pickup_cfg, place_cfg, move_cfg), + ) + + sim.init_gpu_physics() + sim.open_window() + + # Define object semantics and affordances for the mug + gripper_collision_cfg = GripperCollisionCfg( + max_open_length=0.088, finger_length=0.078, point_sample_dense=0.012 + ) + generator_cfg = GraspGeneratorCfg( + viser_port=11801, + antipodal_sampler_cfg=AntipodalSamplerCfg( + n_sample=20000, max_length=0.088, min_length=0.003 + ), + ) + mug_grasp_affordance = AntipodalAffordance( + object_label="mug", + force_reannotate=False, # set to True if you want to re-annotate affordance even if the object has been seen before, which is useful when you have changed the grasp generator configuration and want to see the effect of new configuration, but it will take more time to annotate. So usually set it to False and only set it to True when you have changed the grasp generator configuration or you want to debug the annotation process. + custom_config={ + "gripper_collision_cfg": gripper_collision_cfg, + "generator_cfg": generator_cfg, + }, + ) + mug_semantics = ObjectSemantics( + label="mug", + geometry={ + "mesh_vertices": mug.get_vertices(env_ids=[0], scale=True)[0], + "mesh_triangles": mug.get_triangles(env_ids=[0])[0], + }, + affordance=mug_grasp_affordance, + entity=mug, # in order to fetch object pose + ) + + target_grasp_xpos = torch.tensor( + [ + [-0.0539, -0.9985, -0.0022, 0.4489], + [-0.9977, 0.0540, -0.0401, -0.0030], + [0.0401, 0.0000, -0.9992, 0.1400], + [0.0000, 0.0000, 0.0000, 1.0000], + ], + dtype=torch.float32, + device=sim.device, + ) + + place_xpos = target_grasp_xpos.clone() + place_xpos[:3, 3] += torch.tensor([-0.2, 0.4, 0.1], device=sim.device) + + rest_xpos = target_grasp_xpos.clone() + rest_xpos[:3, 3] = torch.tensor([0.5, 0.0, 0.5], device=sim.device) + + is_success, traj = atom_engine.execute_static( + target_list=[mug_semantics, place_xpos, rest_xpos] + ) + n_waypoint = traj.shape[1] + for i in range(n_waypoint): + robot.set_qpos(traj[:, i]) + sim.update(step=4) + time.sleep(1e-2) + + input("Press Enter to exit...") + + +if __name__ == "__main__": + main() diff --git a/tests/sim/atomic_actions/test_atom_actions.py b/tests/sim/atomic_actions/test_atom_actions.py new file mode 100644 index 00000000..751130a2 --- /dev/null +++ b/tests/sim/atomic_actions/test_atom_actions.py @@ -0,0 +1,257 @@ +# ---------------------------------------------------------------------------- +# Copyright (c) 2021-2026 DexForce Technology Co., Ltd. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ----------------------------------------------------------------------------, + + +import argparse +import numpy as np +import time +import open3d as o3d +import torch + +from embodichain.lab.sim import SimulationManager, SimulationManagerCfg +from embodichain.lab.sim.objects import Robot, RigidObject +from embodichain.lab.sim.shapes import MeshCfg +from embodichain.lab.sim.solvers import PytorchSolverCfg +from embodichain.data import get_data_path +from embodichain.utils import logger +from embodichain.lab.sim.cfg import ( + JointDrivePropertiesCfg, + RobotCfg, + RigidObjectCfg, + RigidBodyAttributesCfg, + LightCfg, + URDFCfg, +) +from embodichain.lab.sim.planners import MotionGenerator, MotionGenCfg, ToppraPlannerCfg +from embodichain.lab.sim.shapes import MeshCfg + +from embodichain.toolkits.graspkit.pg_grasp.gripper_collision_checker import ( + GripperCollisionCfg, +) +from embodichain.toolkits.graspkit.pg_grasp.antipodal_generator import ( + GraspGenerator, + GraspGeneratorCfg, + AntipodalSamplerCfg, +) +from embodichain.lab.sim.atomic_actions.engine import ( + AtomicActionEngine, + register_action, +) +from embodichain.lab.sim.atomic_actions.core import ObjectSemantics, AntipodalAffordance +from embodichain.lab.sim.atomic_actions.actions import ( + PickUpActionCfg, + PlaceActionCfg, + MoveActionCfg, +) + + +def initialize_simulation(): + """ + Initialize the simulation environment based on the provided arguments. + + Args: + args (argparse.Namespace): Parsed command-line arguments. + + Returns: + SimulationManager: Configured simulation manager instance. + """ + config = SimulationManagerCfg( + headless=True, + sim_device="cuda", + physics_dt=1.0 / 100.0, + num_envs=1, + ) + sim = SimulationManager(config) + + light = sim.add_light( + cfg=LightCfg(uid="main_light", intensity=50.0, init_pos=(0, 0, 2.0)) + ) + + return sim + + +def create_robot(sim: SimulationManager, position=[0.0, 0.0, 0.0]): + """ + Create and configure a robot with an arm and a dexterous hand in the simulation. + + Args: + sim (SimulationManager): The simulation manager instance. + + Returns: + Robot: The configured robot instance added to the simulation. + """ + # Retrieve URDF paths for the robot arm and hand + ur10_urdf_path = get_data_path("UniversalRobots/UR10/UR10.urdf") + gripper_urdf_path = get_data_path("DH_PGC_140_50_M/DH_PGC_140_50_M.urdf") + # Configure the robot with its components and control properties + cfg = RobotCfg( + uid="UR10", + urdf_cfg=URDFCfg( + components=[ + {"component_type": "arm", "urdf_path": ur10_urdf_path}, + {"component_type": "hand", "urdf_path": gripper_urdf_path}, + ] + ), + drive_pros=JointDrivePropertiesCfg( + stiffness={"JOINT[0-9]": 1e4, "FINGER[1-2]": 1e2}, + damping={"JOINT[0-9]": 1e3, "FINGER[1-2]": 1e1}, + max_effort={"JOINT[0-9]": 1e5, "FINGER[1-2]": 1e3}, + drive_type="force", + ), + control_parts={ + "arm": ["JOINT[0-9]"], + "hand": ["FINGER[1-2]"], + }, + solver_cfg={ + "arm": PytorchSolverCfg( + end_link_name="ee_link", + root_link_name="base_link", + tcp=[ + [0.0, 1.0, 0.0, 0.0], + [-1.0, 0.0, 0.0, 0.0], + [0.0, 0.0, 1.0, 0.12], + [0.0, 0.0, 0.0, 1.0], + ], + ) + }, + init_qpos=[0.0, -np.pi / 2, -np.pi / 2, np.pi / 2, -np.pi / 2, 0.0, 0.0, 0.0], + init_pos=position, + ) + return sim.add_robot(cfg=cfg) + + +def create_mug(sim: SimulationManager) -> RigidObject: + mug_cfg = RigidObjectCfg( + uid="table", + shape=MeshCfg( + fpath=get_data_path("CoffeeCup/cup.ply"), + ), + attrs=RigidBodyAttributesCfg( + mass=0.01, + dynamic_friction=0.97, + static_friction=0.99, + ), + max_convex_hull_num=16, + init_pos=[0.55, 0.0, 0.01], + init_rot=[0.0, 0.0, -90], + body_scale=(4, 4, 4), + ) + mug = sim.add_rigid_object(cfg=mug_cfg) + return mug + + +def test_atomic_actions(): + sim: SimulationManager = initialize_simulation() + robot = create_robot(sim) + mug = create_mug(sim) + + motion_gen = MotionGenerator( + cfg=MotionGenCfg(planner_cfg=ToppraPlannerCfg(robot_uid=robot.uid)) + ) + + pickup_cfg = PickUpActionCfg( + hand_open_qpos=torch.tensor( + [0.00, 0.00], dtype=torch.float32, device=sim.device + ), + hand_close_qpos=torch.tensor( + [0.025, 0.025], dtype=torch.float32, device=sim.device + ), + control_part="arm", + hand_control_part="hand", + approach_direction=torch.tensor( + [0.0, 0.0, -1.0], dtype=torch.float32, device=sim.device + ), + pre_grasp_distance=0.15, + lift_height=0.15, + ) + + place_cfg = PlaceActionCfg( + hand_open_qpos=torch.tensor( + [0.00, 0.00], dtype=torch.float32, device=sim.device + ), + hand_close_qpos=torch.tensor( + [0.025, 0.025], dtype=torch.float32, device=sim.device + ), + control_part="arm", + hand_control_part="hand", + lift_height=0.15, + ) + + move_cfg = MoveActionCfg( + control_part="arm", + ) + + atom_engine = AtomicActionEngine( + motion_generator=motion_gen, + actions_cfg_list=(pickup_cfg, place_cfg, move_cfg), + ) + + sim.init_gpu_physics() + + # Define object semantics and affordances for the mug + gripper_collision_cfg = GripperCollisionCfg( + max_open_length=0.088, finger_length=0.078, point_sample_dense=0.012 + ) + generator_cfg = GraspGeneratorCfg( + viser_port=11801, + antipodal_sampler_cfg=AntipodalSamplerCfg( + n_sample=20000, max_length=0.088, min_length=0.003 + ), + ) + mug_grasp_affordance = AntipodalAffordance( + object_label="mug", + force_reannotate=False, # set to True if you want to re-annotate affordance even if the object has been seen before, which is useful when you have changed the grasp generator configuration and want to see the effect of new configuration, but it will take more time to annotate. So usually set it to False and only set it to True when you have changed the grasp generator configuration or you want to debug the annotation process. + custom_config={ + "gripper_collision_cfg": gripper_collision_cfg, + "generator_cfg": generator_cfg, + }, + ) + mug_semantics = ObjectSemantics( + label="mug", + geometry={ + "mesh_vertices": mug.get_vertices(env_ids=[0], scale=True)[0], + "mesh_triangles": mug.get_triangles(env_ids=[0])[0], + }, + affordance=mug_grasp_affordance, + entity=mug, # in order to fetch object pose + ) + + target_grasp_xpos = torch.tensor( + [ + [-0.0539, -0.9985, -0.0022, 0.4489], + [-0.9977, 0.0540, -0.0401, -0.0030], + [0.0401, 0.0000, -0.9992, 0.1400], + [0.0000, 0.0000, 0.0000, 1.0000], + ], + dtype=torch.float32, + device=sim.device, + ) + + place_xpos = target_grasp_xpos.clone() + place_xpos[:3, 3] += torch.tensor([-0.2, 0.4, 0.1], device=sim.device) + + rest_xpos = target_grasp_xpos.clone() + rest_xpos[:3, 3] = torch.tensor([0.5, 0.0, 0.5], device=sim.device) + + is_success, traj = atom_engine.execute_static( + target_list=[target_grasp_xpos, place_xpos, rest_xpos] + ) + assert is_success, "Atomic action execution failed." + assert traj.shape == torch.Size([1, 210, 8]) + + +if __name__ == "__main__": + test_atomic_actions() diff --git a/tests/sim/atomic_actions/test_core.py b/tests/sim/atomic_actions/test_core.py deleted file mode 100644 index bbb02a36..00000000 --- a/tests/sim/atomic_actions/test_core.py +++ /dev/null @@ -1,237 +0,0 @@ -# ---------------------------------------------------------------------------- -# Copyright (c) 2021-2026 DexForce Technology Co., Ltd. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ---------------------------------------------------------------------------- - -import torch -import pytest - -from embodichain.lab.sim.atomic_actions import ( - Affordance, - GraspPose, - InteractionPoints, - ObjectSemantics, - ActionCfg, - register_action, - unregister_action, - get_registered_actions, -) - - -class TestAffordance: - """Test affordance base class and subclasses.""" - - def test_affordance_base(self): - """Test base affordance class.""" - aff = Affordance(object_label="test_object") - assert aff.object_label == "test_object" - assert aff.get_batch_size() == 1 - - def test_grasp_pose_default(self): - """Test GraspPose with default values.""" - grasp = GraspPose(object_label="bottle") - assert grasp.object_label == "bottle" - assert grasp.poses.shape == (1, 4, 4) - assert grasp.grasp_types == ["default"] - assert grasp.get_batch_size() == 1 - - def test_grasp_pose_multiple(self): - """Test GraspPose with multiple poses.""" - poses = torch.stack( - [ - torch.eye(4), - torch.eye(4), - torch.eye(4), - ] - ) - grasp = GraspPose( - object_label="bottle", - poses=poses, - grasp_types=["pinch", "power", "hook"], - ) - assert grasp.get_batch_size() == 3 - - # Test get_grasp_by_type - pinch_pose = grasp.get_grasp_by_type("pinch") - assert pinch_pose is not None - assert torch.allclose(pinch_pose, torch.eye(4)) - - nonexistent = grasp.get_grasp_by_type("nonexistent") - assert nonexistent is None - - def test_grasp_pose_best_grasp(self): - """Test get_best_grasp method.""" - poses = torch.stack( - [ - torch.eye(4), - torch.eye(4) * 2, - ] - ) - confidence = torch.tensor([0.7, 0.9]) - grasp = GraspPose( - poses=poses, - grasp_types=["low_conf", "high_conf"], - confidence_scores=confidence, - ) - - best = grasp.get_best_grasp() - # Should return the second pose (higher confidence) - assert torch.allclose(best, poses[1]) - - def test_interaction_points(self): - """Test InteractionPoints class.""" - points = torch.tensor( - [ - [0.1, 0.0, 0.0], - [0.0, 0.1, 0.0], - [0.0, 0.0, 0.1], - ] - ) - normals = torch.tensor( - [ - [1.0, 0.0, 0.0], - [0.0, 1.0, 0.0], - [0.0, 0.0, 1.0], - ] - ) - interaction = InteractionPoints( - object_label="cube", - points=points, - normals=normals, - point_types=["push", "poke", "touch"], - ) - - assert interaction.get_batch_size() == 3 - - # Test get_points_by_type - push_points = interaction.get_points_by_type("push") - assert push_points is not None - assert torch.allclose(push_points, points[0:1]) - - nonexistent = interaction.get_points_by_type("nonexistent") - assert nonexistent is None - - # Test get_approach_direction - approach = interaction.get_approach_direction(0) - assert torch.allclose(approach, torch.tensor([-1.0, 0.0, 0.0])) - - def test_interaction_points_no_normals(self): - """Test InteractionPoints without normals.""" - points = torch.tensor([[0.1, 0.2, 0.3]]) - interaction = InteractionPoints(points=points) - - # Default approach direction should be +z - approach = interaction.get_approach_direction(0) - assert torch.allclose(approach, torch.tensor([0.0, 0.0, 1.0])) - - -class TestObjectSemantics: - """Test ObjectSemantics dataclass.""" - - def test_basic_creation(self): - """Test basic ObjectSemantics creation.""" - affordance = GraspPose(object_label="bottle") - semantics = ObjectSemantics( - label="bottle", - affordance=affordance, - geometry={"bounding_box": [0.1, 0.2, 0.3]}, - properties={"mass": 0.5, "friction": 0.8}, - uid="bottle_001", - ) - - assert semantics.label == "bottle" - assert semantics.uid == "bottle_001" - assert semantics.affordance.object_label == "bottle" - assert semantics.properties["mass"] == 0.5 - - def test_no_uid(self): - """Test ObjectSemantics without UID.""" - affordance = GraspPose() - semantics = ObjectSemantics( - label="apple", - affordance=affordance, - geometry={}, - properties={}, - ) - - assert semantics.uid is None - - -class TestActionCfg: - """Test ActionCfg dataclass.""" - - def test_defaults(self): - """Test ActionCfg default values.""" - cfg = ActionCfg() - assert cfg.control_part == "left_arm" - assert cfg.interpolation_type == "linear" - assert cfg.velocity_limit is None - assert cfg.acceleration_limit is None - - def test_custom_values(self): - """Test ActionCfg with custom values.""" - cfg = ActionCfg( - control_part="right_arm", - interpolation_type="toppra", - velocity_limit=0.5, - acceleration_limit=1.0, - ) - assert cfg.control_part == "right_arm" - assert cfg.interpolation_type == "toppra" - assert cfg.velocity_limit == 0.5 - assert cfg.acceleration_limit == 1.0 - - -class TestActionRegistry: - """Test action registry functions.""" - - def test_register_and_unregister(self): - """Test registering and unregistering actions.""" - from embodichain.lab.sim.atomic_actions import AtomicAction - - class TestAction(AtomicAction): - def execute(self, target, **kwargs): - return PlanResult(success=True) - - def validate(self, target, **kwargs): - return True - - # Register - register_action("test", TestAction) - assert "test" in get_registered_actions() - - # Unregister - unregister_action("test") - assert "test" not in get_registered_actions() - - def test_get_registered_actions_copy(self): - """Test that get_registered_actions returns a copy.""" - from embodichain.lab.sim.atomic_actions import AtomicAction - - initial = get_registered_actions() - - class DummyAction(AtomicAction): - def execute(self, target, **kwargs): - return PlanResult(success=True) - - def validate(self, target, **kwargs): - return True - - register_action("dummy", DummyAction) - - # Original should not contain the new action - assert "dummy" not in initial - - # Cleanup - unregister_action("dummy") From ddb785aecc52902e5adf0113ed4cc14daaf539f8 Mon Sep 17 00:00:00 2001 From: yuecideng Date: Thu, 30 Apr 2026 17:05:50 +0000 Subject: [PATCH 5/9] wip --- .claude/skills/add-atomic-action/SKILL.md | 197 ++++++++++++++ .../embodichain.lab.sim.atomic_actions.rst | 89 +++++++ .../embodichain/embodichain.lab.sim.rst | 11 +- docs/source/introduction.rst | 6 +- docs/source/overview/sim/atomic_actions.md | 241 ++++++++++++++++++ docs/source/overview/sim/index.rst | 1 + docs/source/tutorial/atomic_actions.rst | 170 ++++++++++++ docs/source/tutorial/index.rst | 1 + .../lab/sim/atomic_actions/__init__.py | 1 + embodichain/lab/sim/atomic_actions/actions.py | 69 +++-- embodichain/lab/sim/atomic_actions/core.py | 2 - embodichain/lab/sim/atomic_actions/engine.py | 58 ++--- .../sim/{atom_action.py => atomic_actions.py} | 165 ++++++++---- 13 files changed, 882 insertions(+), 129 deletions(-) create mode 100644 .claude/skills/add-atomic-action/SKILL.md create mode 100644 docs/source/api_reference/embodichain/embodichain.lab.sim.atomic_actions.rst create mode 100644 docs/source/overview/sim/atomic_actions.md create mode 100644 docs/source/tutorial/atomic_actions.rst rename scripts/tutorials/sim/{atom_action.py => atomic_actions.py} (53%) diff --git a/.claude/skills/add-atomic-action/SKILL.md b/.claude/skills/add-atomic-action/SKILL.md new file mode 100644 index 00000000..9ae574a5 --- /dev/null +++ b/.claude/skills/add-atomic-action/SKILL.md @@ -0,0 +1,197 @@ +--- +name: add-atomic-action +description: Use when adding a new observation, event, reward, action, dataset, or randomization functor to an EmbodiChain environment +--- + +# Add Atomic Action + +Scaffold a new atomic action following EmbodiChain's `ActionCfg` / `AtomicAction` pattern. + +## When to Use + +- User asks to add a new motion primitive (push, wipe, insert, hand-over, …) +- User says "add a new atomic action", "create a custom action", "implement a push action" +- User wants to extend `AtomicActionEngine` with a behaviour not covered by the built-ins + +## Key Files + +| Purpose | Path | +|---------|------| +| Base classes (`ActionCfg`, `AtomicAction`, `ObjectSemantics`) | `embodichain/lab/sim/atomic_actions/core.py` | +| Built-in actions (reference implementations) | `embodichain/lab/sim/atomic_actions/actions.py` | +| Engine + global registry (`register_action`) | `embodichain/lab/sim/atomic_actions/engine.py` | +| Public API exports | `embodichain/lab/sim/atomic_actions/__init__.py` | +| Reference docs | `docs/source/overview/sim/atomic_actions.md` | + +## Steps + +### 1. Define the config + +Add a `@configclass`-decorated class that extends `ActionCfg` (or `MoveActionCfg` / +`GraspActionCfg` if the new action reuses arm/gripper fields). + +Place it in `embodichain/lab/sim/atomic_actions/actions.py` alongside the existing configs, +or in a new file if the action is large. + +```python +from embodichain.utils import configclass +from embodichain.lab.sim.atomic_actions.core import ActionCfg # or MoveActionCfg + +@configclass +class PushActionCfg(ActionCfg): + name: str = "push" # must match the registry key + push_distance: float = 0.05 # metres to push forward + push_speed: int = 30 # waypoints for the push phase + control_part: str = "arm" # robot segment to control +``` + +**Rules:** +- `name` must be unique and match the string passed to `register_action`. +- Inherit from `GraspActionCfg` when the action needs hand open/close fields. +- All fields must have defaults — configs are instantiated without arguments in tests. + +### 2. Implement the action class + +Subclass `AtomicAction` and implement the two abstract methods. + +```python +import torch +from typing import Optional, Union +from embodichain.lab.sim.atomic_actions.core import AtomicAction, ObjectSemantics + +class PushAction(AtomicAction): + """Push an object forward by a fixed distance.""" + + def __init__(self, motion_generator, cfg: PushActionCfg | None = None): + super().__init__(motion_generator, cfg=cfg or PushActionCfg()) + self.arm_joint_ids = self.robot.get_joint_ids(name=self.cfg.control_part) + + # ------------------------------------------------------------------ + def execute( + self, + target: Union[torch.Tensor, ObjectSemantics], + start_qpos: Optional[torch.Tensor] = None, + **kwargs, + ) -> tuple[bool, torch.Tensor, list]: + """Plan the push motion and return a joint trajectory. + + Args: + target: EEF pose tensor (n_envs, 4, 4) or ObjectSemantics. + start_qpos: Starting joint positions (n_envs, dof). Uses current + robot state when None. + + Returns: + Tuple of (is_success, trajectory, joint_ids) where + trajectory has shape (n_envs, n_waypoints, len(joint_ids)). + """ + # 1. Resolve target pose + # 2. Plan trajectory with self.motion_generator + # 3. Return result + return is_success, trajectory, self.arm_joint_ids + + # ------------------------------------------------------------------ + def validate( + self, + target: Union[torch.Tensor, ObjectSemantics], + start_qpos: Optional[torch.Tensor] = None, + **kwargs, + ) -> bool: + """Fast feasibility check — no trajectory generated. + + Returns: + True if the action can be attempted. + """ + return True # add IK reachability check here if needed +``` + +**Rules:** +- `execute()` must always return `(is_success: bool, trajectory: Tensor, joint_ids: list)`. +- `trajectory` shape: `(n_envs, n_waypoints, len(joint_ids))`. +- `joint_ids` tells the engine which DOF columns the trajectory covers. +- `validate()` must be cheap — no motion planning allowed. +- Call `super().__init__()` — it sets `self.robot`, `self.motion_generator`, and `self.cfg`. + +### 3. Register the action + +Register the new class so `AtomicActionEngine` can discover it by name. + +**Option A — register at module load (built-ins style)** + +In `embodichain/lab/sim/atomic_actions/engine.py`, add to the `_builtin_action_map` dict: + +```python +_builtin_action_map: dict[str, type[AtomicAction]] = { + "move": MoveAction, + "pickup": PickUpAction, + "place": PlaceAction, + "push": PushAction, # ← add here +} +``` + +**Option B — register at runtime (custom / plugin style)** + +```python +from embodichain.lab.sim.atomic_actions import register_action +register_action("push", PushAction) +``` + +### 4. Export from the public API + +Add config and action class to `embodichain/lab/sim/atomic_actions/__init__.py`: + +```python +from .actions import PushAction, PushActionCfg + +__all__ = [ + ..., + "PushAction", + "PushActionCfg", +] +``` + +### 5. Update the supported actions table + +Add a row to the table in `docs/source/overview/sim/atomic_actions.md` under +"Supported Actions": + +```markdown +| `PushAction` | `PushActionCfg` | `Tensor (4,4)` — contact pose | Approach → push forward | +``` + +### 6. Write a test + +Add a test in `tests/sim/atomic_actions/` (append to an existing file or create a new one): + +```python +def test_push_action_cfg_defaults(): + cfg = PushActionCfg() + assert cfg.name == "push" + assert cfg.push_distance == 0.05 + +def test_push_action_validate(mock_motion_generator): + action = PushAction(mock_motion_generator) + assert action.validate(target=torch.eye(4)) is True +``` + +## Common Mistakes + +| Mistake | Fix | +|---------|-----| +| `name` in config doesn't match registry key | Keep `cfg.name` identical to the string in `register_action("push", ...)` | +| Returning `trajectory` without `joint_ids` | Always return the 3-tuple `(bool, Tensor, list)` | +| `trajectory` shape `(n_envs, dof, n_waypoints)` | Correct shape is `(n_envs, n_waypoints, dof)` | +| Doing motion planning inside `validate()` | `validate()` must be fast — IK check only | +| Not calling `super().__init__()` | Required to set `self.robot`, `self.motion_generator`, `self.cfg` | +| Inheriting `MoveActionCfg` instead of `ActionCfg` | Use `MoveActionCfg` only when the action reuses arm-control fields; otherwise use `ActionCfg` | +| Forgetting to export from `__init__.py` | Users import from the public API — missing exports cause `ImportError` | + +## Quick Reference + +| Step | Action | +|------|--------| +| 1 | Define `@configclass` config extending `ActionCfg` with `name` field | +| 2 | Subclass `AtomicAction`, implement `execute()` and `validate()` | +| 3 | Register: add to `_builtin_action_map` or call `register_action()` | +| 4 | Export from `__init__.py` | +| 5 | Add row to supported-actions table in overview docs | +| 6 | Write tests for config defaults and `validate()` | diff --git a/docs/source/api_reference/embodichain/embodichain.lab.sim.atomic_actions.rst b/docs/source/api_reference/embodichain/embodichain.lab.sim.atomic_actions.rst new file mode 100644 index 00000000..181086c3 --- /dev/null +++ b/docs/source/api_reference/embodichain/embodichain.lab.sim.atomic_actions.rst @@ -0,0 +1,89 @@ +embodichain.lab.sim.atomic_actions +================================== + +.. automodule:: embodichain.lab.sim.atomic_actions + + .. rubric:: Classes + + .. autosummary:: + + Affordance + InteractionPoints + ObjectSemantics + ActionCfg + AtomicAction + MoveActionCfg + MoveAction + PickUpActionCfg + PickUpAction + PlaceActionCfg + PlaceAction + AtomicActionEngine + +.. currentmodule:: embodichain.lab.sim.atomic_actions + +Core +---- + +.. autoclass:: Affordance + :members: + :show-inheritance: + +.. autoclass:: InteractionPoints + :members: + :show-inheritance: + +.. autoclass:: ObjectSemantics + :members: + :show-inheritance: + +.. autoclass:: ActionCfg + :members: + :exclude-members: __init__, copy, replace, to_dict, validate + +.. autoclass:: AtomicAction + :members: + :show-inheritance: + +Actions +------- + +.. autoclass:: MoveActionCfg + :members: + :exclude-members: __init__, copy, replace, to_dict, validate + :show-inheritance: + +.. autoclass:: MoveAction + :members: + :show-inheritance: + +.. autoclass:: PickUpActionCfg + :members: + :exclude-members: __init__, copy, replace, to_dict, validate + :show-inheritance: + +.. autoclass:: PickUpAction + :members: + :show-inheritance: + +.. autoclass:: PlaceActionCfg + :members: + :exclude-members: __init__, copy, replace, to_dict, validate + :show-inheritance: + +.. autoclass:: PlaceAction + :members: + :show-inheritance: + +Engine & Registry +----------------- + +.. autoclass:: AtomicActionEngine + :members: + :show-inheritance: + +.. autofunction:: register_action + +.. autofunction:: unregister_action + +.. autofunction:: get_registered_actions diff --git a/docs/source/api_reference/embodichain/embodichain.lab.sim.rst b/docs/source/api_reference/embodichain/embodichain.lab.sim.rst index 2a21fcf0..942f0c7d 100644 --- a/docs/source/api_reference/embodichain/embodichain.lab.sim.rst +++ b/docs/source/api_reference/embodichain/embodichain.lab.sim.rst @@ -15,8 +15,9 @@ shapes objects sensors - planners solvers + planners + atomic_actions utility .. currentmodule:: embodichain.lab.sim @@ -101,6 +102,14 @@ Planners embodichain.lab.sim.planners +Atomic Actions +-------------- + +.. toctree:: + :maxdepth: 1 + + embodichain.lab.sim.atomic_actions + Utility ------- diff --git a/docs/source/introduction.rst b/docs/source/introduction.rst index eae35d96..d437b4fb 100644 --- a/docs/source/introduction.rst +++ b/docs/source/introduction.rst @@ -44,11 +44,11 @@ Getting Started To get started with EmbodiChain, follow these steps: - `Installation - Guide `__ + Guide `__ - `Quick Start - Tutorial `__ + Tutorial `__ - `API - Reference `__ + Reference `__ Contribution Guide ------------------ diff --git a/docs/source/overview/sim/atomic_actions.md b/docs/source/overview/sim/atomic_actions.md new file mode 100644 index 00000000..979df571 --- /dev/null +++ b/docs/source/overview/sim/atomic_actions.md @@ -0,0 +1,241 @@ +# Atomic Actions + +```{currentmodule} embodichain.lab.sim.atomic_actions +``` + +Atomic actions are the building blocks for automated robot motion generation. Each action encapsulates a complete, self-contained motion primitive — such as picking up an object or moving to a pose — that can be chained together to form complex manipulation workflows. + +## Design Overview + +The module is organized into three layers: + +``` +AtomicActionEngine ← orchestrates a sequence of actions + │ + ├── AtomicAction(s) ← each action plans one motion primitive + │ │ + │ └── MotionGenerator ← low-level trajectory planner (IK + trajectory optimization) + │ + └── SemanticAnalyzer ← resolves object labels → ObjectSemantics +``` + +Each action receives a target (object semantics or a pose tensor), runs its planning pipeline, +and returns a joint trajectory. The engine threads the end state of each action as the start +state of the next, then concatenates all trajectories into one contiguous sequence: + +``` +ObjectSemantics ──► AffordanceEstimation ──► AtomicAction.execute() +(label + geometry │ + + affordance ├─ IK solve + + entity) ├─ Motion plan + └─ Gripper interpolation + │ +AtomicActionEngine ◄─────────────── PlanResult ───────┘ +(sequences actions, accumulates + full-robot trajectory) +``` + +### Core Concepts + +**`ObjectSemantics`** describes an interaction target. It bundles: +- `geometry` — mesh data (vertices, triangles) used for grasp annotation +- `affordance` — *how* to interact with the object (e.g. antipodal grasp poses) +- `entity` — a live reference to the simulation object, so actions can read its current pose + +**`Affordance`** is a data class that encodes a specific interaction capability. The built-in affordance types are: + +| Class | Use case | +|---|---| +| `AntipodalAffordance` | Parallel-jaw grasping via antipodal point pairs | +| `InteractionPoints` | Contact-based interactions (push, poke, touch) | + +**`AtomicAction`** is the abstract base class for all motion primitives. Every action must implement: +- `execute(target, start_qpos)` — plan and return a joint trajectory +- `validate(target, start_qpos)` — fast feasibility check without full planning + +**`AtomicActionEngine`** manages a named registry of actions and runs them in sequence via `execute_static()`, threading the end state of each action as the start state of the next. + +--- + +## Built-in Actions + +(supported_atomic_actions)= + +The following actions are available out of the box: + +| Action | Config class | Target type | Motion phases | +|---|---|---|---| +| `MoveAction` | `MoveActionCfg` | `Tensor (4,4)` — EEF pose | Move arm to pose | +| `PickUpAction` | `PickUpActionCfg` | `ObjectSemantics` or `Tensor (4,4)` | Approach → close gripper → lift | +| `PlaceAction` | `PlaceActionCfg` | `Tensor (4,4)` — EEF release pose | Lower → open gripper → retract | + +### `MoveAction` + +Moves the end-effector to a target pose in free space. + +| Config field | Default | Description | +|---|---|---| +| `control_part` | `"arm"` | Robot control part to move | +| `sample_interval` | `50` | Number of waypoints in the trajectory | + +**Target:** `torch.Tensor` of shape `(4, 4)` or `(n_envs, 4, 4)` — a homogeneous EEF pose. + +--- + +### `PickUpAction` + +Three-phase grasp motion: *approach → close gripper → lift*. + +| Config field | Default | Description | +|---|---|---| +| `approach_direction` | `[0, 0, -1]` | Gripper approach direction in object frame | +| `pre_grasp_distance` | `0.15` | Hover distance before descending (m) | +| `lift_height` | `0.10` | Lift height after grasping (m) | +| `hand_open_qpos` | `None` | **Required.** Gripper open joint positions | +| `hand_close_qpos` | `None` | **Required.** Gripper closed joint positions | +| `hand_control_part` | `"hand"` | Robot control part for the gripper | +| `hand_interp_steps` | `5` | Waypoints for the gripper close phase | +| `sample_interval` | `80` | Total waypoints across all three phases | + +**Target:** `ObjectSemantics` (grasp pose computed automatically) **or** a `torch.Tensor` EEF pose. + +--- + +### `PlaceAction` + +Three-phase release motion: *lower → open gripper → retract*. Mirrors `PickUpAction`. + +Inherits all gripper config fields from `GraspActionCfg`. The `approach_direction` field is not used — the arm moves straight down to the target pose. + +**Target:** `torch.Tensor` of shape `(4, 4)` or `(n_envs, 4, 4)` — the EEF pose at release. + +--- + +## Typical Workflow + +```python +from embodichain.lab.sim.atomic_actions import ( + AtomicActionEngine, + ObjectSemantics, + AntipodalAffordance, + PickUpActionCfg, + PlaceActionCfg, + MoveActionCfg, +) + +# 1. Configure each action +pickup_cfg = PickUpActionCfg( + control_part="arm", + hand_control_part="hand", + hand_open_qpos=torch.tensor([0.0, 0.0]), + hand_close_qpos=torch.tensor([0.025, 0.025]), +) +place_cfg = PlaceActionCfg(...) +move_cfg = MoveActionCfg(control_part="arm") + +# 2. Build the engine — action order matches target_list order +engine = AtomicActionEngine( + motion_generator=motion_gen, + actions_cfg_list=[pickup_cfg, place_cfg, move_cfg], +) + +# 3. Describe the object to pick +semantics = ObjectSemantics( + label="mug", + geometry={"mesh_vertices": ..., "mesh_triangles": ...}, + affordance=AntipodalAffordance(object_label="mug", ...), + entity=mug, +) + +# 4. Plan the full sequence and replay +is_success, traj = engine.execute_static( + target_list=[semantics, place_pose, rest_pose] +) +# traj: (n_envs, n_waypoints, dof) +``` + +--- + +## How to Extend: Adding a Custom Action + +You can add any motion primitive by subclassing `AtomicAction` and registering it with the engine. + +### Step 1 — Define the config + +```python +from embodichain.utils import configclass +from embodichain.lab.sim.atomic_actions import ActionCfg + +@configclass +class PushActionCfg(ActionCfg): + name: str = "push" + push_distance: float = 0.05 # metres to push forward + push_speed: int = 30 # waypoints for the push phase +``` + +### Step 2 — Implement the action + +```python +import torch +from typing import Optional, Union +from embodichain.lab.sim.atomic_actions import AtomicAction, ObjectSemantics +from embodichain.lab.sim.planners import PlanState, MoveType + +class PushAction(AtomicAction): + def __init__(self, motion_generator, cfg: PushActionCfg | None = None): + super().__init__(motion_generator, cfg=cfg or PushActionCfg()) + self.arm_joint_ids = self.robot.get_joint_ids(name=self.cfg.control_part) + + def execute( + self, + target: Union[torch.Tensor, ObjectSemantics], + start_qpos: Optional[torch.Tensor] = None, + **kwargs, + ) -> tuple[bool, torch.Tensor, list]: + # Resolve target to a batched [n_envs, 4, 4] EEF pose + # ... your planning logic here ... + return is_success, trajectory, self.arm_joint_ids + + def validate(self, target, start_qpos=None, **kwargs) -> bool: + return True # add IK check here if needed +``` + +### Step 3 — Register and use + +```python +from embodichain.lab.sim.atomic_actions import register_action + +register_action("push", PushAction, PushActionCfg) + +engine = AtomicActionEngine( + motion_generator=motion_gen, + actions_cfg_list=[PushActionCfg(push_distance=0.08)], +) +is_success, traj = engine.execute_static(target_list=[target_pose]) +``` + +> **Tip:** The `execute()` return signature is always `(is_success, trajectory, joint_ids)`. +> `trajectory` has shape `(n_envs, n_waypoints, len(joint_ids))`. +> `joint_ids` tells the engine which columns of the full robot DOF vector the trajectory covers. + +--- + +## Target Resolution + +`AtomicActionEngine` accepts several target formats in `target_list`, giving you flexibility without boilerplate: + +| Input type | Resolved to | +|---|---| +| `torch.Tensor (4,4)` or `(n_envs,4,4)` | EEF pose, broadcast across envs | +| `ObjectSemantics` | Passed directly to the action | +| `str` (object label) | Looked up in `SemanticAnalyzer` cache | +| `dict` with `"pose"` key | Unwrapped to tensor | +| `dict` with `"label"` key | Analyzed via `SemanticAnalyzer` | + +--- + +## Further Reading + +- {doc}`planners/motion_generator` — the trajectory planner used by every action +- {doc}`sim_robot` — how control parts and IK solvers are configured +- Tutorial: `scripts/tutorials/sim/atomic_actions.py` diff --git a/docs/source/overview/sim/index.rst b/docs/source/overview/sim/index.rst index 56f98ef2..60cdfd56 100644 --- a/docs/source/overview/sim/index.rst +++ b/docs/source/overview/sim/index.rst @@ -22,3 +22,4 @@ Overview of the Simulation Framework: sim_sensor.md solvers/index planners/index + atomic_actions.md diff --git a/docs/source/tutorial/atomic_actions.rst b/docs/source/tutorial/atomic_actions.rst new file mode 100644 index 00000000..10b8e97c --- /dev/null +++ b/docs/source/tutorial/atomic_actions.rst @@ -0,0 +1,170 @@ +.. _tutorial_atomic_actions: + +Atomic Actions +============== + +EmbodiChain's **atomic action** layer provides a high-level, composable interface for common +manipulation primitives such as *move*, *pick up*, and *place*. Each action encapsulates the +full planning pipeline — grasp-pose estimation, IK, trajectory generation, and gripper +interpolation — behind a single ``execute()`` call, making it straightforward to chain +multiple actions together into complex robot behaviours. + +Key Features +------------ + +- **Semantic-aware execution** — actions accept either a raw pose tensor or an + ``ObjectSemantics`` descriptor that bundles affordance data (grasp poses, interaction + points) with the simulation entity. +- **Three built-in primitives** — ``MoveAction``, ``PickUpAction``, and ``PlaceAction`` + cover the most common tabletop manipulation workflows out of the box. + See the :ref:`supported_atomic_actions` table for configs and target types. +- **Extensible registry** — custom actions can be registered globally with + ``register_action`` and discovered by the engine at runtime. +- **Engine orchestration** — ``AtomicActionEngine`` sequences multiple actions, + threads ``start_qpos`` from one action to the next, and returns a single concatenated + trajectory ready to replay in the simulator. + +For the full design overview, architecture diagram, and extension guide see +:doc:`/overview/sim/atomic_actions`. + +The Code +-------- + +The tutorial corresponds to the ``atomic_actions.py`` script in the ``scripts/tutorials/sim`` +directory. + +.. dropdown:: Code for atomic_actions.py + :icon: code + + .. literalinclude:: ../../../scripts/tutorials/sim/atomic_actions.py + :language: python + :linenos: + +Typical Usage +------------- + +Setting up the engine +~~~~~~~~~~~~~~~~~~~~~ + +.. code-block:: python + + import torch + from embodichain.lab.sim.planners import MotionGenerator, MotionGenCfg + from embodichain.lab.sim.atomic_actions import ( + AtomicActionEngine, + PickUpActionCfg, + PlaceActionCfg, + MoveActionCfg, + ) + + motion_gen = MotionGenerator(cfg=MotionGenCfg(...)) + + hand_open = torch.tensor([0.00, 0.00], dtype=torch.float32, device=device) + hand_close = torch.tensor([0.025, 0.025], dtype=torch.float32, device=device) + + pickup_cfg = PickUpActionCfg( + hand_open_qpos=hand_open, + hand_close_qpos=hand_close, + control_part="arm", + hand_control_part="hand", + approach_direction=torch.tensor([0.0, 0.0, -1.0], dtype=torch.float32, device=device), + pre_grasp_distance=0.15, + lift_height=0.15, + ) + place_cfg = PlaceActionCfg( + hand_open_qpos=hand_open, + hand_close_qpos=hand_close, + control_part="arm", + hand_control_part="hand", + lift_height=0.15, + ) + move_cfg = MoveActionCfg(control_part="arm") + + engine = AtomicActionEngine( + motion_generator=motion_gen, + actions_cfg_list=[pickup_cfg, place_cfg, move_cfg], + ) + +Defining object semantics +~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. code-block:: python + + from embodichain.lab.sim.atomic_actions import ( + ObjectSemantics, + AntipodalAffordance, + ) + from embodichain.toolkits.graspkit.pg_grasp import GraspGeneratorCfg, AntipodalSamplerCfg + from embodichain.toolkits.graspkit.pg_grasp.gripper_collision_checker import GripperCollisionCfg + + affordance = AntipodalAffordance( + object_label="mug", + force_reannotate=False, + custom_config={ + "gripper_collision_cfg": GripperCollisionCfg( + max_open_length=0.088, finger_length=0.078, point_sample_dense=0.012 + ), + "generator_cfg": GraspGeneratorCfg( + antipodal_sampler_cfg=AntipodalSamplerCfg( + n_sample=20000, max_length=0.088, min_length=0.003 + ) + ), + }, + ) + + semantics = ObjectSemantics( + label="mug", + geometry={ + "mesh_vertices": mug.get_vertices(env_ids=[0], scale=True)[0], + "mesh_triangles": mug.get_triangles(env_ids=[0])[0], + }, + affordance=affordance, + entity=mug, # required so the action can query the live object pose + ) + +Executing a pick-place-move sequence +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. code-block:: python + + place_xpos = ... # torch.Tensor [4, 4] — target placement pose + rest_xpos = ... # torch.Tensor [4, 4] — resting pose after placing + + is_success, trajectory = engine.execute_static( + target_list=[semantics, place_xpos, rest_xpos] + ) + # trajectory: [n_envs, n_waypoints, robot_dof] + + for i in range(trajectory.shape[1]): + robot.set_qpos(trajectory[:, i]) + sim.update(step=4) + +Registering custom actions +~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. code-block:: python + + from embodichain.lab.sim.atomic_actions import AtomicAction, ActionCfg, register_action + + class PushAction(AtomicAction): + def execute(self, target, start_qpos=None, **kwargs): + # ... your planning logic ... + return is_success, trajectory, joint_ids + + def validate(self, target, start_qpos=None, **kwargs): + return True # quick feasibility check + + register_action("push", PushAction) + +Notes & Best Practices +---------------------- + +- ``PickUpAction`` expects an ``AntipodalAffordance`` with valid mesh data + (``mesh_vertices`` / ``mesh_triangles``) so the grasp generator can annotate the object. + Set ``force_reannotate=False`` (the default) to reuse cached annotations across episodes. +- ``ObjectSemantics.entity`` must be set when using semantic targets so the action can read + the object's current world pose at planning time. +- For static (non-physics) playback, iterate over ``trajectory[:, i]`` and call + ``robot.set_qpos`` directly; for physics-enabled playback, feed waypoints through your + controller or gym wrapper instead. +- To add a new action type, see :doc:`/overview/sim/atomic_actions`. diff --git a/docs/source/tutorial/index.rst b/docs/source/tutorial/index.rst index ef6efe79..c73b3d04 100644 --- a/docs/source/tutorial/index.rst +++ b/docs/source/tutorial/index.rst @@ -14,6 +14,7 @@ Tutorials solver sensor motion_gen + atomic_actions gizmo basic_env modular_env diff --git a/embodichain/lab/sim/atomic_actions/__init__.py b/embodichain/lab/sim/atomic_actions/__init__.py index a154cf89..cf1e60ce 100644 --- a/embodichain/lab/sim/atomic_actions/__init__.py +++ b/embodichain/lab/sim/atomic_actions/__init__.py @@ -23,6 +23,7 @@ from .core import ( Affordance, + AntipodalAffordance, InteractionPoints, ObjectSemantics, ActionCfg, diff --git a/embodichain/lab/sim/atomic_actions/actions.py b/embodichain/lab/sim/atomic_actions/actions.py index ae8b1212..4f2698de 100644 --- a/embodichain/lab/sim/atomic_actions/actions.py +++ b/embodichain/lab/sim/atomic_actions/actions.py @@ -42,6 +42,29 @@ class MoveActionCfg(ActionCfg): """Number of waypoints to sample for the motion trajectory. Should be large enough to ensure smooth motion, but not too large to cause unnecessary computation overhead.""" +@configclass +class GraspActionCfg(MoveActionCfg): + """Shared configuration for actions that involve gripper open/close motions.""" + + hand_open_qpos: torch.Tensor | None = None + """[hand_dof,] of float. Joint positions for open hand state.""" + + hand_close_qpos: torch.Tensor | None = None + """[hand_dof,] of float. Joint positions for closed hand state.""" + + hand_control_part: str = "hand" + """Name of the robot part that controls the hand joints.""" + + lift_height: float = 0.1 + """Height (m) to lift the end-effector after the gripper phase.""" + + sample_interval: int = 80 + """Number of waypoints for the full trajectory (approach + hand + lift/back).""" + + hand_interp_steps: int = 5 + """Number of waypoints for the gripper open/close interpolation phase.""" + + class MoveAction(AtomicAction): def __init__( self, @@ -261,33 +284,17 @@ def validate(self, target, start_qpos=None, **kwargs): @configclass -class PickUpActionCfg(MoveActionCfg): +class PickUpActionCfg(GraspActionCfg): name: str = "pick_up" """Name of the action, used for identification and logging.""" - hand_open_qpos: torch.Tensor | None = None - """[hand_dof,] of float. Joint positions for open hand state. Must be specified for PickUpAction.""" - - hand_close_qpos: torch.Tensor | None = None - """[hand_dof,] of float. Joint positions for closed hand state. Must be specified for PickUpAction.""" - - hand_control_part: str = "hand" - """Name of the robot part that controls the hand joints. Must correspond to a valid control part in the robot definition.""" - pre_grasp_distance: float = 0.15 - """Distance to offset back from the grasp pose along the approach direction to get the pre-grasp pose. Should be large enough to avoid collision during approach, but not too large to cause unnecessary detour.""" + """Distance to offset back from the grasp pose along the approach direction to get + the pre-grasp pose. Should be large enough to avoid collision during approach.""" approach_direction: torch.Tensor = torch.tensor([0, 0, -1], dtype=torch.float32) - """Direction from which the gripper approaches the object for grasping, expressed in the object local frame. Should be a unit vector. Default is [0, 0, -1], which means approaching from above along the negative z-axis.""" - - lift_height: float = 0.1 - """Height to lift the object after grasping, expressed in meters. Should be large enough to avoid collision with the environment, but not too large to cause unnecessary motion.""" - - sample_interval: int = 80 - """Number of waypoints to sample for the entire pick up motion trajectory, including approach, hand closing, and lifting. Should be large enough to ensure smooth motion, but not too large to cause unnecessary computation overhead.""" - - hand_interp_steps: int = 5 - """Number of waypoints to interpolate for the hand closing motion. Should be at least 2 to ensure smooth interpolation between open and closed hand states, but not too large to cause unnecessary computation overhead.""" + """Direction from which the gripper approaches the object for grasping, expressed + in the object local frame. Default [0, 0, -1] means approaching from above.""" class PickUpAction(MoveAction): @@ -474,28 +481,10 @@ def validate(self, target, start_qpos=None, **kwargs): @configclass -class PlaceActionCfg(MoveActionCfg): +class PlaceActionCfg(GraspActionCfg): name: str = "place" """Name of the action, used for identification and logging.""" - hand_open_qpos: torch.Tensor | None = None - """[hand_dof,] of float. Joint positions for open hand state. Must be specified for PickUpAction.""" - - hand_close_qpos: torch.Tensor | None = None - """[hand_dof,] of float. Joint positions for closed hand state. Must be specified for PickUpAction.""" - - hand_control_part: str = "hand" - """Name of the robot part that controls the hand joints. Must correspond to a valid control part in the robot definition.""" - - lift_height: float = 0.1 - """Height to lift the object after grasping, expressed in meters. Should be large enough to avoid collision with the environment, but not too large to cause unnecessary motion.""" - - sample_interval: int = 80 - """Number of waypoints to sample for the entire pick up motion trajectory, including approach, hand closing, and lifting. Should be large enough to ensure smooth motion, but not too large to cause unnecessary computation overhead.""" - - hand_interp_steps: int = 5 - """Number of waypoints to interpolate for the hand closing motion. Should be at least 2 to ensure smooth interpolation between open and closed hand states, but not too large to cause unnecessary computation overhead.""" - class PlaceAction(MoveAction): def __init__( diff --git a/embodichain/lab/sim/atomic_actions/core.py b/embodichain/lab/sim/atomic_actions/core.py index c16cebb0..08a22fc5 100644 --- a/embodichain/lab/sim/atomic_actions/core.py +++ b/embodichain/lab/sim/atomic_actions/core.py @@ -33,8 +33,6 @@ ) from embodichain.lab.sim.common import BatchEntity from embodichain.utils import logger -from embodichain.lab.sim.sim_manager import SimulationManager -from embodichain.lab.sim.cfg import MarkerCfg if TYPE_CHECKING: from embodichain.lab.sim.planners import MotionGenerator, MotionGenOptions diff --git a/embodichain/lab/sim/atomic_actions/engine.py b/embodichain/lab/sim/atomic_actions/engine.py index 72ff7ea3..15b868a8 100644 --- a/embodichain/lab/sim/atomic_actions/engine.py +++ b/embodichain/lab/sim/atomic_actions/engine.py @@ -139,8 +139,6 @@ def analyze( grasp_affordance = AntipodalAffordance( object_label=label, - poses=default_poses, - grasp_types=["default"], custom_config=custom_config or {}, ) @@ -182,34 +180,44 @@ def __init__( self._semantic_analyzer = SemanticAnalyzer() # Initialize default actions - self._actions: List[AtomicAction] = self._init_actions(actions_cfg_list) + self._actions: Dict[str, AtomicAction] = self._init_actions(actions_cfg_list) - def _init_actions(self, actions_cfg_list: Optional[List[ActionCfg]] = None): - actions: List[AtomicAction] = [] + def _init_actions( + self, actions_cfg_list: Optional[List[ActionCfg]] = None + ) -> Dict[str, "AtomicAction"]: + actions: Dict[str, AtomicAction] = {} from .actions import MoveAction, PickUpAction, PlaceAction - name_action_map = { + builtin_action_map: Dict[str, Type[AtomicAction]] = { "move": MoveAction, "pick_up": PickUpAction, "place": PlaceAction, } if actions_cfg_list is not None: for cfg in actions_cfg_list: - action_class = name_action_map.get(cfg.name) + action_class = builtin_action_map.get( + cfg.name + ) or _global_action_registry.get(cfg.name) if action_class is None: logger.log_error(f"Unknown action name in config: {cfg.name}") + continue instance = action_class(motion_generator=self.motion_generator, cfg=cfg) - actions.append(instance) + actions[cfg.name] = instance return actions def execute_static( self, target_list: List[Union[torch.Tensor, str, ObjectSemantics, Dict[str, Any]]], ) -> tuple[bool, torch.Tensor]: - """Execute a static move action to target poses without motion planning.""" - if len(target_list) != len(self._actions): + """Execute a sequence of actions to target poses. + + Each element in ``target_list`` corresponds to an action in the order they + were registered via ``actions_cfg_list``. + """ + action_names = list(self._actions.keys()) + if len(target_list) != len(action_names): logger.log_error( - f"Length of target_list ({len(target_list)}) must match number of actions ({len(self._actions)})." + f"Length of target_list ({len(target_list)}) must match number of actions ({len(action_names)})." ) start_qpos = self.motion_generator.robot.get_qpos() n_envs = start_qpos.shape[0] @@ -218,8 +226,8 @@ def execute_static( size=(n_envs, 0, all_dof), dtype=torch.float32, device=self.device ) - for i, target in enumerate(target_list): - atom_action = self._actions[i] + for action_name, target in zip(action_names, target_list): + atom_action = self._actions[action_name] target = self._resolve_target(target) control_part = atom_action.control_part arm_joint_ids = self.motion_generator.robot.get_joint_ids(name=control_part) @@ -227,6 +235,8 @@ def execute_static( is_success, traj, joint_ids = atom_action.execute( target=target, start_qpos=start_qpos_part ) + if not is_success: + return False, all_trajectory n_waypoints = traj.shape[1] traj_full = torch.zeros( @@ -237,33 +247,23 @@ def execute_static( traj_full[:, :] = start_qpos traj_full[:, :, joint_ids] = traj all_trajectory = torch.cat((all_trajectory, traj_full), dim=1) - if is_success: - # update start qpos - start_qpos[:, joint_ids] = traj[:, -1, :] - else: - return False, all_trajectory + # update start qpos for the next action + start_qpos[:, joint_ids] = traj[:, -1, :] return True, all_trajectory def validate( self, action_name: str, target: Union[torch.Tensor, str, ObjectSemantics, Dict[str, Any]], - control_part: Optional[str] = None, **kwargs, ) -> bool: - """Validate if an action is feasible without executing.""" - if control_part: - action_key = f"{action_name}_{control_part}" - else: - action_key = action_name - - if action_key not in self._actions: + """Validate if a named action is feasible without executing.""" + if action_name not in self._actions: + logger.log_warning(f"Action '{action_name}' is not registered.") return False - action = self._actions[action_key] - + action = self._actions[action_name] target = self._resolve_target(target) - return action.validate(target, **kwargs) def _resolve_target( diff --git a/scripts/tutorials/sim/atom_action.py b/scripts/tutorials/sim/atomic_actions.py similarity index 53% rename from scripts/tutorials/sim/atom_action.py rename to scripts/tutorials/sim/atomic_actions.py index 6b0e8c9c..3dace43e 100644 --- a/scripts/tutorials/sim/atom_action.py +++ b/scripts/tutorials/sim/atomic_actions.py @@ -15,7 +15,19 @@ # ---------------------------------------------------------------------------- """ -This script demonstrates the atomic action abstraction layer in the SimulationManager. +Tutorial: Atomic Actions for Robot Motion Generation +===================================================== + +This script shows how to use the atomic action system to plan and execute +a pick-and-place task with a robot arm. + +Key concepts covered: + 1. Setting up a MotionGenerator and AtomicActionEngine + 2. Describing what to pick using ObjectSemantics and AntipodalAffordance + 3. Running a pick → place → move sequence with execute_static() + +Run with: + python atomic_actions.py [--num_envs N] [--enable_rt] """ import argparse @@ -37,8 +49,6 @@ URDFCfg, ) from embodichain.lab.sim.planners import MotionGenerator, MotionGenCfg, ToppraPlannerCfg -from embodichain.lab.sim.shapes import MeshCfg - from embodichain.toolkits.graspkit.pg_grasp.gripper_collision_checker import ( GripperCollisionCfg, ) @@ -47,12 +57,12 @@ GraspGeneratorCfg, AntipodalSamplerCfg, ) -from embodichain.lab.sim.atomic_actions.engine import ( + +# Import everything from the public atomic_actions API +from embodichain.lab.sim.atomic_actions import ( AtomicActionEngine, - register_action, -) -from embodichain.lab.sim.atomic_actions.core import ObjectSemantics, AntipodalAffordance -from embodichain.lab.sim.atomic_actions.actions import ( + ObjectSemantics, + AntipodalAffordance, PickUpActionCfg, PlaceActionCfg, MoveActionCfg, @@ -156,7 +166,7 @@ def create_robot(sim: SimulationManager, position=[0.0, 0.0, 0.0]): def create_mug(sim: SimulationManager) -> RigidObject: mug_cfg = RigidObjectCfg( - uid="table", + uid="mug", shape=MeshCfg( fpath=get_data_path("CoffeeCup/cup.ply"), ), @@ -175,46 +185,55 @@ def create_mug(sim: SimulationManager) -> RigidObject: def main(): - """ - Main function to demonstrate robot simulation. - - This function initializes the simulation, creates the robot and mug objects, - and performs the pickup action task. - """ + """Pick up a mug and place it at a new location using atomic actions.""" args = parse_arguments() + + # ------------------------------------------------------------------ # + # Step 1: Set up simulation, robot, and object # + # ------------------------------------------------------------------ # sim: SimulationManager = initialize_simulation(args) robot = create_robot(sim) mug = create_mug(sim) + # ------------------------------------------------------------------ # + # Step 2: Create a MotionGenerator for the robot # + # MotionGenerator handles trajectory planning (IK + TOPPRA smoothing) # + # ------------------------------------------------------------------ # motion_gen = MotionGenerator( cfg=MotionGenCfg(planner_cfg=ToppraPlannerCfg(robot_uid=robot.uid)) ) + # ------------------------------------------------------------------ # + # Step 3: Configure the three atomic actions # + # # + # PickUpAction — approach → close gripper → lift # + # PlaceAction — lower → open gripper → retract # + # MoveAction — free-space move to a target EEF pose # + # ------------------------------------------------------------------ # + # Gripper joint values for this robot (DH_PGC_140): + # open = [0.00, 0.00] (fully open) + # close = [0.025, 0.025] (grasping width) + hand_open = torch.tensor([0.00, 0.00], dtype=torch.float32, device=sim.device) + hand_close = torch.tensor([0.025, 0.025], dtype=torch.float32, device=sim.device) + pickup_cfg = PickUpActionCfg( - hand_open_qpos=torch.tensor( - [0.00, 0.00], dtype=torch.float32, device=sim.device - ), - hand_close_qpos=torch.tensor( - [0.025, 0.025], dtype=torch.float32, device=sim.device - ), control_part="arm", hand_control_part="hand", + hand_open_qpos=hand_open, + hand_close_qpos=hand_close, + # Approach the object from directly above (negative world-Z) approach_direction=torch.tensor( [0.0, 0.0, -1.0], dtype=torch.float32, device=sim.device ), - pre_grasp_distance=0.15, - lift_height=0.15, + pre_grasp_distance=0.15, # hover 15 cm above before descending + lift_height=0.15, # lift 15 cm after grasping ) place_cfg = PlaceActionCfg( - hand_open_qpos=torch.tensor( - [0.00, 0.00], dtype=torch.float32, device=sim.device - ), - hand_close_qpos=torch.tensor( - [0.025, 0.025], dtype=torch.float32, device=sim.device - ), control_part="arm", hand_control_part="hand", + hand_open_qpos=hand_open, + hand_close_qpos=hand_close, lift_height=0.15, ) @@ -222,30 +241,41 @@ def main(): control_part="arm", ) + # ------------------------------------------------------------------ # + # Step 4: Build the AtomicActionEngine # + # # + # actions_cfg_list defines the ORDER of actions that execute_static() # + # will run. Each entry is matched positionally to target_list. # + # ------------------------------------------------------------------ # atom_engine = AtomicActionEngine( motion_generator=motion_gen, - actions_cfg_list=(pickup_cfg, place_cfg, move_cfg), + actions_cfg_list=[pickup_cfg, place_cfg, move_cfg], ) sim.init_gpu_physics() sim.open_window() - # Define object semantics and affordances for the mug - gripper_collision_cfg = GripperCollisionCfg( - max_open_length=0.088, finger_length=0.078, point_sample_dense=0.012 - ) - generator_cfg = GraspGeneratorCfg( - viser_port=11801, - antipodal_sampler_cfg=AntipodalSamplerCfg( - n_sample=20000, max_length=0.088, min_length=0.003 - ), - ) + # ------------------------------------------------------------------ # + # Step 5: Describe the mug with ObjectSemantics # + # # + # ObjectSemantics bundles together: # + # - geometry (mesh vertices/triangles for grasp annotation) # + # - affordance (how to grasp the object — here antipodal grasps) # + # - entity reference (so the action can read the live object pose) # + # ------------------------------------------------------------------ # mug_grasp_affordance = AntipodalAffordance( object_label="mug", - force_reannotate=False, # set to True if you want to re-annotate affordance even if the object has been seen before, which is useful when you have changed the grasp generator configuration and want to see the effect of new configuration, but it will take more time to annotate. So usually set it to False and only set it to True when you have changed the grasp generator configuration or you want to debug the annotation process. + force_reannotate=False, custom_config={ - "gripper_collision_cfg": gripper_collision_cfg, - "generator_cfg": generator_cfg, + "gripper_collision_cfg": GripperCollisionCfg( + max_open_length=0.088, finger_length=0.078, point_sample_dense=0.012 + ), + "generator_cfg": GraspGeneratorCfg( + viser_port=11801, + antipodal_sampler_cfg=AntipodalSamplerCfg( + n_sample=20000, max_length=0.088, min_length=0.003 + ), + ), }, ) mug_semantics = ObjectSemantics( @@ -255,13 +285,21 @@ def main(): "mesh_triangles": mug.get_triangles(env_ids=[0])[0], }, affordance=mug_grasp_affordance, - entity=mug, # in order to fetch object pose + entity=mug, # needed so PickUpAction can read the mug's live pose ) - target_grasp_xpos = torch.tensor( + # ------------------------------------------------------------------ # + # Step 6: Define target poses for place and final rest # + # # + # Poses are 4×4 homogeneous transforms (rotation | translation). # + # For PickUpAction the target is mug_semantics — the action computes # + # the grasp pose automatically from the affordance. # + # ------------------------------------------------------------------ # + # Place the mug 20 cm to the left and 40 cm forward from its pickup pose + place_xpos = torch.tensor( [ - [-0.0539, -0.9985, -0.0022, 0.4489], - [-0.9977, 0.0540, -0.0401, -0.0030], + [-0.0539, -0.9985, -0.0022, 0.2489], + [-0.9977, 0.0540, -0.0401, 0.3970], [0.0401, 0.0000, -0.9992, 0.1400], [0.0000, 0.0000, 0.0000, 1.0000], ], @@ -269,17 +307,36 @@ def main(): device=sim.device, ) - place_xpos = target_grasp_xpos.clone() - place_xpos[:3, 3] += torch.tensor([-0.2, 0.4, 0.1], device=sim.device) - - rest_xpos = target_grasp_xpos.clone() - rest_xpos[:3, 3] = torch.tensor([0.5, 0.0, 0.5], device=sim.device) + # Move the arm to a safe resting pose after placing + rest_xpos = torch.tensor( + [ + [-0.0539, -0.9985, -0.0022, 0.5000], + [-0.9977, 0.0540, -0.0401, 0.0000], + [0.0401, 0.0000, -0.9992, 0.5000], + [0.0000, 0.0000, 0.0000, 1.0000], + ], + dtype=torch.float32, + device=sim.device, + ) + # ------------------------------------------------------------------ # + # Step 7: Plan and execute the full sequence # + # # + # execute_static() plans all three actions in order and returns a # + # single concatenated joint trajectory (n_envs, n_waypoints, dof). # + # We then replay it frame-by-frame in the simulator. # + # ------------------------------------------------------------------ # + print("Planning pick → place → move trajectory...") is_success, traj = atom_engine.execute_static( target_list=[mug_semantics, place_xpos, rest_xpos] ) - n_waypoint = traj.shape[1] - for i in range(n_waypoint): + + if not is_success: + print("Planning failed. Check that the target poses are reachable.") + return + + print(f"Success! Replaying {traj.shape[1]} waypoints...") + for i in range(traj.shape[1]): robot.set_qpos(traj[:, i]) sim.update(step=4) time.sleep(1e-2) From 861734c5e4c55ba6541a8eabf821065586d530c0 Mon Sep 17 00:00:00 2001 From: yuecideng Date: Thu, 30 Apr 2026 17:20:19 +0000 Subject: [PATCH 6/9] wip --- ...-04-17-atomic-action-abstraction-design.md | 1367 ----------------- 1 file changed, 1367 deletions(-) delete mode 100644 docs/superpowers/specs/2026-04-17-atomic-action-abstraction-design.md diff --git a/docs/superpowers/specs/2026-04-17-atomic-action-abstraction-design.md b/docs/superpowers/specs/2026-04-17-atomic-action-abstraction-design.md deleted file mode 100644 index 97230290..00000000 --- a/docs/superpowers/specs/2026-04-17-atomic-action-abstraction-design.md +++ /dev/null @@ -1,1367 +0,0 @@ -# Atomic Action Abstraction Design for Embodied AI Motion Generation - -**Date:** 2026-04-17 -**Status:** Design Draft - ---- - -## 1. Overview - -This document describes the design of an **atomic action abstraction layer** for embodied AI motion generation. The design supports both demo simulation and data generation within gym environments, providing a consistent interface for agentic workflows while maintaining extensibility for new motion primitives. - ---- - -## 2. Design Principles - -### 2.1 Leverage Existing Infrastructure -- **MotionGenerator**: Use existing motion planning with TOPPRA and interpolation -- **Solvers**: Integrate IK/FK solvers (BaseSolver, PinkSolver, etc.) -- **Warp**: Utilize GPU-accelerated trajectory interpolation from `warp` module - -### 2.2 Object Semantics -Atomic actions should consider: -- **Semantic labels**: Object categories (e.g., "apple", "bottle") -- **Affordance data**: Grasp poses, interaction points -- **Geometry**: Shape, dimensions, collision boundaries - -### 2.3 Consistent Interface -- Unified API across all motion primitives -- Standard parameter types (torch.Tensor, np.ndarray) -- Predictable return types (PlanResult from motion planning) - -### 2.4 Extensibility -- Registry-based motion primitive registration -- Plugin architecture for custom actions -- Strategy pattern for interaction behaviors - ---- - -## 3. Architecture - -### 3.1 Core Components - -``` -┌─────────────────────────────────────────────────────────────┐ -│ AtomicActionEngine │ -├─────────────────────────────────────────────────────────────┤ -│ ┌──────────────┐ ┌──────────────┐ ┌──────────────────┐ │ -│ │ Semantic │ │ Motion │ │ Affordance │ │ -│ │ Analyzer │ │ Planner │ │ Provider │ │ -│ └──────────────┘ └──────────────┘ └──────────────────┘ │ -└─────────────────────────────────────────────────────────────┘ - │ - ┌─────────────────────┼─────────────────────┐ - ▼ ▼ ▼ -┌───────────────┐ ┌────────────────┐ ┌──────────────────┐ -│ ReachAction │ │ GraspAction │ │ MoveAction │ -└───────────────┘ └────────────────┘ └──────────────────┘ -``` - -### 3.2 Class Hierarchy - -```python -# Base class for all atomic actions -AtomicAction(ABC) - ├── ReachAction - ├── GraspAction - ├── ReleaseAction - ├── MoveAction - ├── RotateAction - ├── PushAction - └── CustomAction - -# Configuration classes -ActionConfig(ABC) - ├── ReachConfig - ├── GraspConfig - └── ... - -# Uses existing PlanResult from embodichain.lab.sim.planners -PlanResult (existing) - ├── success: bool - ├── positions: torch.Tensor # [T, DOF] - ├── xpos_list: torch.Tensor # [T, 4, 4] - ├── final_pose: torch.Tensor - └── execution_time: float -``` - ---- - -## 4. Core API Design - -### 4.1 Base AtomicAction Interface - -```python -from abc import ABC, abstractmethod -from dataclasses import dataclass, field -from typing import Optional, Dict, Any, List -import torch -import numpy as np - -from embodichain.lab.sim.planners import PlanResult, PlanState, MoveType -from embodichain.utils import configclass - - -# ============================================================================= -# Affordance Classes -# ============================================================================= - -@dataclass -class Affordance: - """Base class for affordance data. - - Affordance represents interaction possibilities for an object. - This is the base class for specific affordance types. - """ - object_label: str = "" - """Label of the object this affordance belongs to.""" - - def get_batch_size(self) -> int: - """Return the batch size of this affordance data.""" - return 1 - - -@dataclass -class GraspPose(Affordance): - """Grasp pose affordance containing a batch of 4x4 transformation matrices. - - Each grasp pose represents a valid end-effector pose for grasping the object. - Multiple poses may be available for different grasp types (pinch, power, etc.) - or approach directions. - """ - poses: torch.Tensor = field(default_factory=lambda: torch.eye(4).unsqueeze(0)) - """Batch of grasp poses with shape [B, 4, 4]. - - Each pose is a 4x4 homogeneous transformation matrix representing - the end-effector pose in the object's local coordinate frame. - """ - - grasp_types: List[str] = field(default_factory=lambda: ["default"]) - """List of grasp type labels for each pose in the batch. - - Examples: "pinch", "power", "hook", "spherical", etc. - Length must match the batch dimension of `poses`. - """ - - confidence_scores: Optional[torch.Tensor] = None - """Optional confidence scores for each grasp pose with shape [B]. - - Higher values indicate more reliable/ stable grasps. - Used for grasp selection when multiple options exist. - """ - - def get_batch_size(self) -> int: - """Return the number of grasp poses in this affordance.""" - return self.poses.shape[0] - - def get_grasp_by_type(self, grasp_type: str) -> Optional[torch.Tensor]: - """Get grasp pose by type label. - - Args: - grasp_type: Type of grasp (e.g., "pinch", "power") - - Returns: - 4x4 pose tensor if found, None otherwise - """ - if grasp_type in self.grasp_types: - idx = self.grasp_types.index(grasp_type) - return self.poses[idx] - return None - - def get_best_grasp(self) -> torch.Tensor: - """Get the best grasp pose based on confidence scores. - - Returns: - 4x4 pose tensor with highest confidence - """ - if self.confidence_scores is not None: - best_idx = torch.argmax(self.confidence_scores) - return self.poses[best_idx] - return self.poses[0] # Default to first if no scores available - - -@dataclass -class InteractionPoints(Affordance): - """Interaction points affordance containing a batch of 3D positions. - - Interaction points define specific locations on an object surface - that can be used for contact-based interactions (pushing, poking, - touching) rather than full grasping. - """ - points: torch.Tensor = field(default_factory=lambda: torch.zeros(1, 3)) - """Batch of 3D interaction points with shape [B, 3]. - - Each point is a 3D coordinate in the object's local coordinate frame. - """ - - normals: Optional[torch.Tensor] = None - """Optional surface normals at each interaction point with shape [B, 3]. - - Normals indicate the surface orientation at each point, - useful for determining approach directions. - """ - - point_types: List[str] = field(default_factory=lambda: ["contact"]) - """List of interaction types for each point. - - Examples: "push", "poke", "touch", "support", "handle", etc. - Length must match the batch dimension of `points`. - """ - - contact_regions: Optional[List[str]] = None - """Optional labels for object regions each point belongs to. - - Examples: "handle", "face", "edge", "corner", "center" - """ - - def get_batch_size(self) -> int: - """Return the number of interaction points in this affordance.""" - return self.points.shape[0] - - def get_points_by_type(self, point_type: str) -> Optional[torch.Tensor]: - """Get all points of a specific interaction type. - - Args: - point_type: Type of interaction (e.g., "push", "handle") - - Returns: - Tensor of points with shape [N, 3] if found, None otherwise - """ - indices = [i for i, t in enumerate(self.point_types) if t == point_type] - if indices: - return self.points[indices] - return None - - def get_approach_direction(self, point_idx: int) -> torch.Tensor: - """Get recommended approach direction for a given point. - - Args: - point_idx: Index of the point - - Returns: - 3D approach direction vector (normalized) - """ - if self.normals is not None: - # Approach from the opposite direction of the surface normal - return -self.normals[point_idx] - # Default: approach from positive z - return torch.tensor([0, 0, 1], dtype=self.points.dtype, device=self.points.device) - - -# ============================================================================= -# ObjectSemantics -# ============================================================================= - -@dataclass -class ObjectSemantics: - """Semantic information about interaction target. - - This class encapsulates all semantic and geometric information about - an object needed for intelligent interaction planning. - """ - label: str - """Object category label (e.g., 'apple', 'bottle').""" - - affordance: Affordance - """Affordance data (GraspPose, InteractionPoints, etc.).""" - - geometry: Dict[str, Any] - """Geometric information including bounding box, mesh data.""" - - properties: Dict[str, Any] - """Physical properties: mass, friction, etc.""" - - uid: Optional[str] = None - """Optional unique identifier for the object instance.""" - - -# ============================================================================= -# ActionCfg and AtomicAction -# ============================================================================= - -@configclass -class ActionCfg: - """Configuration for atomic actions.""" - control_part: str = "left_arm" - """Control part name for the action.""" - - interpolation_type: str = "linear" - """Interpolation type: 'linear', 'cubic', or 'toppra'.""" - - velocity_limit: Optional[float] = None - """Maximum velocity for trajectory.""" - - acceleration_limit: Optional[float] = None - """Maximum acceleration for trajectory.""" - - -class AtomicAction(ABC): - """Abstract base class for atomic actions. - - All atomic actions use PlanResult from embodichain.lab.sim.planners - as the return type for execute() method, ensuring consistency with - the existing motion planning infrastructure. - """ - - def __init__( - self, - motion_generator: "MotionGenerator", - robot: "Robot", - control_part: str, - device: torch.device = torch.device("cuda"), - ): - self.motion_generator = motion_generator - self.robot = robot - self.control_part = control_part - self.device = device - - @abstractmethod - def execute( - self, - target: Union[torch.Tensor, ObjectSemantics], - start_qpos: Optional[torch.Tensor] = None, - **kwargs - ) -> PlanResult: - """Execute the atomic action. - - Args: - target: Target pose [4, 4] or ObjectSemantics - start_qpos: Starting joint configuration [DOF] - **kwargs: Additional action-specific parameters - - Returns: - PlanResult with trajectory (positions, velocities, accelerations), - end-effector poses (xpos_list), and success status. - Use result.positions for joint trajectory [T, DOF]. - Use result.xpos_list for EE poses [T, 4, 4]. - """ - pass - - @abstractmethod - def validate( - self, - target: Union[torch.Tensor, ObjectSemantics], - start_qpos: Optional[torch.Tensor] = None, - **kwargs - ) -> bool: - """Validate if the action is feasible without executing. - - This method performs a quick feasibility check (e.g., IK solvability) - without generating a full trajectory. - - Returns: - True if action appears feasible, False otherwise - """ - pass - - def _get_current_qpos(self) -> torch.Tensor: - """Get current joint configuration from robot.""" - return self.robot.get_qpos()[0] # Assuming single environment - - def _ik_solve( - self, - target_pose: torch.Tensor, - qpos_seed: Optional[torch.Tensor] = None - ) -> torch.Tensor: - """Solve IK for target pose. - - Args: - target_pose: Target pose [4, 4] - qpos_seed: Seed configuration [DOF] - - Returns: - Joint configuration [DOF] - - Raises: - RuntimeError: If IK fails to find a solution - """ - if qpos_seed is None: - qpos_seed = self._get_current_qpos() - - success, qpos = self.robot.compute_ik( - pose=target_pose.unsqueeze(0), - qpos_seed=qpos_seed.unsqueeze(0), - name=self.control_part, - ) - - if not success.all(): - raise RuntimeError(f"IK failed for target pose: {target_pose}") - - return qpos.squeeze(0) - - def _fk_compute(self, qpos: torch.Tensor) -> torch.Tensor: - """Compute forward kinematics. - - Args: - qpos: Joint configuration [DOF] or [B, DOF] - - Returns: - End-effector pose [4, 4] or [B, 4, 4] - """ - if qpos.dim() == 1: - qpos = qpos.unsqueeze(0) - - xpos = self.robot.compute_fk( - qpos=qpos, - name=self.control_part, - to_matrix=True, - ) - - return xpos.squeeze(0) if xpos.shape[0] == 1 else xpos - - def _apply_offset(self, pose: torch.Tensor, offset: torch.Tensor) -> torch.Tensor: - """Apply offset to pose in local frame. - - Args: - pose: Base pose [4, 4] - offset: Offset in local frame [3] - - Returns: - Pose with offset applied [4, 4] - """ - result = pose.clone() - result[:3, 3] += pose[:3, :3] @ offset - return result - - def plan_trajectory( - self, - target_states: List["PlanState"], - options: Optional["MotionGenOptions"] = None, - ) -> "PlanResult": - """Plan trajectory using motion generator.""" - if options is None: - options = MotionGenOptions(control_part=self.control_part) - return self.motion_generator.generate(target_states, options) -``` - -### 4.2 ReachAction Implementation - -```python -class ReachAction(AtomicAction): - """Atomic action for reaching a target pose or object.""" - - def __init__( - self, - motion_generator: "MotionGenerator", - robot: "Robot", - control_part: str, - device: torch.device = torch.device("cuda"), - interpolation_type: str = "linear", # "linear", "cubic", "toppra" - ): - super().__init__(motion_generator, robot, control_part, device) - self.interpolation_type = interpolation_type - - def execute( - self, - target: Union[torch.Tensor, ObjectSemantics], - start_qpos: Optional[torch.Tensor] = None, - approach_offset: Optional[torch.Tensor] = None, - use_affordance: bool = True, - **kwargs - ) -> PlanResult: - """Execute reach action. - - Args: - target: Target pose [4, 4] or ObjectSemantics - start_qpos: Starting joint configuration - approach_offset: Offset for pre-grasp approach [x, y, z] - use_affordance: Whether to use object's affordance data - - Returns: - PlanResult with trajectory and execution status - """ - # Resolve target pose from ObjectSemantics if needed - if isinstance(target, ObjectSemantics): - target_pose = self._resolve_target_pose(target, use_affordance) - else: - target_pose = target - - # Apply approach offset if specified - if approach_offset is not None: - approach_pose = self._apply_offset(target_pose, approach_offset) - else: - approach_pose = target_pose - - # Get current state if not provided - if start_qpos is None: - start_qpos = self._get_current_qpos() - - # Create plan states - target_states = [ - PlanState( - qpos=start_qpos, - move_type=MoveType.JOINT_MOVE - ), - PlanState( - xpos=approach_pose, - move_type=MoveType.EEF_MOVE - ), - ] - - # Plan trajectory - options = MotionGenOptions( - control_part=self.control_part, - is_interpolate=True, - is_linear=self.interpolation_type == "linear", - interpolate_position_step=0.002, - plan_opts=ToppraPlanOptions( - sample_interval=kwargs.get("sample_interval", 30), - ), - ) - - result = self.plan_trajectory(target_states, options) - - # Return PlanResult directly - return result - - def validate( - self, - target: Union[torch.Tensor, ObjectSemantics], - start_qpos: Optional[torch.Tensor] = None, - **kwargs - ) -> bool: - """Check if the reach action is feasible.""" - try: - # Quick IK feasibility check - if isinstance(target, ObjectSemantics): - target_pose = self._resolve_target_pose(target, use_affordance=True) - else: - target_pose = target - - # Attempt IK - qpos_seed = start_qpos if start_qpos is not None else self._get_current_qpos() - success, _ = self.robot.compute_ik( - pose=target_pose.unsqueeze(0), - qpos_seed=qpos_seed.unsqueeze(0), - name=self.control_part, - ) - return success.all().item() - except Exception: - return False - - def _resolve_target_pose( - self, - semantics: ObjectSemantics, - use_affordance: bool - ) -> torch.Tensor: - """Resolve target pose from object semantics.""" - if use_affordance and "grasp_pose" in semantics.affordance_data: - # Use precomputed grasp pose from affordance data - grasp_pose = semantics.affordance_data["grasp_pose"] - object_pose = self._get_object_pose(semantics.label) - target_pose = object_pose @ grasp_pose - else: - # Default to object center with approach direction - object_pose = self._get_object_pose(semantics.label) - approach_offset = torch.tensor([0, 0, 0.05], device=self.device) - target_pose = object_pose.clone() - target_pose[:3, 3] += approach_offset - - return target_pose - - def _get_object_pose(self, label: str) -> torch.Tensor: - """Get current pose of object by label.""" - # Implementation depends on environment's object management - pass - - def _get_current_qpos(self) -> torch.Tensor: - """Get current joint configuration.""" - return self.robot.get_qpos()[0] # Assuming single environment - - def _apply_offset(self, pose: torch.Tensor, offset: torch.Tensor) -> torch.Tensor: - """Apply offset to pose in local frame.""" - result = pose.clone() - result[:3, 3] += pose[:3, :3] @ offset - return result -``` - -### 4.3 GraspAction Implementation - -```python -class GraspAction(AtomicAction): - """Atomic action for grasping objects.""" - - def __init__( - self, - motion_generator: "MotionGenerator", - robot: "Robot", - control_part: str, - device: torch.device = torch.device("cuda"), - pre_grasp_distance: float = 0.05, - approach_direction: str = "z", # "x", "y", "z", or "custom" - ): - super().__init__(motion_generator, robot, control_part, device) - self.pre_grasp_distance = pre_grasp_distance - self.approach_direction = approach_direction - - def execute( - self, - target: ObjectSemantics, - start_qpos: Optional[torch.Tensor] = None, - use_affordance: bool = True, - grasp_type: str = "default", # "default", "pinch", "power" - **kwargs - ) -> PlanResult: - """Execute grasp action. - - Args: - target: ObjectSemantics with grasp affordances - start_qpos: Starting joint configuration - use_affordance: Whether to use precomputed grasp poses - grasp_type: Type of grasp to execute - """ - # Resolve grasp pose - grasp_pose = self._resolve_grasp_pose(target, use_affordance, grasp_type) - - # Compute pre-grasp pose (approach position) - pre_grasp_pose = self._compute_pre_grasp_pose(grasp_pose) - - # Get current state - if start_qpos is None: - start_qpos = self._get_current_qpos() - - # Build trajectory plan states - target_states = [ - PlanState(qpos=start_qpos, move_type=MoveType.JOINT_MOVE), - PlanState(xpos=pre_grasp_pose, move_type=MoveType.EEF_MOVE), - PlanState(xpos=grasp_pose, move_type=MoveType.EEF_MOVE), - ] - - # Plan trajectory - options = MotionGenOptions( - control_part=self.control_part, - is_interpolate=True, - is_linear=False, - interpolate_position_step=0.001, - plan_opts=ToppraPlanOptions( - sample_interval=kwargs.get("sample_interval", 30), - ), - ) - - result = self.plan_trajectory(target_states, options) - - # Return PlanResult directly - it contains all trajectory data - return result - - def validate( - self, - target: ObjectSemantics, - start_qpos: Optional[torch.Tensor] = None, - **kwargs - ) -> bool: - """Validate if grasp is feasible.""" - try: - grasp_pose = self._resolve_grasp_pose(target, use_affordance=True, grasp_type="default") - qpos_seed = start_qpos if start_qpos is not None else self._get_current_qpos() - success, _ = self.robot.compute_ik( - pose=grasp_pose.unsqueeze(0), - qpos_seed=qpos_seed.unsqueeze(0), - name=self.control_part, - ) - return success.all().item() - except Exception: - return False - - def _resolve_grasp_pose( - self, - semantics: ObjectSemantics, - use_affordance: bool, - grasp_type: str - ) -> torch.Tensor: - """Resolve grasp pose from object semantics.""" - if use_affordance and "grasp_poses" in semantics.affordance_data: - grasp_poses = semantics.affordance_data["grasp_poses"] - if grasp_type in grasp_poses: - grasp_offset = grasp_poses[grasp_type] - else: - grasp_offset = grasp_poses["default"] - - object_pose = self._get_object_pose(semantics.label) - return object_pose @ grasp_offset - - # Fallback: compute grasp pose from geometry - return self._compute_grasp_from_geometry(semantics) - - def _compute_pre_grasp_pose(self, grasp_pose: torch.Tensor) -> torch.Tensor: - """Compute pre-grasp pose with offset.""" - offset = torch.zeros(3, device=self.device) - if self.approach_direction == "z": - offset[2] = -self.pre_grasp_distance - elif self.approach_direction == "x": - offset[0] = -self.pre_grasp_distance - elif self.approach_direction == "y": - offset[1] = -self.pre_grasp_distance - - pre_grasp = grasp_pose.clone() - pre_grasp[:3, 3] += grasp_pose[:3, :3] @ offset - return pre_grasp -``` - -### 4.4 MoveAction Implementation - -```python -class MoveAction(AtomicAction): - """Atomic action for moving to a target position.""" - - def __init__( - self, - motion_generator: "MotionGenerator", - robot: "Robot", - control_part: str, - device: torch.device = torch.device("cuda"), - move_type: str = "cartesian", # "cartesian", "joint" - interpolation: str = "linear", # "linear", "cubic", "toppra" - ): - super().__init__(motion_generator, robot, control_part, device) - self.move_type = move_type - self.interpolation = interpolation - - def execute( - self, - target: Union[torch.Tensor, ObjectSemantics], - start_qpos: Optional[torch.Tensor] = None, - offset: Optional[torch.Tensor] = None, - velocity_limit: Optional[float] = None, - acceleration_limit: Optional[float] = None, - **kwargs - ) -> PlanResult: - """Execute move action. - - Args: - target: Target pose [4, 4] or ObjectSemantics - start_qpos: Starting joint configuration - offset: Optional offset from target - velocity_limit: Max velocity for trajectory - acceleration_limit: Max acceleration for trajectory - """ - # Resolve target - if isinstance(target, ObjectSemantics): - target_pose = self._get_object_pose(target.label) - else: - target_pose = target - - # Apply offset if specified - if offset is not None: - target_pose = self._apply_offset(target_pose, offset) - - # Get start state - if start_qpos is None: - start_qpos = self._get_current_qpos() - - # Create plan states based on move type - if self.move_type == "cartesian": - target_states = [ - PlanState(qpos=start_qpos, move_type=MoveType.JOINT_MOVE), - PlanState(xpos=target_pose, move_type=MoveType.EEF_MOVE), - ] - is_linear = self.interpolation == "linear" - else: # joint space - target_qpos = self._ik_solve(target_pose, start_qpos) - target_states = [ - PlanState(qpos=start_qpos, move_type=MoveType.JOINT_MOVE), - PlanState(qpos=target_qpos, move_type=MoveType.JOINT_MOVE), - ] - is_linear = False - - # Configure motion generation - options = MotionGenOptions( - control_part=self.control_part, - is_interpolate=True, - is_linear=is_linear, - interpolate_position_step=0.002, - plan_opts=ToppraPlanOptions( - sample_interval=kwargs.get("sample_interval", 30), - velocity_limit=velocity_limit, - acceleration_limit=acceleration_limit, - ), - ) - - result = self.plan_trajectory(target_states, options) - - # Return PlanResult directly - it contains all trajectory data - return result -``` - -### 4.5 RigidObject Extension - -```python -class RigidObject(BatchEntity): - """RigidObject with ObjectSemantics support.""" - - def get_object_semantics(self, env_id: int = 0) -> ObjectSemantics: - """Return ObjectSemantics for this rigid object. - - This method aggregates object label, affordance data, geometry, - and physical properties into a unified semantic representation - for intelligent interaction planning. - - Args: - env_id: Environment index for batched objects (default: 0) - - Returns: - ObjectSemantics containing all semantic information - - Example: - >>> obj = sim.get_rigid_object("apple_01") - >>> semantics = obj.get_object_semantics() - >>> print(semantics.label) # "apple" - >>> print(semantics.affordance.poses.shape) # [B, 4, 4] - """ - # Get object label from configuration or user data - label = self._get_object_label(env_id) - - # Get or compute affordance data - affordance = self._get_affordance_data(env_id) - - # Get geometry information - geometry = self._get_geometry_data(env_id) - - # Get physical properties - properties = self._get_physical_properties(env_id) - - return ObjectSemantics( - label=label, - affordance=affordance, - geometry=geometry, - properties=properties, - uid=self._entities[env_id].get_user_id() if env_id < len(self._entities) else None, - ) - - def _get_object_label(self, env_id: int) -> str: - """Extract object label from configuration or metadata.""" - # Try to get from object user data - entity = self._entities[env_id] if env_id < len(self._entities) else None - if entity: - # Check for label in user data - label = entity.get_user_data().get("label", None) if hasattr(entity, "get_user_data") else None - if label: - return label - - # Fallback: derive from object name or class - class_name = self.__class__.__name__.lower() - if "cube" in class_name: - return "cube" - elif "sphere" in class_name: - return "sphere" - elif "cylinder" in class_name: - return "cylinder" - - return "unknown" - - def _get_affordance_data(self, env_id: int) -> Affordance: - """Get or compute affordance data for the object.""" - # Check if precomputed affordance data exists - entity = self._entities[env_id] if env_id < len(self._entities) else None - if entity and hasattr(entity, "get_user_data"): - user_data = entity.get_user_data() - if "affordance" in user_data: - return user_data["affordance"] - - # Compute default affordance based on geometry - geometry = self._get_geometry_data(env_id) - bbox = geometry.get("bounding_box", [0.1, 0.1, 0.1]) - - # Create default grasp poses based on bounding box - # Generate poses for top, front, and side grasps - poses = [] - # Top grasp - pose_top = torch.eye(4) - pose_top[2, 3] = bbox[2] / 2 + 0.05 # Offset above object - poses.append(pose_top) - - # Front grasp - pose_front = torch.eye(4) - pose_front[0, 3] = bbox[0] / 2 + 0.05 - poses.append(pose_front) - - poses_tensor = torch.stack(poses) - - return GraspPose( - poses=poses_tensor, - grasp_types=["top", "front"], - object_label=self._get_object_label(env_id), - ) - - def _get_geometry_data(self, env_id: int) -> Dict[str, Any]: - """Get geometric information about the object.""" - # Get bounding box from entity - entity = self._entities[env_id] if env_id < len(self._entities) else None - bbox = [0.1, 0.1, 0.1] # Default - - if entity: - try: - # Try to get bounding box from entity - if hasattr(entity, "get_bounding_box"): - bbox = entity.get_bounding_box() - except Exception: - pass - - # Get mesh information if available - mesh_info = {} - if entity and hasattr(entity, "get_mesh_count"): - mesh_info["mesh_count"] = entity.get_mesh_count() - - return { - "bounding_box": bbox, - "volume": bbox[0] * bbox[1] * bbox[2], - "mesh_info": mesh_info, - } - - def _get_physical_properties(self, env_id: int) -> Dict[str, Any]: - """Get physical properties of the object.""" - entity = self._entities[env_id] if env_id < len(self._entities) else None - - properties = { - "mass": 1.0, - "friction": 0.5, - "restitution": 0.1, - "body_type": self.body_type, - } - - if entity and hasattr(entity, "get_physical_body"): - try: - phys_body = entity.get_physical_body() - properties["mass"] = phys_body.get_mass() - properties["friction"] = phys_body.get_dynamic_friction() - except Exception: - pass - - return properties - - -# ============================================================================= -# Action Engine -# ============================================================================= - -class AtomicActionEngine: - """Central engine for managing and executing atomic actions.""" - - def __init__( - self, - robot: "Robot", - motion_generator: "MotionGenerator", - device: torch.device = torch.device("cuda"), - ): - self.robot = robot - self.motion_generator = motion_generator - self.device = device - - # Registry of action instances - self._actions: Dict[str, AtomicAction] = {} - - # Semantic analyzer for object understanding - self._semantic_analyzer = SemanticAnalyzer() - - # Initialize default actions - self._init_default_actions() - - def _init_default_actions(self): - """Initialize default atomic action instances.""" - control_parts = self.robot.control_parts or ["default"] - - for part in control_parts: - self.register_action( - f"reach_{part}", - ReachAction( - motion_generator=self.motion_generator, - robot=self.robot, - control_part=part, - device=self.device, - ) - ) - self.register_action( - f"grasp_{part}", - GraspAction( - motion_generator=self.motion_generator, - robot=self.robot, - control_part=part, - device=self.device, - ) - ) - self.register_action( - f"move_{part}", - MoveAction( - motion_generator=self.motion_generator, - robot=self.robot, - control_part=part, - device=self.device, - ) - ) - - def register_action(self, name: str, action: AtomicAction): - """Register a custom atomic action.""" - self._actions[name] = action - - def execute( - self, - action_name: str, - target: Union[torch.Tensor, str, ObjectSemantics], - control_part: Optional[str] = None, - **kwargs - ) -> PlanResult: - """Execute an atomic action. - - Args: - action_name: Name of registered action - target: Target pose, object label, or ObjectSemantics - control_part: Robot control part to use - **kwargs: Additional action parameters - - Returns: - PlanResult with trajectory (positions, velocities, accelerations), - end-effector poses (xpos_list), and success status. - """ - # Resolve action - if control_part: - action_key = f"{action_name}_{control_part}" - else: - action_key = action_name - - if action_key not in self._actions: - raise ValueError(f"Unknown action: {action_key}") - - action = self._actions[action_key] - - # Resolve target to ObjectSemantics if string label provided - if isinstance(target, str): - target = self._semantic_analyzer.analyze(target) - - # Execute action - returns PlanResult directly - return action.execute(target, **kwargs) - - def validate( - self, - action_name: str, - target: Union[torch.Tensor, str, ObjectSemantics], - control_part: Optional[str] = None, - **kwargs - ) -> bool: - """Validate if an action is feasible without executing.""" - if control_part: - action_key = f"{action_name}_{control_part}" - else: - action_key = action_name - - if action_key not in self._actions: - return False - - action = self._actions[action_key] - - if isinstance(target, str): - target = self._semantic_analyzer.analyze(target) - - return action.validate(target, **kwargs) -``` - ---- - -## 5. Usage Examples - -### 5.1 Basic Usage - -```python -from embodichain.lab.sim.planners import MotionGenerator, MotionGenCfg, ToppraPlannerCfg -from embodichain.lab.sim.objects import Robot -from embodichain.lab.gym.envs import BaseEnv - -# Initialize components -env: BaseEnv = ... # Your environment -robot: Robot = env.robot -motion_gen = MotionGenerator( - cfg=MotionGenCfg(planner_cfg=ToppraPlannerCfg(robot_uid=robot.uid)) -) - -# Create action engine -engine = AtomicActionEngine( - robot=robot, - motion_generator=motion_gen, - device=env.device, -) - -# Execute reach action -result = engine.execute( - action_name="reach", - target=target_pose, # [4, 4] tensor - control_part="left_arm", - approach_offset=torch.tensor([0, 0, -0.05]), -) - -if result.success: - trajectory = result.trajectory # [T, DOF] - env.execute_trajectory(trajectory) -``` - -### 5.2 Using Object Semantics - -```python -# Define object semantics -apple_semantics = ObjectSemantics( - label="apple", - affordance_data={ - "grasp_poses": { - "default": grasp_pose_1, # [4, 4] - "pinch": grasp_pose_2, - }, - "interaction_points": [point_1, point_2], - }, - geometry={ - "bounding_box": [0.08, 0.08, 0.08], - "mesh": "apple_mesh.obj", - }, - properties={ - "mass": 0.15, - "friction": 0.5, - }, -) - -# Execute grasp with semantics -result = engine.execute( - action_name="grasp", - target=apple_semantics, - control_part="right_arm", - grasp_type="pinch", -) -``` - -### 5.3 Custom Atomic Action - -```python -class RotateEEFAction(AtomicAction): - """Custom action to rotate end-effector.""" - - def execute( - self, - target: torch.Tensor, # Rotation angle in degrees - start_qpos: Optional[torch.Tensor] = None, - axis: str = "z", - **kwargs - ) -> PlanResult: - # Get current pose - current_qpos = start_qpos or self._get_current_qpos() - current_pose = self.robot.compute_fk( - qpos=current_qpos.unsqueeze(0), - name=self.control_part, - to_matrix=True, - ).squeeze(0) - - # Apply rotation - angle_rad = torch.deg2rad(target) - rotation = self._create_rotation_matrix(angle_rad, axis) - target_pose = current_pose.clone() - target_pose[:3, :3] = rotation @ current_pose[:3, :3] - - # Plan motion - target_states = [ - PlanState(qpos=current_qpos, move_type=MoveType.JOINT_MOVE), - PlanState(xpos=target_pose, move_type=MoveType.EEF_MOVE), - ] - - result = self.plan_trajectory(target_states) - - # Return PlanResult directly - it contains all trajectory data - return result - -# Register custom action -engine.register_action( - "rotate_eef", - RotateEEFAction( - motion_generator=motion_gen, - robot=robot, - control_part="left_arm", - device=device, - ) -) -``` - ---- - -## 6. Integration with Agentic Workflows - -### 6.1 LLM Action Selection - -```python -class ActionSelector: - """Select actions based on LLM reasoning.""" - - def __init__(self, action_engine: AtomicActionEngine): - self.engine = action_engine - - def select_action( - self, - task_description: str, - scene_observation: Dict, - available_objects: List[ObjectSemantics], - ) -> Tuple[str, Dict]: - """Select appropriate action based on task and scene.""" - # Parse task to determine required action - if "pick" in task_description.lower(): - action_name = "grasp" - target = self._resolve_object(task_description, available_objects) - params = {"grasp_type": "default"} - elif "reach" in task_description.lower(): - action_name = "reach" - target = self._resolve_target_pose(task_description, scene_observation) - params = {} - elif "place" in task_description.lower(): - action_name = "move" - target = self._resolve_placement(task_description, scene_observation) - params = {} - else: - raise ValueError(f"Cannot determine action for task: {task_description}") - - return action_name, {"target": target, **params} - - def _resolve_object( - self, - task_description: str, - available_objects: List[ObjectSemantics], - ) -> ObjectSemantics: - """Resolve object reference from task description.""" - # Use LLM or keyword matching to identify object - pass -``` - -### 6.2 Action Composition - -```python -class ActionComposer: - """Compose multiple atomic actions into complex behaviors.""" - - def __init__(self, engine: AtomicActionEngine): - self.engine = engine - - def pick_and_place( - self, - pick_object: ObjectSemantics, - place_pose: torch.Tensor, - control_part: str = "right_arm", - ) -> List[PlanResult]: - """Compose pick and place behavior.""" - results = [] - - # 1. Reach pre-grasp - result = self.engine.execute( - "reach", - target=pick_object, - control_part=control_part, - approach_offset=torch.tensor([0, 0, -0.08]), - ) - results.append(result) - if not result.success: - return results - - # 2. Grasp - result = self.engine.execute( - "grasp", - target=pick_object, - control_part=control_part, - ) - results.append(result) - if not result.success: - return results - - # 3. Lift - current_qpos = result.trajectory[-1] if result.trajectory is not None else None - lift_pose = self._get_lift_pose(pick_object) - result = self.engine.execute( - "move", - target=lift_pose, - control_part=control_part, - start_qpos=current_qpos, - ) - results.append(result) - if not result.success: - return results - - # 4. Move to place - result = self.engine.execute( - "move", - target=place_pose, - control_part=control_part, - ) - results.append(result) - if not result.success: - return results - - # 5. Release - result = self.engine.execute( - "release", - target=place_pose, - control_part=control_part, - ) - results.append(result) - - return results -``` - ---- - -## 7. Extensibility - -### 7.1 Custom Action Registration - -```python -# Define custom action -class PushAction(AtomicAction): - def execute(self, target, **kwargs): - # Implementation - pass - -# Register at runtime -from embodichain.lab.sim.atomic_actions import register_action - -register_action( - name="push", - action_class=PushAction, - motion_generator=motion_gen, - robot=robot, - control_part="right_arm", -) - -# Use in engine -engine.execute("push", target=object_semantics) -``` - -### 7.2 Plugin Architecture - -```python -class ActionPlugin(ABC): - """Base class for action plugins.""" - - @abstractmethod - def get_actions(self) -> Dict[str, Type[AtomicAction]]: - """Return mapping of action names to classes.""" - pass - - @abstractmethod - def get_configurators(self) -> Dict[str, Any]: - """Return configuration helpers.""" - pass - -# Example plugin -class ManipulationActionsPlugin(ActionPlugin): - def get_actions(self): - return { - "twist": TwistAction, - "slide": SlideAction, - "insert": InsertAction, - } -``` - ---- - -## 8. Summary - -This atomic action abstraction design provides: - -1. **Unified Interface**: All atomic actions inherit from `AtomicAction` with consistent `execute()` and `validate()` methods - -2. **Semantic Awareness**: Object semantics (label, affordance, geometry) are first-class citizens - -3. **Motion Planning Integration**: Leverages existing `MotionGenerator`, solvers, and warp interpolation - -4. **Agentic Workflow Support**: Easy composition into complex behaviors and LLM integration - -5. **Extensibility**: Registry-based action registration and plugin architecture - -The design bridges low-level motion planning with high-level agent reasoning, enabling both precise control and semantic task execution. From 26b7e60359a90e6d787528ffdd9bd27cc040e3de Mon Sep 17 00:00:00 2001 From: yuecideng Date: Fri, 1 May 2026 07:58:07 +0000 Subject: [PATCH 7/9] wip --- tests/sim/atomic_actions/test_actions.py | 304 ++++++++++++++++++ tests/sim/atomic_actions/test_atom_actions.py | 257 --------------- tests/sim/atomic_actions/test_core.py | 171 ++++++++++ tests/sim/atomic_actions/test_engine.py | 191 +++++++++++ 4 files changed, 666 insertions(+), 257 deletions(-) create mode 100644 tests/sim/atomic_actions/test_actions.py delete mode 100644 tests/sim/atomic_actions/test_atom_actions.py create mode 100644 tests/sim/atomic_actions/test_core.py create mode 100644 tests/sim/atomic_actions/test_engine.py diff --git a/tests/sim/atomic_actions/test_actions.py b/tests/sim/atomic_actions/test_actions.py new file mode 100644 index 00000000..ba7324cc --- /dev/null +++ b/tests/sim/atomic_actions/test_actions.py @@ -0,0 +1,304 @@ +# ---------------------------------------------------------------------------- +# Copyright (c) 2021-2026 DexForce Technology Co., Ltd. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ---------------------------------------------------------------------------- + +"""Tests for atomic action implementations (MoveAction, PickUpAction, PlaceAction).""" + +from __future__ import annotations + +import pytest +import torch +from unittest.mock import MagicMock, Mock + +from embodichain.lab.sim.atomic_actions.core import ( + ActionCfg, + Affordance, + ObjectSemantics, +) +from embodichain.lab.sim.atomic_actions.actions import ( + MoveAction, + MoveActionCfg, + PickUpAction, + PickUpActionCfg, + PlaceAction, + PlaceActionCfg, +) + +# --------------------------------------------------------------------------- +# Mock Helpers +# --------------------------------------------------------------------------- + +NUM_ENVS = 2 # number of parallel environments used in tests +ARM_DOF = 6 # typical arm joint count +HAND_DOF = 2 # typical hand joint count +TOTAL_DOF = ARM_DOF + HAND_DOF + + +def _make_mock_robot( + num_envs: int = NUM_ENVS, + arm_dof: int = ARM_DOF, + hand_dof: int = HAND_DOF, +) -> Mock: + """Create a mock Robot with arm and hand control parts.""" + robot = Mock() + robot.device = torch.device("cpu") + robot.dof = arm_dof + hand_dof + + def get_qpos(name=None): + if name == "arm": + return torch.zeros(num_envs, arm_dof) + if name == "hand": + return torch.zeros(num_envs, hand_dof) + # Full qpos + return torch.zeros(num_envs, arm_dof + hand_dof) + + robot.get_qpos = get_qpos + + def get_joint_ids(name=None): + if name == "arm": + return list(range(arm_dof)) + if name == "hand": + return list(range(arm_dof, arm_dof + hand_dof)) + return list(range(arm_dof + hand_dof)) + + robot.get_joint_ids = get_joint_ids + + # compute_ik: return success and identity-like qpos + def compute_ik(pose=None, qpos_seed=None, name=None, joint_seed=None): + seed = joint_seed if joint_seed is not None else qpos_seed + if seed is None: + seed = torch.zeros(num_envs, arm_dof) + success = torch.ones(num_envs, dtype=torch.bool) + return success, seed.clone() + + robot.compute_ik = compute_ik + + # compute_fk: return identity-like poses + def compute_fk(qpos=None, name=None, to_matrix=True): + n = qpos.shape[0] if qpos is not None else num_envs + poses = torch.eye(4).unsqueeze(0).repeat(n, 1, 1) + return poses + + robot.compute_fk = compute_fk + + return robot + + +def _make_mock_motion_generator(robot: Mock | None = None) -> Mock: + """Create a mock MotionGenerator.""" + mg = Mock() + mg.robot = robot or _make_mock_robot() + mg.device = mg.robot.device + return mg + + +# --------------------------------------------------------------------------- +# MoveAction +# --------------------------------------------------------------------------- + + +class TestMoveActionHelpers: + """Tests for MoveAction helper methods that don't need simulation.""" + + def setup_method(self): + self.robot = _make_mock_robot() + self.mg = _make_mock_motion_generator(self.robot) + self.cfg = MoveActionCfg(sample_interval=50) + self.action = MoveAction(self.mg, cfg=self.cfg) + + def test_init_sets_attributes(self): + assert self.action.n_envs == NUM_ENVS + assert self.action.dof == ARM_DOF + assert self.action.device == torch.device("cpu") + + def test_resolve_pose_target_from_4x4(self): + target = torch.eye(4) + is_success, result = self.action._resolve_pose_target( + target, action_name="TestAction" + ) + assert is_success is True + assert result.shape == (NUM_ENVS, 4, 4) + # Single pose should be repeated for all envs + for i in range(NUM_ENVS): + assert torch.equal(result[i], torch.eye(4)) + + def test_resolve_pose_target_from_batched(self): + target = torch.eye(4).unsqueeze(0).repeat(NUM_ENVS, 1, 1) + target[:, 2, 3] = 0.5 # offset z for each env + is_success, result = self.action._resolve_pose_target( + target, action_name="TestAction" + ) + assert is_success is True + assert result.shape == (NUM_ENVS, 4, 4) + for i in range(NUM_ENVS): + assert result[i, 2, 3].item() == pytest.approx(0.5) + + def test_resolve_start_qpos_defaults_to_current(self): + result = self.action._resolve_start_qpos(None) + assert result.shape == (NUM_ENVS, ARM_DOF) + + def test_resolve_start_qpos_broadcasts_single(self): + single = torch.ones(ARM_DOF) + result = self.action._resolve_start_qpos(single) + assert result.shape == (NUM_ENVS, ARM_DOF) + for i in range(NUM_ENVS): + assert torch.equal(result[i], single) + + def test_compute_three_phase_waypoints_sums_to_sample_interval(self): + hand_interp_steps = 5 + first, second, third = self.action._compute_three_phase_waypoints( + hand_interp_steps, + first_phase_name="approach", + third_phase_name="lift", + ) + assert first + second + third == self.cfg.sample_interval + assert first >= 2 + assert third >= 2 + + def test_interpolate_hand_qpos_shape(self): + n_waypoints = 10 + start = torch.zeros(HAND_DOF) + end = torch.ones(HAND_DOF) + result = self.action._interpolate_hand_qpos(start, end, n_waypoints) + assert result.shape == (n_waypoints, HAND_DOF) + # First and last should match endpoints + assert torch.allclose(result[0], start) + assert torch.allclose(result[-1], end) + + def test_interpolate_hand_qpos_linear(self): + """Verify linear interpolation between two hand configs.""" + n_waypoints = 3 + start = torch.tensor([0.0, 0.0]) + end = torch.tensor([1.0, 1.0]) + result = self.action._interpolate_hand_qpos(start, end, n_waypoints) + expected_mid = torch.tensor([0.5, 0.5]) + assert torch.allclose(result[1], expected_mid, atol=1e-6) + + +# --------------------------------------------------------------------------- +# PickUpAction +# --------------------------------------------------------------------------- + + +class TestPickUpActionInit: + """Tests for PickUpAction initialization and config validation.""" + + def setup_method(self): + self.robot = _make_mock_robot() + self.mg = _make_mock_motion_generator(self.robot) + + def _make_cfg(self, **overrides): + defaults = dict( + hand_open_qpos=torch.tensor([0.0, 0.0]), + hand_close_qpos=torch.tensor([0.025, 0.025]), + control_part="arm", + hand_control_part="hand", + pre_grasp_distance=0.15, + lift_height=0.15, + approach_direction=torch.tensor([0.0, 0.0, -1.0]), + ) + defaults.update(overrides) + return PickUpActionCfg(**defaults) + + def test_init_sets_hand_joint_ids(self): + cfg = self._make_cfg() + action = PickUpAction(self.mg, cfg=cfg) + assert action.hand_joint_ids == list(range(ARM_DOF, ARM_DOF + HAND_DOF)) + assert action.joint_ids == list(range(ARM_DOF)) + list( + range(ARM_DOF, ARM_DOF + HAND_DOF) + ) + assert action.dof == TOTAL_DOF + + +# --------------------------------------------------------------------------- +# PlaceAction +# --------------------------------------------------------------------------- + + +class TestPlaceActionInit: + """Tests for PlaceAction initialization.""" + + def setup_method(self): + self.robot = _make_mock_robot() + self.mg = _make_mock_motion_generator(self.robot) + + def _make_cfg(self, **overrides): + defaults = dict( + hand_open_qpos=torch.tensor([0.0, 0.0]), + hand_close_qpos=torch.tensor([0.025, 0.025]), + control_part="arm", + hand_control_part="hand", + lift_height=0.15, + ) + defaults.update(overrides) + return PlaceActionCfg(**defaults) + + def test_init_sets_hand_joint_ids(self): + cfg = self._make_cfg() + action = PlaceAction(self.mg, cfg=cfg) + assert action.hand_joint_ids == list(range(ARM_DOF, ARM_DOF + HAND_DOF)) + assert action.dof == TOTAL_DOF + + +# --------------------------------------------------------------------------- +# AtomicAction._apply_offset +# --------------------------------------------------------------------------- + + +class TestAtomicActionApplyOffset: + """Tests for the shared _apply_offset method inherited from AtomicAction.""" + + def setup_method(self): + self.robot = _make_mock_robot() + self.mg = _make_mock_motion_generator(self.robot) + self.cfg = MoveActionCfg() + self.action = MoveAction(self.mg, cfg=self.cfg) + + def test_apply_offset_batched(self): + # [N, 4, 4] poses, [N, 3] offsets + poses = torch.eye(4).unsqueeze(0).repeat(3, 1, 1) + offsets = torch.tensor([[1.0, 0.0, 0.0], [0.0, 1.0, 0.0], [0.0, 0.0, 1.0]]) + result = self.action._apply_offset(poses, offsets) + assert result.shape == (3, 4, 4) + assert result[0, :3, 3].tolist() == pytest.approx([1.0, 0.0, 0.0]) + assert result[1, :3, 3].tolist() == pytest.approx([0.0, 1.0, 0.0]) + assert result[2, :3, 3].tolist() == pytest.approx([0.0, 0.0, 1.0]) + + def test_apply_offset_broadcasts_single_offset(self): + # [N, 4, 4] poses, [3] single offset broadcast to all + poses = torch.eye(4).unsqueeze(0).repeat(2, 1, 1) + offset = torch.tensor([0.1, 0.2, 0.3]) + result = self.action._apply_offset(poses, offset) + assert result.shape == (2, 4, 4) + for i in range(2): + assert result[i, :3, 3].tolist() == pytest.approx([0.1, 0.2, 0.3]) + + def test_apply_offset_preserves_rotation(self): + """Offset only affects translation; rotation part stays unchanged.""" + poses = torch.eye(4).unsqueeze(0).repeat(1, 1, 1) + # Set a non-trivial rotation + poses[0, 0, 1] = -1.0 + poses[0, 1, 0] = 1.0 + offset = torch.tensor([1.0, 2.0, 3.0]) + result = self.action._apply_offset(poses, offset) + # Rotation block should be unchanged + assert torch.equal(result[0, :3, :3], poses[0, :3, :3]) + + +if __name__ == "__main__": + # For visual debugging + test = TestMoveActionHelpers() + test.setup_method() + test.test_compute_three_phase_waypoints_sums_to_sample_interval() diff --git a/tests/sim/atomic_actions/test_atom_actions.py b/tests/sim/atomic_actions/test_atom_actions.py deleted file mode 100644 index 751130a2..00000000 --- a/tests/sim/atomic_actions/test_atom_actions.py +++ /dev/null @@ -1,257 +0,0 @@ -# ---------------------------------------------------------------------------- -# Copyright (c) 2021-2026 DexForce Technology Co., Ltd. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ----------------------------------------------------------------------------, - - -import argparse -import numpy as np -import time -import open3d as o3d -import torch - -from embodichain.lab.sim import SimulationManager, SimulationManagerCfg -from embodichain.lab.sim.objects import Robot, RigidObject -from embodichain.lab.sim.shapes import MeshCfg -from embodichain.lab.sim.solvers import PytorchSolverCfg -from embodichain.data import get_data_path -from embodichain.utils import logger -from embodichain.lab.sim.cfg import ( - JointDrivePropertiesCfg, - RobotCfg, - RigidObjectCfg, - RigidBodyAttributesCfg, - LightCfg, - URDFCfg, -) -from embodichain.lab.sim.planners import MotionGenerator, MotionGenCfg, ToppraPlannerCfg -from embodichain.lab.sim.shapes import MeshCfg - -from embodichain.toolkits.graspkit.pg_grasp.gripper_collision_checker import ( - GripperCollisionCfg, -) -from embodichain.toolkits.graspkit.pg_grasp.antipodal_generator import ( - GraspGenerator, - GraspGeneratorCfg, - AntipodalSamplerCfg, -) -from embodichain.lab.sim.atomic_actions.engine import ( - AtomicActionEngine, - register_action, -) -from embodichain.lab.sim.atomic_actions.core import ObjectSemantics, AntipodalAffordance -from embodichain.lab.sim.atomic_actions.actions import ( - PickUpActionCfg, - PlaceActionCfg, - MoveActionCfg, -) - - -def initialize_simulation(): - """ - Initialize the simulation environment based on the provided arguments. - - Args: - args (argparse.Namespace): Parsed command-line arguments. - - Returns: - SimulationManager: Configured simulation manager instance. - """ - config = SimulationManagerCfg( - headless=True, - sim_device="cuda", - physics_dt=1.0 / 100.0, - num_envs=1, - ) - sim = SimulationManager(config) - - light = sim.add_light( - cfg=LightCfg(uid="main_light", intensity=50.0, init_pos=(0, 0, 2.0)) - ) - - return sim - - -def create_robot(sim: SimulationManager, position=[0.0, 0.0, 0.0]): - """ - Create and configure a robot with an arm and a dexterous hand in the simulation. - - Args: - sim (SimulationManager): The simulation manager instance. - - Returns: - Robot: The configured robot instance added to the simulation. - """ - # Retrieve URDF paths for the robot arm and hand - ur10_urdf_path = get_data_path("UniversalRobots/UR10/UR10.urdf") - gripper_urdf_path = get_data_path("DH_PGC_140_50_M/DH_PGC_140_50_M.urdf") - # Configure the robot with its components and control properties - cfg = RobotCfg( - uid="UR10", - urdf_cfg=URDFCfg( - components=[ - {"component_type": "arm", "urdf_path": ur10_urdf_path}, - {"component_type": "hand", "urdf_path": gripper_urdf_path}, - ] - ), - drive_pros=JointDrivePropertiesCfg( - stiffness={"JOINT[0-9]": 1e4, "FINGER[1-2]": 1e2}, - damping={"JOINT[0-9]": 1e3, "FINGER[1-2]": 1e1}, - max_effort={"JOINT[0-9]": 1e5, "FINGER[1-2]": 1e3}, - drive_type="force", - ), - control_parts={ - "arm": ["JOINT[0-9]"], - "hand": ["FINGER[1-2]"], - }, - solver_cfg={ - "arm": PytorchSolverCfg( - end_link_name="ee_link", - root_link_name="base_link", - tcp=[ - [0.0, 1.0, 0.0, 0.0], - [-1.0, 0.0, 0.0, 0.0], - [0.0, 0.0, 1.0, 0.12], - [0.0, 0.0, 0.0, 1.0], - ], - ) - }, - init_qpos=[0.0, -np.pi / 2, -np.pi / 2, np.pi / 2, -np.pi / 2, 0.0, 0.0, 0.0], - init_pos=position, - ) - return sim.add_robot(cfg=cfg) - - -def create_mug(sim: SimulationManager) -> RigidObject: - mug_cfg = RigidObjectCfg( - uid="table", - shape=MeshCfg( - fpath=get_data_path("CoffeeCup/cup.ply"), - ), - attrs=RigidBodyAttributesCfg( - mass=0.01, - dynamic_friction=0.97, - static_friction=0.99, - ), - max_convex_hull_num=16, - init_pos=[0.55, 0.0, 0.01], - init_rot=[0.0, 0.0, -90], - body_scale=(4, 4, 4), - ) - mug = sim.add_rigid_object(cfg=mug_cfg) - return mug - - -def test_atomic_actions(): - sim: SimulationManager = initialize_simulation() - robot = create_robot(sim) - mug = create_mug(sim) - - motion_gen = MotionGenerator( - cfg=MotionGenCfg(planner_cfg=ToppraPlannerCfg(robot_uid=robot.uid)) - ) - - pickup_cfg = PickUpActionCfg( - hand_open_qpos=torch.tensor( - [0.00, 0.00], dtype=torch.float32, device=sim.device - ), - hand_close_qpos=torch.tensor( - [0.025, 0.025], dtype=torch.float32, device=sim.device - ), - control_part="arm", - hand_control_part="hand", - approach_direction=torch.tensor( - [0.0, 0.0, -1.0], dtype=torch.float32, device=sim.device - ), - pre_grasp_distance=0.15, - lift_height=0.15, - ) - - place_cfg = PlaceActionCfg( - hand_open_qpos=torch.tensor( - [0.00, 0.00], dtype=torch.float32, device=sim.device - ), - hand_close_qpos=torch.tensor( - [0.025, 0.025], dtype=torch.float32, device=sim.device - ), - control_part="arm", - hand_control_part="hand", - lift_height=0.15, - ) - - move_cfg = MoveActionCfg( - control_part="arm", - ) - - atom_engine = AtomicActionEngine( - motion_generator=motion_gen, - actions_cfg_list=(pickup_cfg, place_cfg, move_cfg), - ) - - sim.init_gpu_physics() - - # Define object semantics and affordances for the mug - gripper_collision_cfg = GripperCollisionCfg( - max_open_length=0.088, finger_length=0.078, point_sample_dense=0.012 - ) - generator_cfg = GraspGeneratorCfg( - viser_port=11801, - antipodal_sampler_cfg=AntipodalSamplerCfg( - n_sample=20000, max_length=0.088, min_length=0.003 - ), - ) - mug_grasp_affordance = AntipodalAffordance( - object_label="mug", - force_reannotate=False, # set to True if you want to re-annotate affordance even if the object has been seen before, which is useful when you have changed the grasp generator configuration and want to see the effect of new configuration, but it will take more time to annotate. So usually set it to False and only set it to True when you have changed the grasp generator configuration or you want to debug the annotation process. - custom_config={ - "gripper_collision_cfg": gripper_collision_cfg, - "generator_cfg": generator_cfg, - }, - ) - mug_semantics = ObjectSemantics( - label="mug", - geometry={ - "mesh_vertices": mug.get_vertices(env_ids=[0], scale=True)[0], - "mesh_triangles": mug.get_triangles(env_ids=[0])[0], - }, - affordance=mug_grasp_affordance, - entity=mug, # in order to fetch object pose - ) - - target_grasp_xpos = torch.tensor( - [ - [-0.0539, -0.9985, -0.0022, 0.4489], - [-0.9977, 0.0540, -0.0401, -0.0030], - [0.0401, 0.0000, -0.9992, 0.1400], - [0.0000, 0.0000, 0.0000, 1.0000], - ], - dtype=torch.float32, - device=sim.device, - ) - - place_xpos = target_grasp_xpos.clone() - place_xpos[:3, 3] += torch.tensor([-0.2, 0.4, 0.1], device=sim.device) - - rest_xpos = target_grasp_xpos.clone() - rest_xpos[:3, 3] = torch.tensor([0.5, 0.0, 0.5], device=sim.device) - - is_success, traj = atom_engine.execute_static( - target_list=[target_grasp_xpos, place_xpos, rest_xpos] - ) - assert is_success, "Atomic action execution failed." - assert traj.shape == torch.Size([1, 210, 8]) - - -if __name__ == "__main__": - test_atomic_actions() diff --git a/tests/sim/atomic_actions/test_core.py b/tests/sim/atomic_actions/test_core.py new file mode 100644 index 00000000..7cebaa7b --- /dev/null +++ b/tests/sim/atomic_actions/test_core.py @@ -0,0 +1,171 @@ +# ---------------------------------------------------------------------------- +# Copyright (c) 2021-2026 DexForce Technology Co., Ltd. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ---------------------------------------------------------------------------- + +"""Tests for atomic action core module (Affordance, InteractionPoints, ObjectSemantics, ActionCfg).""" + +from __future__ import annotations + +import pytest +import torch + +from embodichain.lab.sim.atomic_actions.core import ( + ActionCfg, + Affordance, + InteractionPoints, + ObjectSemantics, +) + +# --------------------------------------------------------------------------- +# Affordance +# --------------------------------------------------------------------------- + + +class TestAffordance: + """Tests for the Affordance base dataclass.""" + + def test_default_values(self): + aff = Affordance() + assert aff.object_label == "" + assert aff.geometry == {} + assert aff.custom_config == {} + + def test_mesh_vertices_returns_tensor(self): + vertices = torch.randn(10, 3) + aff = Affordance(geometry={"mesh_vertices": vertices}) + assert torch.equal(aff.mesh_vertices, vertices) + + def test_mesh_vertices_returns_none_when_missing(self): + aff = Affordance() + assert aff.mesh_vertices is None + + def test_mesh_vertices_raises_on_wrong_type(self): + aff = Affordance(geometry={"mesh_vertices": [1, 2, 3]}) + with pytest.raises(TypeError, match="must be a torch.Tensor"): + _ = aff.mesh_vertices + + def test_mesh_triangles_returns_tensor(self): + triangles = torch.randint(0, 10, (5, 3)) + aff = Affordance(geometry={"mesh_triangles": triangles}) + assert torch.equal(aff.mesh_triangles, triangles) + + def test_mesh_triangles_returns_none_when_missing(self): + aff = Affordance() + assert aff.mesh_triangles is None + + def test_mesh_triangles_raises_on_wrong_type(self): + aff = Affordance(geometry={"mesh_triangles": "bad"}) + with pytest.raises(TypeError, match="must be a torch.Tensor"): + _ = aff.mesh_triangles + + def test_custom_config_get_set(self): + aff = Affordance() + aff.set_custom_config("key_a", 42) + assert aff.get_custom_config("key_a") == 42 + assert aff.get_custom_config("missing") is None + assert aff.get_custom_config("missing", "default") == "default" + + def test_get_batch_size_returns_one(self): + # Base Affordance always returns 1 + assert Affordance().get_batch_size() == 1 + + +# --------------------------------------------------------------------------- +# InteractionPoints +# --------------------------------------------------------------------------- + + +class TestInteractionPoints: + """Tests for InteractionPoints affordance.""" + + def test_default_points_shape(self): + ip = InteractionPoints() + assert ip.points.shape == (1, 3) + + def test_get_batch_size_matches_points(self): + points = torch.randn(5, 3) + ip = InteractionPoints(points=points) + assert ip.get_batch_size() == 5 + + def test_get_points_by_type_returns_matching_subset(self): + points = torch.tensor([[1.0, 0.0, 0.0], [0.0, 1.0, 0.0], [0.0, 0.0, 1.0]]) + ip = InteractionPoints(points=points, point_types=["push", "poke", "push"]) + result = ip.get_points_by_type("push") + assert result is not None + assert result.shape == (2, 3) + assert torch.equal(result[0], points[0]) + assert torch.equal(result[1], points[2]) + + def test_get_points_by_type_returns_none_for_missing_type(self): + ip = InteractionPoints(points=torch.zeros(2, 3), point_types=["push", "push"]) + assert ip.get_points_by_type("poke") is None + + def test_get_approach_direction_from_normals(self): + normals = torch.tensor([[0.0, 0.0, 1.0], [1.0, 0.0, 0.0]]) + ip = InteractionPoints(points=torch.zeros(2, 3), normals=normals) + # Approach is opposite of normal + assert torch.equal(ip.get_approach_direction(0), torch.tensor([0.0, 0.0, -1.0])) + assert torch.equal(ip.get_approach_direction(1), torch.tensor([-1.0, 0.0, 0.0])) + + def test_get_approach_direction_default_without_normals(self): + ip = InteractionPoints(points=torch.zeros(1, 3)) + direction = ip.get_approach_direction(0) + assert torch.equal(direction, torch.tensor([0.0, 0.0, 1.0])) + + +# --------------------------------------------------------------------------- +# ObjectSemantics +# --------------------------------------------------------------------------- + + +class TestObjectSemantics: + """Tests for ObjectSemantics dataclass.""" + + def test_post_init_binds_label_and_geometry(self): + geometry = {"bounding_box": [0.1, 0.2, 0.3]} + aff = Affordance() + sem = ObjectSemantics( + affordance=aff, + geometry=geometry, + label="mug", + ) + assert sem.affordance.object_label == "mug" + assert sem.affordance.geometry is geometry + + def test_default_optional_fields(self): + sem = ObjectSemantics( + affordance=Affordance(), + geometry={}, + ) + assert sem.label == "none" + assert sem.properties == {} + assert sem.entity is None + + +# --------------------------------------------------------------------------- +# ActionCfg +# --------------------------------------------------------------------------- + + +class TestActionCfg: + """Tests for ActionCfg defaults.""" + + def test_default_values(self): + cfg = ActionCfg() + assert cfg.name == "default" + assert cfg.control_part == "arm" + assert cfg.interpolation_type == "linear" + assert cfg.velocity_limit is None + assert cfg.acceleration_limit is None diff --git a/tests/sim/atomic_actions/test_engine.py b/tests/sim/atomic_actions/test_engine.py new file mode 100644 index 00000000..52dc034d --- /dev/null +++ b/tests/sim/atomic_actions/test_engine.py @@ -0,0 +1,191 @@ +# ---------------------------------------------------------------------------- +# Copyright (c) 2021-2026 DexForce Technology Co., Ltd. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ---------------------------------------------------------------------------- + +"""Tests for atomic action engine (registry, SemanticAnalyzer, AtomicActionEngine).""" + +from __future__ import annotations + +import pytest +import torch +from unittest.mock import MagicMock, Mock + +from embodichain.lab.sim.atomic_actions.core import ( + ActionCfg, + Affordance, + ObjectSemantics, +) +from embodichain.lab.sim.atomic_actions.engine import ( + AtomicActionEngine, + SemanticAnalyzer, + get_registered_actions, + register_action, + unregister_action, +) + +# --------------------------------------------------------------------------- +# Global Action Registry +# --------------------------------------------------------------------------- + + +class TestGlobalRegistry: + """Tests for register_action / unregister_action / get_registered_actions.""" + + def teardown_method(self): + # Clean up any test registrations + unregister_action("_test_dummy") + + def test_register_and_retrieve(self): + mock_cls = Mock() + register_action("_test_dummy", mock_cls) + registry = get_registered_actions() + assert "_test_dummy" in registry + assert registry["_test_dummy"] is mock_cls + + def test_unregister_removes_entry(self): + register_action("_test_dummy", Mock()) + unregister_action("_test_dummy") + assert "_test_dummy" not in get_registered_actions() + + def test_unregister_nonexistent_is_noop(self): + # Should not raise + unregister_action("_nonexistent_action") + + def test_get_registered_actions_returns_copy(self): + """Mutating the returned dict should not affect the global registry.""" + result = get_registered_actions() + result["_should_not_persist"] = Mock() + assert "_should_not_persist" not in get_registered_actions() + + +# --------------------------------------------------------------------------- +# SemanticAnalyzer +# --------------------------------------------------------------------------- + + +class TestSemanticAnalyzer: + """Tests for SemanticAnalyzer.""" + + def setup_method(self): + self.analyzer = SemanticAnalyzer() + + def test_analyze_returns_object_semantics(self): + sem = self.analyzer.analyze("mug") + assert isinstance(sem, ObjectSemantics) + assert sem.label == "mug" + assert isinstance(sem.affordance, Affordance) + + def test_analyze_caches_by_default(self): + sem1 = self.analyzer.analyze("bottle") + sem2 = self.analyzer.analyze("bottle") + assert sem1 is sem2 + + def test_analyze_bypasses_cache_with_geometry(self): + sem1 = self.analyzer.analyze("bottle") + sem2 = self.analyzer.analyze( + "bottle", geometry={"bounding_box": [0.2, 0.2, 0.2]} + ) + assert sem1 is not sem2 + + def test_analyze_no_cache(self): + sem1 = self.analyzer.analyze("cup", use_cache=False) + sem2 = self.analyzer.analyze("cup", use_cache=False) + assert sem1 is not sem2 + + def test_clear_cache(self): + self.analyzer.analyze("can") + self.analyzer.clear_cache() + # After clearing, a new object should be created + sem1 = self.analyzer.analyze("can") + sem2 = self.analyzer.analyze("can") + assert sem1 is sem2 # re-cached after clear + + +# --------------------------------------------------------------------------- +# AtomicActionEngine._resolve_target +# --------------------------------------------------------------------------- + + +class TestResolveTarget: + """Tests for AtomicActionEngine._resolve_target with various input types.""" + + def setup_method(self): + self.robot = Mock() + self.robot.device = torch.device("cpu") + self.robot.dof = 6 + self.robot.get_qpos.return_value = torch.zeros(1, 6) + self.robot.get_joint_ids.return_value = list(range(6)) + + self.mg = Mock() + self.mg.robot = self.robot + self.mg.device = torch.device("cpu") + + self.engine = AtomicActionEngine(self.mg, actions_cfg_list=[]) + + def test_tensor_passthrough(self): + tensor = torch.eye(4) + result = self.engine._resolve_target(tensor) + assert result is tensor + + def test_object_semantics_passthrough(self): + sem = ObjectSemantics(affordance=Affordance(), geometry={}) + result = self.engine._resolve_target(sem) + assert result is sem + + def test_string_resolved_via_semantic_analyzer(self): + result = self.engine._resolve_target("mug") + assert isinstance(result, ObjectSemantics) + assert result.label == "mug" + + def test_dict_with_pose_key(self): + pose = torch.eye(4) + result = self.engine._resolve_target({"pose": pose}) + assert result is pose + + def test_dict_with_pose_raises_on_non_tensor(self): + with pytest.raises(TypeError, match="must be a torch.Tensor"): + self.engine._resolve_target({"pose": "not_a_tensor"}) + + def test_dict_with_semantics_key(self): + sem = ObjectSemantics(affordance=Affordance(), geometry={}, label="bottle") + result = self.engine._resolve_target({"semantics": sem}) + assert result is sem + + def test_dict_with_semantics_raises_on_wrong_type(self): + with pytest.raises(TypeError, match="must be an ObjectSemantics"): + self.engine._resolve_target({"semantics": "wrong"}) + + def test_dict_with_label_uses_analyzer(self): + result = self.engine._resolve_target({"label": "apple"}) + assert isinstance(result, ObjectSemantics) + assert result.label == "apple" + + def test_dict_without_label_raises(self): + with pytest.raises(ValueError, match="must provide 'label'"): + self.engine._resolve_target({"geometry": {}}) + + def test_dict_with_non_string_label_raises(self): + with pytest.raises(TypeError, match="must be a string"): + self.engine._resolve_target({"label": 123}) + + def test_unsupported_type_raises(self): + with pytest.raises(TypeError, match="target must be"): + self.engine._resolve_target(42) + + +if __name__ == "__main__": + test = TestSemanticAnalyzer() + test.setup_method() + test.test_analyze_returns_object_semantics() From 8c2a8671ee8b169ad232c288d697c68037060989 Mon Sep 17 00:00:00 2001 From: yuecideng Date: Fri, 1 May 2026 09:32:25 +0000 Subject: [PATCH 8/9] wip --- scripts/tutorials/sim/atomic_actions.py | 16 ++++------------ 1 file changed, 4 insertions(+), 12 deletions(-) diff --git a/scripts/tutorials/sim/atomic_actions.py b/scripts/tutorials/sim/atomic_actions.py index 3dace43e..41ab92c0 100644 --- a/scripts/tutorials/sim/atomic_actions.py +++ b/scripts/tutorials/sim/atomic_actions.py @@ -298,8 +298,8 @@ def main(): # Place the mug 20 cm to the left and 40 cm forward from its pickup pose place_xpos = torch.tensor( [ - [-0.0539, -0.9985, -0.0022, 0.2489], - [-0.9977, 0.0540, -0.0401, 0.3970], + [-0.0539, -0.9985, -0.0022, 0.4489], + [-0.9977, 0.0540, -0.0401, -0.0030], [0.0401, 0.0000, -0.9992, 0.1400], [0.0000, 0.0000, 0.0000, 1.0000], ], @@ -308,16 +308,8 @@ def main(): ) # Move the arm to a safe resting pose after placing - rest_xpos = torch.tensor( - [ - [-0.0539, -0.9985, -0.0022, 0.5000], - [-0.9977, 0.0540, -0.0401, 0.0000], - [0.0401, 0.0000, -0.9992, 0.5000], - [0.0000, 0.0000, 0.0000, 1.0000], - ], - dtype=torch.float32, - device=sim.device, - ) + rest_xpos = place_xpos.clone() + rest_xpos[:3, 3] = torch.tensor([0.5, 0.0, 0.5], device=sim.device) # ------------------------------------------------------------------ # # Step 7: Plan and execute the full sequence # From bec694110b3c3b6d5926862d6f0e61b9278b9376 Mon Sep 17 00:00:00 2001 From: yuecideng Date: Fri, 1 May 2026 09:41:05 +0000 Subject: [PATCH 9/9] wip Co-authored-by: Copilot --- scripts/tutorials/sim/atomic_actions.py | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/scripts/tutorials/sim/atomic_actions.py b/scripts/tutorials/sim/atomic_actions.py index 41ab92c0..1f4de8d5 100644 --- a/scripts/tutorials/sim/atomic_actions.py +++ b/scripts/tutorials/sim/atomic_actions.py @@ -247,7 +247,7 @@ def main(): # actions_cfg_list defines the ORDER of actions that execute_static() # # will run. Each entry is matched positionally to target_list. # # ------------------------------------------------------------------ # - atom_engine = AtomicActionEngine( + atomic_engine = AtomicActionEngine( motion_generator=motion_gen, actions_cfg_list=[pickup_cfg, place_cfg, move_cfg], ) @@ -298,9 +298,9 @@ def main(): # Place the mug 20 cm to the left and 40 cm forward from its pickup pose place_xpos = torch.tensor( [ - [-0.0539, -0.9985, -0.0022, 0.4489], - [-0.9977, 0.0540, -0.0401, -0.0030], - [0.0401, 0.0000, -0.9992, 0.1400], + [-0.0539, -0.9985, -0.0022, 0.2489], + [-0.9977, 0.0540, -0.0401, 0.3970], + [0.0401, 0.0000, -0.9992, 0.2400], [0.0000, 0.0000, 0.0000, 1.0000], ], dtype=torch.float32, @@ -308,8 +308,16 @@ def main(): ) # Move the arm to a safe resting pose after placing - rest_xpos = place_xpos.clone() - rest_xpos[:3, 3] = torch.tensor([0.5, 0.0, 0.5], device=sim.device) + rest_xpos = torch.tensor( + [ + [-0.0539, -0.9985, -0.0022, 0.5000], + [-0.9977, 0.0540, -0.0401, 0.0000], + [0.0401, 0.0000, -0.9992, 0.5000], + [0.0000, 0.0000, 0.0000, 1.0000], + ], + dtype=torch.float32, + device=sim.device, + ) # ------------------------------------------------------------------ # # Step 7: Plan and execute the full sequence # @@ -319,7 +327,7 @@ def main(): # We then replay it frame-by-frame in the simulator. # # ------------------------------------------------------------------ # print("Planning pick → place → move trajectory...") - is_success, traj = atom_engine.execute_static( + is_success, traj = atomic_engine.execute_static( target_list=[mug_semantics, place_xpos, rest_xpos] )