From 82eb03a961174d448f8ffb7429270a3b7bb043e5 Mon Sep 17 00:00:00 2001 From: Adrian Riobo Date: Wed, 25 Mar 2026 15:11:16 +0100 Subject: [PATCH 1/2] feat: Addeed initial version for spec driven This commit creates the first struct for spec driven based in current content offered by mapt Signed-off-by: Adrian Riobo --- CLAUDE.md | 1 + specs/api/aws/allocation.md | 98 ++++++ specs/api/aws/bastion.md | 113 +++++++ specs/api/aws/compute.md | 170 ++++++++++ specs/api/aws/network.md | 115 +++++++ specs/api/aws/security-group.md | 103 ++++++ specs/api/azure/allocation.md | 122 +++++++ specs/api/azure/network.md | 107 +++++++ specs/api/azure/security-group.md | 104 ++++++ specs/api/azure/virtual-machine.md | 113 +++++++ specs/api/concepts/allocation.md | 70 ++++ specs/api/concepts/compute.md | 64 ++++ specs/api/concepts/network.md | 46 +++ specs/api/concepts/security-group.md | 57 ++++ specs/api/output-contract.md | 101 ++++++ specs/api/provider-interfaces.md | 200 ++++++++++++ specs/cmd/azure-params.md | 42 +++ specs/cmd/params.md | 273 ++++++++++++++++ specs/features/000-template.md | 58 ++++ specs/features/aws/airgap-network.md | 61 ++++ specs/features/aws/eks.md | 75 +++++ specs/features/aws/fedora-host.md | 70 ++++ specs/features/aws/kind.md | 73 +++++ specs/features/aws/mac-host.md | 82 +++++ specs/features/aws/mac-pool-service.md | 90 ++++++ specs/features/aws/openshift-snc.md | 95 ++++++ specs/features/aws/rhel-ai.md | 73 +++++ specs/features/aws/rhel-host.md | 119 +++++++ .../features/aws/serverless-self-destruct.md | 66 ++++ specs/features/aws/vpc-endpoints.md | 182 +++++++++++ specs/features/aws/windows-server-host.md | 123 +++++++ specs/features/azure/aks.md | 68 ++++ specs/features/azure/kind.md | 69 ++++ specs/features/azure/linux-host.md | 70 ++++ specs/features/azure/rhel-ai.md | 70 ++++ specs/features/azure/rhel-host.md | 75 +++++ specs/features/azure/windows-desktop.md | 73 +++++ specs/integrations/cirrus-ci.md | 109 +++++++ specs/integrations/github-actions.md | 98 ++++++ specs/integrations/gitlab.md | 146 +++++++++ specs/integrations/overview.md | 129 ++++++++ specs/integrations/tekton-tasks.md | 61 ++++ specs/project-context.md | 299 ++++++++++++++++++ 43 files changed, 4333 insertions(+) create mode 100644 CLAUDE.md create mode 100644 specs/api/aws/allocation.md create mode 100644 specs/api/aws/bastion.md create mode 100644 specs/api/aws/compute.md create mode 100644 specs/api/aws/network.md create mode 100644 specs/api/aws/security-group.md create mode 100644 specs/api/azure/allocation.md create mode 100644 specs/api/azure/network.md create mode 100644 specs/api/azure/security-group.md create mode 100644 specs/api/azure/virtual-machine.md create mode 100644 specs/api/concepts/allocation.md create mode 100644 specs/api/concepts/compute.md create mode 100644 specs/api/concepts/network.md create mode 100644 specs/api/concepts/security-group.md create mode 100644 specs/api/output-contract.md create mode 100644 specs/api/provider-interfaces.md create mode 100644 specs/cmd/azure-params.md create mode 100644 specs/cmd/params.md create mode 100644 specs/features/000-template.md create mode 100644 specs/features/aws/airgap-network.md create mode 100644 specs/features/aws/eks.md create mode 100644 specs/features/aws/fedora-host.md create mode 100644 specs/features/aws/kind.md create mode 100644 specs/features/aws/mac-host.md create mode 100644 specs/features/aws/mac-pool-service.md create mode 100644 specs/features/aws/openshift-snc.md create mode 100644 specs/features/aws/rhel-ai.md create mode 100644 specs/features/aws/rhel-host.md create mode 100644 specs/features/aws/serverless-self-destruct.md create mode 100644 specs/features/aws/vpc-endpoints.md create mode 100644 specs/features/aws/windows-server-host.md create mode 100644 specs/features/azure/aks.md create mode 100644 specs/features/azure/kind.md create mode 100644 specs/features/azure/linux-host.md create mode 100644 specs/features/azure/rhel-ai.md create mode 100644 specs/features/azure/rhel-host.md create mode 100644 specs/features/azure/windows-desktop.md create mode 100644 specs/integrations/cirrus-ci.md create mode 100644 specs/integrations/github-actions.md create mode 100644 specs/integrations/gitlab.md create mode 100644 specs/integrations/overview.md create mode 100644 specs/integrations/tekton-tasks.md create mode 100644 specs/project-context.md diff --git a/CLAUDE.md b/CLAUDE.md new file mode 100644 index 000000000..f3221d786 --- /dev/null +++ b/CLAUDE.md @@ -0,0 +1 @@ +@specs/project-context.md diff --git a/specs/api/aws/allocation.md b/specs/api/aws/allocation.md new file mode 100644 index 000000000..6fc020fbc --- /dev/null +++ b/specs/api/aws/allocation.md @@ -0,0 +1,98 @@ +# API: Allocation (AWS) + +> Concept: [specs/api/concepts/allocation.md](../concepts/allocation.md) + +**Package:** `github.com/redhat-developer/mapt/pkg/provider/aws/modules/allocation` + +Single entry point for resolving where and on what instance type a target will run. +All AWS EC2 action `Create()` functions call this before any Pulumi stack is touched. + +--- + +## Types + +### `AllocationArgs` + +> `ComputeRequestArgs` and `SpotArgs` are cross-provider types — see `specs/api/provider-interfaces.md`. + +```go +type AllocationArgs struct { + ComputeRequest *cr.ComputeRequestArgs // required: hardware constraints + Prefix *string // required: used to name the spot stack + AMIProductDescription *string // optional: e.g. "Linux/UNIX" — used for spot price queries + AMIName *string // optional: scopes spot search to AMI availability + Spot *spotTypes.SpotArgs // nil = on-demand; non-nil = spot evaluation +} +``` + +### `AllocationResult` + +```go +type AllocationResult struct { + Region *string // AWS region to deploy into + AZ *string // availability zone within that region + SpotPrice *float64 // nil when on-demand; set when spot was selected + InstanceTypes []string // one or more compatible instance type strings +} +``` + +--- + +## Functions + +### `Allocation` + +```go +func Allocation(mCtx *mc.Context, args *AllocationArgs) (*AllocationResult, error) +``` + +**Spot path** (`args.Spot != nil && args.Spot.Spot == true`): +- Creates or reuses a `spotOption-` Pulumi stack +- Queries spot prices across eligible regions; selects best region/AZ/price +- Idempotent: if the stack already exists, returns its saved outputs without re-querying +- Returns `AllocationResult` with all four fields set + +**On-demand path** (`args.Spot == nil` or `args.Spot.Spot == false`): +- Uses `mCtx.TargetHostingPlace()` as the region (set from provider default) +- Iterates AZs until one supports the required instance types +- Returns `AllocationResult` with `SpotPrice == nil` + +**Error:** returns `ErrNoSupportedInstanceTypes` if no AZ in the region supports the requested types. + +--- + +## Usage Pattern + +```go +// In every AWS action Create(): +r.allocationData, err = allocation.Allocation(mCtx, &allocation.AllocationArgs{ + Prefix: &args.Prefix, + ComputeRequest: args.ComputeRequest, + AMIProductDescription: &amiProduct, // constant in the action's constants.go + Spot: args.Spot, +}) + +// Then pass results into the deploy function: +// r.allocationData.Region → NetworkArgs.Region, ComputeRequest credential region +// r.allocationData.AZ → NetworkArgs.AZ +// r.allocationData.InstanceTypes → ComputeRequest.InstaceTypes +// r.allocationData.SpotPrice → ComputeRequest.SpotPrice (when non-nil) +``` + +--- + +## Known Gaps + +- `spot.Destroy()` uses `aws.DefaultCredentials` (not region-scoped); verify this is correct + when the selected spot region differs from the default AWS region +- No re-evaluation of spot selection when the persisted region becomes significantly more expensive + between runs (by design — idempotency wins; worth documenting in user docs) + +--- + +## When to Extend This API + +Open a spec under `specs/features/aws/` and update this file when: +- Adding a new allocation strategy (e.g. reserved instances, on-demand with fallback to spot) +- Adding a new field to `AllocationArgs` that all targets would benefit from +- Changing the idempotency behaviour of the spot stack diff --git a/specs/api/aws/bastion.md b/specs/api/aws/bastion.md new file mode 100644 index 000000000..20573fa32 --- /dev/null +++ b/specs/api/aws/bastion.md @@ -0,0 +1,113 @@ +# API: Bastion + +**Package:** `github.com/redhat-developer/mapt/pkg/provider/aws/modules/bastion` + +Creates a bastion host in the public subnet of an airgap network. Called automatically by +`network.Create()` when `Airgap=true` — action code never calls bastion directly during deploy. + +Action code calls `bastion.WriteOutputs()` in `manageResults()` when airgap is enabled. + +--- + +## Types + +### `BastionArgs` + +```go +type BastionArgs struct { + Prefix string + VPC *ec2.Vpc + Subnet *ec2.Subnet // must be the PUBLIC subnet, not the target subnet +} +``` + +### `BastionResult` + +```go +type BastionResult struct { + Instance *ec2.Instance + PrivateKey *tls.PrivateKey + Usarname string // note: typo in source — "Usarname" not "Username" + Port int // always 22 +} +``` + +--- + +## Functions + +### `Create` + +```go +func Create(ctx *pulumi.Context, mCtx *mc.Context, args *BastionArgs) (*BastionResult, error) +``` + +Called internally by `network.Create()`. Not called directly from action code. + +Creates: +- Amazon Linux 2 `t2.small` instance in the public subnet +- Keypair for SSH access +- Security group allowing SSH ingress from `0.0.0.0/0` + +Exports to Pulumi stack: +- `-bastion_id_rsa` +- `-bastion_username` +- `-bastion_host` + +### `WriteOutputs` + +```go +func WriteOutputs(stackResult auto.UpResult, prefix string, destinationFolder string) error +``` + +Writes the three bastion stack outputs to files in `destinationFolder`: + +| Stack output key | Output filename | +|---|---| +| `-bastion_id_rsa` | `bastion_id_rsa` | +| `-bastion_username` | `bastion_username` | +| `-bastion_host` | `bastion_host` | + +--- + +## Usage Pattern + +```go +// In deploy(): bastion is returned as part of NetworkResult — no direct call needed +nw, err := network.Create(ctx, mCtx, &network.NetworkArgs{Airgap: true, ...}) +// nw.Bastion is populated automatically + +// Pass to Readiness() so SSH goes through the bastion: +c.Readiness(ctx, cmd, prefix, id, privateKey, username, nw.Bastion, deps) + +// In manageResults(): write bastion files alongside target files +func manageResults(mCtx *mc.Context, stackResult auto.UpResult, prefix *string, airgap *bool) error { + if *airgap { + if err := bastion.WriteOutputs(stackResult, *prefix, mCtx.GetResultsOutputPath()); err != nil { + return err + } + } + return output.Write(stackResult, mCtx.GetResultsOutputPath(), results) +} +``` + +--- + +## Bastion Instance Spec (fixed, not configurable) + +| Property | Value | +|---|---| +| AMI | Amazon Linux 2 (`amzn2-ami-hvm-*-x86_64-ebs`) | +| Instance type | `t2.small` | +| Disk | 100 GiB | +| SSH user | `ec2-user` | +| SSH port | 22 | + +--- + +## When to Extend This API + +Open a spec under `specs/features/aws/` and update this file when: +- Making bastion instance type or disk size configurable +- Adding bastion support to Azure targets +- Adding support for Session Manager as an alternative to bastion SSH diff --git a/specs/api/aws/compute.md b/specs/api/aws/compute.md new file mode 100644 index 000000000..50f5adee8 --- /dev/null +++ b/specs/api/aws/compute.md @@ -0,0 +1,170 @@ +# API: Compute (AWS EC2) + +> Concept: [specs/api/concepts/compute.md](../concepts/compute.md) + +**Package:** `github.com/redhat-developer/mapt/pkg/provider/aws/modules/ec2/compute` + +Creates the EC2 instance (on-demand) or Auto Scaling Group (spot). Always the last Pulumi +resource created in a `deploy()` function, after networking, keypair, and security groups. + +--- + +## Types + +### `ComputeRequest` + +```go +type ComputeRequest struct { + MCtx *mc.Context + Prefix string + ID string // component ID — used in resource naming + VPC *ec2.Vpc // from network.NetworkResult.Vpc + Subnet *ec2.Subnet // from network.NetworkResult.Subnet + Eip *ec2.Eip // from network.NetworkResult.Eip + LB *lb.LoadBalancer // from network.NetworkResult.LoadBalancer; nil = on-demand + LBTargetGroups []int // TCP ports to register as LB target groups (e.g. []int{22, 3389}) + AMI *ec2.LookupAmiResult + KeyResources *keypair.KeyPairResources + SecurityGroups pulumi.StringArray + InstaceTypes []string // from AllocationResult.InstanceTypes + InstanceProfile *iam.InstanceProfile // optional — required by SNC for SSM access + DiskSize *int // nil uses the module default (200 GiB) + Airgap bool + Spot bool // true when AllocationResult.SpotPrice != nil + SpotPrice float64 // only read when Spot=true + UserDataAsBase64 pulumi.StringPtrInput // cloud-init or PowerShell userdata + DependsOn []pulumi.Resource // explicit Pulumi dependencies +} +``` + +### `Compute` + +```go +type Compute struct { + Instance *ec2.Instance // set when Spot=false + AutoscalingGroup *autoscaling.Group // set when Spot=true + Eip *ec2.Eip + LB *lb.LoadBalancer + Dependencies []pulumi.Resource // pass to Readiness() and RunCommand() +} +``` + +--- + +## Functions + +### `NewCompute` + +```go +func (r *ComputeRequest) NewCompute(ctx *pulumi.Context) (*Compute, error) +``` + +- `Spot=false`: creates `ec2.Instance` with direct EIP association +- `Spot=true`: creates `ec2.LaunchTemplate` + `autoscaling.Group` with mixed instances policy, forced spot, capacity-optimized allocation strategy; registers LB target groups + +### `Readiness` + +```go +func (c *Compute) Readiness( + ctx *pulumi.Context, + cmd string, // command.CommandCloudInitWait or command.CommandPing + prefix, id string, + mk *tls.PrivateKey, + username string, + b *bastion.BastionResult, // nil when not airgap + dependencies []pulumi.Resource, +) error +``` + +Runs `cmd` over SSH on the instance. Blocks Pulumi until it succeeds (timeout: 40 minutes). +Pass `c.Dependencies` as `dependencies`. + +### `RunCommand` + +```go +func (c *Compute) RunCommand( + ctx *pulumi.Context, + cmd string, + loggingCmdStd bool, // compute.LoggingCmdStd or compute.NoLoggingCmdStd + prefix, id string, + mk *tls.PrivateKey, + username string, + b *bastion.BastionResult, + dependencies []pulumi.Resource, +) (*remote.Command, error) +``` + +Like `Readiness` but returns the command resource for use as a dependency in subsequent steps. +Used by SNC to chain SSH → cluster ready → CA rotated → fetch kubeconfig. + +### `GetHostDnsName` + +```go +func (c *Compute) GetHostDnsName(public bool) pulumi.StringInput +``` + +Returns `LB.DnsName` when LB is set, otherwise `Eip.PublicDns` (public=true) or `Eip.PrivateDns` (public=false). +Export this as `-host`. + +### `GetHostIP` + +```go +func (c *Compute) GetHostIP(public bool) pulumi.StringOutput +``` + +Returns `Eip.PublicIp` or `Eip.PrivateIp`. Used by SNC (needs IP not DNS for kubeconfig replacement). + +--- + +## Readiness Commands + +| Constant | Value | When to use | +|---|---|---| +| `command.CommandCloudInitWait` | `sudo cloud-init status --long --wait \|\| [[ $? -eq 2 \|\| $? -eq 0 ]]` | Linux targets with cloud-init | +| `command.CommandPing` | `echo ping` | Windows targets (no cloud-init) | + +--- + +## Usage Pattern + +```go +cr := compute.ComputeRequest{ + MCtx: r.mCtx, + Prefix: *r.prefix, + ID: awsTargetID, + VPC: nw.Vpc, + Subnet: nw.Subnet, + Eip: nw.Eip, + LB: nw.LoadBalancer, + LBTargetGroups: []int{22}, // add 3389 for Windows + AMI: ami, + KeyResources: keyResources, + SecurityGroups: securityGroups, + InstaceTypes: r.allocationData.InstanceTypes, + DiskSize: &diskSize, // constant in constants.go + Airgap: *r.airgap, + UserDataAsBase64: udB64, +} +if r.allocationData.SpotPrice != nil { + cr.Spot = true + cr.SpotPrice = *r.allocationData.SpotPrice +} +c, err := cr.NewCompute(ctx) + +ctx.Export(fmt.Sprintf("%s-%s", *r.prefix, outputHost), c.GetHostDnsName(!*r.airgap)) + +return c.Readiness(ctx, command.CommandCloudInitWait, + *r.prefix, awsTargetID, + keyResources.PrivateKey, amiUserDefault, + nw.Bastion, c.Dependencies) +``` + +--- + +## When to Extend This API + +Open a spec under `specs/features/aws/` and update this file when: +- Adding support for additional storage volumes +- Adding support for instance store (NVMe) configuration +- Exposing health check grace period as configurable (currently hardcoded at 1200s) +- Adding on-demand with spot fallback (noted as TODO in source) diff --git a/specs/api/aws/network.md b/specs/api/aws/network.md new file mode 100644 index 000000000..4fadf3d20 --- /dev/null +++ b/specs/api/aws/network.md @@ -0,0 +1,115 @@ +# API: Network (AWS) + +> Concept: [specs/api/concepts/network.md](../concepts/network.md) + +**Package:** `github.com/redhat-developer/mapt/pkg/provider/aws/modules/network` + +Creates the VPC, subnet, internet gateway, optional load balancer, and optional airgap bastion +for any AWS EC2 target. Always the first Pulumi resource created in a `deploy()` function. + +--- + +## Types + +### `NetworkArgs` + +```go +type NetworkArgs struct { + Prefix string // resource name prefix + ID string // component ID (e.g. "aws-rhel") — used in resource naming + Region string // from AllocationResult.Region + AZ string // from AllocationResult.AZ + CreateLoadBalancer bool // true when spot is used (LB fronts the ASG) + Airgap bool // true for airgap topology + AirgapPhaseConnectivity Connectivity // ON (with NAT) or OFF (without NAT) + // Optional VPC endpoints to create in the public subnet. + // Empty (default) = no endpoints. Accepted: "s3", "ecr", "ssm". + // Interface endpoints ("ecr", "ssm") share a security group (TCP 443 from VPC CIDR). + // See specs/features/aws/vpc-endpoints.md + Endpoints []string +} + +type Connectivity int +const ( + ON Connectivity = iota // NAT gateway present — machine has internet egress + OFF // NAT gateway absent — machine is isolated +) +``` + +### `NetworkResult` + +```go +type NetworkResult struct { + Vpc *ec2.Vpc + Subnet *ec2.Subnet // target subnet (public or private) + SubnetRouteTableAssociation *ec2.RouteTableAssociation // only set in airgap + Eip *ec2.Eip // always created; used for LB or direct instance + LoadBalancer *lb.LoadBalancer // nil when CreateLoadBalancer=false + Bastion *bastion.BastionResult // nil when Airgap=false +} +``` + +--- + +## Functions + +### `Create` + +```go +func Create(ctx *pulumi.Context, mCtx *mc.Context, args *NetworkArgs) (*NetworkResult, error) +``` + +**Standard path** (`Airgap=false`): +- VPC (`10.0.0.0/16`) with one public subnet (`10.0.2.0/24`) and internet gateway +- No NAT gateway +- EIP always created +- Load balancer created if `CreateLoadBalancer=true`, attached to EIP + +**Airgap path** (`Airgap=true`): +- VPC with public subnet (`10.0.2.0/24`) and private (target) subnet (`10.0.101.0/24`) +- Phase ON: public subnet gets NAT gateway → private subnet has internet egress +- Phase OFF: NAT gateway removed → private subnet is isolated +- Bastion host created in public subnet (see `specs/api/aws/bastion.md`) +- Load balancer when `CreateLoadBalancer=true` is internal-facing (private IP) + +--- + +## CIDRs (fixed, not configurable) + +| Range | Value | +|---|---| +| VPC | `10.0.0.0/16` | +| Public subnet | `10.0.2.0/24` | +| Private (airgap target) subnet | `10.0.101.0/24` | + +--- + +## Usage Pattern + +```go +nw, err := network.Create(ctx, r.mCtx, &network.NetworkArgs{ + Prefix: *r.prefix, + ID: awsTargetID, // constant from constants.go + Region: *r.allocationData.Region, + AZ: *r.allocationData.AZ, + CreateLoadBalancer: r.allocationData.SpotPrice != nil, + Airgap: *r.airgap, + AirgapPhaseConnectivity: r.airgapPhaseConnectivity, +}) + +// Pass results to compute: +// nw.Vpc → ComputeRequest.VPC, securityGroup.SGRequest.VPC +// nw.Subnet → ComputeRequest.Subnet +// nw.Eip → ComputeRequest.Eip +// nw.LoadBalancer → ComputeRequest.LB +// nw.Bastion → ComputeRequest.Readiness() bastion arg +``` + +--- + +## When to Extend This API + +Open a spec under `specs/features/aws/` and update this file when: +- Adding support for IPv6 +- Making CIDRs configurable +- Adding a new topology (e.g. multi-AZ, private-only without bastion) diff --git a/specs/api/aws/security-group.md b/specs/api/aws/security-group.md new file mode 100644 index 000000000..10a44e189 --- /dev/null +++ b/specs/api/aws/security-group.md @@ -0,0 +1,103 @@ +# API: Security Group (AWS) + +> Concept: [specs/api/concepts/security-group.md](../concepts/security-group.md) + +**Package:** `github.com/redhat-developer/mapt/pkg/provider/aws/services/ec2/security-group` + +Creates an EC2 security group with ingress rules. Called from every AWS action `deploy()` +and from the bastion module internally. + +--- + +## Types + +### `SGRequest` + +```go +type SGRequest struct { + Name string // resourcesUtil.GetResourceName(prefix, id, "sg") + Description string + IngressRules []IngressRules + VPC *ec2.Vpc // from network.NetworkResult.Vpc +} +``` + +### `IngressRules` + +```go +type IngressRules struct { + Description string + FromPort int + ToPort int + Protocol string // "tcp", "udp", "icmp", "-1" (all) + CidrBlocks string // CIDR string; empty = 0.0.0.0/0; mutually exclusive with SG + SG *ec2.SecurityGroup // source SG; mutually exclusive with CidrBlocks +} +``` + +### `SGResources` + +```go +type SGResources struct { + SG *ec2.SecurityGroup +} +``` + +--- + +## Functions + +### `Create` + +```go +func (r SGRequest) Create(ctx *pulumi.Context, mCtx *mc.Context) (*SGResources, error) +``` + +Creates the security group with all ingress rules and a permissive egress (all traffic allowed). + +--- + +## Pre-defined Rules + +```go +// Defined in security-group/defaults.go — copy and set CidrBlocks before use +var SSH_TCP = IngressRules{Description: "SSH", FromPort: 22, ToPort: 22, Protocol: "tcp"} +var RDP_TCP = IngressRules{Description: "RDP", FromPort: 3389, ToPort: 3389, Protocol: "tcp"} + +// Port constants +const SSH_PORT = 22 +const HTTPS_PORT = 443 +``` + +**Important:** `SSH_TCP` and `RDP_TCP` are value types — copy them before setting `CidrBlocks`: +```go +sshRule := securityGroup.SSH_TCP +sshRule.CidrBlocks = infra.NETWORKING_CIDR_ANY_IPV4 // "0.0.0.0/0" +``` + +--- + +## Usage Pattern + +```go +sg, err := securityGroup.SGRequest{ + Name: resourcesUtil.GetResourceName(*prefix, awsTargetID, "sg"), + VPC: nw.Vpc, + Description: fmt.Sprintf("sg for %s", awsTargetID), + IngressRules: []securityGroup.IngressRules{sshRule}, +}.Create(ctx, mCtx) + +// Convert to StringArray for ComputeRequest: +sgs := util.ArrayConvert([]*ec2.SecurityGroup{sg.SG}, + func(sg *ec2.SecurityGroup) pulumi.StringInput { return sg.ID() }) +return pulumi.StringArray(sgs[:]), nil +``` + +--- + +## When to Extend This API + +Open a spec under `specs/features/aws/` and update this file when: +- Adding new pre-defined rule constants (e.g. WinRM, HTTPS) +- Adding IPv6 CIDR support +- Adding support for egress rule customisation (currently always allow-all egress) diff --git a/specs/api/azure/allocation.md b/specs/api/azure/allocation.md new file mode 100644 index 000000000..bf74319ec --- /dev/null +++ b/specs/api/azure/allocation.md @@ -0,0 +1,122 @@ +# API: Allocation (Azure) + +**Package:** `github.com/redhat-developer/mapt/pkg/provider/azure/modules/allocation` + +Single entry point for resolving which Azure location, VM size, and image to use. +All Azure action `Create()` functions call this before any Pulumi stack is touched. + +> Concept: [specs/api/concepts/allocation.md](../concepts/allocation.md) + +--- + +## Types + +### `AllocationArgs` + +> `ComputeRequestArgs` and `SpotArgs` are cross-provider types — see `specs/api/provider-interfaces.md`. + +```go +type AllocationArgs struct { + ComputeRequest *cr.ComputeRequestArgs // required: hardware constraints + OSType string // e.g. "Linux", "Windows" — used for spot queries + ImageRef *data.ImageReference // optional: scopes spot search to image availability + Location *string // required for on-demand; ignored when spot selects location + Spot *spotTypes.SpotArgs // nil = on-demand; non-nil = spot evaluation +} +``` + +### `AllocationResult` + +```go +type AllocationResult struct { + Location *string // Azure region (e.g. "eastus") + Price *float64 // nil when on-demand; set when spot was selected + ComputeSizes []string // one or more compatible VM size strings + ImageRef *data.ImageReference // passed through from args +} +``` + +--- + +## Functions + +### `Allocation` + +```go +func Allocation(mCtx *mc.Context, args *AllocationArgs) (*AllocationResult, error) +``` + +**Spot path** (`args.Spot != nil && args.Spot.Spot == true`): +- Queries spot prices across eligible Azure locations +- Scores by price × availability; selects best location/VM size +- No separate Pulumi stack (unlike AWS) — result is not persisted between runs +- Returns `AllocationResult` with all fields set + +**On-demand path** (`args.Spot == nil` or `args.Spot.Spot == false`): +- Uses `args.Location` as the target location +- Filters `ComputeRequest.ComputeSizes` to those available in the location +- Returns `AllocationResult` with `Price == nil` + +--- + +## Related Types + +### `ImageReference` +**Package:** `github.com/redhat-developer/mapt/pkg/provider/azure/data` + +```go +type ImageReference struct { + // Marketplace image + Publisher string + Offer string + Sku string + // Azure Community Gallery + CommunityImageID string + // Azure Shared Gallery (private or cross-tenant) + SharedImageID string +} +``` + +Exactly one of the three variants should be populated. Use `data.GetImageRef()` to build +a reference from OS type, arch, and version: + +```go +func GetImageRef(osTarget OSType, arch string, version string) (*ImageReference, error) +``` + +Supported `OSType` values: `data.Ubuntu`, `data.RHEL`, `data.Fedora` + +### `SpotArgs` +**Package:** `github.com/redhat-developer/mapt/pkg/provider/api/spot` + +Cross-provider type — see `specs/api/concepts/allocation.md` for field descriptions. + +--- + +## Usage Pattern + +```go +// In every Azure action Create(): +r.allocationData, err = allocation.Allocation(mCtx, &allocation.AllocationArgs{ + ComputeRequest: args.ComputeRequest, + OSType: "Linux", // or "Windows" + ImageRef: imageRef, // from data.GetImageRef() + Location: &defaultLocation, // provider default, ignored if spot + Spot: args.Spot, +}) + +// Then pass results into the deploy function: +// r.allocationData.Location → NetworkArgs.Location, VM location +// r.allocationData.ComputeSizes → pick one for VirtualMachineArgs.VMSize +// r.allocationData.Price → VirtualMachineArgs.SpotPrice (when non-nil) +// r.allocationData.ImageRef → VirtualMachineArgs.Image +``` + +--- + +## When to Extend This API + +Open a spec under `specs/features/azure/` and update this file when: +- Persisting Azure spot allocation to a Pulumi stack (for idempotency, matching AWS behaviour) +- Adding new `OSType` values to `data.GetImageRef()` +- Adding `ExcludedLocations` filtering to on-demand path diff --git a/specs/api/azure/network.md b/specs/api/azure/network.md new file mode 100644 index 000000000..2e2f652c4 --- /dev/null +++ b/specs/api/azure/network.md @@ -0,0 +1,107 @@ +# API: Network (Azure) + +> Concept: [specs/api/concepts/network.md](../concepts/network.md) + +**Package:** `github.com/redhat-developer/mapt/pkg/provider/azure/modules/network` + +Creates the VNet, subnet, public IP, and network interface for any Azure VM target. +Called after the resource group and security group are created in a `deploy()` function. + +--- + +## Types + +### `NetworkArgs` + +```go +type NetworkArgs struct { + Prefix string + ComponentID string + ResourceGroup *resources.ResourceGroup // must be created before calling network.Create() + Location *string // from AllocationResult.Location + SecurityGroup securityGroup.SecurityGroup // must be created before calling network.Create() +} +``` + +Note: unlike AWS, the security group is passed **in** to `network.Create()` rather than +being created after. Creation order in `deploy()` is therefore: +**resource group → security group → network → VM** + +### `Network` + +```go +type Network struct { + Network *network.VirtualNetwork + PublicSubnet *network.Subnet + NetworkInterface *network.NetworkInterface // pass to VirtualMachineArgs.NetworkInterface + PublicIP *network.PublicIPAddress // export as -host +} +``` + +--- + +## Functions + +### `Create` + +```go +func Create(ctx *pulumi.Context, mCtx *mc.Context, args *NetworkArgs) (*Network, error) +``` + +Creates in sequence: +1. VNet (`10.0.0.0/16`) with RunID as name +2. Subnet (`10.0.2.0/24`) +3. Static Standard-SKU public IP +4. NIC attached to subnet + public IP + security group + +All resources are tagged via `mCtx.ResourceTags()`. + +--- + +## CIDRs (fixed, not configurable) + +| Range | Value | +|---|---| +| VNet | `10.0.0.0/16` | +| Subnet | `10.0.2.0/24` | + +--- + +## Usage Pattern + +```go +// 1. Create resource group (outside network module) +rg, err := resources.NewResourceGroup(ctx, ..., &resources.ResourceGroupArgs{ + Location: pulumi.String(*r.allocationData.Location), +}) + +// 2. Create security group (before network) +sg, err := securityGroup.Create(ctx, mCtx, &securityGroup.SecurityGroupArgs{ + Name: resourcesUtil.GetResourceName(*r.prefix, azureTargetID, "sg"), + RG: rg, + Location: r.allocationData.Location, + IngressRules: []securityGroup.IngressRules{securityGroup.SSH_TCP}, +}) + +// 3. Create network (takes sg as input) +n, err := network.Create(ctx, mCtx, &network.NetworkArgs{ + Prefix: *r.prefix, + ComponentID: azureTargetID, + ResourceGroup: rg, + Location: r.allocationData.Location, + SecurityGroup: sg, +}) + +// 4. Pass to VM: +// n.NetworkInterface → VirtualMachineArgs.NetworkInteface +// n.PublicIP.IpAddress → export as -host +``` + +--- + +## When to Extend This API + +Open a spec under `specs/features/azure/` and update this file when: +- Adding airgap support for Azure (bastion + private subnet pattern) +- Adding load balancer support for spot VM scenarios +- Making CIDRs configurable diff --git a/specs/api/azure/security-group.md b/specs/api/azure/security-group.md new file mode 100644 index 000000000..9995af4bd --- /dev/null +++ b/specs/api/azure/security-group.md @@ -0,0 +1,104 @@ +# API: Security Group (Azure) + +> Concept: [specs/api/concepts/security-group.md](../concepts/security-group.md) + +**Package:** `github.com/redhat-developer/mapt/pkg/provider/azure/services/network/security-group` + +Creates an Azure Network Security Group (NSG). The NSG is created **before** the network +module is called, because `network.Create()` takes the NSG as an input argument. +See `specs/api/azure/network.md`. + +--- + +## Types + +### `SecurityGroupArgs` + +```go +type SecurityGroupArgs struct { + Name string // resourcesUtil.GetResourceName(prefix, id, "sg") + RG *resources.ResourceGroup // resource group the NSG belongs to + Location *string // from AllocationResult.Location + IngressRules []IngressRules +} +``` + +### `IngressRules` + +```go +type IngressRules struct { + Description string + FromPort int + ToPort int + Protocol string // "tcp", "udp", "*" (all) + CidrBlocks string // source CIDR; empty = allow any source ("*") +} +``` + +### `SecurityGroup` + +```go +type SecurityGroup = *network.NetworkSecurityGroup +``` + +A type alias — the raw Pulumi Azure NSG resource. Passed directly into `NetworkArgs.SecurityGroup`. + +--- + +## Functions + +### `Create` + +```go +func Create(ctx *pulumi.Context, mCtx *mc.Context, args *SecurityGroupArgs) (SecurityGroup, error) +``` + +Creates the NSG with inbound allow rules. Priorities are auto-assigned starting at 1001, +incrementing by 1 per rule. Egress is unrestricted (Azure default). + +--- + +## Pre-defined Rules + +```go +// Defined in security-group/defaults.go — safe to use directly (not value copies like AWS) +var SSH_TCP = IngressRules{Description: "SSH", FromPort: 22, ToPort: 22, Protocol: "tcp"} +var RDP_TCP = IngressRules{Description: "RDP", FromPort: 3389, ToPort: 3389, Protocol: "tcp"} + +var SSH_PORT int = 22 +var RDP_PORT int = 3389 +``` + +Unlike the AWS equivalent, Azure `IngressRules` do not have a source SG field — only CIDR. +Empty `CidrBlocks` allows from any source (`*`), which is the default for SSH and RDP rules. + +--- + +## Usage Pattern + +```go +sg, err := securityGroup.Create(ctx, mCtx, &securityGroup.SecurityGroupArgs{ + Name: resourcesUtil.GetResourceName(*r.prefix, azureTargetID, "sg"), + RG: rg, + Location: r.allocationData.Location, + IngressRules: []securityGroup.IngressRules{ + securityGroup.SSH_TCP, + // securityGroup.RDP_TCP, // add for Windows targets + }, +}) + +// Pass directly into network: +n, err := network.Create(ctx, mCtx, &network.NetworkArgs{ + SecurityGroup: sg, + ... +}) +``` + +--- + +## When to Extend This API + +Open a spec under `specs/features/azure/` and update this file when: +- Adding source NSG reference support (intra-VNet rules) +- Adding egress rule customisation +- Adding new pre-defined rule constants diff --git a/specs/api/azure/virtual-machine.md b/specs/api/azure/virtual-machine.md new file mode 100644 index 000000000..8e8e83498 --- /dev/null +++ b/specs/api/azure/virtual-machine.md @@ -0,0 +1,113 @@ +# API: Virtual Machine (Azure) + +> Concept: [specs/api/concepts/compute.md](../concepts/compute.md) + +**Package:** `github.com/redhat-developer/mapt/pkg/provider/azure/modules/virtual-machine` + +Creates an Azure VM. The Azure equivalent of `specs/api/aws/compute.md`. +Always the last Pulumi resource created in an Azure `deploy()` function. + +--- + +## Types + +### `VirtualMachineArgs` + +```go +type VirtualMachineArgs struct { + Prefix string + ComponentID string + ResourceGroup *resources.ResourceGroup + NetworkInteface *network.NetworkInterface // note: typo in source — "Inteface" not "Interface" + VMSize string // pick one from AllocationResult.ComputeSizes + + SpotPrice *float64 // nil = on-demand; non-nil = spot (sets Priority="Spot") + + Image *data.ImageReference // from AllocationResult.ImageRef + + // Linux: provide PrivateKey (password auth disabled) + PrivateKey *tls.PrivateKey + // Windows: provide AdminPasswd (password auth) + AdminPasswd *random.RandomPassword + + AdminUsername string + UserDataAsBase64 pulumi.StringPtrInput // cloud-init or custom script (base64) + Location string // from AllocationResult.Location +} +``` + +### `VirtualMachine` + +```go +type VirtualMachine = *compute.VirtualMachine +``` + +The returned value is the raw Pulumi Azure VM resource. +Access the public IP via `Network.PublicIP.IpAddress` (not from the VM itself). + +--- + +## Functions + +### `Create` + +```go +func Create(ctx *pulumi.Context, mCtx *mc.Context, args *VirtualMachineArgs) (VirtualMachine, error) +``` + +- **Linux VMs**: sets `LinuxConfiguration` with SSH public key; disables password authentication +- **Windows VMs**: sets `AdminPassword`; no SSH configuration +- **Spot**: sets `Priority = "Spot"` and `BillingProfile.MaxPrice = *SpotPrice` +- **On-demand**: no priority or billing profile set +- Disk: 200 GiB Standard_LRS, created from image +- Boot diagnostics disabled (improves provisioning time) +- Image resolution: handles Marketplace, Community Gallery, and Shared Gallery variants automatically + +--- + +## Image Resolution (internal) + +`convertImageRef()` resolves the `ImageReference` to a Pulumi `ImageReferenceArgs`: + +| ImageReference field set | Azure resource used | +|---|---| +| `CommunityImageID` | Community Gallery (`communityGalleryImageId`) | +| `SharedImageID` (own subscription) | Direct resource ID | +| `SharedImageID` (other subscription) | Shared Gallery (`sharedGalleryImageId`) | +| `Publisher` + `Offer` + `Sku` | Marketplace image; SKU upgraded to Gen2 if available | + +Self-owned detection uses `AZURE_SUBSCRIPTION_ID` env var against the image resource path. + +--- + +## Usage Pattern + +```go +vm, err := virtualmachine.Create(ctx, mCtx, &virtualmachine.VirtualMachineArgs{ + Prefix: *r.prefix, + ComponentID: azureTargetID, + ResourceGroup: rg, + NetworkInteface: n.NetworkInterface, + VMSize: r.allocationData.ComputeSizes[0], + SpotPrice: r.allocationData.Price, // nil if on-demand + Image: r.allocationData.ImageRef, + AdminUsername: amiUserDefault, + PrivateKey: privateKey, // Linux + // AdminPasswd: password, // Windows instead + UserDataAsBase64: udB64, + Location: *r.allocationData.Location, +}) + +// Export host from the network public IP (not from the VM): +ctx.Export(fmt.Sprintf("%s-%s", *r.prefix, outputHost), n.PublicIP.IpAddress) +``` + +--- + +## When to Extend This API + +Open a spec under `specs/features/azure/` and update this file when: +- Making disk size configurable +- Adding data disk support +- Adding support for VM extensions (currently Windows uses custom script extension directly in some actions) +- Adding `RunCommand` / `Readiness` methods equivalent to `specs/api/aws/compute.md` diff --git a/specs/api/concepts/allocation.md b/specs/api/concepts/allocation.md new file mode 100644 index 000000000..4572e444e --- /dev/null +++ b/specs/api/concepts/allocation.md @@ -0,0 +1,70 @@ +# Concept: Allocation + +Allocation is the pre-stack step that resolves **where** a target will run and **on what hardware**, +before any Pulumi resource is created. Every provider action `Create()` calls its allocation +function first and stores the result on the action struct. + +--- + +## Provider-Agnostic Contract + +1. Accept hardware constraints (`ComputeRequestArgs`) and an optional spot preference (`SpotArgs`). +2. On the **spot path**: query cloud pricing across eligible regions/locations; select best price. +3. On the **on-demand path**: use the provider default region/location; filter to available sizes. +4. Return a result struct that downstream modules consume directly — no re-querying. + +--- + +## Cross-Provider Types + +These types are defined in the shared provider API and used by both AWS and Azure allocation. + +### `ComputeRequestArgs` +**Package:** `github.com/redhat-developer/mapt/pkg/provider/api/compute-request` + +```go +type ComputeRequestArgs struct { + CPUs int32 + GPUs int32 + GPUManufacturer string + GPUModel string + MemoryGib int32 + Arch Arch // Amd64 | Arm64 + NestedVirt bool // true when a profile requires nested virtualisation + ComputeSizes []string // skip selector — use these exact instance types/sizes +} +``` + +When `ComputeSizes` is set, the instance selector is skipped entirely. + +### `SpotArgs` +**Package:** `github.com/redhat-developer/mapt/pkg/provider/api/spot` + +```go +type SpotArgs struct { + Spot bool + Tolerance Tolerance // Lowest | Low | Medium | High | Highest + IncreaseRate int // % above current price for bid (default 30) + ExcludedHostingPlaces []string // regions/locations to skip +} +``` + +--- + +## Provider Comparison + +| | AWS (`specs/api/aws/allocation.md`) | Azure (`specs/api/azure/allocation.md`) | +|---|---|---| +| Location key | Region + AZ (two fields in result) | Location (one field in result) | +| Spot persistence | Separate `spotOption` Pulumi stack — idempotent across runs | No stack — re-evaluated each run | +| Instance selector | `aws/data.NewComputeSelector()` | `azure/data.NewComputeSelector()` | +| Extra input | `AMIName`, `AMIProductDescription` | `OSType`, `ImageRef` | +| Extra output | `AZ *string` | `ImageRef *data.ImageReference` | + +--- + +## Implementation References + +- AWS: `specs/api/aws/allocation.md` +- Azure: `specs/api/azure/allocation.md` +- Shared types: `specs/api/provider-interfaces.md` diff --git a/specs/api/concepts/compute.md b/specs/api/concepts/compute.md new file mode 100644 index 000000000..a4c1ae052 --- /dev/null +++ b/specs/api/concepts/compute.md @@ -0,0 +1,64 @@ +# Concept: Compute + +The compute module is always the **last Pulumi resource created** in a `deploy()` function. +It creates the VM or instance, wires it to the network and security group, and runs a +readiness check before the stack is considered complete. + +--- + +## Provider-Agnostic Contract + +1. Accept network outputs (subnet, public IP), credentials (keypair or password), security groups, + instance types/sizes, and userdata from the action. +2. On the **spot path**: use a spot-aware resource (AWS ASG / Azure VM priority). +3. On the **on-demand path**: use a standard instance/VM with direct IP assignment. +4. Run a **readiness check** — a remote command that blocks Pulumi until the machine is ready. +5. Export the host address as `-host`. + +--- + +## Spot Mechanism + +| | AWS (`specs/api/aws/compute.md`) | Azure (`specs/api/azure/virtual-machine.md`) | +|---|---|---| +| Spot resource | `ec2.LaunchTemplate` + `autoscaling.Group` (ASG) | Single VM with `Priority="Spot"` + `MaxPrice` | +| Load balancer | Required — ASG registers target groups | Not applicable | +| Selection source | `AllocationResult.SpotPrice != nil` → `Spot=true` | `AllocationResult.Price != nil` → non-nil `SpotPrice` | + +--- + +## Readiness Check + +| | AWS | Azure | +|---|---|---| +| Method | `Compute.Readiness()` — built into the module | Remote command run directly in the action | +| Linux command | `sudo cloud-init status --long --wait` | Same command, called differently | +| Windows command | `echo ping` | Equivalent inline | +| Timeout | 40 minutes | Varies by action | + +--- + +## Host Address + +| | AWS | Azure | +|---|---|---| +| DNS/IP source | `Compute.GetHostDnsName()` — returns LB DNS or EIP public DNS | `Network.PublicIP.IpAddress` — from the network module, not the VM | +| Export key | `-host` | `-host` | + +--- + +## Provider Comparison + +| | AWS (`specs/api/aws/compute.md`) | Azure (`specs/api/azure/virtual-machine.md`) | +|---|---|---| +| Disk size | Configurable via `DiskSize *int` | Fixed at 200 GiB | +| LB support | Yes (for spot ASG) | No | +| Airgap | Yes — bastion passed to `Readiness()` | No | +| Readiness helper | `Compute.Readiness()` + `RunCommand()` | No equivalent yet | + +--- + +## Implementation References + +- AWS: `specs/api/aws/compute.md` +- Azure: `specs/api/azure/virtual-machine.md` diff --git a/specs/api/concepts/network.md b/specs/api/concepts/network.md new file mode 100644 index 000000000..a70afaa27 --- /dev/null +++ b/specs/api/concepts/network.md @@ -0,0 +1,46 @@ +# Concept: Network + +The network module is always the **first Pulumi resource created** in a `deploy()` function. +It establishes the virtual network, subnet, and public IP that all subsequent resources depend on. + +--- + +## Provider-Agnostic Contract + +1. Accept a prefix, component ID, and location/region from `AllocationResult`. +2. Create a virtual network + subnet with fixed CIDRs (`10.0.0.0/16` / `10.0.2.0/24`). +3. Produce a public IP (or EIP) and a subnet reference consumed by the compute module. +4. Return a result struct — downstream modules must not re-query network state. + +--- + +## Creation Order in `deploy()` + +``` +network.Create() ← first +securityGroup.Create() ← depends on network (AWS only; Azure reverses this) +keypair / password +compute.NewCompute() ← last +``` + +Azure is the exception: the security group is created **before** `network.Create()` because +`NetworkArgs.SecurityGroup` is a required input. See `specs/api/concepts/security-group.md`. + +--- + +## Provider Comparison + +| | AWS (`specs/api/aws/network.md`) | Azure (`specs/api/azure/network.md`) | +|---|---|---| +| Airgap support | Yes — two-phase NAT removal, private subnet, bastion | No | +| Load balancer | Optional, created internally when spot is used | Not managed by this module | +| Security group | Created after network; passed to compute | Created before network; passed in as input | +| Public address output | EIP (`NetworkResult.Eip`) or LB DNS | `Network.PublicIP.IpAddress` | +| Bastion | Automatic when `Airgap=true` | Not available | + +--- + +## Implementation References + +- AWS: `specs/api/aws/network.md` +- Azure: `specs/api/azure/network.md` diff --git a/specs/api/concepts/security-group.md b/specs/api/concepts/security-group.md new file mode 100644 index 000000000..57076b794 --- /dev/null +++ b/specs/api/concepts/security-group.md @@ -0,0 +1,57 @@ +# Concept: Security Group + +A security group (or network security group) restricts inbound traffic to the VM/instance. +Both providers create one per target with explicit ingress rules and permissive egress (allow all). + +--- + +## Provider-Agnostic Contract + +1. Accept a list of ingress rules (port range, protocol, source CIDR). +2. Deny all inbound traffic not matched by a rule. +3. Allow all outbound traffic (permissive egress — not configurable today). +4. Return a resource reference consumed by the network or compute module. + +--- + +## Creation Order + +This is the key structural difference between providers: + +| Provider | When created | Passed to | +|---|---|---| +| AWS | After `network.Create()` | `compute.ComputeRequest.SecurityGroups` | +| Azure | Before `network.Create()` | `network.NetworkArgs.SecurityGroup` (required input) | + +The Azure network module attaches the NSG to the NIC internally, so the VM does not receive +the security group directly. + +--- + +## Pre-defined Rules + +Both providers export `SSH_TCP` and `RDP_TCP` rule constants. Usage differs: + +| | AWS | Azure | +|---|---|---| +| Type | Value type — **must copy** before setting `CidrBlocks` | Reference — safe to use directly | +| Source SG | Supported via `IngressRules.SG` | Not supported (CIDR only) | +| Protocol wildcard | `"-1"` (all traffic) | `"*"` | +| Priority | Not applicable | Auto-assigned from 1001 upward | + +--- + +## Provider Comparison + +| | AWS (`specs/api/aws/security-group.md`) | Azure (`specs/api/azure/security-group.md`) | +|---|---|---| +| Return type | `*SGResources{SG *ec2.SecurityGroup}` | `SecurityGroup` (alias for `*network.NetworkSecurityGroup`) | +| Source SG in rules | Yes | No | +| VPC/RG binding | Bound to VPC (`SGRequest.VPC`) | Bound to resource group (`SecurityGroupArgs.RG`) | + +--- + +## Implementation References + +- AWS: `specs/api/aws/security-group.md` +- Azure: `specs/api/azure/security-group.md` diff --git a/specs/api/output-contract.md b/specs/api/output-contract.md new file mode 100644 index 000000000..9c0e8d8c0 --- /dev/null +++ b/specs/api/output-contract.md @@ -0,0 +1,101 @@ +# API: Output Contract + +**Package:** `github.com/redhat-developer/mapt/pkg/provider/util/output` + +Defines the files written to `ResultsOutput` after a successful `create`. These files are +the interface between mapt and the CI systems that consume it (Tekton tasks, GitHub workflows, +shell scripts). Changing a filename is a breaking change for all consumers. + +--- + +## Function + +### `output.Write` + +```go +func Write(stackResult auto.UpResult, destinationFolder string, results map[string]string) error +``` + +- `results` maps a Pulumi stack output key → destination filename +- Writes each value as a plain text file with permissions `0600` +- Silently skips outputs that are not strings (logs a debug message) +- No-op when `destinationFolder` is empty + +--- + +## Standard Output Files + +These filenames are stable across all targets that produce them. +CI consumers depend on these exact names. + +| Filename | Content | Targets | +|---|---|---| +| `host` | Hostname or IP to SSH/RDP to | All | +| `username` | OS login username | All | +| `id_rsa` | PEM-encoded SSH private key | All Linux targets, Windows (SSH) | +| `userpassword` | Administrator password (plaintext) | Windows targets | +| `kubeconfig` | kubectl-compatible kubeconfig YAML | SNC, EKS, Kind | +| `kubeadmin-password` | OCP kubeadmin password | SNC only | +| `developer-password` | OCP developer password | SNC only | + +### Airgap Additional Files (written by `bastion.WriteOutputs`) + +| Filename | Content | +|---|---| +| `bastion_host` | Bastion public IP | +| `bastion_username` | Bastion SSH username (`ec2-user`) | +| `bastion_id_rsa` | Bastion SSH private key | + +--- + +## Pulumi Stack Export Keys + +Stack output keys follow the pattern `-`. The `prefix` defaults to `"main"` +when not explicitly set by the caller. + +| Stack output key | → | Filename | +|---|---|---| +| `-host` | | `host` | +| `-username` | | `username` | +| `-id_rsa` | | `id_rsa` | +| `-userpassword` | | `userpassword` | +| `-kubeconfig` | | `kubeconfig` | +| `-kubeadmin-password` | | `kubeadmin-password` | +| `-developer-password` | | `developer-password` | +| `-bastion_id_rsa` | | `bastion_id_rsa` | +| `-bastion_username` | | `bastion_username` | +| `-bastion_host` | | `bastion_host` | + +--- + +## Usage Pattern in `manageResults()` + +```go +func manageResults(mCtx *mc.Context, stackResult auto.UpResult, prefix *string, airgap *bool) error { + results := map[string]string{ + fmt.Sprintf("%s-%s", *prefix, outputUsername): "username", + fmt.Sprintf("%s-%s", *prefix, outputUserPrivateKey): "id_rsa", + fmt.Sprintf("%s-%s", *prefix, outputHost): "host", + } + if *airgap { + if err := bastion.WriteOutputs(stackResult, *prefix, mCtx.GetResultsOutputPath()); err != nil { + return err + } + } + return output.Write(stackResult, mCtx.GetResultsOutputPath(), results) +} +``` + +Output key constants (`outputHost`, `outputUsername`, etc.) are defined in the action's +`constants.go` and must match the `ctx.Export(...)` calls in `deploy()`. + +--- + +## When to Change This Contract + +Any change to filenames is **breaking** — update this spec and notify consumers: +- Tekton task definitions that read the files (`tkn/template/`) +- GitHub workflow files that reference the output directory +- Any external documentation or user guides + +New output files can be added without breaking existing consumers. diff --git a/specs/api/provider-interfaces.md b/specs/api/provider-interfaces.md new file mode 100644 index 000000000..9c1adf5f5 --- /dev/null +++ b/specs/api/provider-interfaces.md @@ -0,0 +1,200 @@ +# API: Provider Interfaces (Cross-Cloud) + +**Package:** `github.com/redhat-developer/mapt/pkg/provider/api` + +Defines the hardware-constraint and spot-selection types that are **shared across all cloud +providers**. Both AWS and Azure allocations are driven by the same input structs; each provider +supplies its own implementation of the selector interfaces. + +This layer sits *below* `specs/api/aws/allocation.md` and `specs/api/azure/allocation.md` — +those allocation modules call these selectors internally. Action code interacts with these +types directly (passing `ComputeRequestArgs` and `SpotArgs` into `AllocationArgs`), but +never calls the selector interfaces itself. + +--- + +## Package: `compute-request` + +**Full path:** `github.com/redhat-developer/mapt/pkg/provider/api/compute-request` + +### Types + +```go +type Arch int + +const ( + Amd64 Arch = iota + 1 + Arm64 + MaxResults = 20 // max VM types returned per selector call +) + +type ComputeRequestArgs struct { + CPUs int32 + GPUs int32 + GPUManufacturer string + GPUModel string + MemoryGib int32 + Arch Arch + NestedVirt bool + // Override: skip selector entirely, use these sizes directly + ComputeSizes []string +} + +type ComputeSelector interface { + Select(args *ComputeRequestArgs) ([]string, error) +} +``` + +`ComputeRequestArgs` is embedded in every `AllocationArgs` on both clouds. +If `ComputeSizes` is pre-populated, the selector is skipped — useful when +a specific VM type is required rather than capacity-matched selection. + +### Functions + +```go +func Validate(cpus, memory int32, arch Arch) error +func (a Arch) String() string // "x64" | "Arm64" +``` + +### Provider Implementations + +| Provider | Type | Package | +|---|---|---| +| AWS | `data.ComputeSelector` | `pkg/provider/aws/data` | +| Azure | `data.ComputeSelector` | `pkg/provider/azure/data` | + +**AWS** uses the `amazon-ec2-instance-selector` library to filter by vCPUs, memory, and arch +across all available instance types. + +**Azure** queries the ARM Resource SKUs API, then filters by vCPUs, memory, arch, HyperV Gen2 +support, nested virt eligibility, PremiumIO, and `AcceleratedNetworkingEnabled`. Results are +sorted by vCPU count ascending. Azure also exposes `FilterComputeSizesByLocation()` as a +standalone helper used by the on-demand allocation path. + +--- + +## Package: `spot` + +**Full path:** `github.com/redhat-developer/mapt/pkg/provider/api/spot` + +### Types + +```go +type Tolerance int + +const ( + Lowest Tolerance = iota // eviction rate 0–5% (AWS: placement score ≥ 7) + Low // eviction rate 5–10% + Medium // eviction rate 10–15% + High // eviction rate 15–20% + Highest // eviction rate 20%+ (AWS: placement score ≥ 1) +) + +var DefaultTolerance = Lowest + +type SpotArgs struct { + Spot bool + Tolerance Tolerance + IncreaseRate int // bid price = base × (1 + IncreaseRate/100); default 30% + ExcludedHostingPlaces []string // regions/locations to skip +} + +type SpotRequestArgs struct { + ComputeRequest *cr.ComputeRequestArgs + OS *string // "linux", "windows", "RHEL", "fedora" — affects product filter + ImageName *string // AWS: scopes region search to AMI availability + SpotParams *SpotArgs +} + +type SpotResults struct { + ComputeType []string // AWS: multiple types for ASG; Azure: single type + Price float64 // bid price (already inflated by SafePrice) + HostingPlace string // AWS: region; Azure: location + AvailabilityZone string // AWS only; empty on Azure + ChanceLevel int // not yet populated (TODO in source) +} + +type SpotSelector interface { + Select(mCtx *mc.Context, args *SpotRequestArgs) (*SpotResults, error) +} +``` + +### Functions + +```go +func ParseTolerance(str string) (Tolerance, bool) +// "lowest"|"low"|"medium"|"high"|"highest" → Tolerance + +func SafePrice(basePrice float64, spotPriceIncreaseRate *int) float64 +// Returns basePrice × (1 + rate/100). Default rate = 30%. +// Called by both provider SpotInfo() implementations before returning results. +``` + +### Provider Implementations + +| Provider | Type | Selection strategy | +|---|---|---| +| AWS | `data.SpotSelector` | Placement scores × spot price history across all regions | +| Azure | `data.SpotSelector` | Eviction rates × spot price (via Azure Resource Graph) | + +**AWS**: Queries placement scores (API requires an opt-in region as API endpoint) and +spot price history in parallel across all regions. Filters regions where the AMI is +available. Returns up to 8 instance types for the winning AZ (used by the ASG mixed-instances +policy). + +**Azure**: Queries eviction rates and spot prices via Azure Resource Graph KQL. Crosses eviction +rate buckets against allowed tolerance, then picks the lowest-price / lowest-eviction-rate +location. Falls back to price-only ranking if eviction-rate data is unavailable. Returns a +single compute size. + +--- + +## Package: `config/userdata` + +**Full path:** `github.com/redhat-developer/mapt/pkg/provider/api/config/userdata` + +```go +type CloudConfig interface { + CloudConfig() (*string, error) +} +``` + +Implemented by cloud-init / cloud-config builder packages used to generate the +`UserData` / `UserDataAsBase64` field on compute resources. Every target that +injects software at boot implements this interface. + +--- + +## Architecture Summary + +``` +pkg/provider/api/ ← provider-agnostic types & interfaces + compute-request/ + ComputeRequestArgs used in AllocationArgs (both clouds) + ComputeSelector interface + spot/ + SpotArgs, SpotResults used in AllocationArgs (both clouds) + SpotSelector interface + SafePrice() shared bid-price calculation + config/userdata/ + CloudConfig interface for cloud-init builders + +pkg/provider/aws/data/ ← AWS implementations + ComputeSelector ec2-instance-selector + SpotSelector placement scores + price history + +pkg/provider/azure/data/ ← Azure implementations + ComputeSelector ARM Resource SKUs API + SpotSelector Azure Resource Graph (eviction + price) +``` + +--- + +## When to Extend This API + +Open a spec under `specs/features/aws/` or `specs/features/azure/` and update this file when: +- Adding a third cloud provider (implement both interfaces in the new `data` package) +- Adding GPU-based compute selection (currently fields exist but filtering is partial) +- Making `CPUsRange` / `MemoryRange` filters active (currently commented out) +- Populating `SpotResults.ChanceLevel` (currently a TODO in both implementations) +- Adding `ExcludedRegions` to AWS spot path (field exists in `SpotInfoArgs` but not wired into `SpotRequestArgs`) diff --git a/specs/cmd/azure-params.md b/specs/cmd/azure-params.md new file mode 100644 index 000000000..505079c2c --- /dev/null +++ b/specs/cmd/azure-params.md @@ -0,0 +1,42 @@ +# CLI Params: Azure Shared + +**Package:** `github.com/redhat-developer/mapt/cmd/mapt/cmd/azure/params` +**File:** `cmd/mapt/cmd/azure/params/params.go` + +Azure-provider-specific shared params, used alongside the cross-provider params in +`specs/cmd/params.md`. Every Azure `create` command that accepts a location registers +this flag. + +--- + +## Location + +```go +const ( + Location = "location" + LocationDesc = "If spot is passed location will be calculated based on spot results. Otherwise location will be used to create resources." + LocationDefault = "westeurope" +) +``` + +| Flag | Type | Default | Description | +|---|---|---|---| +| `--location` | string | `westeurope` | Azure region; ignored when `--spot` is set (spot selects the location) | + +No `Add*Flags` helper — each cmd registers it directly: + +```go +flagSet.StringP(azureParams.Location, "", azureParams.LocationDefault, azureParams.LocationDesc) +``` + +Mapped to `AllocationArgs.Location` inside the action. When spot is active the allocation +module ignores this value and picks the best-priced region automatically. + +See `specs/api/azure/allocation.md`. + +--- + +## When to Extend + +Update this file when adding new Azure-wide shared params (e.g. resource group prefix, +subscription override). diff --git a/specs/cmd/params.md b/specs/cmd/params.md new file mode 100644 index 000000000..b8c259327 --- /dev/null +++ b/specs/cmd/params.md @@ -0,0 +1,273 @@ +# CLI Params Layer + +**Package:** `github.com/redhat-developer/mapt/cmd/mapt/cmd/params` +**File:** `cmd/mapt/cmd/params/params.go` + +Central registry for all reusable CLI flags. Every flag that appears on more than one +`create` command is defined here, not in the individual cmd files. Individual cmd files +only define flags that are unique to that target. + +--- + +## The Three-Part Pattern + +Every flag group follows the same structure: + +### 1. Constants + +```go +// Exported: used by cmd files to read values via viper +const FlagName string = "flag-name" +const FlagNameDesc string = "human readable description" +const FlagNameDefault string = "default-value" // optional + +// Unexported: only used within params.go +const internalFlag string = "internal-flag-name" +``` + +Use **exported** constants when the cmd file needs to call `viper.GetX(params.FlagName)` +directly. Use **unexported** when the value is only read inside a `*Args()` helper in +this package. + +### 2. `Add*Flags(fs *pflag.FlagSet)` + +Registers flags on the flagset passed in. Called once per `create` command that needs +this group: + +```go +func AddSpotFlags(fs *pflag.FlagSet) { + fs.Bool(spot, false, spotDesc) + fs.StringP(spotTolerance, "", spotToleranceDefault, spotToleranceDesc) + fs.StringSliceP(spotExcludedHostedZones, "", []string{}, spotExcludedHostedZonesDesc) +} +``` + +### 3. `*Args() *SomeType` + +Reads values from viper and returns a populated struct (or `nil` if the feature is not +enabled). Called inside the cmd's `RunE` when building the action args: + +```go +func SpotArgs() *spotTypes.SpotArgs { + if viper.IsSet(spot) { + return &spotTypes.SpotArgs{ ... } + } + return nil // nil = feature not requested +} +``` + +Returning `nil` is the canonical "not configured" signal — action code checks for nil +before using the result. + +--- + +## How Viper Binding Works + +Each `create` command binds its flagset to viper at the start of `RunE`: + +```go +RunE: func(cmd *cobra.Command, args []string) error { + if err := viper.BindPFlags(cmd.Flags()); err != nil { + return err + } + // now viper.GetX(flagName) works for all registered flags + ... +} +``` + +After binding, all flag values are accessible via `viper.GetString`, `viper.GetBool`, +`viper.GetInt32`, `viper.GetStringSlice`, `viper.IsSet`, etc. + +--- + +## Existing Flag Groups + +### Common (every command) + +```go +func AddCommonFlags(fs *pflag.FlagSet) +``` + +| Flag | Type | Description | +|---|---|---| +| `project-name` | string | Pulumi project name | +| `backed-url` | string | State backend URL (`file://`, `s3://`, `azblob://`) | + +Added to the parent command's `PersistentFlags` so it applies to all subcommands. + +--- + +### Debug + +```go +func AddDebugFlags(fs *pflag.FlagSet) +``` + +| Flag | Type | Default | Description | +|---|---|---|---| +| `debug` | bool | false | Enable debug traces | +| `debug-level` | uint | 3 | Verbosity 1–9 | + +--- + +### Compute Request + +```go +func AddComputeRequestFlags(fs *pflag.FlagSet) +func ComputeRequestArgs() *cr.ComputeRequestArgs +``` + +| Flag | Type | Default | Description | +|---|---|---|---| +| `cpus` | int32 | 8 | vCPU count | +| `memory` | int32 | 64 | RAM in GiB | +| `gpus` | int32 | 0 | GPU count | +| `gpu-manufacturer` | string | — | e.g. `NVIDIA` | +| `nested-virt` | bool | false | Require nested virtualisation support | +| `compute-sizes` | []string | — | Override selector; comma-separated instance types | +| `arch` | string | `x86_64` | `x86_64` or `arm64` | + +`ComputeRequestArgs()` maps `arch` to `cr.Amd64` / `cr.Arm64`. When `--snc` is set, +`NestedVirt` is forced true regardless of `--nested-virt`. + +See `specs/api/provider-interfaces.md` for `ComputeRequestArgs` type. + +--- + +### Spot + +```go +func AddSpotFlags(fs *pflag.FlagSet) +func SpotArgs() *spotTypes.SpotArgs // returns nil when --spot not set +``` + +| Flag | Type | Default | Description | +|---|---|---|---| +| `spot` | bool | false | Enable spot selection | +| `spot-eviction-tolerance` | string | `lowest` | `lowest`/`low`/`medium`/`high`/`highest` | +| `spot-increase-rate` | int | 30 | Bid price % above current price | +| `spot-excluded-regions` | []string | — | Regions to skip | + +Returns `nil` when `--spot` is not set — this signals on-demand to allocation. + +See `specs/api/provider-interfaces.md` for `SpotArgs` type. + +--- + +### Network (to be added — `specs/features/aws/vpc-endpoints.md`) + +```go +func AddNetworkFlags(fs *pflag.FlagSet) +func NetworkEndpoints() []string +``` + +| Flag | Type | Default | Description | +|---|---|---|---| +| `endpoints` | []string | — | VPC endpoints to create: `s3`, `ecr`, `ssm` | + +--- + +### GitHub Actions Runner + +```go +func AddGHActionsFlags(fs *pflag.FlagSet) +func GithubRunnerArgs() *github.GithubRunnerArgs // returns nil when token not set +``` + +| Flag | Type | Description | +|---|---|---| +| `ghactions-runner-token` | string | Registration token | +| `ghactions-runner-repo` | string | Repository or org URL | +| `ghactions-runner-labels` | []string | Runner labels | + +Returns `nil` when `--ghactions-runner-token` is not set. +Platform and arch are derived from `--arch`; not user-configurable at CLI level. + +--- + +### Cirrus CI Persistent Worker + +```go +func AddCirrusFlags(fs *pflag.FlagSet) +func CirrusPersistentWorkerArgs() *cirrus.PersistentWorkerArgs // returns nil when token not set +``` + +| Flag | Type | Description | +|---|---|---| +| `it-cirrus-pw-token` | string | Cirrus registration token | +| `it-cirrus-pw-labels` | map[string]string | Labels as `key=value` pairs | + +Returns `nil` when `--it-cirrus-pw-token` is not set. + +--- + +### GitLab Runner + +```go +func AddGitLabRunnerFlags(fs *pflag.FlagSet) +func GitLabRunnerArgs() *gitlab.GitLabRunnerArgs // returns nil when token not set +``` + +| Flag | Type | Default | Description | +|---|---|---|---| +| `glrunner-token` | string | — | GitLab Personal Access Token | +| `glrunner-project-id` | string | — | Project ID (mutually exclusive with group ID) | +| `glrunner-group-id` | string | — | Group ID (mutually exclusive with project ID) | +| `glrunner-url` | string | `https://gitlab.com` | GitLab instance URL | +| `glrunner-tags` | []string | — | Runner tags | + +Returns `nil` when `--glrunner-token` is not set. + +--- + +### Serverless / Destroy + +```go +// No Add* helper — these are registered directly in each destroy command +``` + +| Flag | Type | Description | Command | +|---|---|---|---| +| `timeout` | string | Go duration string — schedules self-destruct | create | +| `serverless` | bool | Use role-based credentials (ECS context) | destroy | +| `force-destroy` | bool | Destroy even if locked | destroy | +| `keep-state` | bool | Keep Pulumi state in S3 after destroy | destroy | + +--- + +## Arch Conversion Helpers + +Each integration has its own `Platform`/`Arch` type. Params provides private converters: + +```go +func linuxArchAsGithubActionsArch(arch string) *github.Arch // "x86_64" → &Amd64 +func linuxArchAsCirrusArch(arch string) *cirrus.Arch +func linuxArchAsGitLabArch(arch string) *gitlab.Arch + +// Exported variants for MAC commands (different arch string convention): +func MACArchAsCirrusArch(arch string) *cirrus.Arch // "x86" → &Amd64 +func MACArchAsGitLabArch(arch string) *gitlab.Arch +``` + +--- + +## How to Add a New Flag Group + +1. **Add constants** in the `const` block — unexported flag name, exported description +2. **Add `Add*Flags(fs *pflag.FlagSet)`** — register each flag with the appropriate type + (`Bool`, `StringP`, `StringSliceP`, `Int32P`, `StringToStringP`) +3. **Add `*Args() *SomeType`** — read from viper and return a populated struct or `nil` +4. **Call `Add*Flags`** in each cmd create function that needs the group +5. **Call `*Args()`** in the `RunE` body when building the action args struct + +For a single-target flag (not shared), define it with a local constant in the target's +cmd file instead, and read it directly with `viper.GetX(localConst)`. + +--- + +## When to Extend This File + +Update this spec when: +- Adding a new shared flag group (e.g. `AddNetworkFlags` for VPC endpoints) +- Adding flags to an existing group +- Adding a new arch conversion helper for a new integration diff --git a/specs/features/000-template.md b/specs/features/000-template.md new file mode 100644 index 000000000..231c281ca --- /dev/null +++ b/specs/features/000-template.md @@ -0,0 +1,58 @@ +# Spec: [Title] + +## Context +Brief background. What area of the codebase this touches. Links to related existing files. + +## Problem +What is missing, broken, or needs improvement. + +## Requirements +- [ ] Concrete, testable requirement +- [ ] Another requirement + +## Out of Scope +Explicit list of what this spec does NOT cover. + +## Must Reuse +Existing modules and functions that MUST be called. Do not reimplement this logic. +Reference the API spec for each module's full type signatures. + + + + +## Must Create +New files to write. Everything not listed under Must Reuse. + +- `pkg/provider//action//.go` +- `pkg/provider//action//constants.go` +- `pkg/target/host//` or `pkg/target/service//` +- `cmd/mapt/cmd//hosts/.go` +- `tkn/template/infra--.yaml` + +## API Changes +List any `specs/api/` files that need updating alongside this feature. + +- none + +## Acceptance Criteria +- Specific observable outcome (command runs, test passes, output file exists, etc.) diff --git a/specs/features/aws/airgap-network.md b/specs/features/aws/airgap-network.md new file mode 100644 index 000000000..4f26fe668 --- /dev/null +++ b/specs/features/aws/airgap-network.md @@ -0,0 +1,61 @@ +# Spec: Airgap Network Topology + +## Context +An optional network topology that isolates the target instance from the public internet while +still allowing SSH access via a bastion host. Implemented as a two-phase Pulumi stack update. + +Key files: +- `pkg/provider/aws/modules/network/airgap/airgap.go` — VPC/subnet creation +- `pkg/provider/aws/modules/network/network.go` — dispatcher (standard vs airgap) +- `pkg/provider/aws/modules/bastion/bastion.go` — bastion host resource + +The same Pulumi stack is applied twice: +1. Phase 1 (`connectivity = ON`): NAT gateway present → machine can reach internet for bootstrapping +2. Phase 2 (`connectivity = OFF`): NAT gateway removed → machine loses egress, bastion still accessible + +## Problem +This feature is implemented for AWS RHEL and Windows. This spec documents the design and gaps. + +## Requirements +- [ ] Create a VPC with a public subnet (has internet gateway + NAT gateway in phase 1) and a private subnet (target) +- [ ] Phase 1: private subnet has route to NAT gateway; cloud-init runs and machine is bootstrapped +- [ ] Phase 2: NAT gateway is removed; private subnet loses egress; machine is isolated +- [ ] Bastion host in the public subnet provides SSH proxy access throughout both phases +- [ ] Write bastion output files alongside target files (`bastion-host`, `bastion-username`, `bastion-id_rsa`) +- [ ] Targets using airgap: RHEL, Windows Server (AWS); extensible to other targets + +## Out of Scope +- Azure airgap (not currently implemented) +- Egress filtering via security groups or NACLs (only NAT removal is used) + +## Affected Areas +- `pkg/provider/aws/modules/network/` — standard and airgap network implementations +- `pkg/provider/aws/modules/bastion/` — bastion host and output writing +- `pkg/provider/aws/action/rhel/rhel.go` — `createAirgapMachine()` orchestration +- `pkg/provider/aws/action/windows/windows.go` — same + +## Known Gaps / Improvement Ideas +- The error from phase 1 of `createAirgapMachine()` is swallowed in both rhel and windows actions + (`return nil` instead of `return err`) — this is a bug; phase 2 should not run if phase 1 fails +- No validation that `Airgap=true` requires a remote BackedURL (unlike serverless timeout which does validate) + +## Acceptance Criteria +- `mapt aws rhel create --airgap ...` provisions an instance accessible only through the bastion +- Direct SSH to the target host's public IP fails; SSH via bastion succeeds +- Phase 2 is confirmed complete by checking the target cannot reach an external host + +--- + +## Command + +This is a cross-cutting feature, not a standalone command. It is activated via the +`--airgap` flag on individual target create commands: + +``` +mapt aws rhel create --airgap ... +mapt aws windows create --airgap ... +``` + +The `--airgap` flag is defined locally in each host cmd file (not in shared params). +No additional flags are specific to the airgap feature itself — the two-phase +connectivity behaviour is controlled internally by the action. diff --git a/specs/features/aws/eks.md b/specs/features/aws/eks.md new file mode 100644 index 000000000..c49971b77 --- /dev/null +++ b/specs/features/aws/eks.md @@ -0,0 +1,75 @@ +# Spec: AWS EKS (Elastic Kubernetes Service) + +## Context +Provisions a managed EKS cluster on AWS. Entry point: `pkg/provider/aws/action/eks/`. +CLI: `cmd/mapt/cmd/aws/services/eks.go`. + +Unlike the SNC target, EKS uses the AWS-managed control plane and worker node groups +rather than a self-managed cluster on a single EC2 instance. + +## Problem +This feature is implemented. This spec documents the current behaviour. + +## Requirements +- [ ] Provision an EKS cluster with a managed node group +- [ ] Support configurable Kubernetes version +- [ ] Support spot instances for worker nodes +- [ ] Write kubeconfig output file +- [ ] `destroy` cleans up all cluster resources and S3 state + +## Out of Scope +- OpenShift SNC (see `005-aws-openshift-snc.md`) +- Azure AKS (see `012-azure-aks.md`) +- AWS Kind (see `007-aws-kind.md`) + +## Affected Areas +- `pkg/provider/aws/action/eks/` — orchestration +- `cmd/mapt/cmd/aws/services/eks.go` +- `tkn/template/infra-aws-kind.yaml` (verify — may share template) + +## Acceptance Criteria +- `mapt aws eks create ...` provisions a functioning EKS cluster +- Exported kubeconfig allows `kubectl get nodes` to return Ready nodes +- `mapt aws eks destroy ...` removes all resources + +--- + +## Command + +``` +mapt aws eks create [flags] +mapt aws eks destroy [flags] +``` + +### Shared flag groups (`specs/cmd/params.md`) + +| Group | Flags added | +|---|---| +| Common | `--project-name`, `--backed-url` | +| Compute Request | `--cpus`, `--memory`, `--arch`, `--nested-virt`, `--compute-sizes` | +| Spot | `--spot`, `--spot-eviction-tolerance`, `--spot-increase-rate`, `--spot-excluded-regions` | + +Note: no integration flags, no timeout (EKS cluster lifecycle is not self-destructed). + +### Target-specific flags (create only) + +| Flag | Type | Default | Description | +|---|---|---|---| +| `--version` | string | `1.31` | Kubernetes version | +| `--workers-desired` | int | `1` | Worker node group desired size | +| `--workers-max` | int | `3` | Worker node group maximum size | +| `--workers-min` | int | `1` | Worker node group minimum size | +| `--addons` | []string | — | EKS managed addons to install (comma-separated) | +| `--load-balancer-controller` | bool | false | Install AWS Load Balancer Controller | +| `--excluded-zone-ids` | []string | — | AZ IDs to exclude from node placement | +| `--arch` | string | `x86_64` | Worker node architecture | +| `--conn-details-output` | string | — | Path to write kubeconfig | +| `--tags` | map | — | Resource tags | + +### Destroy flags + +`--force-destroy`, `--keep-state` (no `--serverless`) + +### Action args struct populated + +`eks.EKSArgs` → `pkg/provider/aws/action/eks/eks.go` diff --git a/specs/features/aws/fedora-host.md b/specs/features/aws/fedora-host.md new file mode 100644 index 000000000..5bf53815b --- /dev/null +++ b/specs/features/aws/fedora-host.md @@ -0,0 +1,70 @@ +# Spec: AWS Fedora Host + +## Context +Provisions a Fedora EC2 instance on AWS. Entry point: `pkg/provider/aws/action/fedora/`. +Cloud-config: `pkg/target/host/fedora/`. CLI: `cmd/mapt/cmd/aws/hosts/fedora.go`. + +Fedora on AWS is used for Fedora-specific testing. The instance uses a cloud-init config +with the Fedora cloud image. + +## Problem +This feature is implemented. This spec documents the current behaviour. + +## Requirements +- [ ] Provision a Fedora EC2 instance (latest or specified version) +- [ ] Support spot instance allocation +- [ ] Support optional CI integrations (GitHub runner, Cirrus worker, GitLab runner) +- [ ] Write output files: `host`, `username`, `id_rsa` +- [ ] `destroy` cleans up stack, spot stack, S3 state + +## Out of Scope +- Azure Fedora (see docs/azure/fedora.md — currently Azure Linux target) +- RHEL (subscription-managed — see `001-aws-rhel-host.md`) + +## Affected Areas +- `pkg/provider/aws/action/fedora/` +- `pkg/target/host/fedora/` — cloud-config +- `cmd/mapt/cmd/aws/hosts/fedora.go` +- `tkn/template/infra-aws-fedora.yaml` + +## Acceptance Criteria +- `mapt aws fedora create ...` provisions an accessible Fedora instance +- SSH access works with the output key +- `mapt aws fedora destroy ...` removes all resources + +--- + +## Command + +``` +mapt aws fedora create [flags] +mapt aws fedora destroy [flags] +``` + +### Shared flag groups (`specs/cmd/params.md`) + +| Group | Flags added | +|---|---| +| Common | `--project-name`, `--backed-url` | +| Compute Request | `--cpus`, `--memory`, `--arch`, `--nested-virt`, `--compute-sizes` | +| Spot | `--spot`, `--spot-eviction-tolerance`, `--spot-increase-rate`, `--spot-excluded-regions` | +| Integrations | `--ghactions-runner-*`, `--it-cirrus-pw-*`, `--glrunner-*` | + +### Target-specific flags (create only) + +| Flag | Type | Default | Description | +|---|---|---|---| +| `--version` | string | `41` | Fedora Cloud major version | +| `--arch` | string | `x86_64` | `x86_64` or `arm64` | +| `--airgap` | bool | false | Provision as airgap machine | +| `--timeout` | string | — | Self-destruct duration | +| `--conn-details-output` | string | — | Path to write connection files | +| `--tags` | map | — | Resource tags | + +### Destroy flags + +`--serverless`, `--force-destroy`, `--keep-state` + +### Action args struct populated + +`fedora.FedoraArgs` → `pkg/provider/aws/action/fedora/fedora.go` diff --git a/specs/features/aws/kind.md b/specs/features/aws/kind.md new file mode 100644 index 000000000..8171897d4 --- /dev/null +++ b/specs/features/aws/kind.md @@ -0,0 +1,73 @@ +# Spec: AWS Kind Cluster + +## Context +Provisions a Kind (Kubernetes-in-Docker) cluster on an EC2 instance. +Entry point: `pkg/provider/aws/action/kind/`. Cloud-config: `pkg/target/service/kind/`. +CLI: `cmd/mapt/cmd/aws/services/kind.go`. + +Kind is a lighter-weight alternative to EKS/SNC for CI pipelines that need a disposable +Kubernetes cluster without managed-service overhead. + +## Problem +This feature is implemented. This spec documents the current behaviour. + +## Requirements +- [ ] Provision an EC2 instance and install Kind + Docker via cloud-init +- [ ] Create a Kind cluster during cloud-init; export kubeconfig +- [ ] Support configurable Kubernetes version (via Kind node image) +- [ ] Support spot instance allocation +- [ ] Write output files: `host`, `username`, `id_rsa`, `kubeconfig` +- [ ] `destroy` cleans up stack and S3 state + +## Out of Scope +- Azure Kind (see `014-azure-kind.md`) +- EKS managed clusters (see `006-aws-eks.md`) + +## Affected Areas +- `pkg/provider/aws/action/kind/` +- `pkg/target/service/kind/` — cloud-config generation and test +- `cmd/mapt/cmd/aws/services/kind.go` +- `tkn/template/infra-aws-kind.yaml` + +## Acceptance Criteria +- `mapt aws kind create ...` produces a working kubeconfig +- `kubectl get nodes` returns a Ready node +- `mapt aws kind destroy ...` removes all resources + +--- + +## Command + +``` +mapt aws kind create [flags] +mapt aws kind destroy [flags] +``` + +### Shared flag groups (`specs/cmd/params.md`) + +| Group | Flags added | +|---|---| +| Common | `--project-name`, `--backed-url` | +| Compute Request | `--cpus`, `--memory`, `--arch`, `--nested-virt`, `--compute-sizes` | +| Spot | `--spot`, `--spot-eviction-tolerance`, `--spot-increase-rate`, `--spot-excluded-regions` | + +Note: no integration flags. + +### Target-specific flags (create only) + +| Flag | Type | Default | Description | +|---|---|---|---| +| `--version` | string | `v1.34` | Kubernetes version for Kind | +| `--arch` | string | `x86_64` | `x86_64` or `arm64` | +| `--extra-port-mappings` | string | — | JSON array of `{containerPort, hostPort, protocol}` objects | +| `--timeout` | string | — | Self-destruct duration | +| `--conn-details-output` | string | — | Path to write kubeconfig | +| `--tags` | map | — | Resource tags | + +### Destroy flags + +`--serverless`, `--force-destroy`, `--keep-state` + +### Action args struct populated + +`kind.KindArgs` → `pkg/provider/aws/action/kind/kind.go` diff --git a/specs/features/aws/mac-host.md b/specs/features/aws/mac-host.md new file mode 100644 index 000000000..f6258707d --- /dev/null +++ b/specs/features/aws/mac-host.md @@ -0,0 +1,82 @@ +# Spec: AWS Mac Host (Single) + +## Context +Provisions a single macOS instance on an AWS Dedicated Host. Entry point: +`pkg/provider/aws/action/mac/`. Modules: `pkg/provider/aws/modules/mac/`. +CLI: `cmd/mapt/cmd/aws/hosts/mac.go`. + +AWS Dedicated Hosts for Mac have a hard constraint: minimum 24-hour tenancy before release. +The mac module handles host allocation, machine setup (via root-volume replacement), and +graceful release respecting the 24h window. + +## Problem +This feature is implemented. This spec documents behaviour and the 24h constraint implications. + +## Requirements +- [ ] Allocate an AWS Dedicated Host for macOS (x86_64 or arm64/Apple Silicon) +- [ ] Deploy a macOS machine via root-volume replacement (not standard AMI boot) +- [ ] Support optional CI integration: GitHub Actions runner, Cirrus persistent worker, GitLab runner +- [ ] Optionally fix the dedicated host to a specific region/AZ (`FixedLocation`) +- [ ] Enforce the 24-hour minimum tenancy: do not attempt to release a host allocated < 24h ago +- [ ] Write output files: `host`, `username`, `id_rsa` +- [ ] `destroy` handles the 24h wait or errors clearly if host is not yet releasable + +## Out of Scope +- Mac Pool service (managed pool of mac hosts — see `004-aws-mac-pool-service.md`) +- Windows or Linux hosts + +## Affected Areas +- `pkg/provider/aws/action/mac/` — orchestration +- `pkg/provider/aws/modules/mac/host/` — dedicated host allocation +- `pkg/provider/aws/modules/mac/machine/` — machine setup via volume replacement +- `cmd/mapt/cmd/aws/hosts/mac.go` +- `tkn/template/infra-aws-mac.yaml` + +## Acceptance Criteria +- `mapt aws mac create ...` exits 0 and writes `host`, `username`, `id_rsa` +- SSH access to the macOS host works +- `mapt aws mac destroy ...` either releases the host (if >= 24h old) or fails with a clear error + +--- + +## Command + +``` +mapt aws mac create [flags] +mapt aws mac destroy [flags] +mapt aws mac request [flags] # borrow a machine from the pool +mapt aws mac release [flags] # return a machine to the pool +``` + +Note: `request` and `release` operate on the mac-pool (see `specs/features/aws/mac-pool-service.md`). +A standalone `create` provisions a dedicated host directly without a pool. + +### Shared flag groups (`specs/cmd/params.md`) + +| Group | Flags added | +|---|---| +| Common | `--project-name`, `--backed-url` | + +Note: no compute-request, no spot, no timeout, no integration flags. +Mac hardware is allocated as a dedicated host — instance type is fixed by arch+version. + +### Target-specific flags (create) + +| Flag | Type | Default | Description | +|---|---|---|---| +| `--arch` | string | `m1` | MAC architecture: `x86`, `m1`, `m2` | +| `--version` | string | *(per arch)* | macOS version: 11/12 on x86; 13/14/15 on all | +| `--fixed-location` | bool | false | Force creation in `AWS_DEFAULT_REGION` only | +| `--airgap` | bool | false | Provision as airgap machine | +| `--conn-details-output` | string | — | Path to write connection files | +| `--tags` | map | — | Resource tags | + +### Destroy / request / release flags + +`--dedicated-host-id` — required for `request`, `release`, and `destroy` to identify the host + +`--force-destroy`, `--keep-state` on destroy. + +### Action args struct populated + +`mac.MacArgs` → `pkg/provider/aws/action/mac/mac.go` diff --git a/specs/features/aws/mac-pool-service.md b/specs/features/aws/mac-pool-service.md new file mode 100644 index 000000000..53d54fd03 --- /dev/null +++ b/specs/features/aws/mac-pool-service.md @@ -0,0 +1,90 @@ +# Spec: AWS Mac Pool Service + +## Context +A managed pool of macOS dedicated hosts providing request/release semantics to CI pipelines. +Entry point: `pkg/provider/aws/action/mac-pool/mac-pool.go`. +CLI: `cmd/mapt/cmd/aws/services/mac-pool.go`. + +The pool runs a serverless HouseKeeper on a recurring schedule (ECS Fargate) that maintains +the desired offered capacity by adding or removing machines while respecting AWS's 24h minimum +host tenancy. State is stored per-machine in separate Pulumi stacks under a shared S3 prefix. + +## Problem +This feature is implemented. This spec documents the architecture and known gaps. + +## Requirements +- [ ] `create`: provision N machines (OfferedCapacity) up to MaxSize; start the HouseKeeper scheduler +- [ ] `create`: generate a least-privilege IAM user/key pair for request/release operations (`requestReleaserAccount`) +- [ ] `housekeeper`: add machines if current offered capacity < desired and pool size < max +- [ ] `housekeeper`: remove machines if current offered capacity > desired and machines are > 24h old (destroyable) +- [ ] `request`: lock the next available (non-locked) machine and write its connection details +- [ ] `release`: unlock a machine identified by host ID, resetting it for the next user +- [ ] Reject local `file://` BackedURL — pool requires remote S3 state +- [ ] `destroy`: remove IAM resources, serverless scheduler, and S3 state + +## Out of Scope +- Single mac host (see `003-aws-mac-host.md`) +- Integration-mode selection on `request` (currently hardcoded; TODO in code) + +## Affected Areas +- `pkg/provider/aws/action/mac-pool/` — orchestration +- `pkg/provider/aws/modules/mac/` — host, machine, util sub-packages +- `pkg/provider/aws/modules/serverless/` — HouseKeeper recurring task +- `pkg/provider/aws/modules/iam/` — request/releaser IAM account +- `cmd/mapt/cmd/aws/services/mac-pool.go` + +## Known Gaps / Improvement Ideas +- `Request` integration-mode is hardcoded (TODO comment at `mac-pool.go:138`) +- `destroyCapacity` has a TODO about allocation time ordering +- `getNextMachineForRequest` picks the newest machine; could be optimized (e.g. LRU) +- No explicit handling when all machines in the pool are locked and none available + +## Acceptance Criteria +- Pool creates N dedicated hosts and writes IAM credentials +- `housekeeper` invocation adds a machine when pool is below capacity +- `request` writes `host`, `username`, `id_rsa` for a locked machine +- `release` makes the machine available again for the next request + +--- + +## Command + +``` +mapt aws mac-pool create [flags] # create the pool of dedicated hosts +mapt aws mac-pool destroy [flags] +mapt aws mac-pool request [flags] # borrow a machine from the pool +mapt aws mac-pool release [flags] # return a machine to the pool +``` + +### Shared flag groups (`specs/cmd/params.md`) + +| Group | Flags added | +|---|---| +| Common | `--project-name`, `--backed-url` | + +No compute-request, spot, timeout, or integration flags. + +### Target-specific flags (create) + +| Flag | Type | Default | Description | +|---|---|---|---| +| `--name` | string | — | Pool name (used to identify the resource group) | +| `--arch` | string | `m1` | MAC architecture: `x86`, `m1`, `m2` | +| `--version` | string | *(per arch)* | macOS version | +| `--offered-capacity` | int | *(default in action)* | Number of machines kept available in the pool | +| `--max-size` | int | *(default in action)* | Maximum number of dedicated hosts in the pool | +| `--fixed-location` | bool | false | Force creation in `AWS_DEFAULT_REGION` only | +| `--conn-details-output` | string | — | Path to write IAM credentials | +| `--tags` | map | — | Resource tags | + +### Request / release flags + +`--project-name`, `--backed-url` (from common) + +### Destroy flags + +`--force-destroy`, `--keep-state` + +### Action args struct populated + +`mac.MacPoolArgs` → `pkg/provider/aws/action/mac/mac-pool.go` diff --git a/specs/features/aws/openshift-snc.md b/specs/features/aws/openshift-snc.md new file mode 100644 index 000000000..86677b1d0 --- /dev/null +++ b/specs/features/aws/openshift-snc.md @@ -0,0 +1,95 @@ +# Spec: AWS OpenShift Single Node Cluster (SNC) + +## Context +Provisions a single-node OpenShift cluster (CRC/SNC) on an EC2 instance using a pre-baked AMI. +Entry point: `pkg/provider/aws/action/snc/`. Profile system: `pkg/target/service/snc/profile/`. +CLI: `cmd/mapt/cmd/aws/services/snc.go`. + +The cluster setup runs inside cloud-init on boot. Sensitive values (pull secret, kubeadmin +password, developer password) are managed via AWS SSM Parameter Store. Readiness is verified +by SSH-checking the kubeconfig availability and CA rotation completion. + +## Problem +This feature is implemented. This spec documents behaviour, the profile system, and gaps. + +## Requirements +- [ ] Provision an EC2 instance using the SNC pre-baked AMI (looked up by version + arch) +- [ ] Fail early with a clear error if the AMI does not exist in the target region +- [ ] Store pull secret, kubeadmin password, and developer password in SSM; inject via cloud-init +- [ ] Verify cluster readiness: SSH up → kubeconfig exists → CA rotation complete +- [ ] Export kubeconfig (with public IP replacing internal API endpoint) as a secret output +- [ ] Support optional profiles deployed post-cluster-ready via the Kubernetes Pulumi provider: + - `virtualization` — enables nested virtualisation on the compute instance + - `serverless-serving` — installs Knative Serving + - `serverless-eventing` — installs Knative Eventing + - `serverless` — installs both Knative Serving and Eventing + - `servicemesh` — installs OpenShift Service Mesh 3 +- [ ] Validate profile names before provisioning begins +- [ ] Support spot allocation and serverless self-destruct timeout +- [ ] Write output files: `host`, `username`, `id_rsa`, `kubeconfig`, `kubeadmin-password`, `developer-password` +- [ ] `destroy` cleans up main stack, spot stack, S3 state + +## Out of Scope +- Multi-node OCP (full IPI/UPI install) +- EKS (see `006-aws-eks.md`) + +## Affected Areas +- `pkg/provider/aws/action/snc/` — orchestration, kubeconfig extraction +- `pkg/target/service/snc/` — cloud-config, SSM management, readiness commands +- `pkg/target/service/snc/profile/` — profile registry and deployment +- `cmd/mapt/cmd/aws/services/snc.go` +- `tkn/template/infra-aws-ocp-snc.yaml` + +## Known Gaps / Improvement Ideas +- Profile deployment failures are logged as warnings, not errors (`snc.go:279`) + — consider making this configurable (fail-fast vs warn-and-continue) +- `disableClusterReadiness` flag skips the readiness wait entirely; useful for debugging + but not documented in the Tekton task +- The `--version` flag accepts a free-form string; no validation against available AMIs beyond + the early existence check + +## Acceptance Criteria +- Cluster is reachable via the exported kubeconfig +- `oc get nodes` shows one Ready node +- Profiles deploy successfully when specified +- `mapt aws openshift-snc destroy` removes all resources and state + +--- + +## Command + +``` +mapt aws openshift-snc create [flags] +mapt aws openshift-snc destroy [flags] +``` + +### Shared flag groups (`specs/cmd/params.md`) + +| Group | Flags added | +|---|---| +| Common | `--project-name`, `--backed-url` | +| Compute Request | `--cpus`, `--memory`, `--arch`, `--nested-virt`, `--compute-sizes` | +| Spot | `--spot`, `--spot-eviction-tolerance`, `--spot-increase-rate`, `--spot-excluded-regions` | + +Note: no integration flags. + +### Target-specific flags (create only) + +| Flag | Type | Default | Description | +|---|---|---|---| +| `--version` | string | `4.21.0` | OpenShift version | +| `--arch` | string | `x86_64` | `x86_64` or `arm64` | +| `--pull-secret-file` | string | — | Path to Red Hat pull secret JSON file (required) | +| `--snc` | []string | — | SNC profiles to apply (comma-separated) | +| `--disable-cluster-readiness` | bool | false | Skip cluster readiness check after provision | +| `--timeout` | string | — | Self-destruct duration | +| `--conn-details-output` | string | — | Path to write kubeconfig | +| `--tags` | map | — | Resource tags | + +### Destroy flags + +`--serverless`, `--force-destroy`, `--keep-state` + +### Action args struct populated + +`snc.SNCArgs` → `pkg/provider/aws/action/snc/snc.go` diff --git a/specs/features/aws/rhel-ai.md b/specs/features/aws/rhel-ai.md new file mode 100644 index 000000000..b5cb24525 --- /dev/null +++ b/specs/features/aws/rhel-ai.md @@ -0,0 +1,73 @@ +# Spec: AWS RHEL AI Host + +## Context +Provisions a RHEL AI instance on AWS, designed for AI/ML workloads. Entry point: +`pkg/provider/aws/action/rhel-ai/`. API: `pkg/target/host/rhelai/`. +CLI: `cmd/mapt/cmd/aws/hosts/rhelai.go`. + +RHEL AI differs from standard RHEL in that it uses specialised GPU-capable instance types +and a RHEL AI-specific AMI. + +## Problem +This feature is implemented. This spec documents the current behaviour. + +## Requirements +- [ ] Provision a RHEL AI instance using the RHEL AI AMI +- [ ] Target GPU-capable instance types (e.g. g4dn, p3 families) +- [ ] Support spot allocation +- [ ] Write output files: `host`, `username`, `id_rsa` +- [ ] `destroy` cleans up all resources and state + +## Out of Scope +- Standard RHEL (see `001-aws-rhel-host.md`) +- Azure RHEL AI (see `015-azure-rhel-ai.md`) + +## Affected Areas +- `pkg/provider/aws/action/rhel-ai/` +- `pkg/target/host/rhelai/` +- `cmd/mapt/cmd/aws/hosts/rhelai.go` +- `tkn/template/infra-aws-rhel-ai.yaml` +- `Pulumi.rhelai.yaml` — stack configuration for the rhelai Pulumi stack + +## Acceptance Criteria +- `mapt aws rhel-ai create ...` provisions an accessible RHEL AI instance +- SSH access works +- `mapt aws rhel-ai destroy ...` removes all resources + +--- + +## Command + +``` +mapt aws rhel-ai create [flags] +mapt aws rhel-ai destroy [flags] +``` + +### Shared flag groups (`specs/cmd/params.md`) + +| Group | Flags added | +|---|---| +| Common | `--project-name`, `--backed-url` | +| Compute Request | `--cpus`, `--memory`, `--arch`, `--nested-virt`, `--compute-sizes` | +| Spot | `--spot`, `--spot-eviction-tolerance`, `--spot-increase-rate`, `--spot-excluded-regions` | + +Note: no integration flags. + +### Target-specific flags (create only) + +| Flag | Type | Default | Description | +|---|---|---|---| +| `--version` | string | `3.0.0` | RHEL AI version | +| `--accelerator` | string | `cuda` | GPU accelerator type: `cuda` or `rocm` | +| `--custom-ami` | string | — | Override with a custom AMI ID | +| `--timeout` | string | — | Self-destruct duration | +| `--conn-details-output` | string | — | Path to write connection files | +| `--tags` | map | — | Resource tags | + +### Destroy flags + +`--serverless`, `--force-destroy`, `--keep-state` + +### Action args struct populated + +`rhelai.RHELAIArgs` → `pkg/provider/aws/action/rhelai/rhelai.go` diff --git a/specs/features/aws/rhel-host.md b/specs/features/aws/rhel-host.md new file mode 100644 index 000000000..fea12d7ae --- /dev/null +++ b/specs/features/aws/rhel-host.md @@ -0,0 +1,119 @@ +# Spec: AWS RHEL Host + +## Context +Provisions a RHEL EC2 instance on AWS. This is the reference implementation of the AWS EC2 host +pattern — all other AWS EC2 host targets follow the same structure. + +Relevant existing files: +- `pkg/provider/aws/action/rhel/` — orchestration (reference implementation) +- `pkg/target/host/rhel/cloud-config.go` — cloud-config builder +- `cmd/mapt/cmd/aws/hosts/rhel.go` — CLI + +## Problem +This feature is fully implemented. This spec documents current behaviour, the mandatory module +sequence, and known gaps. Use it as the template when adding a new AWS EC2 host target. + +## Requirements +- [ ] Provision a RHEL EC2 instance (versions: 9.x, 8.x) for x86_64 or arm64 +- [ ] Register with Red Hat Subscription Manager using `SubsUsername` / `SubsPassword` via cloud-init +- [ ] Support spot instance allocation with cross-region best-bid selection +- [ ] Support on-demand allocation using the default AWS region +- [ ] Support airgap topology: two-phase stack update (connectivity ON then OFF) +- [ ] Optionally apply the `profileSNC` cloud-config variant to pre-install SNC dependencies +- [ ] Optionally schedule serverless self-destruct after a given timeout (requires remote BackedURL) +- [ ] Write output files: `host`, `username`, `id_rsa` (and bastion files when airgap) +- [ ] `destroy` cleans up main stack, spot stack (if exists), and S3 state + +## Out of Scope +- RHEL AI variant (see `009-aws-rhel-ai.md`) +- Azure RHEL (see `010-azure-rhel-host.md`) + +## Must Reuse + +**In `Create()`:** +- `mc.Init(mCtxArgs, aws.Provider())` — context initialisation +- `allocation.Allocation(mCtx, &AllocationArgs{Prefix, ComputeRequest, AMIProductDescription, Spot})` — resolves region/AZ/instance types for spot or on-demand + +**In `deploy()`, in this order:** +- `amiSVC.GetAMIByName(ctx, amiRegex, nil, map[string]string{"architecture": arch})` — finds the RHEL AMI +- `network.Create(ctx, mCtx, &NetworkArgs{Prefix, ID, Region, AZ, CreateLoadBalancer, Airgap, AirgapPhaseConnectivity})` — VPC/subnet/IGW/LB +- `keypair.KeyPairRequest{Name: resourcesUtil.GetResourceName(...)}.Create(ctx, mCtx)` — SSH keypair +- `securityGroup.SGRequest{...}.Create(ctx, mCtx)` — security group (SSH/22 ingress) +- `rhelApi.CloudConfigArgs{...}.GenerateCloudConfig(ctx, mCtx.RunID())` — RHEL cloud-config with subscription and optional SNC profile +- `compute.ComputeRequest{...}.NewCompute(ctx)` — EC2 instance +- `serverless.OneTimeDelayedTask(...)` — only when `Timeout != ""` +- `c.Readiness(ctx, command.CommandCloudInitWait, ...)` — waits for cloud-init to complete + +**In `Destroy()`:** +- `aws.DestroyStack(mCtx, DestroyStackRequest{Stackname: stackName})` +- `spot.Destroy(mCtx)` guarded by `spot.Exist(mCtx)` +- `aws.CleanupState(mCtx)` + +**In `manageResults()`:** +- `bastion.WriteOutputs(stackResult, prefix, resultsPath)` — only when `airgap=true` +- `output.Write(stackResult, resultsPath, results)` — writes `host`, `username`, `id_rsa` + +**Naming:** +- All resource names via `resourcesUtil.GetResourceName(prefix, awsRHELDedicatedID, suffix)` +- Stack name via `mCtx.StackNameByProject(stackName)` + +## Must Create +- `pkg/provider/aws/action/rhel/rhel.go` — `RHELArgs`, `Create()`, `Destroy()`, `deploy()`, `manageResults()`, `securityGroups()` +- `pkg/provider/aws/action/rhel/constants.go` — `stackName`, `awsRHELDedicatedID`, `amiRegex`, `diskSize`, `amiProduct`, `amiUserDefault`, output key constants +- `pkg/target/host/rhel/cloud-config.go` — `CloudConfigArgs`, `GenerateCloudConfig()` +- `pkg/target/host/rhel/cloud-config-base` — base cloud-config template file +- `pkg/target/host/rhel/cloud-config-snc` — SNC-variant cloud-config template file +- `cmd/mapt/cmd/aws/hosts/rhel.go` — Cobra `create` and `destroy` subcommands +- `tkn/template/infra-aws-rhel.yaml` — Tekton task template + +## Known Gaps +- `createAirgapMachine()` swallows the phase-1 error: returns `nil` instead of `err` at `rhel.go:167` + — phase 2 must not run if phase 1 fails +- No validation that `SubsUsername`/`SubsPassword` are non-empty when `profileSNC=true` +- `diskSize` is a hardcoded constant; not exposed as a CLI flag + +## Acceptance Criteria +- `mapt aws rhel create --backed-url s3://... --project-name test --version 9 --arch x86_64 --subs-username u --subs-user-pass p` exits 0 +- Output directory contains `host`, `username`, `id_rsa` +- SSH access to the provisioned host succeeds +- `mapt aws rhel destroy --backed-url s3://... --project-name test` exits 0 and removes state + +--- + +## Command + +``` +mapt aws rhel create [flags] +mapt aws rhel destroy [flags] +``` + +### Shared flag groups (`specs/cmd/params.md`) + +| Group | Flags added | +|---|---| +| Common | `--project-name`, `--backed-url` | +| Compute Request | `--cpus`, `--memory`, `--arch`, `--nested-virt`, `--compute-sizes` | +| Spot | `--spot`, `--spot-eviction-tolerance`, `--spot-increase-rate`, `--spot-excluded-regions` | +| Integrations | `--ghactions-runner-*`, `--it-cirrus-pw-*`, `--glrunner-*` | + +### Target-specific flags (create only) + +| Flag | Type | Default | Description | +|---|---|---|---| +| `--version` | string | `9.4` | RHEL major.minor version | +| `--arch` | string | `x86_64` | `x86_64` or `arm64` | +| `--rh-subscription-username` | string | — | Red Hat subscription username | +| `--rh-subscription-password` | string | — | Red Hat subscription password | +| `--snc` | bool | false | Apply SNC profile (sets `nested-virt=true`) | +| `--airgap` | bool | false | Provision as airgap machine (bastion access only) | +| `--timeout` | string | — | Self-destruct duration e.g. `4h` (requires remote `--backed-url`) | +| `--conn-details-output` | string | — | Path to write connection files | +| `--tags` | map | — | Resource tags `name=value,...` | + +### Destroy flags + +`--serverless`, `--force-destroy`, `--keep-state` + +### Action args struct populated + +`rhel.RHELArgs` → `pkg/provider/aws/action/rhel/rhel.go` diff --git a/specs/features/aws/serverless-self-destruct.md b/specs/features/aws/serverless-self-destruct.md new file mode 100644 index 000000000..283086fed --- /dev/null +++ b/specs/features/aws/serverless-self-destruct.md @@ -0,0 +1,66 @@ +# Spec: Serverless Self-Destruct (Timeout Mode) + +## Context +Any provisioned host or service can optionally schedule its own destruction after a given duration. +This prevents cost overruns when a CI pipeline fails to call `destroy` explicitly. + +Implementation: `pkg/provider/aws/modules/serverless/`. + +Mechanism: +1. An ECS Fargate task definition is created with the `mapt` OCI image +2. An AWS EventBridge Scheduler one-time schedule fires at `now + timeout` +3. The scheduled task runs `mapt destroy --project-name ... --backed-url ... --serverless` +4. A shared ECS cluster and IAM roles are created once per region and retained (`RetainOnDelete(true)`) + +## Problem +This feature is implemented. This spec documents the design and constraints. + +## Requirements +- [ ] Accept a timeout duration string (Go `time.Duration` format, e.g. `"4h"`, `"30m"`) +- [ ] Reject timeout when BackedURL is `file://` (state must be remotely accessible by Fargate) +- [ ] Create/reuse a named ECS cluster (`mapt-serverless-cluster`) retained on delete +- [ ] Create/reuse task execution and scheduler IAM roles, retained on delete +- [ ] Create a one-time EventBridge Schedule at `now + timeout` in the region's local timezone +- [ ] The Fargate task image is the `mapt` OCI image baked in at compile time via linker flag (`-X ...context.OCI`) +- [ ] Support `--serverless` flag on destroy to use role-based credentials (no static key/secret needed inside ECS) +- [ ] Clean up the EventBridge schedule and task definition on destroy (these are not retained) + +## Out of Scope +- Recurring schedules (used internally by mac-pool HouseKeeper via `serverless.Create()` with `Repeat` type) +- Azure self-destruct (not implemented) + +## Affected Areas +- `pkg/provider/aws/modules/serverless/serverless.go` — core implementation +- `pkg/provider/aws/modules/serverless/types.go` — schedule types +- `pkg/manager/context/context.go` — `OCI` variable set by linker +- Any action that calls `serverless.OneTimeDelayedTask()` (rhel, windows, snc, fedora, kind, eks) +- `oci/Containerfile` — the container image being scheduled + +## Known Gaps / Improvement Ideas +- IAM policy for the task role is very broad (`ec2:*`, `s3:*`, `cloudformation:*`, `ssm:*`, `scheduler:*`) + — could be scoped down to only what destroy needs +- There is no mechanism to cancel the scheduled self-destruct once set (other than manually deleting + the EventBridge schedule from the AWS console) +- The OCI image tag used by the Fargate task is baked in at build time; if a newer binary is deployed + via a different image tag, old scheduled tasks still run the old image + +## Acceptance Criteria +- `mapt aws rhel create --timeout 1h ...` creates a visible EventBridge schedule +- After the timeout, the Fargate task fires and the stack is destroyed +- `mapt aws rhel create --timeout 1h --backed-url file:///tmp/...` returns an error immediately + +--- + +## Command + +This is a cross-cutting feature, not a standalone command. It is activated via the +`--timeout` flag on individual target create commands, and the `--serverless` flag +on destroy commands: + +``` +mapt aws rhel create --timeout 4h ... +mapt aws rhel destroy --serverless ... +``` + +Both flags are defined in shared params (`specs/cmd/params.md` — Serverless / Destroy group). +No additional flags are specific to the self-destruct feature itself. diff --git a/specs/features/aws/vpc-endpoints.md b/specs/features/aws/vpc-endpoints.md new file mode 100644 index 000000000..b8f7484aa --- /dev/null +++ b/specs/features/aws/vpc-endpoints.md @@ -0,0 +1,182 @@ +# Feature: Optional VPC Endpoints + +## Context + +Every public subnet created by mapt unconditionally creates three VPC endpoints inside +`PublicSubnetRequest.Create()` in `pkg/provider/aws/services/vpc/subnet/public.go`: + +| Name | Service | Type | +|---|---|---| +| `s3` | `com.amazonaws.{region}.s3` | Gateway | +| `ecr` | `com.amazonaws.{region}.ecr.dkr` | Interface | +| `ssm` | `com.amazonaws.{region}.ssm` | Interface | + +Interface endpoints (ECR, SSM) also create a shared security group allowing TCP 443 +inbound from the VPC CIDR — this group is also created unconditionally today. + +Targets that do not need these endpoints pay for them unnecessarily. Targets that need +other endpoints cannot add them without code changes. + +--- + +## Requirements + +- [ ] Accept a `Endpoints []string` field on `NetworkArgs` — each entry is a short name + (`"s3"`, `"ecr"`, `"ssm"`) identifying the endpoint to create +- [ ] Empty slice (default) = **no endpoints created** — breaking change from current + behaviour; callers that need endpoints must opt in explicitly +- [ ] Propagate through the full call chain: + `cmd params` → action `*Args` → `NetworkArgs` → `NetworkRequest` → `PublicSubnetRequest` → `endpoints()` +- [ ] `endpoints()` creates only the endpoints present in the list; unknown names return an + error before any AWS resource is created +- [ ] The Interface-endpoint security group is only created when at least one Interface + endpoint (`ecr`, `ssm`) is in the list +- [ ] Targets that currently depend on specific endpoints (verify EKS, SNC) must pass the + required endpoint names explicitly in their action args + +--- + +## Out of Scope + +- Adding new endpoint types beyond the existing three +- Azure (no equivalent mechanism) +- Airgap path — endpoints are only created for public subnets (`standard/`) + +--- + +## Must Reuse + +- `network.Create()` — `specs/api/aws/network.md` — extend `NetworkArgs` with `Endpoints []string` +- `standard.NetworkRequest.CreateNetwork()` — pass `Endpoints` down to `PublicSubnetRequest` +- `PublicSubnetRequest.Create()` — pass `Endpoints` down to `endpoints()` + +--- + +## Must Create + +No new files. All changes are within existing files: + +### 1. Shared CLI params — `cmd/mapt/cmd/params/params.go` + +Follow the three-part pattern described in `specs/cmd/params.md`. Add the Network group: + +```go +const ( + Endpoints = "endpoints" + EndpointsDesc = "Comma-separated list of VPC endpoints to create. " + + "Accepted values: s3, ecr, ssm. Empty = no endpoints." +) + +func AddNetworkFlags(fs *pflag.FlagSet) { + fs.StringSliceP(Endpoints, "", []string{}, EndpointsDesc) +} + +func NetworkEndpoints() []string { + return viper.GetStringSlice(Endpoints) +} +``` + +`StringSliceP` + `viper.GetStringSlice` handle comma-separated input automatically — +the same mechanism used by `--compute-sizes` and `--spot-excluded-regions`. + +### 2. Action args structs — one per target that uses network + +Add `Endpoints []string` to each action's public args struct and wire it into +`NetworkArgs` inside `deploy()`: + +| Action args struct | File | +|---|---| +| `rhel.RHELArgs` | `pkg/provider/aws/action/rhel/rhel.go` | +| `windows.WindowsArgs` | `pkg/provider/aws/action/windows/windows.go` | +| `fedora.FedoraArgs` | `pkg/provider/aws/action/fedora/fedora.go` | +| `kind.KindArgs` | `pkg/provider/aws/action/kind/kind.go` | +| `snc.SNCArgs` | `pkg/provider/aws/action/snc/snc.go` | +| `eks.EKSArgs` | `pkg/provider/aws/action/eks/eks.go` | + +In each action's `deploy()`, pass the field to `NetworkArgs`: + +```go +nw, err := network.Create(ctx, r.mCtx, &network.NetworkArgs{ + ... + Endpoints: r.endpoints, // new field +}) +``` + +### 3. cmd create files — one per target + +Call `params.AddNetworkFlags(flagSet)` and pass `params.NetworkEndpoints()` to the +action args. Pattern (shown for RHEL, identical for all others): + +```go +// in getRHELCreate() flagSet block: +params.AddNetworkFlags(flagSet) + +// in RHELArgs construction: +&rhel.RHELArgs{ + ... + Endpoints: params.NetworkEndpoints(), +} +``` + +Affected cmd files: + +| File | +|---| +| `cmd/mapt/cmd/aws/hosts/rhel.go` | +| `cmd/mapt/cmd/aws/hosts/windows.go` | +| `cmd/mapt/cmd/aws/hosts/fedora.go` | +| `cmd/mapt/cmd/aws/hosts/rhelai.go` | +| `cmd/mapt/cmd/aws/services/kind.go` | +| `cmd/mapt/cmd/aws/services/snc.go` | +| `cmd/mapt/cmd/aws/services/eks.go` | + +### 4. Network module — `pkg/provider/aws/modules/network/network.go` + +Add `Endpoints []string` to `NetworkArgs`; pass to `NetworkRequest`. + +### 5. Standard network — `pkg/provider/aws/modules/network/standard/standard.go` + +Add `Endpoints []string` to `NetworkRequest`; pass to `PublicSubnetRequest`. + +### 6. Public subnet — `pkg/provider/aws/services/vpc/subnet/public.go` + +Add `Endpoints []string` to `PublicSubnetRequest`. + +Refactor `endpoints()`: +- Accept the list; iterate and create only matching entries +- Unknown names: return error immediately +- Create the security group only when at least one Interface endpoint (`ecr`, `ssm`) is present +- Return without creating anything when the list is empty + +--- + +## Endpoint Identifiers + +| Name | AWS service name | Type | Needs security group | +|---|---|---|---| +| `s3` | `com.amazonaws.{region}.s3` | Gateway | No | +| `ecr` | `com.amazonaws.{region}.ecr.dkr` | Interface | Yes | +| `ssm` | `com.amazonaws.{region}.ssm` | Interface | Yes | + +The security group (TCP 443 ingress from VPC CIDR) is shared by all Interface endpoints +in the subnet. Created once if any Interface endpoint is in the list; omitted otherwise. + +--- + +## API Changes + +Update `specs/api/aws/network.md`: +- Add `Endpoints []string` to `NetworkArgs` type block +- Document the accepted names and the security group behaviour + +--- + +## Acceptance Criteria + +- [ ] `mapt aws rhel create` with no `--endpoints` provisions a VPC with zero endpoints +- [ ] `mapt aws rhel create --endpoints s3,ssm` creates only S3 (Gateway) and SSM (Interface); + ECR is absent; security group is present +- [ ] `mapt aws rhel create --endpoints s3` creates only S3; no security group is created +- [ ] `mapt aws rhel create --endpoints foo` returns an error before any stack is touched +- [ ] Targets that depended on endpoints before this change (verify EKS, SNC) pass their + required endpoint names explicitly and continue to work diff --git a/specs/features/aws/windows-server-host.md b/specs/features/aws/windows-server-host.md new file mode 100644 index 000000000..6256a6914 --- /dev/null +++ b/specs/features/aws/windows-server-host.md @@ -0,0 +1,123 @@ +# Spec: AWS Windows Server Host + +## Context +Provisions a Windows Server EC2 instance on AWS. Follows the standard AWS EC2 host pattern +(see `001-aws-rhel-host.md`) with two additions: AMI cross-region copy and Fast Launch. + +Relevant existing files: +- `pkg/provider/aws/action/windows/` — orchestration +- `pkg/provider/aws/modules/ami/` — AMI copy + fast-launch (reused here, not in other targets) +- `pkg/target/host/windows-server/` — PowerShell userdata builder + +## Problem +This feature is fully implemented. This spec documents the standard and Windows-specific +module usage, and known gaps. + +## Requirements +- [ ] Provision Windows Server 2019 (English or non-English variant) EC2 instance +- [ ] Accept a custom AMI name/owner/user; fall back to well-known defaults +- [ ] Copy the AMI to the target region when not natively available; optionally keep the copy +- [ ] Enable Fast Launch on copied AMI with configurable parallelism +- [ ] Support spot instance allocation with cross-region best-bid selection +- [ ] Support airgap topology (two-phase: connectivity ON → OFF) +- [ ] Generate a random administrator password; export as `userpassword` +- [ ] Open security group rules for SSH (22) and RDP (3389) +- [ ] Optionally schedule serverless self-destruct after timeout +- [ ] Write output files: `host`, `username`, `userpassword`, `id_rsa` (and bastion files when airgap) +- [ ] `destroy` cleans up main stack, AMI-copy stack (if exists), spot stack (if exists), S3 state + +## Out of Scope +- Azure Windows Desktop (see `011-azure-windows-desktop.md`) +- Non-server Windows editions + +## Must Reuse + +**In `Create()` — standard:** +- `mc.Init(mCtxArgs, aws.Provider())` +- `allocation.Allocation(mCtx, &AllocationArgs{...})` — spot or on-demand + +**In `Create()` — Windows-specific addition before `createMachine()`:** +- `data.IsAMIOffered(ctx, ImageRequest{Name, Region})` — check if AMI exists in the target region +- `amiCopy.CopyAMIRequest{..., FastLaunch: true, MaxParallel: N}.Create()` — copy AMI to region when not offered; this creates its own Pulumi stack + +**In `deploy()`, in this order — same as standard pattern:** +- `amiSVC.GetAMIByName(ctx, amiName+"*", []string{amiOwner}, nil)` +- `network.Create(ctx, mCtx, &NetworkArgs{..., CreateLoadBalancer: r.spot})` +- `keypair.KeyPairRequest{Name: resourcesUtil.GetResourceName(...)}.Create(ctx, mCtx)` +- `securityGroup.SGRequest{..., IngressRules: [SSH_TCP, RDP_TCP]}.Create(ctx, mCtx)` +- `security.CreatePassword(ctx, resourcesUtil.GetResourceName(...))` — random admin password +- `cloudConfigWindowsServer.GenerateUserdata(ctx, user, password, keyResources, runID)` — PowerShell userdata +- `compute.ComputeRequest{..., LBTargetGroups: []int{22, 3389}}.NewCompute(ctx)` +- `serverless.OneTimeDelayedTask(...)` — only when `Timeout != ""` +- `c.Readiness(ctx, command.CommandPing, ...)` — ICMP ping readiness (not cloud-init wait) + +**In `Destroy()` — Windows-specific additions:** +- `aws.DestroyStack(mCtx, DestroyStackRequest{Stackname: stackName})` +- `amiCopy.Destroy(mCtx)` guarded by `amiCopy.Exist(mCtx)` — additional step vs standard pattern +- `spot.Destroy(mCtx)` guarded by `spot.Exist(mCtx)` +- `aws.CleanupState(mCtx)` + +**In `manageResults()` — standard:** +- `bastion.WriteOutputs(...)` when airgap +- `output.Write(stackResult, resultsPath, results)` — writes `host`, `username`, `userpassword`, `id_rsa` + +**Naming:** +- All resource names via `resourcesUtil.GetResourceName(prefix, awsWindowsDedicatedID, suffix)` + +## Must Create +- `pkg/provider/aws/action/windows/windows.go` — `WindowsServerArgs`, `Create()`, `Destroy()`, `deploy()`, `manageResults()`, `securityGroups()` +- `pkg/provider/aws/action/windows/constants.go` — stack name, component ID, AMI defaults, disk size, fast-launch config +- `pkg/target/host/windows-server/windows-server.go` — `GenerateUserdata()` +- `pkg/target/host/windows-server/bootstrap.ps1` — embedded PowerShell bootstrap script +- `cmd/mapt/cmd/aws/hosts/windows.go` — Cobra `create` and `destroy` subcommands +- `tkn/template/infra-aws-windows-server.yaml` — Tekton task template + +## Known Gaps +- `createAirgapMachine()` swallows the phase-1 error: `return nil` instead of `return err` at `windows.go:214` +- RDP through the bastion is unfinished — TODO comment at bottom of `windows.go` +- Readiness uses `CommandPing` (ICMP) not `CommandCloudInitWait`; cloud-init completion is not explicitly verified + +## Acceptance Criteria +- `mapt aws windows create ...` provisions an accessible Windows instance +- RDP port 3389 and SSH port 22 are reachable +- Output directory contains `host`, `username`, `userpassword`, `id_rsa` +- `mapt aws windows destroy ...` removes all stacks and S3 state + +--- + +## Command + +``` +mapt aws windows create [flags] +mapt aws windows destroy [flags] +``` + +### Shared flag groups (`specs/cmd/params.md`) + +| Group | Flags added | +|---|---| +| Common | `--project-name`, `--backed-url` | +| Spot | `--spot`, `--spot-eviction-tolerance`, `--spot-increase-rate`, `--spot-excluded-regions` | + +Note: no compute-request flags — Windows uses a fixed AMI-based workflow, not hardware-spec selection. No integration flags. + +### Target-specific flags (create only) + +| Flag | Type | Default | Description | +|---|---|---|---| +| `--ami-name` | string | `Windows_Server-2019-English-Full-Base*` | AMI name pattern to search | +| `--ami-username` | string | `ec2-user` | Default username on the AMI | +| `--ami-region` | string | — | Source region for cross-region AMI copy | +| `--ami-keep-copy` | bool | false | Retain the copied AMI after destroy | +| `--airgap` | bool | false | Provision as airgap machine | +| `--timeout` | string | — | Self-destruct duration | +| `--conn-details-output` | string | — | Path to write connection files | +| `--tags` | map | — | Resource tags | + +### Destroy flags + +`--serverless`, `--force-destroy`, `--keep-state` + +### Action args struct populated + +`windows.WindowsArgs` → `pkg/provider/aws/action/windows/windows.go` diff --git a/specs/features/azure/aks.md b/specs/features/azure/aks.md new file mode 100644 index 000000000..c393a1246 --- /dev/null +++ b/specs/features/azure/aks.md @@ -0,0 +1,68 @@ +# Spec: Azure AKS (Azure Kubernetes Service) + +## Context +Provisions a managed AKS cluster on Azure. Entry point: `pkg/provider/azure/action/aks/`. +CLI: `cmd/mapt/cmd/azure/services/aks.go`. + +## Problem +This feature is implemented. This spec documents the current behaviour. + +## Requirements +- [ ] Provision an AKS cluster with a configurable node pool +- [ ] Support configurable Kubernetes version +- [ ] Support spot node pools (Azure spot VMs) +- [ ] Write kubeconfig output file +- [ ] `destroy` cleans up all resources and state + +## Out of Scope +- AWS EKS (see `006-aws-eks.md`) +- Azure Kind (see `014-azure-kind.md`) + +## Affected Areas +- `pkg/provider/azure/action/aks/` +- `cmd/mapt/cmd/azure/services/aks.go` +- `tkn/template/infra-azure-aks.yaml` + +## Acceptance Criteria +- `mapt azure aks create ...` provisions a functioning AKS cluster +- Exported kubeconfig allows `kubectl get nodes` to return Ready nodes +- `mapt azure aks destroy ...` removes all resources + +--- + +## Command + +``` +mapt azure aks create [flags] +mapt azure aks destroy [flags] +``` + +### Shared flag groups + +| Group | Source | Flags added | +|---|---|---| +| Common | `specs/cmd/params.md` | `--project-name`, `--backed-url` | +| Spot | `specs/cmd/params.md` | `--spot`, `--spot-eviction-tolerance`, `--spot-increase-rate`, `--spot-excluded-regions` | + +Note: no compute-request (VM size is explicit), no integrations, no timeout. +AKS uses its own `--location` rather than the shared azure-params one (different default: `West US`). + +### Target-specific flags (create only) + +| Flag | Type | Default | Description | +|---|---|---|---| +| `--location` | string | `West US` | Azure region (ignored when spot is set) | +| `--vmsize` | string | *(default in action)* | Explicit VM size for node pool | +| `--version` | string | `1.31` | Kubernetes version | +| `--only-system-pool` | bool | false | Create system node pool only (no user pool) | +| `--enable-app-routing` | bool | false | Enable AKS App Routing add-on | +| `--conn-details-output` | string | — | Path to write kubeconfig | +| `--tags` | map | — | Resource tags | + +### Destroy flags + +*(none beyond common)* + +### Action args struct populated + +`aks.AKSArgs` → `pkg/provider/azure/action/aks/aks.go` diff --git a/specs/features/azure/kind.md b/specs/features/azure/kind.md new file mode 100644 index 000000000..3240c9c29 --- /dev/null +++ b/specs/features/azure/kind.md @@ -0,0 +1,69 @@ +# Spec: Azure Kind Cluster + +## Context +Provisions a Kind (Kubernetes-in-Docker) cluster on an Azure VM. +Entry point: `pkg/provider/azure/action/kind/`. CLI: `cmd/mapt/cmd/azure/services/kind.go`. + +Mirrors the AWS Kind target but runs on Azure infrastructure. + +## Problem +This feature is implemented. This spec documents the current behaviour. + +## Requirements +- [ ] Provision an Azure VM and install Kind + Docker via cloud-init +- [ ] Create a Kind cluster; export kubeconfig +- [ ] Support configurable Kubernetes version +- [ ] Support spot (low-priority) VMs +- [ ] Write output files: `host`, `username`, `id_rsa`, `kubeconfig` +- [ ] `destroy` cleans up all resources and state + +## Out of Scope +- AWS Kind (see `007-aws-kind.md`) +- Azure AKS managed clusters (see `012-azure-aks.md`) + +## Affected Areas +- `pkg/provider/azure/action/kind/` +- `cmd/mapt/cmd/azure/services/kind.go` + +## Acceptance Criteria +- `mapt azure kind create ...` produces a working kubeconfig +- `kubectl get nodes` returns a Ready node +- `mapt azure kind destroy ...` removes all resources + +--- + +## Command + +``` +mapt azure kind create [flags] +mapt azure kind destroy [flags] +``` + +### Shared flag groups + +| Group | Source | Flags added | +|---|---|---| +| Common | `specs/cmd/params.md` | `--project-name`, `--backed-url` | +| Compute Request | `specs/cmd/params.md` | `--cpus`, `--memory`, `--arch`, `--nested-virt`, `--compute-sizes` | +| Spot | `specs/cmd/params.md` | `--spot`, `--spot-eviction-tolerance`, `--spot-increase-rate`, `--spot-excluded-regions` | +| Location | `specs/cmd/azure-params.md` | `--location` (default: `westeurope`) | + +Note: no integration flags. + +### Target-specific flags (create only) + +| Flag | Type | Default | Description | +|---|---|---|---| +| `--version` | string | `v1.34` | Kubernetes version for Kind | +| `--arch` | string | `x86_64` | `x86_64` or `arm64` | +| `--extra-port-mappings` | string | — | JSON array of `{containerPort, hostPort, protocol}` | +| `--conn-details-output` | string | — | Path to write kubeconfig | +| `--tags` | map | — | Resource tags | + +### Destroy flags + +`--serverless`, `--force-destroy` + +### Action args struct populated + +`kind.KindArgs` → `pkg/provider/azure/action/kind/kind.go` diff --git a/specs/features/azure/linux-host.md b/specs/features/azure/linux-host.md new file mode 100644 index 000000000..5174b0e8a --- /dev/null +++ b/specs/features/azure/linux-host.md @@ -0,0 +1,70 @@ +# Spec: Azure Linux Host (Fedora / Ubuntu) + +## Context +Provisions a generic Linux VM on Azure (Fedora or Ubuntu). Entry point: +`pkg/provider/azure/action/linux/`. CLI: `cmd/mapt/cmd/azure/hosts/linux.go`. +Also referenced as separate Fedora/Ubuntu targets in docs (`docs/azure/fedora.md`, `docs/azure/ubuntu.md`). + +This is a general-purpose Linux provisioner for Azure that accepts a configurable image reference. + +## Problem +This feature is implemented. This spec documents the current behaviour. + +## Requirements +- [ ] Provision a Linux VM on Azure with a configurable Marketplace image (Fedora, Ubuntu, etc.) +- [ ] Support spot (low-priority) VMs +- [ ] Support optional CI integrations (GitHub runner, Cirrus worker, GitLab runner) +- [ ] Write output files: `host`, `username`, `id_rsa` +- [ ] `destroy` cleans up all resources and state + +## Out of Scope +- Azure RHEL (subscription-managed — see `010-azure-rhel-host.md`) +- AWS Fedora (see `008-aws-fedora-host.md`) + +## Affected Areas +- `pkg/provider/azure/action/linux/` +- `pkg/provider/azure/data/` — image reference lookup +- `cmd/mapt/cmd/azure/hosts/linux.go` +- `tkn/template/infra-azure-fedora.yaml` + +## Acceptance Criteria +- `mapt azure linux create ...` provisions an accessible Linux VM +- SSH access works +- `mapt azure linux destroy ...` removes all resources + +--- + +## Command + +``` +mapt azure linux create [flags] # Ubuntu default; reused for Fedora with different version +mapt azure linux destroy [flags] +``` + +### Shared flag groups + +| Group | Source | Flags added | +|---|---|---| +| Common | `specs/cmd/params.md` | `--project-name`, `--backed-url` | +| Compute Request | `specs/cmd/params.md` | `--cpus`, `--memory`, `--arch`, `--nested-virt`, `--compute-sizes` | +| Spot | `specs/cmd/params.md` | `--spot`, `--spot-eviction-tolerance`, `--spot-increase-rate`, `--spot-excluded-regions` | +| Integrations | `specs/cmd/params.md` | `--ghactions-runner-*`, `--it-cirrus-pw-*`, `--glrunner-*` | +| Location | `specs/cmd/azure-params.md` | `--location` (default: `westeurope`) | + +### Target-specific flags (create only) + +| Flag | Type | Default | Description | +|---|---|---|---| +| `--version` | string | `24.04` | OS version (Ubuntu format; `42` for Fedora) | +| `--arch` | string | `x86_64` | `x86_64` or `arm64` | +| `--username` | string | `rhqp` | OS username for SSH access | +| `--conn-details-output` | string | — | Path to write connection files | +| `--tags` | map | — | Resource tags | + +### Destroy flags + +*(none beyond common)* + +### Action args struct populated + +`linux.LinuxArgs` → `pkg/provider/azure/action/linux/linux.go` diff --git a/specs/features/azure/rhel-ai.md b/specs/features/azure/rhel-ai.md new file mode 100644 index 000000000..c13d8b594 --- /dev/null +++ b/specs/features/azure/rhel-ai.md @@ -0,0 +1,70 @@ +# Spec: Azure RHEL AI Host + +## Context +Provisions a RHEL AI VM on Azure for AI/ML workloads. Entry point: +`pkg/provider/azure/action/rhel-ai/`. CLI: `cmd/mapt/cmd/azure/hosts/rhelai.go`. + +Mirrors the AWS RHEL AI target on Azure infrastructure, using GPU-capable VM sizes +and the RHEL AI Marketplace image. + +## Problem +This feature is implemented. This spec documents the current behaviour. + +## Requirements +- [ ] Provision a RHEL AI VM on Azure using the Marketplace image +- [ ] Target GPU-capable Azure VM sizes +- [ ] Support spot (low-priority) VMs +- [ ] Write output files: `host`, `username`, `id_rsa` +- [ ] `destroy` cleans up all Azure resources and state + +## Out of Scope +- AWS RHEL AI (see `009-aws-rhel-ai.md`) +- Standard Azure RHEL (see `010-azure-rhel-host.md`) + +## Affected Areas +- `pkg/provider/azure/action/rhel-ai/` +- `cmd/mapt/cmd/azure/hosts/rhelai.go` +- `tkn/template/infra-azure-rhel-ai.yaml` + +## Acceptance Criteria +- `mapt azure rhel-ai create ...` provisions an accessible RHEL AI VM +- SSH access works +- `mapt azure rhel-ai destroy ...` removes all resources + +--- + +## Command + +``` +mapt azure rhel-ai create [flags] +mapt azure rhel-ai destroy [flags] +``` + +### Shared flag groups + +| Group | Source | Flags added | +|---|---|---| +| Common | `specs/cmd/params.md` | `--project-name`, `--backed-url` | +| Compute Request | `specs/cmd/params.md` | `--cpus`, `--memory`, `--arch`, `--nested-virt`, `--compute-sizes` | +| Spot | `specs/cmd/params.md` | `--spot`, `--spot-eviction-tolerance`, `--spot-increase-rate`, `--spot-excluded-regions` | +| Location | `specs/cmd/azure-params.md` | `--location` (default: `westeurope`) | + +Note: no integration flags. + +### Target-specific flags (create only) + +| Flag | Type | Default | Description | +|---|---|---|---| +| `--version` | string | `3.0.0` | RHEL AI version | +| `--accelerator` | string | `cuda` | GPU accelerator: `cuda` or `rocm` | +| `--custom-ami` | string | — | Custom image override | +| `--conn-details-output` | string | — | Path to write connection files | +| `--tags` | map | — | Resource tags | + +### Destroy flags + +`--serverless`, `--force-destroy`, `--keep-state` + +### Action args struct populated + +`rhelai.RHELAIArgs` → `pkg/provider/azure/action/rhelai/rhelai.go` diff --git a/specs/features/azure/rhel-host.md b/specs/features/azure/rhel-host.md new file mode 100644 index 000000000..069e45c75 --- /dev/null +++ b/specs/features/azure/rhel-host.md @@ -0,0 +1,75 @@ +# Spec: Azure RHEL Host + +## Context +Provisions a RHEL VM on Azure. Entry point: `pkg/provider/azure/action/rhel/`. +CLI: `cmd/mapt/cmd/azure/hosts/rhel.go`. + +Azure RHEL uses Azure Marketplace images. Root disk expansion is handled via a shell script +(`expand-root-disk.sh`) run during cloud-init since Azure RHEL images often ship with a small root partition. + +## Problem +This feature is implemented. This spec documents the current behaviour. + +## Requirements +- [ ] Provision a RHEL VM on Azure using the Marketplace image +- [ ] Expand the root disk during cloud-init to use the full allocated disk size +- [ ] Support spot (Azure low-priority / spot VMs) via `azure/modules/allocation/` +- [ ] Support optional CI integrations +- [ ] Write output files: `host`, `username`, `id_rsa` +- [ ] `destroy` cleans up all Azure resources and state + +## Out of Scope +- AWS RHEL (see `001-aws-rhel-host.md`) +- Azure RHEL AI (see `015-azure-rhel-ai.md`) + +## Affected Areas +- `pkg/provider/azure/action/rhel/` — including `expand-root-disk.sh` +- `pkg/provider/azure/modules/` — network, virtual-machine, allocation +- `cmd/mapt/cmd/azure/hosts/rhel.go` +- `tkn/template/infra-azure-rhel.yaml` + +## Acceptance Criteria +- `mapt azure rhel create ...` provisions an accessible RHEL VM +- Root disk is expanded to the configured size +- SSH access works +- `mapt azure rhel destroy ...` removes all resources + +--- + +## Command + +``` +mapt azure rhel create [flags] +mapt azure rhel destroy [flags] +``` + +### Shared flag groups + +| Group | Source | Flags added | +|---|---|---| +| Common | `specs/cmd/params.md` | `--project-name`, `--backed-url` | +| Compute Request | `specs/cmd/params.md` | `--cpus`, `--memory`, `--arch`, `--nested-virt`, `--compute-sizes` | +| Spot | `specs/cmd/params.md` | `--spot`, `--spot-eviction-tolerance`, `--spot-increase-rate`, `--spot-excluded-regions` | +| Integrations | `specs/cmd/params.md` | `--ghactions-runner-*`, `--it-cirrus-pw-*`, `--glrunner-*` | +| Location | `specs/cmd/azure-params.md` | `--location` (default: `westeurope`) | + +### Target-specific flags (create only) + +| Flag | Type | Default | Description | +|---|---|---|---| +| `--version` | string | `9.7` | RHEL major.minor version | +| `--arch` | string | `x86_64` | `x86_64` or `arm64` | +| `--username` | string | `rhqp` | OS username for SSH access | +| `--rh-subscription-username` | string | — | Red Hat subscription username | +| `--rh-subscription-password` | string | — | Red Hat subscription password | +| `--snc` | bool | false | Apply SNC profile | +| `--conn-details-output` | string | — | Path to write connection files | +| `--tags` | map | — | Resource tags | + +### Destroy flags + +*(none beyond common)* + +### Action args struct populated + +`rhel.RhelArgs` → `pkg/provider/azure/action/rhel/rhel.go` diff --git a/specs/features/azure/windows-desktop.md b/specs/features/azure/windows-desktop.md new file mode 100644 index 000000000..573c041ab --- /dev/null +++ b/specs/features/azure/windows-desktop.md @@ -0,0 +1,73 @@ +# Spec: Azure Windows Desktop Host + +## Context +Provisions a Windows Desktop VM on Azure. Entry point: `pkg/provider/azure/action/windows/`. +CLI: `cmd/mapt/cmd/azure/hosts/windows.go`. + +This differs from the AWS Windows Server target: it targets Windows Desktop editions on Azure +and includes CI-specific setup scripts (`rhqp-ci-setup.ps1`). + +## Problem +This feature is implemented. This spec documents the current behaviour. + +## Requirements +- [ ] Provision a Windows Desktop VM on Azure using the specified Marketplace image +- [ ] Run CI setup PowerShell scripts via custom script extension or userdata +- [ ] Support optional spot (low-priority) VMs +- [ ] Open security group rules for RDP (3389) and WinRM/SSH as needed +- [ ] Write output files: `host`, `username`, `userpassword` +- [ ] `destroy` cleans up all Azure resources and state + +## Out of Scope +- AWS Windows Server (see `002-aws-windows-server-host.md`) +- Azure RHEL or Linux (see `010-azure-rhel-host.md`, `013-azure-linux-host.md`) + +## Affected Areas +- `pkg/provider/azure/action/windows/` — including `rhqp-ci-setup.ps1` +- `cmd/mapt/cmd/azure/hosts/windows.go` +- `tkn/template/infra-azure-windows-desktop.yaml` + +## Acceptance Criteria +- `mapt azure windows create ...` provisions an accessible Windows VM +- RDP connection works with the output credentials +- `mapt azure windows destroy ...` removes all resources + +--- + +## Command + +``` +mapt azure windows create [flags] +mapt azure windows destroy [flags] +``` + +### Shared flag groups + +| Group | Source | Flags added | +|---|---|---| +| Common | `specs/cmd/params.md` | `--project-name`, `--backed-url` | +| Compute Request | `specs/cmd/params.md` | `--cpus`, `--memory`, `--arch`, `--nested-virt`, `--compute-sizes` | +| Spot | `specs/cmd/params.md` | `--spot`, `--spot-eviction-tolerance`, `--spot-increase-rate`, `--spot-excluded-regions` | +| Location | `specs/cmd/azure-params.md` | `--location` (default: `westeurope`) | + +Note: no integration flags. + +### Target-specific flags (create only) + +| Flag | Type | Default | Description | +|---|---|---|---| +| `--windows-version` | string | `11` | Windows major version | +| `--feature` | string | — | Windows feature/edition variant | +| `--username` | string | `rhqp` | Username for SSH access | +| `--admin-username` | string | `rhqpadmin` | Admin username for RDP access | +| `--profile` | []string | — | Setup profiles to apply (comma-separated) | +| `--conn-details-output` | string | — | Path to write connection files | +| `--tags` | map | — | Resource tags | + +### Destroy flags + +*(none beyond common)* + +### Action args struct populated + +`windows.WindowsArgs` → `pkg/provider/azure/action/windows/windows.go` diff --git a/specs/integrations/cirrus-ci.md b/specs/integrations/cirrus-ci.md new file mode 100644 index 000000000..403c9a0c6 --- /dev/null +++ b/specs/integrations/cirrus-ci.md @@ -0,0 +1,109 @@ +# Integration: Cirrus CI Persistent Worker + +**Package:** `github.com/redhat-developer/mapt/pkg/integrations/cirrus` + +Registers the provisioned machine as a Cirrus CI persistent worker at boot. +The cirrus-cli binary is downloaded and configured as a long-running service. + +See `specs/integrations/overview.md` for the shared interface and config flow. + +--- + +## Type + +```go +type PersistentWorkerArgs struct { + Name string // Worker name — set to mCtx.RunID() by the action + Token string // Cirrus CI registration token (required) + Platform *Platform // Target OS: Linux | Darwin | Windows + Arch *Arch // Target arch: Amd64 | Arm64 + Labels map[string]string // Worker labels as key=value pairs +} +``` + +### Platform / Arch constants + +```go +var ( + Windows Platform = "windows" + Linux Platform = "linux" + Darwin Platform = "darwin" + + Arm64 Arch = "arm64" + Amd64 Arch = "amd64" +) +``` + +--- + +## Persistent Worker Version + +```go +var version = "v0.135.0" // overridden at build time via linker flag +``` + +Makefile variable: `CIRRUS_CLI` +Linker target: `pkg/integrations/cirrus.version` + +--- + +## Download URL Pattern + +``` +https://github.com/cirruslabs/cirrus-cli/releases/download/{version}/cirrus-{platform}-{arch} +https://github.com/cirruslabs/cirrus-cli/releases/download/{version}/cirrus-{platform}-{arch}.exe (Windows) +``` + +--- + +## Listen Port + +```go +var cirrusPort = "3010" +``` + +The worker listens on port `3010`. This port must be opened in the security group when +Cirrus integration is enabled — callers use `cirrus.CirrusPort()` to conditionally add +the ingress rule: + +```go +func CirrusPort() (*int, error) // returns nil, nil if Cirrus not configured +``` + +This is the only integration that requires an additional inbound port. + +--- + +## Functions + +```go +func Init(args *PersistentWorkerArgs) // stores args as package-level state +func GetRunnerArgs() *PersistentWorkerArgs // returns nil if not configured +func GetToken() string // returns token or "" if not configured +func CirrusPort() (*int, error) // returns port int or nil if not configured +``` + +--- + +## UserDataValues populated + +| Field | Source | +|---|---| +| `CliURL` | `downloadURL()` — version + platform + arch | +| `Name` | `PersistentWorkerArgs.Name` | +| `Token` | `PersistentWorkerArgs.Token` | +| `Labels` | Map entries formatted as `key=value`, joined with `,` | +| `Port` | `"3010"` (fixed) | +| `User` | Set by `GetIntegrationSnippet` from `username` arg | +| `RepoURL`, `Executor` | Not used | + +--- + +## Script Templates + +Embedded at compile time: +- `snippet-linux.sh` — downloads binary, installs as systemd service +- `snippet-darwin.sh` — same flow for macOS +- `snippet-windows.ps1` — downloads `.exe`, installs as Windows service + +Template selection is based on `PersistentWorkerArgs.Platform`. diff --git a/specs/integrations/github-actions.md b/specs/integrations/github-actions.md new file mode 100644 index 000000000..bcacb1b72 --- /dev/null +++ b/specs/integrations/github-actions.md @@ -0,0 +1,98 @@ +# Integration: GitHub Actions Self-Hosted Runner + +**Package:** `github.com/redhat-developer/mapt/pkg/integrations/github` + +Registers the provisioned machine as a GitHub Actions self-hosted runner at boot. +The runner binary is downloaded and installed by the injected setup script. + +See `specs/integrations/overview.md` for the shared interface and config flow. + +--- + +## Type + +```go +type GithubRunnerArgs struct { + Token string // GitHub runner registration token (required) + RepoURL string // Repository or organisation URL to register against (required) + Name string // Runner name — set to mCtx.RunID() by the action + Platform *Platform // Target OS: Linux | Darwin | Windows + Arch *Arch // Target arch: Amd64 | Arm64 | Arm + Labels []string // Runner labels, comma-joined before injection + User string // OS user to run as (set by cloud-config builder) +} +``` + +### Platform / Arch constants + +```go +var ( + Windows Platform = "win" + Linux Platform = "linux" + Darwin Platform = "osx" + + Arm64 Arch = "arm64" + Amd64 Arch = "x64" + Arm Arch = "arm" +) +``` + +--- + +## Runner Version + +```go +var runnerVersion = "2.317.0" // overridden at build time via linker flag +``` + +Makefile variable: `GITHUB_RUNNER` +Linker target: `pkg/integrations/github.runnerVersion` + +--- + +## Download URL Pattern + +``` +https://github.com/actions/runner/releases/download/v{version}/actions-runner-{platform}-{arch}-{version}.tar.gz +https://github.com/actions/runner/releases/download/v{version}/actions-runner-{platform}-{arch}-{version}.zip (Windows) +``` + +The URL is built by `downloadURL()` and injected as `UserDataValues.CliURL`. + +--- + +## Functions + +```go +func Init(args *GithubRunnerArgs) // stores args as package-level state +func GetRunnerArgs() *GithubRunnerArgs // returns nil if not configured +func GetToken() string // returns token or "" if not configured +``` + +`GetRunnerArgs()` implements `IntegrationConfig` (via pointer receiver methods on +`*GithubRunnerArgs`) — pass directly to `GetIntegrationSnippet`. + +--- + +## UserDataValues populated + +| Field | Source | +|---|---| +| `CliURL` | `downloadURL()` — version + platform + arch | +| `Name` | `GithubRunnerArgs.Name` | +| `Token` | `GithubRunnerArgs.Token` | +| `Labels` | `GithubRunnerArgs.Labels` joined with `,` | +| `RepoURL` | `GithubRunnerArgs.RepoURL` | +| `User` | Set by `GetIntegrationSnippet` from `username` arg | +| `Port`, `Executor` | Not used | + +--- + +## Script Templates + +Embedded at compile time: +- `snippet-linux.sh` — downloads `.tar.gz`, extracts, configures, starts as systemd service +- `snippet-darwin.sh` — same flow for macOS +- `snippet-windows.ps1` — downloads `.zip`, extracts, registers as Windows service + +Template selection is based on `GithubRunnerArgs.Platform`. diff --git a/specs/integrations/gitlab.md b/specs/integrations/gitlab.md new file mode 100644 index 000000000..9a3e154f7 --- /dev/null +++ b/specs/integrations/gitlab.md @@ -0,0 +1,146 @@ +# Integration: GitLab Runner + +**Package:** `github.com/redhat-developer/mapt/pkg/integrations/gitlab` + +Registers the provisioned machine as a GitLab runner. Unlike GitHub Actions and Cirrus CI, +GitLab registration requires creating a runner resource in GitLab itself to obtain an auth +token. mapt uses the Pulumi GitLab provider to create the runner as a Pulumi resource +inside the deploy stack — the auth token is resolved at provision time and injected into +the setup script. + +See `specs/integrations/overview.md` for the shared interface and config flow. + +--- + +## Type + +```go +type GitLabRunnerArgs struct { + GitLabPAT string // Personal Access Token for the Pulumi GitLab provider + ProjectID string // GitLab project ID — mutually exclusive with GroupID + GroupID string // GitLab group ID — mutually exclusive with ProjectID + URL string // GitLab instance URL (e.g. "https://gitlab.com") + Tags []string // Runner tags for job routing; empty = accepts untagged jobs + Name string // Runner description — set to mCtx.RunID() by the action + Platform *Platform // Target OS: Linux | Darwin | Windows + Arch *Arch // Target arch: Amd64 | Arm64 | Arm + User string // OS user to run as + AuthToken string // Set by Pulumi after CreateRunner(); not caller-supplied +} +``` + +### Platform / Arch constants + +```go +var ( + Windows Platform = "windows" + Linux Platform = "linux" + Darwin Platform = "darwin" + + Arm64 Arch = "arm64" + Amd64 Arch = "amd64" + Arm Arch = "arm" +) +``` + +--- + +## Runner Version + +```go +var version = "18.8.0" // overridden at build time via linker flag +``` + +Makefile variable: `GITLAB_RUNNER` +Linker target: `pkg/integrations/gitlab.version` + +--- + +## Download URL Pattern + +``` +https://gitlab-runner-downloads.s3.amazonaws.com/v{version}/binaries/gitlab-runner-{platform}-{arch} +https://gitlab-runner-downloads.s3.amazonaws.com/v{version}/binaries/gitlab-runner-{platform}-{arch}.exe (Windows) +``` + +--- + +## Pulumi Registration (key difference from other integrations) + +GitLab runners must be registered in GitLab before deployment. mapt handles this inside the +Pulumi deploy stack by calling `CreateRunner()`: + +```go +func CreateRunner(ctx *pulumi.Context, args *GitLabRunnerArgs) (pulumi.StringOutput, error) +``` + +This creates a `gitlab.UserRunner` Pulumi resource via the `pulumi-gitlab` provider, +authenticated with `GitLabPAT`. The resource returns an `AuthToken` as a `pulumi.StringOutput`. + +The returned token is then wired via `ApplyT` into the userdata generation so it is available +when the cloud-init script is rendered: + +```go +token, err := gitlab.CreateRunner(ctx, glArgs) +// token is a pulumi.StringOutput resolved during stack apply +token.ApplyT(func(t string) string { + gitlab.SetAuthToken(t) + // generate userdata here using GetIntegrationSnippet + return t +}) +``` + +Exports added to the stack: `gitlab-runner-id`, `gitlab-runner-type`. + +### Project vs Group runner + +Exactly one of `ProjectID` or `GroupID` must be set — `CreateRunner` returns an error if +both or neither are provided: + +| Field set | Runner type | GitLab API | +|---|---|---| +| `ProjectID` | `project_type` | Scoped to a single project | +| `GroupID` | `group_type` | Shared across all projects in the group | + +--- + +## Functions + +```go +func Init(args *GitLabRunnerArgs) // stores args as package-level state +func GetRunnerArgs() *GitLabRunnerArgs // returns nil if not configured +func GetToken() string // returns AuthToken or "" if not configured +func SetAuthToken(token string) // called inside ApplyT after CreateRunner +func CreateRunner(ctx *pulumi.Context, args *GitLabRunnerArgs) (pulumi.StringOutput, error) +``` + +--- + +## UserDataValues populated + +| Field | Source | +|---|---| +| `CliURL` | `downloadURL()` — version + platform + arch | +| `Name` | `GitLabRunnerArgs.Name` | +| `Token` | `GitLabRunnerArgs.AuthToken` — set by Pulumi, not caller | +| `RepoURL` | `GitLabRunnerArgs.URL` | +| `User` | Set by `GetIntegrationSnippet` from `username` arg | +| `Labels`, `Port`, `Executor` | Not used | + +--- + +## Script Templates + +Embedded at compile time: +- `snippet-linux.sh` — downloads binary, registers runner, starts as systemd service +- `snippet-darwin.sh` — same flow for macOS +- `snippet-windows.ps1` — downloads `.exe`, installs as Windows service + +Template selection is based on `GitLabRunnerArgs.Platform`. + +--- + +## Known Gaps + +- No Tekton task template includes the GitLab runner flags (verify and add) +- Tags are not surfaced in the setup script — only the Pulumi resource carries them diff --git a/specs/integrations/overview.md b/specs/integrations/overview.md new file mode 100644 index 000000000..50988f113 --- /dev/null +++ b/specs/integrations/overview.md @@ -0,0 +1,129 @@ +# Integrations: Overview + +Integrations allow any provisioned mapt target to register itself as a CI system agent +at boot, without manual setup. The integration is injected as a shell or PowerShell script +into the cloud-init `write_files` section. + +Three services are supported — each has its own spec: +- `specs/integrations/github-actions.md` — GitHub Actions self-hosted runner +- `specs/integrations/cirrus-ci.md` — Cirrus CI persistent worker +- `specs/integrations/gitlab.md` — GitLab runner (uses Pulumi for registration) + +--- + +## Shared Interface + +**Package:** `github.com/redhat-developer/mapt/pkg/integrations` + +### `IntegrationConfig` + +```go +type IntegrationConfig interface { + GetUserDataValues() *UserDataValues // nil = integration disabled + GetSetupScriptTemplate() string // embedded shell/PS1 template string +} +``` + +Every service implementation implements this interface. Returning `nil` from +`GetUserDataValues()` is the zero-value — it means the integration was not configured +and `GetIntegrationSnippet` returns an empty string. + +### `UserDataValues` + +```go +type UserDataValues struct { + CliURL string // download URL for the runner binary + User string // OS username — set automatically by GetIntegrationSnippet + Name string // runner/worker name (set to mCtx.RunID()) + Token string // registration/auth token + Labels string // comma-separated labels or key=value pairs + Port string // listen port (Cirrus only) + RepoURL string // repository or GitLab instance URL + Executor string // executor type (GitLab only) +} +``` + +Not all fields are used by every service — see the per-service spec for which fields +are populated. + +--- + +## Shared Functions + +### `GetIntegrationSnippet` + +```go +func GetIntegrationSnippet(intCfg IntegrationConfig, username string) (*string, error) +``` + +Renders the service's embedded script template with `UserDataValues`. Sets `User` from +`username` before rendering. Returns an empty string (not an error) when +`GetUserDataValues()` returns nil. + +### `GetIntegrationSnippetAsCloudInitWritableFile` + +```go +func GetIntegrationSnippetAsCloudInitWritableFile(intCfg IntegrationConfig, username string) (*string, error) +``` + +Same as `GetIntegrationSnippet` but indents every line by 6 spaces, ready to embed as +a `write_files` entry in a cloud-init YAML: + +```yaml +write_files: + - content: | + #!/bin/bash + # rendered snippet here — each line indented 6 spaces +``` + +--- + +## Config Flow + +Integration args enter via `ContextArgs` at `mc.Init()` time, which calls each package's +`Init()` to store them as package-level state: + +```go +// Caller sets one of (mutually exclusive in practice, but not validated): +mCtxArgs.GHRunnerArgs = &github.GithubRunnerArgs{...} +mCtxArgs.CirrusPWArgs = &cirrus.PersistentWorkerArgs{...} +mCtxArgs.GLRunnerArgs = &gitlab.GitLabRunnerArgs{...} + +// mc.Init() calls: +github.Init(ca.GHRunnerArgs) // nil-safe; sets package-level runnerArgs +cirrus.Init(ca.CirrusPWArgs) +gitlab.Init(ca.GLRunnerArgs) +``` + +Cloud-config builders then retrieve via `.GetRunnerArgs()` or +`.GetIntegrationConfig()` and pass the result to `GetIntegrationSnippet`. + +--- + +## Usage Pattern in a Cloud-Config Builder + +```go +// In pkg/target/host//.go: +snippet, err := integrations.GetIntegrationSnippetAsCloudInitWritableFile( + github.GetRunnerArgs(), // returns nil if not configured → empty snippet + username, +) +// Embed snippet into the cloud-init write_files section +``` + +--- + +## Known Gaps + +- No validation that at most one integration is configured (multiple could be set simultaneously) +- Runner versions are compile-time constants; upgrading requires a full rebuild and release +- The GitLab runner integration does not appear in the Tekton task templates (verify) + +--- + +## When to Extend + +Add a new file under `specs/integrations/` when: +- Adding a new CI system (e.g. Jenkins, TeamCity) +- Making runner versions runtime-configurable instead of compile-time +- Adding support for runner groups or additional registration parameters diff --git a/specs/integrations/tekton-tasks.md b/specs/integrations/tekton-tasks.md new file mode 100644 index 000000000..a98fe5c1c --- /dev/null +++ b/specs/integrations/tekton-tasks.md @@ -0,0 +1,61 @@ +# Spec: Tekton Task Bundles + +## Context +mapt ships a set of Tekton Task definitions for use in Tekton Pipelines. These allow CI pipelines +running on OpenShift/Kubernetes to dynamically provision and destroy remote targets as pipeline steps. + +Key files: +- `tkn/template/*.yaml` — source templates with `` and `` placeholders +- `tkn/*.yaml` — rendered task files (generated by `make tkn-update`) +- `Makefile` targets: `tkn-update`, `tkn-push` + +The bundle is published to `quay.io/redhat-developer/mapt:-tkn` as an OCI artifact +using the `tkn bundle push` command. + +## Problem +This feature is implemented. This spec documents the generation process and current task coverage. + +## Requirements +- [ ] Template files in `tkn/template/` define tasks parametrically (``, ``) +- [ ] `make tkn-update IMG=... VERSION=...` renders all templates to `tkn/` using `sed` +- [ ] `make tkn-push` bundles all rendered tasks and pushes to the OCI registry +- [ ] Each target has a corresponding Tekton task with `create` and `destroy` steps +- [ ] Task parameters mirror the CLI flags for the corresponding `mapt` subcommand +- [ ] Tasks use the mapt container image and pass `--serverless` flag for role-based credentials + +## Current Task Coverage +| Task file | Target | +|-----------|--------| +| `infra-aws-rhel.yaml` | AWS RHEL host | +| `infra-aws-rhel-ai.yaml` | AWS RHEL AI host | +| `infra-aws-windows-server.yaml` | AWS Windows Server host | +| `infra-aws-fedora.yaml` | AWS Fedora host | +| `infra-aws-mac.yaml` | AWS Mac host | +| `infra-aws-kind.yaml` | AWS Kind cluster | +| `infra-aws-ocp-snc.yaml` | AWS OpenShift SNC | +| `infra-azure-aks.yaml` | Azure AKS | +| `infra-azure-rhel.yaml` | Azure RHEL host | +| `infra-azure-rhel-ai.yaml` | Azure RHEL AI host | +| `infra-azure-fedora.yaml` | Azure Linux/Fedora host | +| `infra-azure-windows-desktop.yaml` | Azure Windows Desktop | + +## Out of Scope +- GitHub Actions workflow files (`.github/workflows/`) — those are for mapt's own CI, not for consumers +- Direct CLI usage (that is the primary usage documented in `docs/`) + +## Affected Areas +- `tkn/template/` — source templates +- `tkn/` — generated (do not edit directly) +- `Makefile` — `tkn-update` and `tkn-push` targets +- `.github/workflows/tkn-bundle.yaml` — CI workflow that runs `tkn-push` + +## Known Gaps / Improvement Ideas +- Azure Kind task is missing from the bundle (no `infra-azure-kind.yaml` template) +- AWS Mac Pool service has no Tekton task +- Task parameters are not validated beyond what Tekton's type system offers (string/array) +- No Tekton task for `aws mac-pool request` / `release` operations + +## Acceptance Criteria +- `make tkn-update IMG=quay.io/redhat-developer/mapt:v1.0.0 VERSION=1.0.0` regenerates `tkn/*.yaml` +- `make tkn-push` successfully pushes the bundle to the registry +- A Tekton Pipeline can reference the bundled tasks and successfully provision/destroy a target diff --git a/specs/project-context.md b/specs/project-context.md new file mode 100644 index 000000000..cae3402e8 --- /dev/null +++ b/specs/project-context.md @@ -0,0 +1,299 @@ +# mapt — Project Context + +## What This Project Is + +mapt (Multi Architecture Provisioning Tool) is a Go CLI that provisions ephemeral compute environments +across AWS and Azure using the Pulumi Automation API. It is used primarily by CI/CD pipelines that +need on-demand remote machines of specific OS/arch combinations. + +Key design goals: +- **Cost savings**: prefer spot instances with cross-region best-bid selection +- **Speed**: use AMI fast-launch, root volume replacement, pre-baked images +- **Safety**: self-destruct via serverless scheduled tasks (timeout mode) +- **Integration**: emit connection details (host, username, key/password) as output files consumed by CI systems + +## Repository Layout + +``` +cmd/mapt/cmd/ CLI commands (Cobra), one file per target + params/ Shared flag definitions, Add*Flags helpers, *Args() readers + — see specs/cmd/params.md + aws/hosts/ AWS host subcommands (rhel, windows, fedora, mac, rhelai) + aws/services/ AWS service subcommands (eks, kind, mac-pool, snc) + azure/hosts/ Azure host subcommands (rhel, windows, linux, rhelai) + azure/services/ Azure service subcommands (aks, kind) + +pkg/manager/ Pulumi Automation API wrapper + context/ Context type — carries project/run metadata, integrations + credentials/ Provider credential helpers + +pkg/provider/ + api/ Shared API types and interfaces (ComputeRequest, SpotArgs, SpotSelector, ComputeSelector, CloudConfig) + — see specs/api/provider-interfaces.md + aws/ + action/ Entry points per target: Create(), Destroy() orchestrate stacks + modules/ Reusable Pulumi stack components + allocation/ Spot vs on-demand region/AZ selection + ami/ AMI copy + fast-launch + bastion/ Bastion host for airgap scenarios + ec2/compute/ EC2 instance resource + iam/ IAM roles/policies + mac/ Mac dedicated host + machine lifecycle + network/ Standard and airgap VPC/subnet/LB + serverless/ ECS Fargate scheduled self-destruct + spot/ Best-spot-option Pulumi stack + data/ AWS SDK read-only queries (AMI, AZ, spot price, etc.) + services/ Low-level Pulumi resource wrappers (keypair, SG, S3, SSM, VPC) + azure/ + action/ Entry points per target + modules/ Azure network, VM, allocation + data/ Azure SDK queries + services/ Azure Pulumi resource wrappers + util/ Shared: command readiness, output writing, security, windows helpers + +pkg/integrations/ CI system integration snippets + github/ GitHub Actions self-hosted runner + cirrus/ Cirrus CI persistent worker + gitlab/ GitLab runner + +pkg/target/ Cloud-init / userdata builders per OS target + host/rhel/ RHEL cloud-config (base + SNC variant) + host/fedora/ Fedora cloud-config + host/rhelai/ RHEL AI API wrapper + host/windows-server/ Windows PowerShell userdata + service/kind/ Kind cloud-config + service/snc/ OpenShift SNC cloud-config + profile deployment + profile/ SNC profiles: virtualization, serverless, servicemesh + +pkg/util/ Generic utilities (cache, cloud-init, file, logging, maps, network, slices) + +tkn/ Tekton Task YAML files (generated from tkn/template/ by make tkn-update) +docs/ User-facing documentation per target +specs/ Developer/contributor artifacts + project-context.md Project knowledge base (this file) + features/ Feature specifications +``` + +## Key Types + +```go +// manager/context.ContextArgs — input to every action Create()/Destroy() +type ContextArgs struct { + ProjectName string + BackedURL string // "s3://bucket/path" or "file:///local/path" + ResultsOutput string // directory where output files are written + Serverless bool // use role-based credentials (ECS task context) + ForceDestroy bool + KeepState bool + Tags map[string]string + GHRunnerArgs *github.GithubRunnerArgs // optional integration + CirrusPWArgs *cirrus.PersistentWorkerArgs + GLRunnerArgs *gitlab.GitLabRunnerArgs +} + +// manager.Stack — describes a Pulumi stack to run +type Stack struct { + ProjectName string + StackName string + BackedURL string + DeployFunc pulumi.RunFunc + ProviderCredentials credentials.ProviderCredentials +} + +// provider/aws/modules/allocation.AllocationResult — result of spot/on-demand selection +type AllocationResult struct { + Region *string + AZ *string + SpotPrice *float64 // nil if on-demand + InstanceTypes []string +} +``` + +## Module Reuse Contract + +**This is the most important architectural rule in mapt.** + +Logic that exists in a module MUST be reused, never reimplemented. The layers are: + +- `modules/` — reusable Pulumi stack components. Always call these; never inline their logic into an action. +- `services/` — low-level Pulumi resource wrappers. Always use these; never call Pulumi provider resources directly from an action. +- `data/` — read-only cloud API queries. Always use these; never call AWS/Azure SDKs directly from an action. +- `action/` — the only layer allowed to contain orchestration logic specific to a single target. + +When writing a spec or implementing a feature, explicitly list which existing modules are called +(Must Reuse) separately from which new files are created (Must Create). This is the distinction +the spec template enforces. + +### AWS EC2 Host — Mandatory Module Sequence + +Every AWS EC2-based host target calls these modules in this order. Deviation requires justification. + +**`Create()` function:** +``` +mc.Init(mCtxArgs, aws.Provider()) +allocation.Allocation(mCtx, &AllocationArgs{...}) // spot or on-demand +r.createMachine() | r.createAirgapMachine() +``` + +**`deploy()` Pulumi RunFunc — always in this order:** +``` +amiSVC.GetAMIByName() // AMI lookup +network.Create() // VPC, subnet, IGW, optional LB, optional airgap +keypair.KeyPairRequest.Create() // TLS keypair → export -id_rsa +securityGroup.SGRequest.Create() // security group with ingress rules +.Generate() // cloud-init / userdata +compute.ComputeRequest.NewCompute() // EC2 instance +serverless.OneTimeDelayedTask() // only when Timeout != "" +c.Readiness() // remote command readiness check +``` + +**`Destroy()` function — always in this order:** +``` +aws.DestroyStack() +spot.Destroy() guarded by spot.Exist() // only if spot was used +amiCopy.Destroy() guarded by amiCopy.Exist() // only if AMI copy was needed (Windows) +aws.CleanupState() +``` + +**`manageResults()` function:** +``` +bastion.WriteOutputs() // only when airgap=true +output.Write() // always — writes host/username/key files +``` + +**Naming — non-negotiable:** +``` +resourcesUtil.GetResourceName(prefix, componentID, suffix) // all resource names +mCtx.StackNameByProject(stackName) // all Pulumi stack names +``` + +### AWS EC2 Host — Files to Create (only these) + +For each new AWS EC2 target, exactly these files are created — everything else is reused: + +``` +pkg/provider/aws/action//.go // Args struct, Create, Destroy, deploy, manageResults, securityGroups +pkg/provider/aws/action//constants.go // stackName, componentID, AMI regex, disk size, ports +pkg/target/host// // cloud-config or userdata builder +cmd/mapt/cmd/aws/hosts/.go // Cobra create/destroy subcommands +tkn/template/infra-aws-.yaml // Tekton task template +``` + +### Azure VM Host — Mandatory Module Sequence + +**`Create()` function:** +``` +mc.Init(mCtxArgs, azure.Provider()) +allocation.Allocation(mCtx, &AllocationArgs{...}) // azure spot or on-demand +``` + +**`deploy()` Pulumi RunFunc:** +``` +azure resource group +azure/modules/network.Create() // VNet, subnet, NIC, optional public IP +keypair or password generation +azure/services/network/security-group.SGRequest.Create() +virtualmachine.NewVM() // Azure VM resource +readiness check via remote command +``` + +**`Destroy()` function:** +``` +azure.DestroyStack() +azure.CleanupState() +``` + +### Adding a New AWS Host Target + +1. **Args struct** in `pkg/provider/aws/action//.go` + - Embed `*cr.ComputeRequestArgs`, `*spotTypes.SpotArgs` + - Include `Prefix`, `Airgap bool`, `Timeout string` + +2. **`Create()`**: `mc.Init` → `allocation.Allocation` → `createMachine` or `createAirgapMachine` + +3. **`deploy()`**: follow the mandatory module sequence above exactly + +4. **`Destroy()`**: follow the mandatory destroy sequence above exactly + +5. **`manageResults()`**: `bastion.WriteOutputs` (if airgap) then `output.Write` + +6. **Cobra command** in `cmd/mapt/cmd/aws/hosts/.go` + - Subcommands: `create`, `destroy`; bind all flags + +7. **Tekton template** in `tkn/template/infra-aws-.yaml` + +### Airgap Orchestration + +Two-phase stack update on the same stack: +1. `airgapPhaseConnectivity = network.ON` — creates NAT gateway, bootstraps machine +2. `airgapPhaseConnectivity = network.OFF` — removes NAT gateway, machine loses egress + +### Spot vs On-Demand (Allocation Module) + +`allocation.Allocation()` is the single entry point. It: +- If `Spot.Spot == true`: creates/reuses a `spotOption` Pulumi stack that selects best region + AZ + price +- If on-demand: uses the provider's default region, iterates AZs until instance types are available + +The spot stack is idempotent — if it already exists, outputs are reused (region stays stable across re-creates). + +### Serverless Self-Destruct + +`serverless.OneTimeDelayedTask()` creates an AWS EventBridge Scheduler + Fargate task that runs +`mapt destroy` at `now + timeout`. Requires a remote BackedURL (not `file://`). + +### Integration Snippets + +Each integration (`github`, `cirrus`, `gitlab`) implements `IntegrationConfig`: +- `GetUserDataValues()` returns token, repo URL, labels, etc. +- `GetSetupScriptTemplate()` returns an embedded shell/PowerShell script template +- Called from cloud-config / userdata builders in `pkg/target/` + +### SNC Profiles + +Profiles are registered in `pkg/target/service/snc/profile/profile.go`: +- `virtualization` — enables nested virt on the compute instance +- `serverless-serving`, `serverless-eventing`, `serverless` — Knative +- `servicemesh` — OpenShift Service Mesh 3 + +`profile.RequireNestedVirt()` gates the instance type selection. +`profile.Deploy()` installs operators/CRDs via the Pulumi Kubernetes provider post-cluster-ready. + +## Build & Test Commands + +```bash +make build # compile to out/mapt +make install # go install to $GOPATH/bin +make test # go test -race ./pkg/... ./cmd/... +make lint # golangci-lint +make fmt # gofmt +make check # build + test + lint + renovate-check +make oci-build # container image (amd64 + arm64) +make tkn-update # regenerate tkn/*.yaml from templates +make tkn-push # push Tekton bundle +``` + +## Naming Conventions + +- Resource names: `resourcesUtil.GetResourceName(prefix, componentID, suffix)` + e.g. `GetResourceName("main", "aws-rhel", "sg")` → `"main-aws-rhel-sg"` +- Stack names: `mCtx.StackNameByProject(stackName)` → `"-"` +- Output keys: `"-host"`, `"-username"`, `"-id_rsa"`, `"-userpassword"` +- Constants: defined in `constants.go` / `contants.go` next to the action file + +## State Backend + +Pulumi state is stored at `BackedURL`: +- Remote: `s3://bucket/prefix` (required for serverless timeout and mac pool) +- Local: `file:///path/to/dir` (dev/testing only; incompatible with timeout) + +After `Destroy`, `aws.CleanupState()` removes the S3 state files unless `KeepState` is set. + +## Dependencies + +- **Pulumi Automation API** (`github.com/pulumi/pulumi/sdk/v3/go/auto`) — all infra is managed via inline stacks +- **AWS SDK v2** — read-only queries (spot prices, AMI lookup, AZ enumeration) +- **Azure SDK for Go** — read-only queries (VM sizes, image refs, locations) +- **Cobra + Viper** — CLI parsing +- **go-playground/validator** — struct validation before stack creation +- **logrus** — structured logging +- **freecache** — in-process caching for expensive cloud API calls From 6c686768848942e68274e98f5dbb191d90dc1c8a Mon Sep 17 00:00:00 2001 From: Adrian Riobo Date: Wed, 25 Mar 2026 15:59:17 +0100 Subject: [PATCH 2/2] feat(aws): Optional service endpoints This commit was implemented based in spec to define optoinal vpc endpoints for AWS, take into consideration this is a breaking change as when endpoints were introduced they were created by default, now behavior is the other way around unless they are explicit in params they will no be created Co-authored-by: Claude Signed-off-by: Adrian Riobo --- cmd/mapt/cmd/aws/hosts/fedora.go | 17 ++- cmd/mapt/cmd/aws/hosts/rhel.go | 23 ++-- cmd/mapt/cmd/aws/hosts/rhelai.go | 17 ++- cmd/mapt/cmd/aws/hosts/windows.go | 1 + cmd/mapt/cmd/aws/params/params.go | 3 + cmd/mapt/cmd/aws/services/eks.go | 3 + cmd/mapt/cmd/aws/services/kind.go | 3 + cmd/mapt/cmd/aws/services/snc.go | 3 + cmd/mapt/cmd/params/params.go | 31 +++-- go.sum | 7 - pkg/provider/aws/action/eks/eks.go | 4 + pkg/provider/aws/action/fedora/fedora.go | 16 ++- pkg/provider/aws/action/kind/kind.go | 3 + pkg/provider/aws/action/rhel-ai/rhelai.go | 13 +- pkg/provider/aws/action/rhel/rhel.go | 4 + pkg/provider/aws/action/snc/snc.go | 3 + pkg/provider/aws/action/windows/windows.go | 4 + pkg/provider/aws/modules/network/network.go | 2 + .../aws/modules/network/standard/standard.go | 2 + .../aws/services/vpc/subnet/public.go | 128 +++++++++++------- pkg/target/host/rhelai/api.go | 1 + pkg/target/service/kind/api.go | 1 + pkg/target/service/snc/api.go | 1 + tkn/infra-aws-fedora.yaml | 10 ++ tkn/infra-aws-kind.yaml | 10 ++ tkn/infra-aws-ocp-snc.yaml | 10 ++ tkn/infra-aws-rhel-ai.yaml | 10 ++ tkn/infra-aws-rhel.yaml | 10 ++ tkn/infra-aws-windows-server.yaml | 10 ++ tkn/template/infra-aws-fedora.yaml | 10 ++ tkn/template/infra-aws-kind.yaml | 10 ++ tkn/template/infra-aws-ocp-snc.yaml | 10 ++ tkn/template/infra-aws-rhel-ai.yaml | 10 ++ tkn/template/infra-aws-rhel.yaml | 10 ++ tkn/template/infra-aws-windows-server.yaml | 8 ++ 35 files changed, 307 insertions(+), 101 deletions(-) diff --git a/cmd/mapt/cmd/aws/hosts/fedora.go b/cmd/mapt/cmd/aws/hosts/fedora.go index db9761859..87e6eeab9 100644 --- a/cmd/mapt/cmd/aws/hosts/fedora.go +++ b/cmd/mapt/cmd/aws/hosts/fedora.go @@ -1,6 +1,7 @@ package hosts import ( + awsParams "github.com/redhat-developer/mapt/cmd/mapt/cmd/aws/params" "github.com/redhat-developer/mapt/cmd/mapt/cmd/params" maptContext "github.com/redhat-developer/mapt/pkg/manager/context" "github.com/redhat-developer/mapt/pkg/provider/aws/action/fedora" @@ -60,13 +61,14 @@ func getFedoraCreate() *cobra.Command { Tags: viper.GetStringMapString(params.Tags), }, &fedora.FedoraArgs{ - Prefix: "main", - Version: viper.GetString(fedoraVersion), - Arch: viper.GetString(params.LinuxArch), - ComputeRequest: params.ComputeRequestArgs(), - Spot: params.SpotArgs(), - Timeout: viper.GetString(params.Timeout), - Airgap: viper.IsSet(airgap)}) + Prefix: "main", + Version: viper.GetString(fedoraVersion), + Arch: viper.GetString(params.LinuxArch), + ComputeRequest: params.ComputeRequestArgs(), + Spot: params.SpotArgs(), + Timeout: viper.GetString(params.Timeout), + Airgap: viper.IsSet(airgap), + ServiceEndpoints: params.NetworkServiceEndpoints()}) }, } flagSet := pflag.NewFlagSet(params.CreateCmdName, pflag.ExitOnError) @@ -78,6 +80,7 @@ func getFedoraCreate() *cobra.Command { flagSet.StringP(params.Timeout, "", "", params.TimeoutDesc) params.AddComputeRequestFlags(flagSet) params.AddSpotFlags(flagSet) + params.AddNetworkFlags(flagSet, awsParams.ServiceEndpointsDesc) params.AddGHActionsFlags(flagSet) params.AddCirrusFlags(flagSet) params.AddGitLabRunnerFlags(flagSet) diff --git a/cmd/mapt/cmd/aws/hosts/rhel.go b/cmd/mapt/cmd/aws/hosts/rhel.go index 3d8f6f9b6..af5d969a6 100644 --- a/cmd/mapt/cmd/aws/hosts/rhel.go +++ b/cmd/mapt/cmd/aws/hosts/rhel.go @@ -1,6 +1,7 @@ package hosts import ( + awsParams "github.com/redhat-developer/mapt/cmd/mapt/cmd/aws/params" "github.com/redhat-developer/mapt/cmd/mapt/cmd/params" maptContext "github.com/redhat-developer/mapt/pkg/manager/context" "github.com/redhat-developer/mapt/pkg/provider/aws/action/rhel" @@ -56,16 +57,17 @@ func getRHELCreate() *cobra.Command { Tags: viper.GetStringMapString(params.Tags), }, &rhel.RHELArgs{ - Prefix: "main", - Version: viper.GetString(params.RhelVersion), - Arch: viper.GetString(params.LinuxArch), - ComputeRequest: params.ComputeRequestArgs(), - SubsUsername: viper.GetString(params.SubsUsername), - SubsUserpass: viper.GetString(params.SubsUserpass), - ProfileSNC: viper.IsSet(params.ProfileSNC), - Spot: params.SpotArgs(), - Timeout: viper.GetString(params.Timeout), - Airgap: viper.IsSet(airgap), + Prefix: "main", + Version: viper.GetString(params.RhelVersion), + Arch: viper.GetString(params.LinuxArch), + ComputeRequest: params.ComputeRequestArgs(), + SubsUsername: viper.GetString(params.SubsUsername), + SubsUserpass: viper.GetString(params.SubsUserpass), + ProfileSNC: viper.IsSet(params.ProfileSNC), + Spot: params.SpotArgs(), + Timeout: viper.GetString(params.Timeout), + Airgap: viper.IsSet(airgap), + ServiceEndpoints: params.NetworkServiceEndpoints(), }) }, } @@ -81,6 +83,7 @@ func getRHELCreate() *cobra.Command { flagSet.Bool(params.ProfileSNC, false, params.ProfileSNCDesc) params.AddComputeRequestFlags(flagSet) params.AddSpotFlags(flagSet) + params.AddNetworkFlags(flagSet, awsParams.ServiceEndpointsDesc) params.AddGHActionsFlags(flagSet) params.AddCirrusFlags(flagSet) params.AddGitLabRunnerFlags(flagSet) diff --git a/cmd/mapt/cmd/aws/hosts/rhelai.go b/cmd/mapt/cmd/aws/hosts/rhelai.go index 09369bbe7..cd7557444 100644 --- a/cmd/mapt/cmd/aws/hosts/rhelai.go +++ b/cmd/mapt/cmd/aws/hosts/rhelai.go @@ -1,6 +1,7 @@ package hosts import ( + awsParams "github.com/redhat-developer/mapt/cmd/mapt/cmd/aws/params" "github.com/redhat-developer/mapt/cmd/mapt/cmd/params" maptContext "github.com/redhat-developer/mapt/pkg/manager/context" rhelai "github.com/redhat-developer/mapt/pkg/provider/aws/action/rhel-ai" @@ -54,13 +55,14 @@ func getRHELAICreate() *cobra.Command { Tags: viper.GetStringMapString(params.Tags), }, &apiRHELAI.RHELAIArgs{ - Prefix: "main", - Version: viper.GetString(params.RhelAIVersion), - Accelerator: viper.GetString(params.RhelAIAccelerator), - CustomAMI: viper.GetString(params.RhelAIAMICustom), - ComputeRequest: params.ComputeRequestArgs(), - Spot: params.SpotArgs(), - Timeout: viper.GetString(params.Timeout), + Prefix: "main", + Version: viper.GetString(params.RhelAIVersion), + Accelerator: viper.GetString(params.RhelAIAccelerator), + CustomAMI: viper.GetString(params.RhelAIAMICustom), + ComputeRequest: params.ComputeRequestArgs(), + Spot: params.SpotArgs(), + Timeout: viper.GetString(params.Timeout), + ServiceEndpoints: params.NetworkServiceEndpoints(), }) }, } @@ -73,6 +75,7 @@ func getRHELAICreate() *cobra.Command { flagSet.StringP(params.Timeout, "", "", params.TimeoutDesc) params.AddComputeRequestFlags(flagSet) params.AddSpotFlags(flagSet) + params.AddNetworkFlags(flagSet, awsParams.ServiceEndpointsDesc) c.PersistentFlags().AddFlagSet(flagSet) return c } diff --git a/cmd/mapt/cmd/aws/hosts/windows.go b/cmd/mapt/cmd/aws/hosts/windows.go index 07c0b7d92..13df0389c 100644 --- a/cmd/mapt/cmd/aws/hosts/windows.go +++ b/cmd/mapt/cmd/aws/hosts/windows.go @@ -78,6 +78,7 @@ func getWindowsCreate() *cobra.Command { Spot: params.SpotArgs(), Airgap: viper.IsSet(airgap), Timeout: viper.GetString(params.Timeout), + ServiceEndpoints: params.NetworkServiceEndpoints(), }) }, } diff --git a/cmd/mapt/cmd/aws/params/params.go b/cmd/mapt/cmd/aws/params/params.go index 379afe760..33768f906 100644 --- a/cmd/mapt/cmd/aws/params/params.go +++ b/cmd/mapt/cmd/aws/params/params.go @@ -20,4 +20,7 @@ const ( MACFixedLocationDesc string = "if this flag is set the host will be created only on the region set by the AWS Env (AWS_DEFAULT_REGION)" MACDHID string = "dedicated-host-id" MACDHIDDesc string = "id for the dedicated host" + + ServiceEndpointsDesc = "Comma-separated list of VPC endpoints to create. " + + "Accepted values: s3, ecr, ssm. Empty = no endpoints." ) diff --git a/cmd/mapt/cmd/aws/services/eks.go b/cmd/mapt/cmd/aws/services/eks.go index 4a4c7df1e..3a3ac862b 100644 --- a/cmd/mapt/cmd/aws/services/eks.go +++ b/cmd/mapt/cmd/aws/services/eks.go @@ -1,6 +1,7 @@ package services import ( + awsParams "github.com/redhat-developer/mapt/cmd/mapt/cmd/aws/params" "github.com/redhat-developer/mapt/cmd/mapt/cmd/params" maptContext "github.com/redhat-developer/mapt/pkg/manager/context" awsEKS "github.com/redhat-developer/mapt/pkg/provider/aws/action/eks" @@ -78,6 +79,7 @@ func getCreateEKS() *cobra.Command { Addons: viper.GetStringSlice(paramAddons), LoadBalancerController: viper.IsSet(paramLoadBalancerController), ExcludedZoneIDs: viper.GetStringSlice(excludedZoneIDs), + ServiceEndpoints: params.NetworkServiceEndpoints(), }) }, } @@ -94,6 +96,7 @@ func getCreateEKS() *cobra.Command { flagSet.StringP(params.LinuxArch, "", params.LinuxArchDefault, params.LinuxArchDesc) params.AddComputeRequestFlags(flagSet) params.AddSpotFlags(flagSet) + params.AddNetworkFlags(flagSet, awsParams.ServiceEndpointsDesc) c.PersistentFlags().AddFlagSet(flagSet) return c } diff --git a/cmd/mapt/cmd/aws/services/kind.go b/cmd/mapt/cmd/aws/services/kind.go index fa3f79189..75e36ed59 100644 --- a/cmd/mapt/cmd/aws/services/kind.go +++ b/cmd/mapt/cmd/aws/services/kind.go @@ -3,6 +3,7 @@ package services import ( "fmt" + awsParams "github.com/redhat-developer/mapt/cmd/mapt/cmd/aws/params" "github.com/redhat-developer/mapt/cmd/mapt/cmd/params" maptContext "github.com/redhat-developer/mapt/pkg/manager/context" "github.com/redhat-developer/mapt/pkg/provider/aws/action/kind" @@ -67,6 +68,7 @@ func createKind() *cobra.Command { Version: viper.GetString(params.KindK8SVersion), Arch: viper.GetString(params.LinuxArch), Timeout: viper.GetString(params.Timeout), + ServiceEndpoints: params.NetworkServiceEndpoints(), ExtraPortMappings: extraPortMappings}); err != nil { return err } @@ -82,6 +84,7 @@ func createKind() *cobra.Command { flagSet.StringToStringP(params.Tags, "", nil, params.TagsDesc) params.AddComputeRequestFlags(flagSet) params.AddSpotFlags(flagSet) + params.AddNetworkFlags(flagSet, awsParams.ServiceEndpointsDesc) c.PersistentFlags().AddFlagSet(flagSet) return c } diff --git a/cmd/mapt/cmd/aws/services/snc.go b/cmd/mapt/cmd/aws/services/snc.go index 349524975..51cb2bb74 100644 --- a/cmd/mapt/cmd/aws/services/snc.go +++ b/cmd/mapt/cmd/aws/services/snc.go @@ -1,6 +1,7 @@ package services import ( + awsParams "github.com/redhat-developer/mapt/cmd/mapt/cmd/aws/params" params "github.com/redhat-developer/mapt/cmd/mapt/cmd/params" maptContext "github.com/redhat-developer/mapt/pkg/manager/context" openshiftsnc "github.com/redhat-developer/mapt/pkg/provider/aws/action/snc" @@ -81,6 +82,7 @@ func createSNC() *cobra.Command { Arch: viper.GetString(params.LinuxArch), PullSecretFile: viper.GetString(pullSecretFile), Timeout: viper.GetString(params.Timeout), + ServiceEndpoints: params.NetworkServiceEndpoints(), Profiles: profiles}); err != nil { return err } @@ -98,6 +100,7 @@ func createSNC() *cobra.Command { flagSet.StringSliceP(sncProfile, "", []string{}, sncProfileDesc) params.AddComputeRequestFlags(flagSet) params.AddSpotFlags(flagSet) + params.AddNetworkFlags(flagSet, awsParams.ServiceEndpointsDesc) c.PersistentFlags().AddFlagSet(flagSet) return c } diff --git a/cmd/mapt/cmd/params/params.go b/cmd/mapt/cmd/params/params.go index a5a8c598e..dc3d8b6a6 100644 --- a/cmd/mapt/cmd/params/params.go +++ b/cmd/mapt/cmd/params/params.go @@ -78,17 +78,17 @@ const ( cirrusPWLabels string = "it-cirrus-pw-labels" cirrusPWLabelsDesc string = "additional labels to use on the persistent worker (--it-cirrus-pw-labels key1=value1,key2=value2)" - glRunnerToken string = "glrunner-token" - glRunnerTokenDesc string = "GitLab Personal Access Token with api scope" - glRunnerProjectID string = "glrunner-project-id" + glRunnerToken string = "glrunner-token" + glRunnerTokenDesc string = "GitLab Personal Access Token with api scope" + glRunnerProjectID string = "glrunner-project-id" glRunnerProjectIDDesc string = "GitLab project ID for project runner registration" - glRunnerGroupID string = "glrunner-group-id" - glRunnerGroupIDDesc string = "GitLab group ID for group runner registration (alternative to --glrunner-project-id)" - glRunnerURL string = "glrunner-url" - glRunnerURLDesc string = "GitLab instance URL (e.g., https://gitlab.com, https://gitlab.example.com)" - glRunnerURLDefault string = "https://gitlab.com" - glRunnerTags string = "glrunner-tags" - glRunnerTagsDesc string = "List of tags separated by comma to be added to the self-hosted runner" + glRunnerGroupID string = "glrunner-group-id" + glRunnerGroupIDDesc string = "GitLab group ID for group runner registration (alternative to --glrunner-project-id)" + glRunnerURL string = "glrunner-url" + glRunnerURLDesc string = "GitLab instance URL (e.g., https://gitlab.com, https://gitlab.example.com)" + glRunnerURLDefault string = "https://gitlab.com" + glRunnerTags string = "glrunner-tags" + glRunnerTagsDesc string = "List of tags separated by comma to be added to the self-hosted runner" //RHEL SubsUsername string = "rh-subscription-username" @@ -130,6 +130,9 @@ const ( KindExtraPortMappings = "extra-port-mappings" KindExtraPortMappingsDesc = "Additional port mappings for the Kind cluster. Value should be a JSON array of objects with containerPort, hostPort, and protocol properties. Example: '[{\"containerPort\": 8080, \"hostPort\": 8080, \"protocol\": \"TCP\"}]'" + // Network + ServiceEndpoints = "service-endpoints" + // Spot spot = "spot" spotDesc = "if spot is set the spot prices across all regions will be checked and machine will be started on best spot option (price / eviction)" @@ -143,6 +146,14 @@ const ( spotExcludedHostedZonesDesc = "Comma-separated list of zone IDs to exclude from spot selection" ) +func AddNetworkFlags(fs *pflag.FlagSet, desc string) { + fs.StringSliceP(ServiceEndpoints, "", []string{}, desc) +} + +func NetworkServiceEndpoints() []string { + return viper.GetStringSlice(ServiceEndpoints) +} + func AddSpotFlags(fs *pflag.FlagSet) { fs.Bool(spot, false, spotDesc) fs.StringP(spotTolerance, "", spotToleranceDefault, spotToleranceDesc) diff --git a/go.sum b/go.sum index 61f63dc56..50971d056 100644 --- a/go.sum +++ b/go.sum @@ -553,17 +553,10 @@ golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8T golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= gonum.org/v1/gonum v0.16.0 h1:5+ul4Swaf3ESvrOnidPp4GZbzf0mxVQpDCYUQE7OJfk= gonum.org/v1/gonum v0.16.0/go.mod h1:fef3am4MQ93R2HHpKnLk4/Tbh/s0+wqD5nfa6Pnwy4E= -<<<<<<< HEAD -google.golang.org/genproto/googleapis/api v0.0.0-20260226221140-a57be14db171 h1:tu/dtnW1o3wfaxCOjSLn5IRX4YDcJrtlpzYkhHhGaC4= -google.golang.org/genproto/googleapis/api v0.0.0-20260226221140-a57be14db171/go.mod h1:M5krXqk4GhBKvB596udGL3UyjL4I1+cTbK0orROM9ng= -google.golang.org/genproto/googleapis/rpc v0.0.0-20260226221140-a57be14db171 h1:ggcbiqK8WWh6l1dnltU4BgWGIGo+EVYxCaAPih/zQXQ= -google.golang.org/genproto/googleapis/rpc v0.0.0-20260226221140-a57be14db171/go.mod h1:4Hqkh8ycfw05ld/3BWL7rJOSfebL2Q+DVDeRgYgxUU8= -======= google.golang.org/genproto/googleapis/api v0.0.0-20260319201613-d00831a3d3e7 h1:41r6JMbpzBMen0R/4TZeeAmGXSJC7DftGINUodzTkPI= google.golang.org/genproto/googleapis/api v0.0.0-20260319201613-d00831a3d3e7/go.mod h1:EIQZ5bFCfRQDV4MhRle7+OgjNtZ6P1PiZBgAKuxXu/Y= google.golang.org/genproto/googleapis/rpc v0.0.0-20260319201613-d00831a3d3e7 h1:ndE4FoJqsIceKP2oYSnUZqhTdYufCYYkqwtFzfrhI7w= google.golang.org/genproto/googleapis/rpc v0.0.0-20260319201613-d00831a3d3e7/go.mod h1:4Hqkh8ycfw05ld/3BWL7rJOSfebL2Q+DVDeRgYgxUU8= ->>>>>>> 00d223cf (fix(deps): update all dependencies) google.golang.org/grpc v1.79.3 h1:sybAEdRIEtvcD68Gx7dmnwjZKlyfuc61Dyo9pGXXkKE= google.golang.org/grpc v1.79.3/go.mod h1:KmT0Kjez+0dde/v2j9vzwoAScgEPx/Bw1CYChhHLrHQ= google.golang.org/protobuf v1.36.11 h1:fV6ZwhNocDyBLK0dj+fg8ektcVegBBuEolpbTQyBNVE= diff --git a/pkg/provider/aws/action/eks/eks.go b/pkg/provider/aws/action/eks/eks.go index ffeee46ba..5bdb3c608 100644 --- a/pkg/provider/aws/action/eks/eks.go +++ b/pkg/provider/aws/action/eks/eks.go @@ -46,6 +46,7 @@ type EKSArgs struct { Addons []string LoadBalancerController bool ExcludedZoneIDs []string + ServiceEndpoints []string } type eksRequest struct { @@ -61,6 +62,7 @@ type eksRequest struct { allocationData *allocation.AllocationResult availabilityZones []string excludedZoneIDs []string + serviceEndpoints []string } func (r *eksRequest) validate() error { @@ -90,6 +92,7 @@ func Create(mCtxArgs *mc.ContextArgs, args *EKSArgs) (err error) { loadBalancerController: &args.LoadBalancerController, addons: args.Addons, excludedZoneIDs: args.ExcludedZoneIDs, + serviceEndpoints: args.ServiceEndpoints, } if args.Spot != nil { r.spot = args.Spot.Spot @@ -158,6 +161,7 @@ func (r *eksRequest) deployer(ctx *pulumi.Context) error { Region: *r.allocationData.Region, NatGatewayMode: &network.NatGatewayModeSingle, MapPublicIp: true, + ServiceEndpoints: r.serviceEndpoints, }.CreateNetwork(ctx) if err != nil { return err diff --git a/pkg/provider/aws/action/fedora/fedora.go b/pkg/provider/aws/action/fedora/fedora.go index 62c9ac4cc..383c58522 100644 --- a/pkg/provider/aws/action/fedora/fedora.go +++ b/pkg/provider/aws/action/fedora/fedora.go @@ -39,6 +39,7 @@ type FedoraArgs struct { ComputeRequest *cr.ComputeRequestArgs Spot *spotTypes.SpotArgs Airgap bool + ServiceEndpoints []string // If timeout is set a severless scheduled task will be created to self destroy the resources Timeout string } @@ -50,6 +51,7 @@ type fedoraRequest struct { arch *string spot bool timeout *string + serviceEndpoints []string allocationData *allocation.AllocationResult airgap *bool // internal management @@ -80,12 +82,13 @@ func Create(mCtxArgs *mc.ContextArgs, args *FedoraArgs) (err error) { // Compose request prefix := util.If(len(args.Prefix) > 0, args.Prefix, "main") r := fedoraRequest{ - mCtx: mCtx, - prefix: &prefix, - version: &args.Version, - arch: &args.Arch, - timeout: &args.Timeout, - airgap: &args.Airgap} + mCtx: mCtx, + prefix: &prefix, + version: &args.Version, + arch: &args.Arch, + timeout: &args.Timeout, + serviceEndpoints: args.ServiceEndpoints, + airgap: &args.Airgap} if args.Spot != nil { r.spot = args.Spot.Spot } @@ -196,6 +199,7 @@ func (r *fedoraRequest) deploy(ctx *pulumi.Context) error { CreateLoadBalancer: r.spot, Airgap: *r.airgap, AirgapPhaseConnectivity: r.airgapPhaseConnectivity, + ServiceEndpoints: r.serviceEndpoints, }) if err != nil { return err diff --git a/pkg/provider/aws/action/kind/kind.go b/pkg/provider/aws/action/kind/kind.go index 7dd560a9b..8f89b3697 100644 --- a/pkg/provider/aws/action/kind/kind.go +++ b/pkg/provider/aws/action/kind/kind.go @@ -34,6 +34,7 @@ type kindRequest struct { arch *string spot bool timeout *string + serviceEndpoints []string allocationData *allocation.AllocationResult extraPortMappings []utilKind.PortMapping } @@ -63,6 +64,7 @@ func Create(mCtxArgs *mc.ContextArgs, args *utilKind.KindArgs) (kr *utilKind.Kin version: &args.Version, arch: &args.Arch, timeout: &args.Timeout, + serviceEndpoints: args.ServiceEndpoints, extraPortMappings: args.ExtraPortMappings} if args.Spot != nil { r.spot = args.Spot.Spot @@ -155,6 +157,7 @@ func (r *kindRequest) deploy(ctx *pulumi.Context) error { Region: *r.allocationData.Region, AZ: *r.allocationData.AZ, CreateLoadBalancer: r.allocationData.SpotPrice != nil, + ServiceEndpoints: r.serviceEndpoints, }) if err != nil { return err diff --git a/pkg/provider/aws/action/rhel-ai/rhelai.go b/pkg/provider/aws/action/rhel-ai/rhelai.go index 9c3dde761..231bd421a 100644 --- a/pkg/provider/aws/action/rhel-ai/rhelai.go +++ b/pkg/provider/aws/action/rhel-ai/rhelai.go @@ -37,6 +37,7 @@ type rhelAIRequest struct { arch *string spot bool timeout *string + serviceEndpoints []string allocationData *allocation.AllocationResult } @@ -65,11 +66,12 @@ func Create(mCtxArgs *mc.ContextArgs, args *apiRHELAI.RHELAIArgs) (err error) { } prefix := util.If(len(args.Prefix) > 0, args.Prefix, "main") r := rhelAIRequest{ - mCtx: mCtx, - prefix: &prefix, - amiName: &amiName, - arch: &args.Arch, - timeout: &args.Timeout} + mCtx: mCtx, + prefix: &prefix, + amiName: &amiName, + arch: &args.Arch, + timeout: &args.Timeout, + serviceEndpoints: args.ServiceEndpoints} if args.Spot != nil { r.spot = args.Spot.Spot } @@ -158,6 +160,7 @@ func (r *rhelAIRequest) deploy(ctx *pulumi.Context) error { Region: *r.allocationData.Region, AZ: *r.allocationData.AZ, CreateLoadBalancer: true, + ServiceEndpoints: r.serviceEndpoints, }) if err != nil { return err diff --git a/pkg/provider/aws/action/rhel/rhel.go b/pkg/provider/aws/action/rhel/rhel.go index d351a52a5..1ca325105 100644 --- a/pkg/provider/aws/action/rhel/rhel.go +++ b/pkg/provider/aws/action/rhel/rhel.go @@ -41,6 +41,7 @@ type RHELArgs struct { ProfileSNC bool Spot *spotTypes.SpotArgs Airgap bool + ServiceEndpoints []string // If timeout is set a severless scheduled task will be created to self destroy the resources Timeout string } @@ -55,6 +56,7 @@ type rhelRequest struct { subsUserpass *string profileSNC *bool timeout *string + serviceEndpoints []string allocationData *allocation.AllocationResult airgap *bool // internal management @@ -93,6 +95,7 @@ func Create(mCtxArgs *mc.ContextArgs, args *RHELArgs) (err error) { subsUsername: &args.SubsUsername, subsUserpass: &args.SubsUserpass, profileSNC: &args.ProfileSNC, + serviceEndpoints: args.ServiceEndpoints, airgap: &args.Airgap} if args.Spot != nil { r.spot = args.Spot.Spot @@ -201,6 +204,7 @@ func (r *rhelRequest) deploy(ctx *pulumi.Context) error { CreateLoadBalancer: r.allocationData.SpotPrice != nil, Airgap: *r.airgap, AirgapPhaseConnectivity: r.airgapPhaseConnectivity, + ServiceEndpoints: r.serviceEndpoints, }) if err != nil { return err diff --git a/pkg/provider/aws/action/snc/snc.go b/pkg/provider/aws/action/snc/snc.go index b68e76d01..c7672c556 100644 --- a/pkg/provider/aws/action/snc/snc.go +++ b/pkg/provider/aws/action/snc/snc.go @@ -43,6 +43,7 @@ type openshiftSNCRequest struct { spot bool timeout *string pullSecretFile *string + serviceEndpoints []string allocationData *allocation.AllocationResult profiles []string } @@ -79,6 +80,7 @@ func Create(mCtxArgs *mc.ContextArgs, args *apiSNC.SNCArgs) (_ *apiSNC.SNCResult arch: &args.Arch, pullSecretFile: &args.PullSecretFile, timeout: &args.Timeout, + serviceEndpoints: args.ServiceEndpoints, profiles: args.Profiles} if args.Spot != nil { r.spot = args.Spot.Spot @@ -174,6 +176,7 @@ func (r *openshiftSNCRequest) deploy(ctx *pulumi.Context) error { AZ: *r.allocationData.AZ, CreateLoadBalancer: r.allocationData.SpotPrice != nil, Airgap: false, + ServiceEndpoints: r.serviceEndpoints, }) if err != nil { return err diff --git a/pkg/provider/aws/action/windows/windows.go b/pkg/provider/aws/action/windows/windows.go index 6be9c264f..6860bb6e2 100644 --- a/pkg/provider/aws/action/windows/windows.go +++ b/pkg/provider/aws/action/windows/windows.go @@ -49,6 +49,7 @@ type WindowsServerArgs struct { ComputeRequest *cr.ComputeRequestArgs Spot *spotTypes.SpotArgs Airgap bool + ServiceEndpoints []string // If timeout is set a severless scheduled task will be created to self destroy the resources Timeout string } @@ -65,6 +66,7 @@ type windowsServerRequest struct { spot bool timeout *string + serviceEndpoints []string allocationData *allocation.AllocationResult airgap *bool // internal management @@ -111,6 +113,7 @@ func Create(mCtxArgs *mc.ContextArgs, args *WindowsServerArgs) (err error) { amiKeepCopy: &args.AMIKeepCopy, amiLang: &args.AMILang, timeout: &args.Timeout, + serviceEndpoints: args.ServiceEndpoints, airgap: &args.Airgap} if args.Spot != nil { r.spot = args.Spot.Spot @@ -251,6 +254,7 @@ func (r *windowsServerRequest) deploy(ctx *pulumi.Context) error { CreateLoadBalancer: r.spot, Airgap: *r.airgap, AirgapPhaseConnectivity: r.airgapPhaseConnectivity, + ServiceEndpoints: r.serviceEndpoints, }) if err != nil { return err diff --git a/pkg/provider/aws/modules/network/network.go b/pkg/provider/aws/modules/network/network.go index f8d2800de..58d75eeb4 100644 --- a/pkg/provider/aws/modules/network/network.go +++ b/pkg/provider/aws/modules/network/network.go @@ -36,6 +36,7 @@ type NetworkArgs struct { CreateLoadBalancer bool Airgap bool AirgapPhaseConnectivity Connectivity + ServiceEndpoints []string } type NetworkResult struct { @@ -105,6 +106,7 @@ func standardNetwork(ctx *pulumi.Context, mCtx *mc.Context, args *NetworkArgs) ( AvailabilityZones: []string{args.AZ}, PublicSubnetsCIDRs: []string{cidrPublicSN}, NatGatewayMode: &ns.NatGatewayModeNone, + ServiceEndpoints: args.ServiceEndpoints, }.CreateNetwork(ctx) if err != nil { return nil, err diff --git a/pkg/provider/aws/modules/network/standard/standard.go b/pkg/provider/aws/modules/network/standard/standard.go index c1488aaf5..f55278376 100644 --- a/pkg/provider/aws/modules/network/standard/standard.go +++ b/pkg/provider/aws/modules/network/standard/standard.go @@ -73,6 +73,7 @@ type NetworkRequest struct { NatGatewayMode *NatGatewayMode PublicToIntra *bool MapPublicIp bool + ServiceEndpoints []string } type NetworkResources struct { @@ -173,6 +174,7 @@ func (r NetworkRequest) managePublicSubnets(mCtx *mc.Context, vpc *ec2.Vpc, Name: fmt.Sprintf("%s%s%d", namePrefix, r.Name, i), AddNatGateway: r.checkIfNatGatewayRequired(i), MapPublicIp: r.MapPublicIp, + ServiceEndpoints: r.ServiceEndpoints, } subnet, err := publicSNRequest.Create(ctx, mCtx) if err != nil { diff --git a/pkg/provider/aws/services/vpc/subnet/public.go b/pkg/provider/aws/services/vpc/subnet/public.go index 0135a260f..decfd77ac 100644 --- a/pkg/provider/aws/services/vpc/subnet/public.go +++ b/pkg/provider/aws/services/vpc/subnet/public.go @@ -18,6 +18,7 @@ type PublicSubnetRequest struct { Name string AddNatGateway bool MapPublicIp bool + ServiceEndpoints []string } type PublicSubnetResources struct { @@ -91,7 +92,7 @@ func (r PublicSubnetRequest) Create(ctx *pulumi.Context, mCtx *mc.Context) (*Pub return nil, err } // Manage endpoints - err = endpoints(ctx, r.Name, r.Region, r.VPC, sn, rt) + err = endpoints(ctx, r.ServiceEndpoints, r.Name, r.Region, r.VPC, sn, rt) if err != nil { return nil, err } @@ -104,58 +105,87 @@ func (r PublicSubnetRequest) Create(ctx *pulumi.Context, mCtx *mc.Context) (*Pub nil } -func endpoints(ctx *pulumi.Context, name, region string, +var validEndpoints = map[string]bool{"s3": true, "ecr": true, "ssm": true} + +func endpoints(ctx *pulumi.Context, endpointList []string, name, region string, vpc *ec2.Vpc, sn *ec2.Subnet, rt *ec2.RouteTable) error { - sg, err := ec2.NewSecurityGroup(ctx, - fmt.Sprintf("%s-%s", "endpoints", name), - &ec2.SecurityGroupArgs{ - VpcId: vpc.ID(), - Ingress: ec2.SecurityGroupIngressArray{ - &ec2.SecurityGroupIngressArgs{ - Protocol: pulumi.String("tcp"), - FromPort: pulumi.Int(443), - ToPort: pulumi.Int(443), - CidrBlocks: pulumi.StringArray{vpc.CidrBlock}, - }, - }, - }) - if err != nil { - return err + if len(endpointList) == 0 { + return nil } - _, err = ec2.NewVpcEndpoint(ctx, - fmt.Sprintf("%s-%s", "endpoint-s3", name), - &ec2.VpcEndpointArgs{ - VpcId: vpc.ID(), - ServiceName: pulumi.Sprintf("com.amazonaws.%s.s3", region), - VpcEndpointType: pulumi.String("Gateway"), - RouteTableIds: pulumi.StringArray{rt.ID()}, - }) - if err != nil { - return err + for _, e := range endpointList { + if !validEndpoints[e] { + return fmt.Errorf("unknown VPC endpoint %q: accepted values are s3, ecr, ssm", e) + } } - _, err = ec2.NewVpcEndpoint(ctx, - fmt.Sprintf("%s-%s", "endpoint-ecr", name), - &ec2.VpcEndpointArgs{ - VpcId: vpc.ID(), - ServiceName: pulumi.Sprintf("com.amazonaws.%s.ecr.dkr", region), - VpcEndpointType: pulumi.String("Interface"), - SubnetIds: pulumi.StringArray{sn.ID()}, - SecurityGroupIds: pulumi.StringArray{sg.ID()}, - }) - if err != nil { - return err + // Create interface-endpoint security group only when needed + needInterfaceSG := false + for _, e := range endpointList { + if e == "ecr" || e == "ssm" { + needInterfaceSG = true + break + } } - _, err = ec2.NewVpcEndpoint(ctx, - fmt.Sprintf("%s-%s", "endpoint-ssm", name), - &ec2.VpcEndpointArgs{ - VpcId: vpc.ID(), - ServiceName: pulumi.Sprintf("com.amazonaws.%s.ssm", region), - VpcEndpointType: pulumi.String("Interface"), - SubnetIds: pulumi.StringArray{sn.ID()}, - SecurityGroupIds: pulumi.StringArray{sg.ID()}, - }) - if err != nil { - return err + var sg *ec2.SecurityGroup + if needInterfaceSG { + var err error + sg, err = ec2.NewSecurityGroup(ctx, + fmt.Sprintf("%s-%s", "endpoints", name), + &ec2.SecurityGroupArgs{ + VpcId: vpc.ID(), + Ingress: ec2.SecurityGroupIngressArray{ + &ec2.SecurityGroupIngressArgs{ + Protocol: pulumi.String("tcp"), + FromPort: pulumi.Int(443), + ToPort: pulumi.Int(443), + CidrBlocks: pulumi.StringArray{vpc.CidrBlock}, + }, + }, + }) + if err != nil { + return err + } + } + for _, e := range endpointList { + switch e { + case "s3": + _, err := ec2.NewVpcEndpoint(ctx, + fmt.Sprintf("%s-%s", "endpoint-s3", name), + &ec2.VpcEndpointArgs{ + VpcId: vpc.ID(), + ServiceName: pulumi.Sprintf("com.amazonaws.%s.s3", region), + VpcEndpointType: pulumi.String("Gateway"), + RouteTableIds: pulumi.StringArray{rt.ID()}, + }) + if err != nil { + return err + } + case "ecr": + _, err := ec2.NewVpcEndpoint(ctx, + fmt.Sprintf("%s-%s", "endpoint-ecr", name), + &ec2.VpcEndpointArgs{ + VpcId: vpc.ID(), + ServiceName: pulumi.Sprintf("com.amazonaws.%s.ecr.dkr", region), + VpcEndpointType: pulumi.String("Interface"), + SubnetIds: pulumi.StringArray{sn.ID()}, + SecurityGroupIds: pulumi.StringArray{sg.ID()}, + }) + if err != nil { + return err + } + case "ssm": + _, err := ec2.NewVpcEndpoint(ctx, + fmt.Sprintf("%s-%s", "endpoint-ssm", name), + &ec2.VpcEndpointArgs{ + VpcId: vpc.ID(), + ServiceName: pulumi.Sprintf("com.amazonaws.%s.ssm", region), + VpcEndpointType: pulumi.String("Interface"), + SubnetIds: pulumi.StringArray{sn.ID()}, + SecurityGroupIds: pulumi.StringArray{sg.ID()}, + }) + if err != nil { + return err + } + } } return nil } diff --git a/pkg/target/host/rhelai/api.go b/pkg/target/host/rhelai/api.go index 90e04bc96..e9de1f9d6 100644 --- a/pkg/target/host/rhelai/api.go +++ b/pkg/target/host/rhelai/api.go @@ -13,6 +13,7 @@ type RHELAIArgs struct { Arch string ComputeRequest *cr.ComputeRequestArgs Spot *spotTypes.SpotArgs + ServiceEndpoints []string // If timeout is set a severless scheduled task will be created to self destroy the resources Timeout string } diff --git a/pkg/target/service/kind/api.go b/pkg/target/service/kind/api.go index ddf56402c..10aecda93 100644 --- a/pkg/target/service/kind/api.go +++ b/pkg/target/service/kind/api.go @@ -40,6 +40,7 @@ type KindArgs struct { HostingPlace string Spot *spotTypes.SpotArgs Timeout string + ServiceEndpoints []string ExtraPortMappings []PortMapping } diff --git a/pkg/target/service/snc/api.go b/pkg/target/service/snc/api.go index a39bfd137..cc1a71325 100644 --- a/pkg/target/service/snc/api.go +++ b/pkg/target/service/snc/api.go @@ -49,6 +49,7 @@ type SNCArgs struct { PullSecretFile string Spot *spotTypes.SpotArgs Timeout string + ServiceEndpoints []string Profiles []string } diff --git a/tkn/infra-aws-fedora.yaml b/tkn/infra-aws-fedora.yaml index 2856bc328..95bff8a33 100644 --- a/tkn/infra-aws-fedora.yaml +++ b/tkn/infra-aws-fedora.yaml @@ -134,6 +134,13 @@ spec: To access the target machine we need to go through the bastion default: "false" + # Network params + - name: service-endpoints + description: | + Comma-separated list of VPC service endpoints to create. + Accepted values: s3, ecr, ssm. Empty = no endpoints created. + default: "" + # Metadata params - name: tags description: Tags for the resources created on the providers @@ -261,6 +268,9 @@ spec: if [[ "$(params.airgap)" == "true" ]]; then cmd+="--airgap " fi + if [[ "$(params.service-endpoints)" != "" ]]; then + cmd+="--service-endpoints '$(params.service-endpoints)' " + fi cmd+="--tags '$(params.tags)' " fi diff --git a/tkn/infra-aws-kind.yaml b/tkn/infra-aws-kind.yaml index a1785cd67..ed83f8243 100644 --- a/tkn/infra-aws-kind.yaml +++ b/tkn/infra-aws-kind.yaml @@ -108,6 +108,13 @@ spec: description: Version for k8s cluster. If not version set it will pick the latest stable default: "''" + # Network params + - name: service-endpoints + description: | + Comma-separated list of VPC service endpoints to create. + Accepted values: s3, ecr, ssm. Empty = no endpoints created. + default: "" + # Metadata params - name: tags description: tags for the resources created on the providers @@ -220,6 +227,9 @@ spec: if [[ $(params.timeout) != "" ]]; then cmd+="--timeout $(params.timeout) " fi + if [[ $(params.service-endpoints) != "" ]]; then + cmd+="--service-endpoints $(params.service-endpoints) " + fi cmd+="--tags $(params.tags) " fi diff --git a/tkn/infra-aws-ocp-snc.yaml b/tkn/infra-aws-ocp-snc.yaml index c2f568dda..a682a384d 100644 --- a/tkn/infra-aws-ocp-snc.yaml +++ b/tkn/infra-aws-ocp-snc.yaml @@ -129,6 +129,13 @@ spec: description: Comma-separated list of profiles to install on the cluster (e.g. virtualization, serverless-serving, serverless-eventing, serverless, servicemesh). When virtualization is selected, a bare metal instance is used. default: "''" + # Network params + - name: service-endpoints + description: | + Comma-separated list of VPC service endpoints to create. + Accepted values: s3, ecr, ssm. Empty = no endpoints created. + default: "" + # Metadata params - name: tags description: tags for the resources created on the providers @@ -252,6 +259,9 @@ spec: if [[ $(params.timeout) != "" ]]; then cmd+="--timeout $(params.timeout) " fi + if [[ $(params.service-endpoints) != "" ]]; then + cmd+="--service-endpoints $(params.service-endpoints) " + fi cmd+="--tags $(params.tags) " fi diff --git a/tkn/infra-aws-rhel-ai.yaml b/tkn/infra-aws-rhel-ai.yaml index 2820ddfcd..aca6be506 100644 --- a/tkn/infra-aws-rhel-ai.yaml +++ b/tkn/infra-aws-rhel-ai.yaml @@ -138,6 +138,13 @@ spec: description: Version of RHEL AI OS (default 3.0.0) default: "3.0.0" + # Network params + - name: service-endpoints + description: | + Comma-separated list of VPC service endpoints to create. + Accepted values: s3, ecr, ssm. Empty = no endpoints created. + default: "" + # Metadata params - name: tags description: tags for the resources created on the providers @@ -268,6 +275,9 @@ spec: cmd+="--spot-eviction-tolerance '$(params.spot-eviction-tolerance)' " cmd+="--spot-excluded-regions '$(params.spot-excluded-regions)' " fi + if [[ "$(params.service-endpoints)" != "" ]]; then + cmd+="--service-endpoints '$(params.service-endpoints)' " + fi cmd+="--tags '$(params.tags)' " fi diff --git a/tkn/infra-aws-rhel.yaml b/tkn/infra-aws-rhel.yaml index 9852dc863..d606fd96e 100644 --- a/tkn/infra-aws-rhel.yaml +++ b/tkn/infra-aws-rhel.yaml @@ -153,6 +153,13 @@ spec: To access the target machine we need to go through the bastion default: "false" + # Network params + - name: service-endpoints + description: | + Comma-separated list of VPC service endpoints to create. + Accepted values: s3, ecr, ssm. Empty = no endpoints created. + default: "" + # Metadata params - name: tags description: tags for the resources created on the providers @@ -286,6 +293,9 @@ spec: if [[ "$(params.profile-snc)" == "true" ]]; then cmd+="--snc " fi + if [[ "$(params.service-endpoints)" != "" ]]; then + cmd+="--service-endpoints '$(params.service-endpoints)' " + fi cmd+="--tags '$(params.tags)' " fi diff --git a/tkn/infra-aws-windows-server.yaml b/tkn/infra-aws-windows-server.yaml index 09c91892b..b5512a799 100644 --- a/tkn/infra-aws-windows-server.yaml +++ b/tkn/infra-aws-windows-server.yaml @@ -111,6 +111,13 @@ spec: To access the target machine we need to go through the bastion default: 'false' + # Network params + - name: service-endpoints + description: | + Comma-separated list of VPC service endpoints to create. + Accepted values: s3, ecr, ssm. Empty = no endpoints created. + default: "''" + - name: tags description: tags for the resources created on the providers default: "''" @@ -220,6 +227,9 @@ spec: cmd+="--spot --spot-increase-rate $(params.spot-increase-rate) --spot-eviction-tolerance $(params.spot-eviction-tolerance) " fi if [[ $(params.airgap) == "true" ]]; then cmd+="--airgap "; fi + if [[ $(params.service-endpoints) != "" ]]; then + cmd+="--service-endpoints $(params.service-endpoints) " + fi cmd+="--tags $(params.tags) " fi diff --git a/tkn/template/infra-aws-fedora.yaml b/tkn/template/infra-aws-fedora.yaml index 917768ccb..f958b17b0 100644 --- a/tkn/template/infra-aws-fedora.yaml +++ b/tkn/template/infra-aws-fedora.yaml @@ -134,6 +134,13 @@ spec: To access the target machine we need to go through the bastion default: "false" + # Network params + - name: service-endpoints + description: | + Comma-separated list of VPC service endpoints to create. + Accepted values: s3, ecr, ssm. Empty = no endpoints created. + default: "" + # Metadata params - name: tags description: Tags for the resources created on the providers @@ -261,6 +268,9 @@ spec: if [[ "$(params.airgap)" == "true" ]]; then cmd+="--airgap " fi + if [[ "$(params.service-endpoints)" != "" ]]; then + cmd+="--service-endpoints '$(params.service-endpoints)' " + fi cmd+="--tags '$(params.tags)' " fi diff --git a/tkn/template/infra-aws-kind.yaml b/tkn/template/infra-aws-kind.yaml index bf037e21d..8aaf7c7e9 100644 --- a/tkn/template/infra-aws-kind.yaml +++ b/tkn/template/infra-aws-kind.yaml @@ -108,6 +108,13 @@ spec: description: Version for k8s cluster. If not version set it will pick the latest stable default: "''" + # Network params + - name: service-endpoints + description: | + Comma-separated list of VPC service endpoints to create. + Accepted values: s3, ecr, ssm. Empty = no endpoints created. + default: "''" + # Metadata params - name: tags description: tags for the resources created on the providers @@ -220,6 +227,9 @@ spec: if [[ $(params.timeout) != "" ]]; then cmd+="--timeout $(params.timeout) " fi + if [[ $(params.service-endpoints) != "" ]]; then + cmd+="--service-endpoints $(params.service-endpoints) " + fi cmd+="--tags $(params.tags) " fi diff --git a/tkn/template/infra-aws-ocp-snc.yaml b/tkn/template/infra-aws-ocp-snc.yaml index b9e04c35a..a37c87ad5 100644 --- a/tkn/template/infra-aws-ocp-snc.yaml +++ b/tkn/template/infra-aws-ocp-snc.yaml @@ -129,6 +129,13 @@ spec: description: Comma-separated list of profiles to install on the cluster (e.g. virtualization, serverless-serving, serverless-eventing, serverless, servicemesh). When virtualization is selected, a bare metal instance is used. default: "''" + # Network params + - name: service-endpoints + description: | + Comma-separated list of VPC service endpoints to create. + Accepted values: s3, ecr, ssm. Empty = no endpoints created. + default: "" + # Metadata params - name: tags description: tags for the resources created on the providers @@ -252,6 +259,9 @@ spec: if [[ $(params.timeout) != "" ]]; then cmd+="--timeout $(params.timeout) " fi + if [[ "$(params.service-endpoints)" != "" ]]; then + cmd+="--service-endpoints $(params.service-endpoints) " + fi cmd+="--tags $(params.tags) " fi diff --git a/tkn/template/infra-aws-rhel-ai.yaml b/tkn/template/infra-aws-rhel-ai.yaml index 32a33124c..3211e00d1 100644 --- a/tkn/template/infra-aws-rhel-ai.yaml +++ b/tkn/template/infra-aws-rhel-ai.yaml @@ -138,6 +138,13 @@ spec: description: Version of RHEL AI OS (default 3.0.0) default: "3.0.0" + # Network params + - name: service-endpoints + description: | + Comma-separated list of VPC service endpoints to create. + Accepted values: s3, ecr, ssm. Empty = no endpoints created. + default: "" + # Metadata params - name: tags description: tags for the resources created on the providers @@ -268,6 +275,9 @@ spec: cmd+="--spot-eviction-tolerance '$(params.spot-eviction-tolerance)' " cmd+="--spot-excluded-regions '$(params.spot-excluded-regions)' " fi + if [[ "$(params.service-endpoints)" != "" ]]; then + cmd+="--service-endpoints '$(params.service-endpoints)' " + fi cmd+="--tags '$(params.tags)' " fi diff --git a/tkn/template/infra-aws-rhel.yaml b/tkn/template/infra-aws-rhel.yaml index 4ccdabd37..69342fdd0 100644 --- a/tkn/template/infra-aws-rhel.yaml +++ b/tkn/template/infra-aws-rhel.yaml @@ -153,6 +153,13 @@ spec: To access the target machine we need to go through the bastion default: "false" + # Network params + - name: service-endpoints + description: | + Comma-separated list of VPC service endpoints to create. + Accepted values: s3, ecr, ssm. Empty = no endpoints created. + default: "" + # Metadata params - name: tags description: tags for the resources created on the providers @@ -286,6 +293,9 @@ spec: if [[ "$(params.profile-snc)" == "true" ]]; then cmd+="--snc " fi + if [[ "$(params.service-endpoints)" != "" ]]; then + cmd+="--service-endpoints '$(params.service-endpoints)' " + fi cmd+="--tags '$(params.tags)' " fi diff --git a/tkn/template/infra-aws-windows-server.yaml b/tkn/template/infra-aws-windows-server.yaml index 67a2d03a5..41156d9e5 100644 --- a/tkn/template/infra-aws-windows-server.yaml +++ b/tkn/template/infra-aws-windows-server.yaml @@ -111,6 +111,13 @@ spec: To access the target machine we need to go through the bastion default: 'false' + # Network params + - name: service-endpoints + description: | + Comma-separated list of VPC service endpoints to create. + Accepted values: s3, ecr, ssm. Empty = no endpoints created. + default: "''" + - name: tags description: tags for the resources created on the providers default: "''" @@ -220,6 +227,7 @@ spec: cmd+="--spot --spot-increase-rate $(params.spot-increase-rate) --spot-eviction-tolerance $(params.spot-eviction-tolerance) " fi if [[ $(params.airgap) == "true" ]]; then cmd+="--airgap "; fi + if [[ $(params.service-endpoints) != "" ]]; then cmd+="--service-endpoints $(params.service-endpoints) "; fi cmd+="--tags $(params.tags) " fi