From bd81c108061275717b4e9031c5201bb808984f2c Mon Sep 17 00:00:00 2001 From: geremyCohen Date: Mon, 1 Jun 2026 18:25:58 -0700 Subject: [PATCH 1/3] go gc lp --- .../go-gc-default-settings/_index.md | 76 ++++++++++ .../go-gc-default-settings/_next-steps.md | 9 ++ .../choose_aws_instance.md | 64 ++++++++ .../create_gc_benchmark.md | 109 ++++++++++++++ .../install_go_tools.md | 72 +++++++++ .../interpret_gc_results.md | 84 +++++++++++ .../run_default_gc_benchmark.md | 139 ++++++++++++++++++ 7 files changed, 553 insertions(+) create mode 100644 content/learning-paths/servers-and-cloud-computing/go-gc-default-settings/_index.md create mode 100644 content/learning-paths/servers-and-cloud-computing/go-gc-default-settings/_next-steps.md create mode 100644 content/learning-paths/servers-and-cloud-computing/go-gc-default-settings/choose_aws_instance.md create mode 100644 content/learning-paths/servers-and-cloud-computing/go-gc-default-settings/create_gc_benchmark.md create mode 100644 content/learning-paths/servers-and-cloud-computing/go-gc-default-settings/install_go_tools.md create mode 100644 content/learning-paths/servers-and-cloud-computing/go-gc-default-settings/interpret_gc_results.md create mode 100644 content/learning-paths/servers-and-cloud-computing/go-gc-default-settings/run_default_gc_benchmark.md diff --git a/content/learning-paths/servers-and-cloud-computing/go-gc-default-settings/_index.md b/content/learning-paths/servers-and-cloud-computing/go-gc-default-settings/_index.md new file mode 100644 index 0000000000..c85a745541 --- /dev/null +++ b/content/learning-paths/servers-and-cloud-computing/go-gc-default-settings/_index.md @@ -0,0 +1,76 @@ +--- +title: Measure Go GC behavior on AWS Graviton with default runtime settings +description: Learn how to run a Go allocation benchmark on AWS Graviton and measure garbage collection behavior without changing Go runtime settings. + +minutes_to_complete: 75 + +who_is_this_for: This Learning Path is for Go developers and performance engineers who want to measure garbage collection behavior on Arm servers without changing Go runtime GC settings. + +learning_objectives: + - Select an AWS Graviton instance for repeatable Go GC measurements + - Install Go and Benchstat on an Arm Linux server + - Confirm that Go runtime tuning variables are unset + - Run a Go benchmark that reports allocation, GC, and pause-time metrics + - Capture CPU and heap profiles without changing GC behavior + +prerequisites: + - An [AWS account](https://aws.amazon.com/) with permission to launch AWS Graviton EC2 instances + - The [AWS CLI](/install-guides/aws-cli/) installed and configured on your local machine + - An AWS Graviton instance running Ubuntu 24.04 LTS or another Arm Linux distribution + - Basic familiarity with Go benchmarks and Linux shell commands + +author: Geremy Cohen + +### Tags +skilllevels: Introductory +subjects: Performance and Architecture +cloud_service_providers: + - AWS +armips: + - Neoverse +tools_software_languages: + - AWS + - Go +operatingsystems: + - Linux + +further_reading: + - resource: + title: Amazon EC2 M8g instances + link: https://aws.amazon.com/ec2/instance-types/m8g/ + type: documentation + - resource: + title: Go GC guide + link: https://go.dev/doc/gc-guide + type: documentation + - resource: + title: Go runtime package + link: https://pkg.go.dev/runtime + type: documentation + - resource: + title: Go testing package + link: https://pkg.go.dev/testing + type: documentation + - resource: + title: Graviton Performance Runbook + link: https://github.com/aws/aws-graviton-getting-started/blob/main/perfrunbook/README.md + type: documentation + - resource: + title: Benchmark Go performance with Sweet and Benchstat + link: /learning-paths/servers-and-cloud-computing/go-benchmarking-with-sweet/ + type: learning path + +### FIXED, DO NOT MODIFY +# ================================================================================ +weight: 1 # _index.md always has weight of 1 to order correctly +layout: "learningpathall" # All files under learning paths have this same wrapper +learning_path_main_page: "yes" # This should be surfaced when looking for related content. Only set for _index.md of learning path content. +--- + +## Measure default Go GC behavior on Arm servers + +Go applications can spend meaningful time allocating memory and running garbage collection (GC). You should measure that behavior before you change runtime settings. + +In this Learning Path, you run Go benchmarks on an AWS Graviton instance and keep the Go runtime in its default GC mode. You do not set `GOGC`, `GOMEMLIMIT`, `GODEBUG`, or `GOMAXPROCS`. + +The goal is to build a clean baseline. You will measure operation time, allocation rate, GC frequency, GC pause cost, and profiles before making tuning decisions. diff --git a/content/learning-paths/servers-and-cloud-computing/go-gc-default-settings/_next-steps.md b/content/learning-paths/servers-and-cloud-computing/go-gc-default-settings/_next-steps.md new file mode 100644 index 0000000000..a45a267360 --- /dev/null +++ b/content/learning-paths/servers-and-cloud-computing/go-gc-default-settings/_next-steps.md @@ -0,0 +1,9 @@ +--- +# ================================================================================ +# FIXED, DO NOT MODIFY THIS FILE +# ================================================================================ +weight: 21 # The weight controls the order of the pages. _index.md always has weight 1. +title: "Next Steps" # Always the same, html page title. +layout: "learningpathall" # All files under learning paths have this same wrapper for Hugo processing. +--- + diff --git a/content/learning-paths/servers-and-cloud-computing/go-gc-default-settings/choose_aws_instance.md b/content/learning-paths/servers-and-cloud-computing/go-gc-default-settings/choose_aws_instance.md new file mode 100644 index 0000000000..1e9f03b1fc --- /dev/null +++ b/content/learning-paths/servers-and-cloud-computing/go-gc-default-settings/choose_aws_instance.md @@ -0,0 +1,64 @@ +--- +title: Choose an AWS Graviton instance +weight: 2 + +### FIXED, DO NOT MODIFY +layout: learningpathall +--- + +## Select an instance for Go GC measurements + +Use an AWS Graviton instance that has enough CPU and memory to make Go runtime behavior visible, while keeping the Learning Path inexpensive to run. + +For the first prototype, use `m8g.xlarge`. + +`m8g.xlarge` is a good starting point because it provides four vCPUs and 16 GiB of memory on AWS Graviton4. Four vCPUs are enough to observe default Go CPU parallelism and GC worker behavior without requiring a large benchmark host. The 16 GiB memory size is enough for allocation-heavy benchmarks without immediately making the lab memory-bound. + +Avoid burstable `t4g` instances for this Learning Path. CPU credits can affect benchmark repeatability and make GC measurements harder to explain. + +If `m8g.xlarge` is not available in your AWS Region or Availability Zone, use `m7g.xlarge` as the fallback. It has the same vCPU and memory shape on an earlier Graviton generation, so the commands and benchmark workflow remain the same. + +## Recommended prototype machine + +Use this instance shape for the first version of the Learning Path: + +| Purpose | Instance type | Processor | vCPUs | Memory | +| --- | --- | --- | ---: | ---: | +| Default prototype | `m8g.xlarge` | AWS Graviton4 | 4 | 16 GiB | +| Fallback | `m7g.xlarge` | AWS Graviton3 | 4 | 16 GiB | + +{{% notice Note %}} +You can use larger instances, such as `m8g.2xlarge`, when you want more CPU width or more memory headroom. Start with `m8g.xlarge` so the first benchmark run is easy to reproduce and inexpensive. +{{% /notice %}} + +The commands in this Learning Path were validated on an `m8g.xlarge` instance running Ubuntu 24.04 LTS Arm64 and Go 1.26.3. + +## Check instance availability + +Use the AWS CLI to check whether `m8g.xlarge` is available in your selected Region. + +Replace `us-east-1` with the Region you want to use. + +```console +aws ec2 describe-instance-type-offerings \ + --region us-east-1 \ + --location-type availability-zone \ + --filters Name=instance-type,Values=m8g.xlarge \ + --query 'InstanceTypeOfferings[].Location' \ + --output table +``` + +If the command returns one or more Availability Zones, you can use `m8g.xlarge` in that Region. + +Run the same command for `m7g.xlarge` if `m8g.xlarge` is not available: + +```console +aws ec2 describe-instance-type-offerings \ + --region us-east-1 \ + --location-type availability-zone \ + --filters Name=instance-type,Values=m7g.xlarge \ + --query 'InstanceTypeOfferings[].Location' \ + --output table +``` + +You have now selected a repeatable AWS Graviton test machine. You will confirm the default Go runtime environment before running the benchmark. diff --git a/content/learning-paths/servers-and-cloud-computing/go-gc-default-settings/create_gc_benchmark.md b/content/learning-paths/servers-and-cloud-computing/go-gc-default-settings/create_gc_benchmark.md new file mode 100644 index 0000000000..143ee8f74b --- /dev/null +++ b/content/learning-paths/servers-and-cloud-computing/go-gc-default-settings/create_gc_benchmark.md @@ -0,0 +1,109 @@ +--- +title: Create a Go GC benchmark +weight: 4 + +### FIXED, DO NOT MODIFY +layout: learningpathall +--- + +## Create a benchmark module + +Create a small Go module for the benchmark: + +```console +mkdir -p $HOME/go-gc-default/parsebench +cd $HOME/go-gc-default +go mod init example.com/go-gc-default +``` + +Create the benchmark file: + +```console +cat > parsebench/parsebench_test.go <<'EOF' +package parsebench + +import ( + "runtime" + "strconv" + "strings" + "testing" +) + +var sink []string + +func BenchmarkParseAndAllocate(b *testing.B) { + payload := strings.Repeat("name=arm&runtime=go&gc=default&value=12345;", 2048) + + b.ReportAllocs() + + var before runtime.MemStats + runtime.ReadMemStats(&before) + + b.ResetTimer() + for i := 0; i < b.N; i++ { + parts := strings.Split(payload, ";") + out := make([]string, 0, len(parts)) + + for _, part := range parts { + if part == "" { + continue + } + fields := strings.SplitN(part, "=", 2) + if len(fields) == 2 { + out = append(out, fields[0]+":"+strconv.Itoa(len(fields[1]))) + } + } + + sink = out + } + b.StopTimer() + + var after runtime.MemStats + runtime.ReadMemStats(&after) + + ops := float64(b.N) + gcCycles := after.NumGC - before.NumGC + pauseNs := after.PauseTotalNs - before.PauseTotalNs + + if ops > 0 { + b.ReportMetric(float64(gcCycles)/ops, "gc/op") + b.ReportMetric(float64(pauseNs)/ops, "stw-ns/op") + } + if gcCycles > 0 { + b.ReportMetric(float64(pauseNs)/float64(gcCycles), "stw-ns/GC") + } +} +EOF +``` + +This benchmark repeatedly parses and allocates strings. It reports the default Go benchmark metrics plus three GC-specific metrics: + +| Metric | Meaning | +| --- | --- | +| `gc/op` | GC cycles per completed benchmark operation | +| `stw-ns/op` | GC stop-the-world pause nanoseconds per completed operation | +| `stw-ns/GC` | GC stop-the-world pause nanoseconds per GC cycle | + +The benchmark reads `runtime.MemStats` before and after the timed loop. It does not set Go runtime tuning variables. + +## Confirm the benchmark builds + +Run one short benchmark pass: + +```console +cd $HOME/go-gc-default +go test ./parsebench -run '^$' -bench BenchmarkParseAndAllocate -benchmem -count 1 -benchtime=2s +``` + +You should see output with `ns/op`, `B/op`, `allocs/op`, and the GC-specific metrics: + +```output +goos: linux +goarch: arm64 +pkg: example.com/go-gc-default/parsebench +BenchmarkParseAndAllocate-4 14014 170814 ns/op 0.04553 gc/op 102956 stw-ns/GC 4687 stw-ns/op 163840 B/op 4098 allocs/op +PASS +ok example.com/go-gc-default/parsebench 4.127s +``` + +Your exact numbers will differ by instance type, Go version, operating system, and system load. diff --git a/content/learning-paths/servers-and-cloud-computing/go-gc-default-settings/install_go_tools.md b/content/learning-paths/servers-and-cloud-computing/go-gc-default-settings/install_go_tools.md new file mode 100644 index 0000000000..4c601351d4 --- /dev/null +++ b/content/learning-paths/servers-and-cloud-computing/go-gc-default-settings/install_go_tools.md @@ -0,0 +1,72 @@ +--- +title: Install Go and benchmark tools +weight: 3 + +### FIXED, DO NOT MODIFY +layout: learningpathall +--- + +## Install Go on Arm Linux + +Install Go on the AWS Graviton instance. The commands below use the Linux Arm64 archive from `go.dev`. + +{{% notice Note %}} +The following commands use Go 1.26.3. The same commands work with other Go versions. Replace the archive name and checksum with the values for your version of choice. To find the latest version, see the [Go downloads page](https://go.dev/dl/). +{{% /notice %}} + +Download the Go archive and verify the checksum: + +```console +cd $HOME +curl -LO https://go.dev/dl/go1.26.3.linux-arm64.tar.gz +echo "9d89a3ea57d141c2b22d70083f2c8459ba3890f2d9e818e7e933b75614936565 go1.26.3.linux-arm64.tar.gz" | sha256sum -c - +``` + +Install Go under `/usr/local`: + +```console +sudo rm -rf /usr/local/go +sudo tar -C /usr/local -xzf go1.26.3.linux-arm64.tar.gz +``` + +Add Go to your shell path: + +```console +export PATH=/usr/local/go/bin:$HOME/go/bin:$PATH +``` + +To make the path update persistent, add it to your shell profile: + +```console +echo 'export PATH=/usr/local/go/bin:$HOME/go/bin:$PATH' >> $HOME/.profile +``` + +Verify that Go is installed for Arm64 Linux: + +```console +go version +go env GOOS GOARCH +``` + +The output should show `linux` and `arm64`: + +```output +linux +arm64 +``` + +## Install Benchstat + +Install Benchstat to summarize repeated Go benchmark runs: + +```console +go install golang.org/x/perf/cmd/benchstat@latest +``` + +Verify that Benchstat is available: + +```console +benchstat -h +``` + +You now have Go and Benchstat installed on the AWS Graviton instance. diff --git a/content/learning-paths/servers-and-cloud-computing/go-gc-default-settings/interpret_gc_results.md b/content/learning-paths/servers-and-cloud-computing/go-gc-default-settings/interpret_gc_results.md new file mode 100644 index 0000000000..404f940e4b --- /dev/null +++ b/content/learning-paths/servers-and-cloud-computing/go-gc-default-settings/interpret_gc_results.md @@ -0,0 +1,84 @@ +--- +title: Interpret the default GC results +weight: 6 + +### FIXED, DO NOT MODIFY +layout: learningpathall +--- + +## Read the benchmark metrics + +Open the Benchstat summary: + +```console +cat default_gc_benchstat.txt +``` + +Use the metrics as follows: + +| Metric | Read | +| --- | --- | +| `ns/op` | Time per completed operation. Lower is better for throughput. | +| `B/op` | Heap bytes allocated per operation. Lower usually reduces GC pressure. | +| `allocs/op` | Heap allocation count per operation. Lower usually reduces GC pressure. | +| `gc/op` | GC cycles per operation. Lower means GC runs less often per completed operation. | +| `stw-ns/op` or `stw-sec/op` | GC pause cost per operation. Lower means less stop-the-world pause time is paid per completed operation. | +| `stw-ns/GC` or `stw-sec/GC` | Pause cost per GC cycle. Lower means each GC cycle pauses for less time. | + +These metrics answer different questions. For example, `stw-ns/GC` can increase while `stw-ns/op` stays flat or decreases if GC runs less often per completed operation. + +## Read the profiles + +Open the CPU profile summary: + +```console +cat cpu_default_top.txt +``` + +Look for functions that dominate CPU time. In an allocation-heavy benchmark, you can expect to see time in string handling, allocation paths, and some runtime or GC support functions. + +On the validated `m8g.xlarge` instance, the top CPU profile entries included string scanning, string concatenation, split handling, and allocation paths: + +```output + flat flat% sum% cum cum% + 2.45s 15.53% 15.53% 2.45s 15.53% internal/bytealg.IndexByteString + 2.37s 15.02% 30.54% 4.65s 29.47% runtime.concatstrings + 1.28s 8.11% 38.66% 8.07s 51.14% strings.genSplit + 0.85s 5.39% 44.04% 1.35s 8.56% runtime.mallocgcTiny + 0.72s 4.56% 53.36% 5.17s 32.76% runtime.mallocgc +``` + +Open the heap allocation profile summary: + +```console +cat mem_default_alloc_top.txt +``` + +Look for application functions that allocate the most heap memory. Reducing allocation volume in those functions usually gives the Go GC less work to do. + +On the validated `m8g.xlarge` instance, the allocation profile showed that `strings.genSplit` and the benchmark function accounted for nearly all allocated bytes: + +```output + flat flat% sum% cum cum% + 7.57GB 64.49% 64.49% 7.57GB 64.49% strings.genSplit + 4.16GB 35.45% 99.94% 11.74GB 99.94% example.com/go-gc-default/parsebench.BenchmarkParseAndAllocate + 0 0% 99.94% 2.90GB 24.70% strings.Split (inline) + 0 0% 99.94% 4.67GB 39.79% strings.SplitN (inline) +``` + +## Keep this result as the baseline + +This result is your default Go GC baseline on AWS Graviton. Keep the following files together when you compare future changes: + +```output +default_runtime_baseline.txt +default_gc_benchmark.txt +default_gc_benchstat.txt +default_gc_profile_run.txt +cpu_default.out +cpu_default_top.txt +mem_default.out +mem_default_alloc_top.txt +``` + +When you test code changes, compare against this baseline before changing Go runtime settings. If you later tune `GOGC`, `GOMEMLIMIT`, `GODEBUG`, or `GOMAXPROCS`, treat that as a separate experiment because it changes the runtime operating mode. diff --git a/content/learning-paths/servers-and-cloud-computing/go-gc-default-settings/run_default_gc_benchmark.md b/content/learning-paths/servers-and-cloud-computing/go-gc-default-settings/run_default_gc_benchmark.md new file mode 100644 index 0000000000..639010e1f2 --- /dev/null +++ b/content/learning-paths/servers-and-cloud-computing/go-gc-default-settings/run_default_gc_benchmark.md @@ -0,0 +1,139 @@ +--- +title: Run the benchmark with default Go GC settings +weight: 5 + +### FIXED, DO NOT MODIFY +layout: learningpathall +--- + +## Confirm runtime tuning variables are unset + +Before you run the benchmark, confirm that the shell is not setting Go runtime tuning variables: + +```console +env | grep -E '^(GOGC|GOMEMLIMIT|GODEBUG|GOMAXPROCS)=' || true +``` + +The command should not print any matching variables. + +If it prints one or more variables, unset them: + +```console +unset GOGC +unset GOMEMLIMIT +unset GODEBUG +unset GOMAXPROCS +``` + +This keeps GC pacing, memory-limit behavior, debug behavior, and CPU parallelism at the Go runtime defaults. + +## Record the runtime baseline + +Record the Go version, architecture, CPU count, and memory size before the benchmark: + +```console +cd $HOME/go-gc-default +{ + go version + go env GOOS GOARCH + nproc + free -h +} | tee default_runtime_baseline.txt +``` + +On the validated `m8g.xlarge` instance, the output was: + +```output +go version go1.26.3 linux/arm64 +linux +arm64 +4 + total used free shared buff/cache available +Mem: 15Gi 841Mi 13Gi 1.1Mi 921Mi 14Gi +Swap: 0B 0B 0B +``` + +## Run repeated benchmark samples + +Run the benchmark with repeated samples and save the output: + +```console +go test ./parsebench \ + -run '^$' \ + -bench BenchmarkParseAndAllocate \ + -benchmem \ + -count 10 \ + -benchtime=5s | tee default_gc_benchmark.txt +``` + +The benchmark output includes operation time, allocation rate, allocation count, GC cycles per operation, pause time per operation, and pause time per GC cycle. + +Summarize the repeated samples with Benchstat: + +```console +benchstat default_gc_benchmark.txt | tee default_gc_benchstat.txt +``` + +Benchstat may scale nanosecond metrics to seconds in the summary. For example, raw `stw-ns/op` benchmark output can appear as `stw-sec/op` in the Benchstat table. + +On the validated `m8g.xlarge` instance, the Benchstat summary was: + +```output +goos: linux +goarch: arm64 +pkg: example.com/go-gc-default/parsebench + │ default_gc_benchmark.txt │ + │ sec/op │ +ParseAndAllocate-4 169.5µ ± 0% + + │ default_gc_benchmark.txt │ + │ gc/op │ +ParseAndAllocate-4 45.59m ± 0% + + │ default_gc_benchmark.txt │ + │ stw-sec/GC │ +ParseAndAllocate-4 99.55µ ± 3% + + │ default_gc_benchmark.txt │ + │ stw-sec/op │ +ParseAndAllocate-4 4.538µ ± 3% + + │ default_gc_benchmark.txt │ + │ B/op │ +ParseAndAllocate-4 160.0Ki ± 0% + + │ default_gc_benchmark.txt │ + │ allocs/op │ +ParseAndAllocate-4 4.098k ± 0% +``` + +## Capture CPU and heap profiles + +Create a test binary and run one longer benchmark pass with CPU and heap profiles enabled: + +```console +go test -c -o parsebench.test ./parsebench + +./parsebench.test \ + -test.run '^$' \ + -test.bench BenchmarkParseAndAllocate \ + -test.benchmem \ + -test.count 1 \ + -test.benchtime 10s \ + -test.cpuprofile cpu_default.out \ + -test.memprofile mem_default.out | tee default_gc_profile_run.txt +``` + +Inspect the CPU profile: + +```console +go tool pprof -top ./parsebench.test cpu_default.out | tee cpu_default_top.txt +``` + +Inspect the heap allocation profile: + +```console +go tool pprof -top -alloc_space ./parsebench.test mem_default.out | tee mem_default_alloc_top.txt +``` + +You now have a default-GC benchmark result, a Benchstat summary, and CPU and heap profiles from the same workload. From 405e35c79f8c231086402578e8a5bc7734cdf20c Mon Sep 17 00:00:00 2001 From: Geremy Cohen Date: Tue, 2 Jun 2026 18:44:04 -0700 Subject: [PATCH 2/3] final draft --- .../go-gc-default-settings/_index.md | 15 +- .../choose_aws_instance.md | 32 ++-- .../create_gc_benchmark.md | 138 ++++++++++++++---- .../install_go_tools.md | 59 ++++---- .../interpret_gc_results.md | 90 ++++++++++-- .../run_default_gc_benchmark.md | 41 +++--- 6 files changed, 257 insertions(+), 118 deletions(-) diff --git a/content/learning-paths/servers-and-cloud-computing/go-gc-default-settings/_index.md b/content/learning-paths/servers-and-cloud-computing/go-gc-default-settings/_index.md index c85a745541..e5a16cb39e 100644 --- a/content/learning-paths/servers-and-cloud-computing/go-gc-default-settings/_index.md +++ b/content/learning-paths/servers-and-cloud-computing/go-gc-default-settings/_index.md @@ -1,15 +1,14 @@ --- -title: Measure Go GC behavior on AWS Graviton with default runtime settings -description: Learn how to run a Go allocation benchmark on AWS Graviton and measure garbage collection behavior without changing Go runtime settings. +title: Measure Go GC behavior on AWS Graviton +description: Learn how to measure and observe Go garbage collection metrics on AWS Graviton instances. minutes_to_complete: 75 -who_is_this_for: This Learning Path is for Go developers and performance engineers who want to measure garbage collection behavior on Arm servers without changing Go runtime GC settings. +who_is_this_for: This Learning Path is for engineers interested in learning more about Go garbage collection (GC) behavior on Arm. learning_objectives: - Select an AWS Graviton instance for repeatable Go GC measurements - Install Go and Benchstat on an Arm Linux server - - Confirm that Go runtime tuning variables are unset - Run a Go benchmark that reports allocation, GC, and pause-time metrics - Capture CPU and heap profiles without changing GC behavior @@ -66,11 +65,3 @@ weight: 1 # _index.md always has weight of 1 to order corr layout: "learningpathall" # All files under learning paths have this same wrapper learning_path_main_page: "yes" # This should be surfaced when looking for related content. Only set for _index.md of learning path content. --- - -## Measure default Go GC behavior on Arm servers - -Go applications can spend meaningful time allocating memory and running garbage collection (GC). You should measure that behavior before you change runtime settings. - -In this Learning Path, you run Go benchmarks on an AWS Graviton instance and keep the Go runtime in its default GC mode. You do not set `GOGC`, `GOMEMLIMIT`, `GODEBUG`, or `GOMAXPROCS`. - -The goal is to build a clean baseline. You will measure operation time, allocation rate, GC frequency, GC pause cost, and profiles before making tuning decisions. diff --git a/content/learning-paths/servers-and-cloud-computing/go-gc-default-settings/choose_aws_instance.md b/content/learning-paths/servers-and-cloud-computing/go-gc-default-settings/choose_aws_instance.md index 1e9f03b1fc..244fe8d85f 100644 --- a/content/learning-paths/servers-and-cloud-computing/go-gc-default-settings/choose_aws_instance.md +++ b/content/learning-paths/servers-and-cloud-computing/go-gc-default-settings/choose_aws_instance.md @@ -5,35 +5,31 @@ weight: 2 ### FIXED, DO NOT MODIFY layout: learningpathall --- +## What is Garbage Collection? (GC) +Memory management is a critical aspects of application performance, and Garbage Collection (GC) plays a central role in automating that process. GC continuously identifies and removes objects that are no longer needed, freeing memory for re-use for other purposes.. -## Select an instance for Go GC measurements +While this automation improves productivity and application safety, inefficient garbage collection can lead to increased CPU usage, longer response times, and unexpected application pauses. -Use an AWS Graviton instance that has enough CPU and memory to make Go runtime behavior visible, while keeping the Learning Path inexpensive to run. +Tracking GC metrics provides a window into an application's memory health, helping engineers optimize performance, and ensuring the system can scale efficiently under load. -For the first prototype, use `m8g.xlarge`. +## Measuring default Go GC behavior on Arm servers -`m8g.xlarge` is a good starting point because it provides four vCPUs and 16 GiB of memory on AWS Graviton4. Four vCPUs are enough to observe default Go CPU parallelism and GC worker behavior without requiring a large benchmark host. The 16 GiB memory size is enough for allocation-heavy benchmarks without immediately making the lab memory-bound. +Go is one such language which implements GC. As Go applications can spend meaningful time allocating memory and running garbage collection, it is important to understand how the Go runtime behaves under default settings. -Avoid burstable `t4g` instances for this Learning Path. CPU credits can affect benchmark repeatability and make GC measurements harder to explain. +In this Learning Path, you'll run Go benchmarks on an AWS Graviton instance. The goal is to build a clean baseline, measuring operation time, allocation rate, GC frequency, and GC pause cost. -If `m8g.xlarge` is not available in your AWS Region or Availability Zone, use `m7g.xlarge` as the fallback. It has the same vCPU and memory shape on an earlier Graviton generation, so the commands and benchmark workflow remain the same. +## Selecting an instance for Go GC measurements -## Recommended prototype machine +An AWS Graviton `m8g.xlarge` instance has enough CPU and memory to make Go runtime behavior visible, while keeping costs minimal. It's a good starting point as it provides four vCPUs and 16 GiB of memory on AWS Graviton4. If you choose to run this Learning Path on a different instance, make sure it has at least 4 vCPUs and 16 GiB of memory to ensure the benchmark runs smoothly and provides meaningful GC metrics. -Use this instance shape for the first version of the Learning Path: - -| Purpose | Instance type | Processor | vCPUs | Memory | -| --- | --- | --- | ---: | ---: | -| Default prototype | `m8g.xlarge` | AWS Graviton4 | 4 | 16 GiB | -| Fallback | `m7g.xlarge` | AWS Graviton3 | 4 | 16 GiB | +Avoid burstable `t4g` instances as CPU credits can affect benchmark repeatability and make GC measurements harder to explain. {{% notice Note %}} You can use larger instances, such as `m8g.2xlarge`, when you want more CPU width or more memory headroom. Start with `m8g.xlarge` so the first benchmark run is easy to reproduce and inexpensive. {{% /notice %}} -The commands in this Learning Path were validated on an `m8g.xlarge` instance running Ubuntu 24.04 LTS Arm64 and Go 1.26.3. -## Check instance availability +## Checking instance availability Use the AWS CLI to check whether `m8g.xlarge` is available in your selected Region. @@ -48,9 +44,7 @@ aws ec2 describe-instance-type-offerings \ --output table ``` -If the command returns one or more Availability Zones, you can use `m8g.xlarge` in that Region. - -Run the same command for `m7g.xlarge` if `m8g.xlarge` is not available: +If the command returns one or more Availability Zones, you can use `m8g.xlarge` in that Region. If you are unable to find `m8g.xlarge` in your Region, you can try a different Region, or fallback to an 'm7g.xlarge' instance, which is based on the previous generation AWS Graviton3: ```console aws ec2 describe-instance-type-offerings \ @@ -61,4 +55,4 @@ aws ec2 describe-instance-type-offerings \ --output table ``` -You have now selected a repeatable AWS Graviton test machine. You will confirm the default Go runtime environment before running the benchmark. +Once you have chosen an instance type, provision it to run Ubuntu 24.04 LTS Arm64. Once the instance is running, and you are ssh'd into it, you can proceed to the next step. diff --git a/content/learning-paths/servers-and-cloud-computing/go-gc-default-settings/create_gc_benchmark.md b/content/learning-paths/servers-and-cloud-computing/go-gc-default-settings/create_gc_benchmark.md index 143ee8f74b..cc1e5c1895 100644 --- a/content/learning-paths/servers-and-cloud-computing/go-gc-default-settings/create_gc_benchmark.md +++ b/content/learning-paths/servers-and-cloud-computing/go-gc-default-settings/create_gc_benchmark.md @@ -6,96 +6,167 @@ weight: 4 layout: learningpathall --- -## Create a benchmark module +## Creating a benchmark module -Create a small Go module for the benchmark: +You'll first create a small Go benchmark module. The high-level flow is: + +1. Generate a large input string. +2. Repeatedly parse it and create new objects/strings. +3. Force memory allocations so the garbage collector has work to do. +4. Measure how long the workload takes. +5. Measure how much GC activity occurred during the benchmark. +6. Report both performance metrics and GC-related metrics. + +Pasting the code below will create the module and benchmark file: + +```bash + +# Create the module directory and initialize it. -```console mkdir -p $HOME/go-gc-default/parsebench cd $HOME/go-gc-default go mod init example.com/go-gc-default -``` -Create the benchmark file: +# Create the benchmark file: -```console cat > parsebench/parsebench_test.go <<'EOF' package parsebench import ( + "runtime" "strconv" "strings" "testing" + ) +// Global variable used to store benchmark results. + var sink []string func BenchmarkParseAndAllocate(b *testing.B) { - payload := strings.Repeat("name=arm&runtime=go&gc=default&value=12345;", 2048) + // This simulates a large payload by creating a large test string by + // repeating the same key=value data many times. + // + // Example: + // name=arm&runtime=go&gc=default&value=12345; + // + + payload := strings.Repeat("name=arm&runtime=go&gc=default&value=12345;",2048) + + // Next, we tell the benchmark framework to track memory allocations. + // + // This will show metrics such as allocations per operation, and bytes allocated per operation + b.ReportAllocs() - + + // Capture runtime memory statistics before the benchmark starts. We will later compare these + // values to see: + // - how many garbage collections occurred + // - how much pause time was spent in GC + var before runtime.MemStats runtime.ReadMemStats(&before) - + + // Reset benchmark timing so that any setup work performed above will not be included + // in the benchmark measurements. + b.ResetTimer() + + // The benchmark loop is where the actual work is done. The number of times this loop is + // executed is controlled by the b.N variable. The value of b.N is automatically chosen by + // the Go benchmark framework to obtain stable and statistically useful measurements. + + // The reason for this design is that timing a single operation is often unreliable; running + // it many times reduces noise from: + // * OS scheduling + // * CPU frequency changes + // * background processes + for i := 0; i < b.N; i++ { + // split the large payload into individual records. + // Example: + // "a=1;b=2;c=3;" becomes: ["a=1", "b=2", "c=3", ""] parts := strings.Split(payload, ";") + // Create a new slice to store parsed output. This allocation is intentional because we want + // the benchmark to generate memory pressure and trigger garbage collection activity. + out := make([]string, 0, len(parts)) - + + // Process each record. + for _, part := range parts { + // Ignore the empty string created by the trailing semicolon. if part == "" { continue } + // Split the string into key and value. + fields := strings.SplitN(part, "=", 2) + + // Make sure both key and value exist. if len(fields) == 2 { - out = append(out, fields[0]+":"+strconv.Itoa(len(fields[1]))) + // Build a new string containing: key:length_of_value + // This creates additional allocations and string objects, increasing GC activity. + out = append(out,fields[0]+":"+strconv.Itoa(len(fields[1])),) } } - + // Save the result so the compiler cannot eliminate the work as unused. sink = out } + // Stop benchmark timing. + // + // Everything below is measurement/reporting logic and should not affect benchmark performance results. b.StopTimer() - + + // Capture memory statistics after the benchmark completes. + var after runtime.MemStats runtime.ReadMemStats(&after) - + + // Number of benchmark operations executed. ops := float64(b.N) + + // Total number of garbage collection cycles that occurred while the benchmark was running: + gcCycles := after.NumGC - before.NumGC + + // Total "stop-the-world" pause time spent in GC. During these pauses, application execution + // is temporarily halted while the runtime performs parts of garbage collection. + pauseNs := after.PauseTotalNs - before.PauseTotalNs - + + // Report GC events per benchmark operation. Example: 0.002 gc/op means one GC cycle + // every 500 operations. + if ops > 0 { b.ReportMetric(float64(gcCycles)/ops, "gc/op") + + // Report average GC pause time per operation. b.ReportMetric(float64(pauseNs)/ops, "stw-ns/op") } + // If at least one GC occurred, report the average stop-the-world pause duration for each GC cycle. if gcCycles > 0 { - b.ReportMetric(float64(pauseNs)/float64(gcCycles), "stw-ns/GC") + b.ReportMetric( + float64(pauseNs)/float64(gcCycles), + "stw-ns/GC", + ) } + } EOF ``` -This benchmark repeatedly parses and allocates strings. It reports the default Go benchmark metrics plus three GC-specific metrics: - -| Metric | Meaning | -| --- | --- | -| `gc/op` | GC cycles per completed benchmark operation | -| `stw-ns/op` | GC stop-the-world pause nanoseconds per completed operation | -| `stw-ns/GC` | GC stop-the-world pause nanoseconds per GC cycle | +The benchmark code is now ready to run! Give it a try by running the following command: -The benchmark reads `runtime.MemStats` before and after the timed loop. It does not set Go runtime tuning variables. - -## Confirm the benchmark builds - -Run one short benchmark pass: - -```console +```bash cd $HOME/go-gc-default go test ./parsebench -run '^$' -bench BenchmarkParseAndAllocate -benchmem -count 1 -benchtime=2s ``` -You should see output with `ns/op`, `B/op`, `allocs/op`, and the GC-specific metrics: +You should see output similar to below: ```output goos: linux @@ -106,4 +177,7 @@ PASS ok example.com/go-gc-default/parsebench 4.127s ``` -Your exact numbers will differ by instance type, Go version, operating system, and system load. +Your exact numbers will differ by instance type, Go version, operating system, and system load. If this test run yields results with no errors, you're ready to move on to the next step. + + + diff --git a/content/learning-paths/servers-and-cloud-computing/go-gc-default-settings/install_go_tools.md b/content/learning-paths/servers-and-cloud-computing/go-gc-default-settings/install_go_tools.md index 4c601351d4..f3f6211ccc 100644 --- a/content/learning-paths/servers-and-cloud-computing/go-gc-default-settings/install_go_tools.md +++ b/content/learning-paths/servers-and-cloud-computing/go-gc-default-settings/install_go_tools.md @@ -1,72 +1,81 @@ --- -title: Install Go and benchmark tools +title: Installing Go and Benchstat weight: 3 ### FIXED, DO NOT MODIFY layout: learningpathall --- -## Install Go on Arm Linux +## Installing Go on Arm Linux -Install Go on the AWS Graviton instance. The commands below use the Linux Arm64 archive from `go.dev`. +Once the instance is up, the next step is to install Go. The commands below install Go from the Linux Arm64 'go.dev' archive. {{% notice Note %}} The following commands use Go 1.26.3. The same commands work with other Go versions. Replace the archive name and checksum with the values for your version of choice. To find the latest version, see the [Go downloads page](https://go.dev/dl/). {{% /notice %}} -Download the Go archive and verify the checksum: -```console +```bash +# Download the Go archive and verify the checksum: + cd $HOME curl -LO https://go.dev/dl/go1.26.3.linux-arm64.tar.gz echo "9d89a3ea57d141c2b22d70083f2c8459ba3890f2d9e818e7e933b75614936565 go1.26.3.linux-arm64.tar.gz" | sha256sum -c - -``` -Install Go under `/usr/local`: +# Install Go under `/usr/local`: -```console sudo rm -rf /usr/local/go sudo tar -C /usr/local -xzf go1.26.3.linux-arm64.tar.gz -``` - -Add Go to your shell path: -```console +# Add Go to your shell path: export PATH=/usr/local/go/bin:$HOME/go/bin:$PATH -``` -To make the path update persistent, add it to your shell profile: +# To make the path update persistent, add it to your shell profile: -```console echo 'export PATH=/usr/local/go/bin:$HOME/go/bin:$PATH' >> $HOME/.profile -``` - -Verify that Go is installed for Arm64 Linux: -```console +# Verify that Go is installed for Arm64 Linux: +echo go version go env GOOS GOARCH ``` -The output should show `linux` and `arm64`: +The output should look like this: ```output +go version go1.26.3 linux/arm64 linux arm64 ``` -## Install Benchstat +## Installing Benchstat -Install Benchstat to summarize repeated Go benchmark runs: +`benchstat` is a Go performance analysis tool that compares benchmark results and provides statistical analysis of performance differences between runs. It helps developers determine whether observed changes in benchmark metrics are statistically significant rather than simply the result of normal measurement variability. We'll use `benchstat` for that purpose in this learning path. -```console +To install `benchstat`: + +```bash go install golang.org/x/perf/cmd/benchstat@latest ``` -Verify that Benchstat is available: +You should see the following output after running that command: + +```output +go: downloading golang.org/x/perf v0.0.0-20260512194132-3cf34090a3db +go: downloading github.com/aclements/go-moremath v0.0.0-20210112150236-f10218a38794 +``` +Finally, do a quick check to make sure `benchstat` is installed: ```console benchstat -h ``` -You now have Go and Benchstat installed on the AWS Graviton instance. +You should see the following output: + +```output +Usage: benchstat [flags] inputs... + +benchstat computes statistical summaries and A/B comparisons of Go +... +``` +If you see this output, `benchstat` is installed and ready to use. With Go and `benchstat` installed on the AWS Graviton instance, you're ready to move on to the next section. diff --git a/content/learning-paths/servers-and-cloud-computing/go-gc-default-settings/interpret_gc_results.md b/content/learning-paths/servers-and-cloud-computing/go-gc-default-settings/interpret_gc_results.md index 404f940e4b..6a0e997a68 100644 --- a/content/learning-paths/servers-and-cloud-computing/go-gc-default-settings/interpret_gc_results.md +++ b/content/learning-paths/servers-and-cloud-computing/go-gc-default-settings/interpret_gc_results.md @@ -6,15 +6,15 @@ weight: 6 layout: learningpathall --- -## Read the benchmark metrics +## Understanding the benchmark metrics -Open the Benchstat summary: +To understand better what these benchmarks results are showing, firt open the Benchstat summary: -```console +```bash cat default_gc_benchstat.txt ``` -Use the metrics as follows: +The the metrics you see explain the following: | Metric | Read | | --- | --- | @@ -27,11 +27,11 @@ Use the metrics as follows: These metrics answer different questions. For example, `stw-ns/GC` can increase while `stw-ns/op` stays flat or decreases if GC runs less often per completed operation. -## Read the profiles +## Reading the profiles -Open the CPU profile summary: + Next, open the CPU profile summary: -```console +```bash cat cpu_default_top.txt ``` @@ -50,7 +50,7 @@ On the validated `m8g.xlarge` instance, the top CPU profile entries included str Open the heap allocation profile summary: -```console +```bash cat mem_default_alloc_top.txt ``` @@ -66,9 +66,9 @@ On the validated `m8g.xlarge` instance, the allocation profile showed that `stri 0 0% 99.94% 4.67GB 39.79% strings.SplitN (inline) ``` -## Keep this result as the baseline +## Keeping a baseline to compare to future changes -This result is your default Go GC baseline on AWS Graviton. Keep the following files together when you compare future changes: +These results show your default Go GC baseline stats for this benchmarking app on AWS Graviton. By keeping the following files together (eg, each stored in their own zip file, folder, etc) you can easily see how making code changes affects your apps overall performance: ```output default_runtime_baseline.txt @@ -81,4 +81,74 @@ mem_default.out mem_default_alloc_top.txt ``` +Example of changes you can experiment with include: + +### Simple changes that can improve GC performance + +1. **Reduce the payload size** + ```go + payload := strings.Repeat( + "name=arm&runtime=go&gc=default&value=12345;", + 512, + ) + ``` + A smaller payload creates fewer temporary objects and less garbage each iteration. + +2. **Move `strings.Split(payload, ";")` outside the benchmark loop** + ```go + parts := strings.Split(payload, ";") + + b.ResetTimer() + + for i := 0; i < b.N; i++ { + ... + } + ``` + This avoids repeatedly allocating the same slice of records on every iteration. + +3. **Reuse the output slice** + ```go + out := make([]string, 0, len(parts)) + + for i := 0; i < b.N; i++ { + out = out[:0] + ... + } + ``` + Reusing the backing array reduces allocations and GC pressure. + +4. **Replace `strings.SplitN()` with `strings.IndexByte()`** + ```go + idx := strings.IndexByte(part, '=') + if idx >= 0 { + key := part[:idx] + value := part[idx+1:] + ... + } + ``` + This avoids allocating a temporary `[]string` for every record processed. + +5. **Avoid creating new strings in the hot loop** + ```go + out = append(out, fields[0]) + ``` + Instead of building `"key:length"` strings, store existing strings or simpler values to reduce allocations. + +6. **Reduce the number of records processed** + ```go + payload := strings.Repeat( + "name=arm&runtime=go&gc=default&value=12345;", + 1024, // instead of 2048 + ) + ``` + Fewer records means less allocation work and fewer GC cycles. + +### Biggest GC wins + +For this benchmark, the largest improvements typically come from: + +- Moving `strings.Split(payload, ";")` outside the benchmark loop. +- Reusing the `out` slice instead of allocating a new one every iteration. +- Replacing `strings.SplitN()` with `strings.IndexByte()`. + When you test code changes, compare against this baseline before changing Go runtime settings. If you later tune `GOGC`, `GOMEMLIMIT`, `GODEBUG`, or `GOMAXPROCS`, treat that as a separate experiment because it changes the runtime operating mode. diff --git a/content/learning-paths/servers-and-cloud-computing/go-gc-default-settings/run_default_gc_benchmark.md b/content/learning-paths/servers-and-cloud-computing/go-gc-default-settings/run_default_gc_benchmark.md index 639010e1f2..12b276b85c 100644 --- a/content/learning-paths/servers-and-cloud-computing/go-gc-default-settings/run_default_gc_benchmark.md +++ b/content/learning-paths/servers-and-cloud-computing/go-gc-default-settings/run_default_gc_benchmark.md @@ -8,9 +8,9 @@ layout: learningpathall ## Confirm runtime tuning variables are unset -Before you run the benchmark, confirm that the shell is not setting Go runtime tuning variables: +Before you run the benchmark, confirm that the shell is not setting Go runtime tuning variables. This could happen especially if you have experimented with Go GC tuning variables in the past on the same machine. -```console +```bash env | grep -E '^(GOGC|GOMEMLIMIT|GODEBUG|GOMAXPROCS)=' || true ``` @@ -18,20 +18,18 @@ The command should not print any matching variables. If it prints one or more variables, unset them: -```console +```bash unset GOGC unset GOMEMLIMIT unset GODEBUG unset GOMAXPROCS ``` -This keeps GC pacing, memory-limit behavior, debug behavior, and CPU parallelism at the Go runtime defaults. - ## Record the runtime baseline -Record the Go version, architecture, CPU count, and memory size before the benchmark: +Before running the benchmark, record the Go version, architecture, CPU count, and memory size before the benchmark: -```console +```bash cd $HOME/go-gc-default { go version @@ -41,7 +39,7 @@ cd $HOME/go-gc-default } | tee default_runtime_baseline.txt ``` -On the validated `m8g.xlarge` instance, the output was: +Your output should look similar to this: ```output go version go1.26.3 linux/arm64 @@ -55,9 +53,9 @@ Swap: 0B 0B 0B ## Run repeated benchmark samples -Run the benchmark with repeated samples and save the output: +Run the benchmark with repeated samples and save the output. In this example, the benchmark runs for 5 seconds and repeats 10 times: -```console +```bash go test ./parsebench \ -run '^$' \ -bench BenchmarkParseAndAllocate \ @@ -66,17 +64,17 @@ go test ./parsebench \ -benchtime=5s | tee default_gc_benchmark.txt ``` -The benchmark output includes operation time, allocation rate, allocation count, GC cycles per operation, pause time per operation, and pause time per GC cycle. +The output is tee'd to a file which includes the benchmark's outputs of operation time, allocation rate, allocation count, GC cycles per operation, pause time per operation, and pause time per GC cycle. -Summarize the repeated samples with Benchstat: +With this information saved, we can now aggregate the repeated samples with Benchstat: -```console +```bash benchstat default_gc_benchmark.txt | tee default_gc_benchstat.txt ``` Benchstat may scale nanosecond metrics to seconds in the summary. For example, raw `stw-ns/op` benchmark output can appear as `stw-sec/op` in the Benchstat table. -On the validated `m8g.xlarge` instance, the Benchstat summary was: +You should see output similar to this: ```output goos: linux @@ -111,7 +109,7 @@ ParseAndAllocate-4 4.098k ± 0% Create a test binary and run one longer benchmark pass with CPU and heap profiles enabled: -```console +```bash go test -c -o parsebench.test ./parsebench ./parsebench.test \ @@ -124,16 +122,19 @@ go test -c -o parsebench.test ./parsebench -test.memprofile mem_default.out | tee default_gc_profile_run.txt ``` -Inspect the CPU profile: +Inspect the CPU profile to display the functions that consumed the most CPU time during benchmark execution, ranked from highest to lowest: -```console +```bash go tool pprof -top ./parsebench.test cpu_default.out | tee cpu_default_top.txt ``` -Inspect the heap allocation profile: +Inspect the heap allocation profile to display the functions responsible for allocating the most total memory over the lifetime of the benchmark, ranked from highest to lowest. -```console +```bash go tool pprof -top -alloc_space ./parsebench.test mem_default.out | tee mem_default_alloc_top.txt ``` -You now have a default-GC benchmark result, a Benchstat summary, and CPU and heap profiles from the same workload. +You now have a default-GC benchmark result, a Benchstat summary, and CPU and heap profiles from the same workload. From here, we can dive deeper into analyzing all of these results. + + + From ef8a7b050aad3ebe650d0ee186674fa8a7ff5ca8 Mon Sep 17 00:00:00 2001 From: Geremy Cohen Date: Wed, 3 Jun 2026 09:11:25 -0700 Subject: [PATCH 3/3] tested and added experiments section --- .../interpret_gc_results.md | 201 +++++++++++------- .../go-gc-default-settings/try_on_your_own.md | 191 +++++++++++++++++ 2 files changed, 321 insertions(+), 71 deletions(-) create mode 100644 content/learning-paths/servers-and-cloud-computing/go-gc-default-settings/try_on_your_own.md diff --git a/content/learning-paths/servers-and-cloud-computing/go-gc-default-settings/interpret_gc_results.md b/content/learning-paths/servers-and-cloud-computing/go-gc-default-settings/interpret_gc_results.md index 6a0e997a68..e19c8b65de 100644 --- a/content/learning-paths/servers-and-cloud-computing/go-gc-default-settings/interpret_gc_results.md +++ b/content/learning-paths/servers-and-cloud-computing/go-gc-default-settings/interpret_gc_results.md @@ -81,74 +81,133 @@ mem_default.out mem_default_alloc_top.txt ``` -Example of changes you can experiment with include: - -### Simple changes that can improve GC performance - -1. **Reduce the payload size** - ```go - payload := strings.Repeat( - "name=arm&runtime=go&gc=default&value=12345;", - 512, - ) - ``` - A smaller payload creates fewer temporary objects and less garbage each iteration. - -2. **Move `strings.Split(payload, ";")` outside the benchmark loop** - ```go - parts := strings.Split(payload, ";") - - b.ResetTimer() - - for i := 0; i < b.N; i++ { - ... - } - ``` - This avoids repeatedly allocating the same slice of records on every iteration. - -3. **Reuse the output slice** - ```go - out := make([]string, 0, len(parts)) - - for i := 0; i < b.N; i++ { - out = out[:0] - ... - } - ``` - Reusing the backing array reduces allocations and GC pressure. - -4. **Replace `strings.SplitN()` with `strings.IndexByte()`** - ```go - idx := strings.IndexByte(part, '=') - if idx >= 0 { - key := part[:idx] - value := part[idx+1:] - ... - } - ``` - This avoids allocating a temporary `[]string` for every record processed. - -5. **Avoid creating new strings in the hot loop** - ```go - out = append(out, fields[0]) - ``` - Instead of building `"key:length"` strings, store existing strings or simpler values to reduce allocations. - -6. **Reduce the number of records processed** - ```go - payload := strings.Repeat( - "name=arm&runtime=go&gc=default&value=12345;", - 1024, // instead of 2048 - ) - ``` - Fewer records means less allocation work and fewer GC cycles. - -### Biggest GC wins - -For this benchmark, the largest improvements typically come from: - -- Moving `strings.Split(payload, ";")` outside the benchmark loop. -- Reusing the `out` slice instead of allocating a new one every iteration. -- Replacing `strings.SplitN()` with `strings.IndexByte()`. - -When you test code changes, compare against this baseline before changing Go runtime settings. If you later tune `GOGC`, `GOMEMLIMIT`, `GODEBUG`, or `GOMAXPROCS`, treat that as a separate experiment because it changes the runtime operating mode. +## Experiment with Code Changes that influence GC Behavior +Now that you have a baseline, you can experiment with code changes that influence GC behavior. For example, you could try: + +## Challenge 1 + +You just found out that the payload size this benchmark is intended to represent is actually only 128 records instead of 2048. What changes can we make from the baseline to test whether optimizing for this smaller workload affects GC frequency, pause times, and overall application performance? + +### Idea: Reduce the payload size + +### How + +```go +payload := strings.Repeat( + "name=arm&runtime=go&gc=default&value=12345;", + 512, +) +``` + +### Why + +A smaller payload creates fewer temporary objects and less garbage each iteration. + +--- + +## Challenge 2 + +After profiling the application, you discover that the input payload rarely changes between requests. What modifications can we make to reuse preprocessing work and determine whether reducing repeated allocations improves GC behavior and throughput? + +### Idea: Move `strings.Split(payload, ";")` outside the benchmark loop + +### How + +```go +parts := strings.Split(payload, ";") + +b.ResetTimer() + +for i := 0; i < b.N; i++ { + ... +} +``` + +### Why + +This avoids repeatedly allocating the same slice of records on every iteration. + +--- + +## Challenge 3 + +The benchmark currently creates a new output buffer for every operation, but production code processes millions of requests using the same worker. How can we modify the benchmark to reuse memory and evaluate the impact on GC activity and memory consumption? + +### Idea: Reuse the output slice + +### How + +```go +out := make([]string, 0, len(parts)) + +for i := 0; i < b.N; i++ { + out = out[:0] + ... +} +``` + +### Why + +Reusing the backing array reduces allocations and GC pressure. + +--- + +## Challenge 4 + +A CPU profile shows that string parsing is one of the hottest code paths in the application. What changes can we make to reduce temporary allocations during parsing and measure whether this reduces GC overhead? + +### Idea: Replace `strings.SplitN()` with `strings.IndexByte()` + +### How + +```go +idx := strings.IndexByte(part, '=') +if idx >= 0 { + key := part[:idx] + value := part[idx+1:] + ... +} +``` + +### Why + +This avoids allocating a temporary `[]string` for every record processed. + +--- + +## Challenge 5 + +Product requirements change and the application no longer needs to generate derived `"key:length"` strings. What modifications can we make to avoid unnecessary string allocations and test their effect on garbage collection performance? + +### Idea: Avoid creating new strings in the hot loop + +### How + +```go +out = append(out, fields[0]) +``` + +### Why + +Instead of building `"key:length"` strings, store existing strings or simpler values to reduce allocations. + +--- + +## Challenge 6 + +Usage analytics show that most customers send payloads that are half the size represented by the current benchmark. How can we adjust the workload to better reflect real-world traffic and evaluate whether the resulting reduction in allocations improves GC efficiency? + +### Idea: Reduce the number of records processed + +### How + +```go +payload := strings.Repeat( + "name=arm&runtime=go&gc=default&value=12345;", + 1024, +) +``` + +### Why + +Fewer records means less allocation work and fewer GC cycles. \ No newline at end of file diff --git a/content/learning-paths/servers-and-cloud-computing/go-gc-default-settings/try_on_your_own.md b/content/learning-paths/servers-and-cloud-computing/go-gc-default-settings/try_on_your_own.md new file mode 100644 index 0000000000..a73f1a9462 --- /dev/null +++ b/content/learning-paths/servers-and-cloud-computing/go-gc-default-settings/try_on_your_own.md @@ -0,0 +1,191 @@ +--- +title: Experiment with Optimization Ideas +weight: 7 + +### FIXED, DO NOT MODIFY +layout: learningpathall +--- + +## Experiment with GC Optimizations + +Now that you have an idea of what GC is, and how to measure it, you can experiment with code changes that influence GC behavior. For example, you could try: + +## Challenge 1 + +You just found out that the payload size this benchmark is intended to represent is actually only 128 records instead of 2048. What changes can we make from the baseline to test whether optimizing for this smaller workload affects GC frequency, pause times, and overall application performance? + +### Idea: Reduce the payload size + +### How + +**Before** + +```go +payload := strings.Repeat( + "name=arm&runtime=go&gc=default&value=12345;", + 2048, +) +``` + +**After** + +```go +payload := strings.Repeat( + "name=arm&runtime=go&gc=default&value=12345;", + 512, +) +``` + +### Why + +A smaller payload creates fewer temporary objects and less garbage each iteration. + +--- + +## Challenge 2 + +After profiling the application, you discover that the input payload rarely changes between requests. What modifications can we make to reuse preprocessing work and determine whether reducing repeated allocations improves GC behavior and throughput? + +### Idea: Move payload split logic outside the benchmark loop + +### How + +**Before** + +```go +for i := 0; i < b.N; i++ { + parts := strings.Split(payload, ";") + + out := make([]string, 0, len(parts)) + + ... +} +``` + +**After** + +```go +parts := strings.Split(payload, ";") + +for i := 0; i < b.N; i++ { + out := make([]string, 0, len(parts)) + + ... +} +``` + +### Why + +This avoids repeatedly allocating the same slice of records on every iteration. + +--- + +## Challenge 3 + +The benchmark currently creates a new output buffer for every operation, but production code processes millions of requests using the same worker. How can we modify the benchmark to reuse memory and evaluate the impact on GC activity and memory consumption? + +### Idea: Reuse the output slice + +### How + +**Before** + +```go +for i := 0; i < b.N; i++ { + out := make([]string, 0, len(parts)) + + ... +} +``` + +**After** + +```go +out := make([]string, 0, len(parts)) + +for i := 0; i < b.N; i++ { + out = out[:0] + + ... +} +``` + +### Why + +Reusing the backing array reduces allocations and GC pressure. + +--- + +## Challenge 4 + +A CPU profile shows that string parsing is one of the hottest code paths in the application. What changes can we make to reduce temporary allocations during parsing and measure whether this reduces GC overhead? + +### Idea: Replace SplitN() with IndexByte() + +### How + +**Before** + +```go +fields := strings.SplitN(part, "=", 2) + +if len(fields) == 2 { + out = append( + out, + fields[0]+":"+strconv.Itoa(len(fields[1])), + ) +} +``` + +**After** + +```go +idx := strings.IndexByte(part, '=') + +if idx >= 0 { + key := part[:idx] + value := part[idx+1:] + + out = append( + out, + key+":"+strconv.Itoa(len(value)), + ) +} +``` + +### Why + +This avoids allocating a temporary `[]string` for every record processed. + +--- + +## Challenge 5 + +Product requirements change and the application no longer needs to generate derived `"key:length"` strings. What modifications can we make to avoid unnecessary string allocations and test their effect on garbage collection performance? + +### Idea: Avoid creating new strings in the hot loop + +### How + +**Before** + +```go +out = append( + out, + fields[0]+":"+strconv.Itoa(len(fields[1])), +) +``` + +**After** + +```go +out = append( + out, + fields[0], +) +``` + +### Why + +Instead of building `"key:length"` strings, store existing strings or simpler values to reduce allocations. +