From 6bb8fb934cc1a77966bde806c4418eb49d6ecb57 Mon Sep 17 00:00:00 2001 From: Penelope Yong Date: Sat, 2 May 2026 21:58:21 +0100 Subject: [PATCH 01/29] Bump Turing version --- Manifest.toml | 143 +++++++++++++++++++++++++++++++++++--------------- Project.toml | 6 +-- 2 files changed, 104 insertions(+), 45 deletions(-) diff --git a/Manifest.toml b/Manifest.toml index 2d5d84133..b3881e6ae 100644 --- a/Manifest.toml +++ b/Manifest.toml @@ -2,7 +2,7 @@ julia_version = "1.11.9" manifest_format = "2.0" -project_hash = "75240642ac9209edc6934c6d8a26cefb5226af2f" +project_hash = "4eb7c34874f749cc35761c71488d3f3c287a5d0f" [[deps.ADTypes]] git-tree-sha1 = "bbc22a9a08a0ef6460041086d8a7b27940ed4ffd" @@ -121,13 +121,18 @@ deps = ["AbstractMCMC", "Distributions", "DocStringExtensions", "FillArrays", "L git-tree-sha1 = "62ddbccf0ce5c26f8ef3cebe4bedef6b1599d616" uuid = "5b7e9947-ddc0-4b3f-9b55-0d8042f74170" version = "0.8.10" -weakdeps = ["DiffResults", "ForwardDiff", "MCMCChains", "StructArrays"] [deps.AdvancedMH.extensions] AdvancedMHForwardDiffExt = ["DiffResults", "ForwardDiff"] AdvancedMHMCMCChainsExt = "MCMCChains" AdvancedMHStructArraysExt = "StructArrays" + [deps.AdvancedMH.weakdeps] + DiffResults = "163ba53b-c6d8-5494-b064-1a9d43ac40c5" + ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210" + MCMCChains = "c7f686f2-ff18-58e9-bc7b-31028e88f75d" + StructArrays = "09ab397b-f2b6-538f-b94a-2f83cf4a842a" + [[deps.AdvancedPS]] deps = ["AbstractMCMC", "Distributions", "Random", "Random123", "Requires", "SSMProblems", "StatsFuns"] git-tree-sha1 = "d92dd3fb4cc2748860ae8d5dd1d324cf0715a53b" @@ -248,12 +253,6 @@ git-tree-sha1 = "01b8ccb13d68535d73d2b0c23e39bd23155fb712" uuid = "13072b0f-2c55-5437-9ae7-d433b7a33950" version = "1.1.0" -[[deps.AxisArrays]] -deps = ["Dates", "IntervalSets", "IterTools", "RangeArrays"] -git-tree-sha1 = "4126b08903b777c88edf1754288144a0492c05ad" -uuid = "39de3d68-74b9-583c-8d2d-e117c070f3a9" -version = "0.4.8" - [[deps.BangBang]] deps = ["Accessors", "ConstructionBase", "InitialValues", "LinearAlgebra"] git-tree-sha1 = "cceb62468025be98d42a5dc581b163c20896b040" @@ -753,6 +752,42 @@ version = "0.7.17" Tracker = "9f7883ad-71c0-57eb-9f7f-b5c9e6d3789c" Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f" +[[deps.DimensionalData]] +deps = ["ConstructionBase", "DataAPI", "Dates", "Extents", "Interfaces", "IntervalSets", "InvertedIndices", "IteratorInterfaceExtensions", "LinearAlgebra", "OrderedCollections", "PrecompileTools", "Random", "Statistics", "TableTraits", "Tables"] +git-tree-sha1 = "57bbee194533adaa755b5cae528eabdea5d05039" +uuid = "0703355e-b756-11e9-17c0-8b28908087d0" +version = "0.30.1" + + [deps.DimensionalData.extensions] + DimensionalDataAbstractFFTsExt = "AbstractFFTs" + DimensionalDataAdaptExt = "Adapt" + DimensionalDataAlgebraOfGraphicsExt = "AlgebraOfGraphics" + DimensionalDataArrayInterfaceExt = "ArrayInterface" + DimensionalDataCategoricalArraysExt = "CategoricalArrays" + DimensionalDataChainRulesCoreExt = "ChainRulesCore" + DimensionalDataDiskArraysExt = "DiskArrays" + DimensionalDataMakieExt = "Makie" + DimensionalDataNearestNeighborsExt = "NearestNeighbors" + DimensionalDataPythonCallExt = "PythonCall" + DimensionalDataRecipesBaseExt = "RecipesBase" + DimensionalDataSparseArraysExt = "SparseArrays" + DimensionalDataStatsBaseExt = "StatsBase" + + [deps.DimensionalData.weakdeps] + AbstractFFTs = "621f4979-c628-5d54-868e-fcf4e3e8185c" + Adapt = "79e6a3ab-5dfb-504d-930d-738a2a938a0e" + AlgebraOfGraphics = "cbdf2221-f076-402e-a563-3d30da359d67" + ArrayInterface = "4fba245c-0d91-5ea0-9b3e-6abc04ee57a9" + CategoricalArrays = "324d7699-5711-5eae-9e2f-1d82baa6b597" + ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4" + DiskArrays = "3c3547ce-8d99-4f5e-a174-61eb10b00ae3" + Makie = "ee78f7c6-11fb-53f2-987a-cfe4a2b5a57a" + NearestNeighbors = "b8a86587-4115-5ab1-83bc-aa920d37bbce" + PythonCall = "6099a3de-0909-46bc-b1f4-468b9a2dfc0d" + RecipesBase = "3cdcf5f2-1ef4-517c-9805-6587b60abb01" + SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf" + StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91" + [[deps.DispatchDoctor]] deps = ["MacroTools", "Preferences"] git-tree-sha1 = "42cd00edaac86f941815fe557c1d01e11913e07c" @@ -810,11 +845,12 @@ version = "3.6.0" [[deps.DynamicPPL]] deps = ["ADTypes", "AbstractMCMC", "AbstractPPL", "Accessors", "BangBang", "Bijectors", "Chairmarks", "Compat", "ConstructionBase", "DifferentiationInterface", "Distributions", "DocStringExtensions", "FillArrays", "InteractiveUtils", "LinearAlgebra", "LogDensityProblems", "MacroTools", "OrderedCollections", "PartitionedDistributions", "PrecompileTools", "Preferences", "Printf", "Random", "Statistics", "Test"] -git-tree-sha1 = "23ad4cb791a4725698268684555d937c26da0d54" +git-tree-sha1 = "c46e2b68a20a695df01ce316c08c977a1bb4c5b8" uuid = "366bfd00-2699-11ea-058f-f148b4cae6d8" -version = "0.41.6" +version = "0.41.7" [deps.DynamicPPL.extensions] + DynamicPPLComponentArraysExt = ["ComponentArrays"] DynamicPPLEnzymeCoreExt = ["EnzymeCore"] DynamicPPLForwardDiffExt = ["ForwardDiff"] DynamicPPLMCMCChainsExt = ["MCMCChains"] @@ -823,6 +859,7 @@ version = "0.41.6" DynamicPPLReverseDiffExt = ["ReverseDiff"] [deps.DynamicPPL.weakdeps] + ComponentArrays = "b0b7db55-cfe3-40fc-9ded-d10e2dbeff66" EnzymeCore = "f151be2c-9106-41f4-ab19-57ee4f262869" ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210" KernelAbstractions = "63c18a36-062a-441e-b654-da1e3ab1ce7c" @@ -924,6 +961,11 @@ git-tree-sha1 = "c13f0b150373771b0fdc1713c97860f8df12e6c2" uuid = "55351af7-c7e9-48d6-89ff-24e801d99491" version = "0.10.14" +[[deps.Extents]] +git-tree-sha1 = "b309b36a9e02fe7be71270dd8c0fd873625332b4" +uuid = "411431e0-e8b7-467b-b5e0-f676ba4f2910" +version = "0.1.6" + [[deps.FFMPEG]] deps = ["FFMPEG_jll"] git-tree-sha1 = "95ecf07c2eea562b5adbd0696af6db62c0f52560" @@ -1061,6 +1103,38 @@ git-tree-sha1 = "05882d6995ae5c12bb5f36dd2ed3f61c98cbb172" uuid = "53c48c17-4a7d-5ca2-90c5-79b7896eea93" version = "0.8.5" +[[deps.FlexiChains]] +deps = ["AbstractMCMC", "AbstractPPL", "DelimitedFiles", "DimensionalData", "DocStringExtensions", "MCMCDiagnosticTools", "OrderedCollections", "PrecompileTools", "Printf", "Random", "Statistics", "StatsBase", "Tables"] +git-tree-sha1 = "81e93592ce8a9afc00e15c942c78de5fbe35e17d" +uuid = "4a37a8b9-6e57-4b92-8664-298d46e639f7" +version = "0.6.0" + + [deps.FlexiChains.extensions] + FlexiChainsAdvancedHMCExt = ["AdvancedHMC", "DimensionalData"] + FlexiChainsComponentArraysExt = ["ComponentArrays"] + FlexiChainsDynamicPPLExt = ["DynamicPPL", "Random", "DimensionalData", "OrderedCollections", "Distributions"] + FlexiChainsMCMCChainsExt = ["MCMCChains", "OrderedCollections"] + FlexiChainsMakieExt = ["Makie"] + FlexiChainsPairPlotsExt = ["PairPlots", "Makie"] + FlexiChainsPosteriorDBExt = ["PosteriorDB", "OrderedCollections"] + FlexiChainsPosteriorStatsDynamicPPLExt = ["PosteriorStats", "DynamicPPL"] + FlexiChainsPosteriorStatsExt = ["PosteriorStats", "DimensionalData"] + FlexiChainsRecipesBaseExt = ["RecipesBase", "StatsBase"] + FlexiChainsStanSampleExt = ["StanSample"] + + [deps.FlexiChains.weakdeps] + AdvancedHMC = "0bf59076-c3b1-5ca4-86bd-e02cd72cde3d" + ComponentArrays = "b0b7db55-cfe3-40fc-9ded-d10e2dbeff66" + Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f" + DynamicPPL = "366bfd00-2699-11ea-058f-f148b4cae6d8" + MCMCChains = "c7f686f2-ff18-58e9-bc7b-31028e88f75d" + Makie = "ee78f7c6-11fb-53f2-987a-cfe4a2b5a57a" + PairPlots = "43a3c2be-4208-490b-832a-a21dcd55d7da" + PosteriorDB = "1c4bc282-d2f5-44f9-b6d1-8c4424a23ad4" + PosteriorStats = "7f36be82-ad55-44ba-a5c0-b8b5480d7aa5" + RecipesBase = "3cdcf5f2-1ef4-517c-9805-6587b60abb01" + StanSample = "c1514b29-d3a0-5178-b312-660c88baa699" + [[deps.Fontconfig_jll]] deps = ["Artifacts", "Bzip2_jll", "Expat_jll", "FreeType2_jll", "JLLWrappers", "Libdl", "Libuuid_jll", "Zlib_jll"] git-tree-sha1 = "f85dac9a96a01087df6e3a749840015a0ca3817d" @@ -1297,6 +1371,11 @@ deps = ["Markdown"] uuid = "b77e0a4c-d291-57a0-90e8-8db25a27a240" version = "1.11.0" +[[deps.Interfaces]] +git-tree-sha1 = "331ff37738aea1a3cf841ddf085442f31b84324f" +uuid = "85a1e053-f937-4924-92a5-1367d23b7b87" +version = "0.3.2" + [[deps.Interpolations]] deps = ["Adapt", "AxisAlgorithms", "ChainRulesCore", "LinearAlgebra", "OffsetArrays", "Random", "Ratios", "SharedArrays", "SparseArrays", "StaticArrays", "WoodburyMatrices"] git-tree-sha1 = "65d505fa4c0d7072990d659ef3fc086eb6da8208" @@ -1342,11 +1421,6 @@ git-tree-sha1 = "b2d91fe939cae05960e760110b328288867b5758" uuid = "92d709cd-6900-40b7-9082-c6be49f344b6" version = "0.2.6" -[[deps.IterTools]] -git-tree-sha1 = "42d5f897009e7ff2cf88db414a389e5ed1bdd023" -uuid = "c8e1da08-722c-5040-9ed9-7db0dc04731e" -version = "1.10.0" - [[deps.IteratorInterfaceExtensions]] git-tree-sha1 = "a3f24677c21f5bbe9d2a714f95dcd58337fb2856" uuid = "82899510-4779-5014-852e-03e436cf321d" @@ -1585,9 +1659,9 @@ version = "2.42.0+0" [[deps.LineSearch]] deps = ["ADTypes", "CommonSolve", "ConcreteStructs", "FastClosures", "LinearAlgebra", "MaybeInplace", "PrecompileTools", "SciMLBase", "SciMLJacobianOperators", "StaticArraysCore"] -git-tree-sha1 = "25454bc65c3eec4656cbe201c3d9336af49178c7" +git-tree-sha1 = "fd58a77c92e7c8f1db25c9839127d52943a49349" uuid = "87fe0de2-c867-4266-b59a-2f0a94fc965b" -version = "0.1.8" +version = "0.1.9" weakdeps = ["LineSearches"] [deps.LineSearch.extensions] @@ -1844,12 +1918,6 @@ version = "1.15.9" Tracker = "9f7883ad-71c0-57eb-9f7f-b5c9e6d3789c" cuDNN = "02a925ec-e4fe-4b08-9a7e-0d78e3d38ccd" -[[deps.MCMCChains]] -deps = ["AbstractMCMC", "AxisArrays", "DataAPI", "Dates", "Distributions", "IteratorInterfaceExtensions", "KernelDensity", "LinearAlgebra", "MCMCDiagnosticTools", "MLJModelInterface", "NaturalSort", "OrderedCollections", "PrettyTables", "Random", "RecipesBase", "Statistics", "StatsBase", "StatsFuns", "TableTraits", "Tables"] -git-tree-sha1 = "060d6bc7cf60e621dfd056ed2c1a2db1e68db0fe" -uuid = "c7f686f2-ff18-58e9-bc7b-31028e88f75d" -version = "7.7.0" - [[deps.MCMCDiagnosticTools]] deps = ["AbstractFFTs", "DataAPI", "DataStructures", "Distributions", "LinearAlgebra", "MLJModelInterface", "Random", "SpecialFunctions", "Statistics", "StatsBase", "StatsFuns", "Tables"] git-tree-sha1 = "2f464b68e84673727b4e4216a6254fba7da5cf4e" @@ -1870,9 +1938,9 @@ version = "1.0.0" [[deps.MLDataDevices]] deps = ["Adapt", "Functors", "Preferences", "Random", "SciMLPublic"] -git-tree-sha1 = "2dfe3b4b96c6ecbea7c798dfbe96d493fd7a1848" +git-tree-sha1 = "29b00f22be6fd821a214760f0224329f21998a05" uuid = "7e8f7934-dd98-4c1a-8fe8-92b47a384d40" -version = "1.17.8" +version = "1.17.10" [deps.MLDataDevices.extensions] AMDGPUExt = "AMDGPU" @@ -2133,11 +2201,6 @@ git-tree-sha1 = "1a0fa0e9613f46c9b8c11eee38ebb4f590013c5e" uuid = "71a1bf82-56d0-4bbc-8a3c-48b961074391" version = "0.1.5" -[[deps.NaturalSort]] -git-tree-sha1 = "eda490d06b9f7c00752ee81cfa451efe55521e21" -uuid = "c020b1a1-e9b0-503a-9c33-f039bfc54a85" -version = "1.0.0" - [[deps.NearestNeighbors]] deps = ["AbstractTrees", "Distances", "StaticArrays"] git-tree-sha1 = "e2c3bba08dd6dedfe17a17889131b885b8c082f0" @@ -2828,11 +2891,6 @@ git-tree-sha1 = "c6ec94d2aaba1ab2ff983052cf6a606ca5985902" uuid = "e6cf234a-135c-5ec9-84dd-332b85af5143" version = "1.6.0" -[[deps.RangeArrays]] -git-tree-sha1 = "b9039e93773ddcfc828f12aadf7115b4b4d225f5" -uuid = "b3c3ace0-ae52-54e7-9d0b-2c1406fd6b9d" -version = "0.3.2" - [[deps.Ratios]] deps = ["Requires"] git-tree-sha1 = "1342a47bf3260ee108163042310d26f2be5ec90b" @@ -2989,9 +3047,9 @@ version = "0.6.1" [[deps.SciMLBase]] deps = ["ADTypes", "Accessors", "Adapt", "ArrayInterface", "CommonSolve", "ConstructionBase", "Distributed", "DocStringExtensions", "EnumX", "FunctionWrappersWrappers", "IteratorInterfaceExtensions", "LinearAlgebra", "Logging", "Markdown", "Moshi", "PreallocationTools", "PrecompileTools", "Preferences", "Printf", "RecipesBase", "RecursiveArrayTools", "Reexport", "RuntimeGeneratedFunctions", "SciMLLogging", "SciMLOperators", "SciMLPublic", "SciMLStructures", "StaticArraysCore", "Statistics", "SymbolicIndexingInterface"] -git-tree-sha1 = "908c0bf271604d09393a21c142116ab26f66f67c" +git-tree-sha1 = "a017ed325ac5e11438c888864fe83b124bb171b7" uuid = "0bca4576-84f4-4d90-8ffe-ffa030f20462" -version = "2.154.0" +version = "2.155.1" [deps.SciMLBase.extensions] SciMLBaseChainRulesCoreExt = "ChainRulesCore" @@ -3580,15 +3638,18 @@ uuid = "9d95972d-f1c8-5527-a6e0-b4b365fa01f6" version = "1.6.0" [[deps.Turing]] -deps = ["ADTypes", "AbstractMCMC", "AbstractPPL", "Accessors", "AdvancedHMC", "AdvancedMH", "AdvancedPS", "AdvancedVI", "BangBang", "Bijectors", "Compat", "DataStructures", "DifferentiationInterface", "Distributions", "DocStringExtensions", "DynamicPPL", "EllipticalSliceSampling", "ForwardDiff", "Libtask", "LinearAlgebra", "LogDensityProblems", "MCMCChains", "Optimization", "OptimizationOptimJL", "OrderedCollections", "Printf", "Random", "Reexport", "SciMLBase", "SpecialFunctions", "Statistics", "StatsAPI", "StatsBase", "StatsFuns"] -git-tree-sha1 = "666a4c4a3758e5b25eb100bce144f378addd8697" +deps = ["ADTypes", "AbstractMCMC", "AbstractPPL", "Accessors", "AdvancedHMC", "AdvancedMH", "AdvancedPS", "AdvancedVI", "BangBang", "Bijectors", "Compat", "DataStructures", "DifferentiationInterface", "Distributions", "DocStringExtensions", "DynamicPPL", "EllipticalSliceSampling", "FlexiChains", "ForwardDiff", "Libtask", "LinearAlgebra", "LogDensityProblems", "Optimization", "OptimizationOptimJL", "OrderedCollections", "Printf", "Random", "Reexport", "SciMLBase", "SpecialFunctions", "Statistics", "StatsAPI", "StatsBase", "StatsFuns"] +git-tree-sha1 = "96cfabe16a96096d4864df1760187ee8814c17bf" uuid = "fce5fe82-541a-59a6-adf8-730c64b5f9a0" -version = "0.44.5" -weakdeps = ["DynamicHMC"] +version = "0.45.0" [deps.Turing.extensions] TuringDynamicHMCExt = "DynamicHMC" + [deps.Turing.weakdeps] + DynamicHMC = "bbc10e6e-7c05-544b-b16e-64fede858acb" + MCMCChains = "c7f686f2-ff18-58e9-bc7b-31028e88f75d" + [[deps.URIs]] git-tree-sha1 = "bef26fb046d031353ef97a82e3fdb6afe7f21b1a" uuid = "5c2747f8-b7ea-4ff2-ba2e-563bfd36b1d4" diff --git a/Project.toml b/Project.toml index 02fe26958..6fb4dee5d 100644 --- a/Project.toml +++ b/Project.toml @@ -15,6 +15,7 @@ Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f" DynamicHMC = "bbc10e6e-7c05-544b-b16e-64fede858acb" DynamicPPL = "366bfd00-2699-11ea-058f-f148b4cae6d8" FillArrays = "1a297f60-69ca-5386-bcde-b61e274b549b" +FlexiChains = "4a37a8b9-6e57-4b92-8664-298d46e639f7" ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210" Functors = "d9f16b24-f501-4c13-a1f2-28368ffc5196" GLM = "38e38edf-8417-5370-95a0-9cbb8c7f171a" @@ -26,7 +27,6 @@ LogDensityProblemsAD = "996a588d-648d-4e1f-a8f0-a84b347e47b1" LogExpFunctions = "2ab3a3ac-af41-5b50-aa03-7779005ae688" Logging = "56ddb016-857b-54e1-b83d-db4d58db5568" Lux = "b2108857-7c20-44ae-9111-449ecde12c47" -MCMCChains = "c7f686f2-ff18-58e9-bc7b-31028e88f75d" MLUtils = "f1d291b0-491e-4a28-83b9-f70985020b54" MacroTools = "1914dd2f-81c6-5fcd-8719-6d5c9610ff09" Measures = "442fdcdd-2543-5da2-b0f3-8c86c306513e" @@ -51,6 +51,4 @@ StatsPlots = "f3b207a7-027a-5e70-b257-86293d7955fd" Turing = "fce5fe82-541a-59a6-adf8-730c64b5f9a0" [compat] -Turing = "0.44" -# https://github.com/SciML/SciMLBase.jl/issues/1325 -SciMLBase = "=2.154.0" +Turing = "0.45" From ba3eecaf25bb54a0c01a2bb6c976bb2902390500 Mon Sep 17 00:00:00 2001 From: Penelope Yong Date: Sat, 2 May 2026 22:01:29 +0100 Subject: [PATCH 02/29] := is not a problem with FlexiChains --- usage/tracking-extra-quantities/index.qmd | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/usage/tracking-extra-quantities/index.qmd b/usage/tracking-extra-quantities/index.qmd index 8ce77151c..b88cb9a0d 100755 --- a/usage/tracking-extra-quantities/index.qmd +++ b/usage/tracking-extra-quantities/index.qmd @@ -93,10 +93,7 @@ nts[1] There are some pros and cons of using `returned`, as opposed to `:=`. -Firstly, `returned` is more flexible, as it allows you to track any type of object; `:=` only works with variables that can be inserted into an `MCMCChains.Chains` object. -(Notice that `x` is a vector, and in the first case where we used `:=`, reconstructing the vector value of `x` can also be rather annoying as the chain stores each individual element of `x` separately.) - -A drawback is that naively using `returned` can lead to unnecessary computation during inference. +One drawback is that naively using `returned` can lead to unnecessary computation during inference. This is because during the sampling process, the return values are also calculated (since they are part of the model function), but then thrown away. So, if the extra quantities are expensive to compute, this can be a problem. From 0925d6ef03bed70c5aab43c1028609183010f30e Mon Sep 17 00:00:00 2001 From: Penelope Yong Date: Sat, 2 May 2026 22:11:37 +0100 Subject: [PATCH 03/29] First batch of FlexiChains changes --- core-functionality/index.qmd | 29 ++++++++++--------- .../compiler/minituring-compiler/index.qmd | 4 +-- .../compiler/minituring-contexts/index.qmd | 2 +- developers/transforms/bijectors/index.qmd | 2 +- getting-started/index.qmd | 2 +- .../bayesian-logistic-regression/index.qmd | 14 ++++----- .../bayesian-poisson-regression/index.qmd | 4 +-- .../bayesian-time-series-analysis/index.qmd | 2 -- tutorials/coin-flipping/index.qmd | 4 +-- .../multinomial-logistic-regression/index.qmd | 4 +-- tutorials/probabilistic-pca/index.qmd | 2 +- tutorials/variational-inference/index.qmd | 7 +++-- usage/external-samplers/index.qmd | 2 +- usage/predictive-distributions/index.qmd | 6 ++-- usage/sampling-options/index.qmd | 27 +++++++++-------- usage/threadsafe-evaluation/index.qmd | 12 ++++---- 16 files changed, 60 insertions(+), 63 deletions(-) diff --git a/core-functionality/index.qmd b/core-functionality/index.qmd index 2f78b0ade..c6af9664e 100755 --- a/core-functionality/index.qmd +++ b/core-functionality/index.qmd @@ -130,12 +130,14 @@ The arguments for each sampler are: More information about each sampler can be found in [Turing.jl's API docs](https://turinglang.org/Turing.jl). -The `MCMCChains` module (which is re-exported by Turing) provides plotting tools for the `Chain` objects returned by a `sample` function. -See the [MCMCChains](https://github.com/TuringLang/MCMCChains.jl) repository for more information on the suite of tools available for diagnosing MCMC chains. +FlexiChains.jl provides plotting tools for the `VNChain` objects returned by a `sample` function. +For more information about FlexiChains and its plotting capabilities, please see the [FlexiChains.jl documentation](https://pysm.dev/FlexiChains.jl/stable). ```julia +using FlexiChains + # Summarise results -describe(c3) +summarystats(c3) # Plot results plot(c3) @@ -182,13 +184,12 @@ model = model_name() | (arg_1 = 1, arg_2 = 2) The returned chain contains samples of the variables in the model. ```julia -var_1 = mean(chn[:var_1]) # Taking the mean of a variable named var_1. +var_1 = mean(chn[@varname(var_1)]) # Taking the mean of a variable named var_1. ``` -The key (`:var_1`) can either be a `Symbol` or a `String`. -For example, to fetch `x[1]`, one can use `chn[Symbol("x[1]")]` or `chn["x[1]"]`. -If you want to retrieve all parameters associated with a specific symbol, you can use `group`. -As an example, if you have the parameters `"x[1]"`, `"x[2]"`, and `"x[3]"`, calling `group(chn, :x)` or `group(chn, "x")` will return a new chain with only `"x[1]"`, `"x[2]"`, and `"x[3]"`. +The key should be a `VarName`, constructed with the `@varname` macro. + +FlexiChains has a very powerful indexing interface, which includes the ability to index into sub-variables: please see [the FlexiChains docs](https://pysm.dev/FlexiChains.jl/stable/turing/#Accessing-data) for more information. ### Tilde-statement ordering @@ -573,19 +574,19 @@ end end ``` -### Working with MCMCChains.jl +### Working with FlexiChains.jl -Turing.jl wraps its samples using `MCMCChains.Chains` so that all the functions working for `MCMCChains.Chain` can be re-used in Turing.jl. -Two typical functions are `MCMCChains.describe` and `MCMCChains.plot`, which can be used as follows for an obtained chain `chn`. -For more information on `MCMCChains`, please see the [GitHub repository](https://github.com/TuringLang/MCMCChains.jl). +Turing.jl wraps its samples using `FlexiChains.VNChain` so that all the functions working for `FlexiChains.VNChain` can be re-used in Turing.jl. +Two typical functions are `describe` and `plot`, which can be used as follows for an obtained chain `chn`. +For more information on `FlexiChains`, please see the [GitHub repository](https://github.com/TuringLang/FlexiChains.jl). ```{julia} describe(chn) # Lists statistics of the samples. plot(chn) # Plots statistics of the samples. ``` -There are numerous functions in addition to `describe` and `plot` in the `MCMCChains` package, such as those used in convergence diagnostics. -For more information on the package, please see the [GitHub repository](https://github.com/TuringLang/MCMCChains.jl). +There are numerous functions in addition to `describe` and `plot` in the `FlexiChains` package, such as those used in convergence diagnostics. +For more information on the package, please see the [GitHub repository](https://github.com/TuringLang/FlexiChains.jl). ### Changing Default Settings diff --git a/developers/compiler/minituring-compiler/index.qmd b/developers/compiler/minituring-compiler/index.qmd index 2be489c4e..f142d0f31 100755 --- a/developers/compiler/minituring-compiler/index.qmd +++ b/developers/compiler/minituring-compiler/index.qmd @@ -52,7 +52,7 @@ Specifically, we demand that First, we import some required packages: ```{julia} -using MacroTools, Distributions, Random, AbstractMCMC, MCMCChains +using MacroTools, Distributions, Random, AbstractMCMC, FlexiChains ``` Before getting to the actual "compiler", we first build the data structure for the program trace. @@ -242,7 +242,7 @@ function AbstractMCMC.step( end; ``` -To make it easier to analyse the samples and compare them with results from Turing, additionally we define a version of `AbstractMCMC.bundle_samples` for our model and sampler that returns a `MCMCChains.Chains` object of samples. +To make it easier to analyse the samples and compare them with results from Turing, additionally we define a version of `AbstractMCMC.bundle_samples` for our model and sampler that returns a `FlexiChains.VNChain` object of samples. ```{julia} function AbstractMCMC.bundle_samples( diff --git a/developers/compiler/minituring-contexts/index.qmd b/developers/compiler/minituring-contexts/index.qmd index 5439e719b..3bffbcacb 100755 --- a/developers/compiler/minituring-contexts/index.qmd +++ b/developers/compiler/minituring-contexts/index.qmd @@ -25,7 +25,7 @@ If you haven't read [Mini Turing]({{< meta minituring >}}) yet, you should do th ```{julia} import MacroTools, Random, AbstractMCMC using Distributions: Normal, logpdf -using MCMCChains: Chains +using FlexiChains: Chains using AbstractMCMC: sample struct VarInfo{V,L} diff --git a/developers/transforms/bijectors/index.qmd b/developers/transforms/bijectors/index.qmd index a0d41476d..8fa15ee87 100644 --- a/developers/transforms/bijectors/index.qmd +++ b/developers/transforms/bijectors/index.qmd @@ -306,7 +306,7 @@ One way to do this is to calculate the _effective sample size_ (ESS), which is d A larger ESS implies that the samples are less correlated, and thus more representative of the underlying distribution: ```{julia} -using MCMCChains: Chains, ess +using FlexiChains: Chains, ess rejection = first(mh(logp, 10000, x -> x > 0)) transformation = f_inv(first(mh(logq, 10000, x -> true))) diff --git a/getting-started/index.qmd b/getting-started/index.qmd index 5a8fb8fae..ba7d332da 100644 --- a/getting-started/index.qmd +++ b/getting-started/index.qmd @@ -79,7 +79,7 @@ plot(chain) and obtain summary statistics by indexing the chain: ```{julia} -mean(chain[:m]), mean(chain[:s²]) +mean(chain[@varname(m)]), mean(chain[@varname(s²)]) ``` ### Where to go next diff --git a/tutorials/bayesian-logistic-regression/index.qmd b/tutorials/bayesian-logistic-regression/index.qmd index 2ab4e04e2..1eed9429c 100755 --- a/tutorials/bayesian-logistic-regression/index.qmd +++ b/tutorials/bayesian-logistic-regression/index.qmd @@ -23,7 +23,7 @@ To start, let's import all the libraries we'll need. ```{julia} using Turing using RDatasets -using MCMCChains, Plots, StatsPlots +using Plots, StatsPlots using StatsFuns: logistic using MLUtils: splitobs using StatsBase: fit, transform!, ZScoreTransform @@ -205,14 +205,12 @@ The `prediction` function below takes a `Matrix` and a `Chain` object. It takes the mean of each parameter's sampled values and re-runs the logistic function using those mean values for every element in the test set. ```{julia} -using MCMCChains: MCMCChains - -function prediction(x::AbstractMatrix, chain::MCMCChains.Chains, threshold) +function prediction(x::AbstractMatrix, chain, threshold) # Pull the means from each parameter's sampled values in the chain. - intercept = mean(chain[:intercept]) - student = mean(chain[:student]) - balance = mean(chain[:balance]) - income = mean(chain[:income]) + intercept = mean(chain[@varname(intercept)]) + student = mean(chain[@varname(student)]) + balance = mean(chain[@varname(balance)]) + income = mean(chain[@varname(income)]) # Retrieve the number of observations. n = size(x, 2) diff --git a/tutorials/bayesian-poisson-regression/index.qmd b/tutorials/bayesian-poisson-regression/index.qmd index 74f12d8c8..3713f6d86 100755 --- a/tutorials/bayesian-poisson-regression/index.qmd +++ b/tutorials/bayesian-poisson-regression/index.qmd @@ -26,8 +26,8 @@ We start by importing the required libraries. #Import Turing, Distributions and DataFrames using Turing, Distributions, DataFrames, Distributed -# Import MCMCChain, Plots, and StatsPlots for visualisations and diagnostics. -using MCMCChains, Plots, StatsPlots +# Import Plots and StatsPlots for visualisations and diagnostics. +using Plots, StatsPlots # Set a seed for reproducibility. using Random diff --git a/tutorials/bayesian-time-series-analysis/index.qmd b/tutorials/bayesian-time-series-analysis/index.qmd index 80399e64a..5522d9131 100755 --- a/tutorials/bayesian-time-series-analysis/index.qmd +++ b/tutorials/bayesian-time-series-analysis/index.qmd @@ -165,8 +165,6 @@ scatter!(t, yf; color=2, label="Data") With the model specified and with a reasonable prior we can now let Turing decompose the time series for us! ```{julia} -using MCMCChains: get_sections - function mean_ribbon(samples) qs = quantile(samples) low = qs[:, Symbol("2.5%")] diff --git a/tutorials/coin-flipping/index.qmd b/tutorials/coin-flipping/index.qmd index d57c79181..118b4f5a2 100755 --- a/tutorials/coin-flipping/index.qmd +++ b/tutorials/coin-flipping/index.qmd @@ -134,10 +134,10 @@ To do so, we first need to load `Turing`. using Turing ``` -Additionally, we load `MCMCChains`, a library for analysing and visualising the samples with which we approximate the posterior distribution. +Additionally, we load `FlexiChains`, a library for analysing and visualising the samples with which we approximate the posterior distribution. ```{julia} -using MCMCChains +using FlexiChains ``` First, we define the coin-flip model using Turing. diff --git a/tutorials/multinomial-logistic-regression/index.qmd b/tutorials/multinomial-logistic-regression/index.qmd index 1377ad9a2..c8d50e654 100755 --- a/tutorials/multinomial-logistic-regression/index.qmd +++ b/tutorials/multinomial-logistic-regression/index.qmd @@ -24,7 +24,6 @@ To start, let's import all the libraries we'll need. ```{julia} using Turing -using MCMCChains: Chains using StatsPlots # Functionality for splitting and normalising the data. using MLUtils: shuffleobs, splitobs, load_iris @@ -165,7 +164,8 @@ It computes the mean of the sampled parameters and calculates the species with t Note that we do not have to evaluate the `softmax` function since it does not affect the order of its inputs. ```{julia} -function prediction(x::AbstractMatrix{<:Real}, chain::MCMCChains.Chains) +import FlexiChains +function prediction(x::AbstractMatrix{<:Real}, chain::FlexiChains.VNChain) # Pull the means from each parameter's sampled values in the chain. intercept_versicolor = mean(chain, :intercept_versicolor) intercept_virginica = mean(chain, :intercept_virginica) diff --git a/tutorials/probabilistic-pca/index.qmd b/tutorials/probabilistic-pca/index.qmd index 4eae66da7..900eb2124 100755 --- a/tutorials/probabilistic-pca/index.qmd +++ b/tutorials/probabilistic-pca/index.qmd @@ -209,7 +209,7 @@ ppca = pPCA(mat_exp', k) # instantiate the probabilistic model chain_ppca = sample(ppca, NUTS(; adtype=AutoMooncake()), 500); ``` -The samples are saved in `chain_ppca`, which is an `MCMCChains.Chains` object. +The samples are saved in `chain_ppca`, which is an `FlexiChains.VNChain` object. We can check its shape: ```{julia} diff --git a/tutorials/variational-inference/index.qmd b/tutorials/variational-inference/index.qmd index d56e60e68..aa3776112 100755 --- a/tutorials/variational-inference/index.qmd +++ b/tutorials/variational-inference/index.qmd @@ -352,7 +352,8 @@ Each sample is a [`DynamicPPL.VarNamedTuple`]({{< meta usage-varnamedtuple >}}), ```{julia} z = rand(result_fr, 100_000, 1) using AbstractMCMC: AbstractMCMC -vi_chain = AbstractMCMC.from_samples(MCMCChains.Chains, z) +import FlexiChains +vi_chain = AbstractMCMC.from_samples(FlexiChains.VNChain, z) ``` Now, we can, for example, look at expectations: @@ -418,8 +419,8 @@ first(test_cut, 6) ```{julia} # Construct the Chains from the Variational Approximations -vi_mf_chain = AbstractMCMC.from_samples(MCMCChains.Chains, rand(result_mf, 10_000, 1)) -vi_fr_chain = AbstractMCMC.from_samples(MCMCChains.Chains, rand(result_fr, 10_000, 1)) +vi_mf_chain = AbstractMCMC.from_samples(FlexiChains.VNChain, rand(result_mf, 10_000, 1)) +vi_fr_chain = AbstractMCMC.from_samples(FlexiChains.VNChain, rand(result_fr, 10_000, 1)) ``` ```{julia} diff --git a/usage/external-samplers/index.qmd b/usage/external-samplers/index.qmd index 4d5c839ca..4a1ba703b 100755 --- a/usage/external-samplers/index.qmd +++ b/usage/external-samplers/index.qmd @@ -127,7 +127,7 @@ As shown above, there must be two `step` methods: - A method that performs the following steps and takes an extra input, `state`, which carries the initialisation information. The output of both of these methods must be a tuple containing: - - a 'transition', which is essentially the 'visible output' of the sampler: this object is later used to construct an `MCMCChains.Chains`; + - a 'transition', which is essentially the 'visible output' of the sampler: this object is later used to construct an `FlexiChains.VNChain`; - a 'state', representing the current state of the sampler, which is passed to the next step of the MCMC algorithm. Apart from this, your sampler state should also implement `AbstractMCMC.getparams(model, state)` to return the parameters of the model as a vector. diff --git a/usage/predictive-distributions/index.qmd b/usage/predictive-distributions/index.qmd index 96e595c45..3c9de1864 100755 --- a/usage/predictive-distributions/index.qmd +++ b/usage/predictive-distributions/index.qmd @@ -44,12 +44,12 @@ model = f(N) | (; X = X) # Sample from the posterior chain = sample(model, NUTS(), 1_000; progress=false) -mean(chain[:m]) +mean(chain[@varname(m)]) ``` ## Posterior predictive distribution -`chain[:m]` now contains samples from the posterior distribution of `m`. +`chain[@varname(m)]` now contains samples from the posterior distribution of `m`. If we use these samples of the parameters to generate new data points, we obtain the *posterior predictive distribution*. Statistically, this is defined as @@ -77,7 +77,7 @@ If you only want to decondition a single variable `X`, you can use `decondition( To demonstrate how this deconditioned model can generate new data, we can fix the value of `m` to be its mean and evaluate the model: ```{julia} -predictive_model_with_mean_m = predictive_model | (; m = mean(chain[:m])) +predictive_model_with_mean_m = predictive_model | (; m = mean(chain[@varname(m)])) rand(predictive_model_with_mean_m) ``` diff --git a/usage/sampling-options/index.qmd b/usage/sampling-options/index.qmd index aa23c0ad9..904ddee67 100644 --- a/usage/sampling-options/index.qmd +++ b/usage/sampling-options/index.qmd @@ -85,7 +85,7 @@ If you require absolute reproducibility, you should use [the StableRNGs.jl packa ## Switching the output type -By default, the results of MCMC sampling are bundled up in an `MCMCChains.Chains` object. +By default, the results of MCMC sampling are bundled up in an `FlexiChains.VNChain` object. ```{julia} chn = sample(demo_model(), HMC(0.1, 20), 5) @@ -128,11 +128,11 @@ If `initial_params` is unspecified, each sampler will use its own default initia ```{julia} chn = sample(demo_model(), MH(), 5; initial_params=InitFromParams((x = 1.0, y = -5.0))) -chn[:x][1], chn[:y][1] +chn[@varname(x), iter=1, chain=1], chn[@varname(y), iter=1, chain=1] ``` ::: {.callout-note} -Note that a number of samplers use warm-up steps by default (see the [Thinning and Warmup section below](#thinning-and-warmup)), so `chn[:param][1]` may not correspond to the exact initial parameters you provided. +Note that a number of samplers use warm-up steps by default (see the [Thinning and Warmup section below](#thinning-and-warmup)), so `chn[@varname(param), iter=1, chain=1]` may not correspond to the exact initial parameters you provided. `MH()` does not do this, which is why we use it here. ::: @@ -155,7 +155,7 @@ If you want to use the same initial parameters for all chains, you can use `fill ```{julia} initial_params = fill(InitFromParams((x=1.0, y=-5.0)), 3) chn = sample(demo_model(), MH(), MCMCThreads(), 5, 3; initial_params=initial_params) -chn[:x][1,:], chn[:y][1,:] +chn[@varname(x), iter=1], chn[@varname(y), iter=1] ``` In Turing v0.41, initialisation with a raw `NamedTuple` is still supported (it will simply be wrapped in `InitFromParams()`); but we expect to remove this eventually, so it will likely be more future-proof to wrap this in `InitFromParams()` yourself. @@ -174,7 +174,7 @@ rng = Xoshiro(468) chn1 = sample(rng, demo_model(), MH(), 5; save_state=true); ``` -For `MCMCChains.Chains`, this results in the final sampler state being stored inside the chain metadata. +For `FlexiChains.VNChain`, this results in the final sampler state being stored inside the chain metadata. You can access it using `Turing.loadstate`: ```{julia} @@ -183,19 +183,16 @@ typeof(saved_state) ``` ::: {.callout-note} -You can also directly access the saved sampler state with `chn1.info.samplerstate`, but we recommend _not_ using this as it relies on the internal structure of `MCMCChains.Chains`. +You can also directly access the saved sampler state with `FlexiChains.last_sampler_state(chn1)`, which is equivalent. ::: Sampling can then be resumed from this state by providing it as the `initial_state` keyword argument. +Note that `loadstate` always returns a vector of states, even for single-chain sampling, so before passing it to `sample` you should extract the single state inside it: ```{julia} -chn2 = sample(demo_model(), MH(), 5; initial_state=saved_state) +chn2 = sample(demo_model(), MH(), 5; initial_state=only(saved_state)) ``` -Note that the exact format saved in `chn.info.samplerstate`, and that expected by `initial_state`, depends on the invocation of `sample` used. -For single-chain sampling, the saved state, and the required initial state, is just a single sampler state. -For multiple-chain sampling, it is a vector of states, one per chain. - ::: {.callout-warning} ## `resume_from` @@ -203,10 +200,11 @@ The `resume_from` argument has been removed in Turing v0.41; please use `initial In v0.41, `loadstate` is also exported from Turing rather than DynamicPPL. ::: +For multiple-chain sampling, `sample()` expects a vector of states, one per chain. This means that, for example, after sampling a single chain, you could sample three chains that branch off from that final state: ```{julia} -initial_states = fill(saved_state, 3) +initial_states = fill(only(saved_state), 3) chn3 = sample(demo_model(), MH(), MCMCThreads(), 5, 3; initial_state=initial_states) ``` @@ -220,7 +218,7 @@ If both are provided, `initial_params` will be silently ignored. chn2 = sample(rng, demo_model(), MH(), 5; initial_state=saved_state, initial_params=InitFromParams((x=0.0, y=0.0)) ) -chn2[:x][1], chn2[:y][1] +chn2[@varname(x)][1], chn2[@varname(y)][1] ``` In general, the saved state will contain a set of parameters (which will be the last parameters in the previous chain). @@ -232,7 +230,8 @@ Finally, note that the first sample in the resumed chain will not be the same as ```{julia} # In general these will not be the same (although it _could_ be if the MH step # was rejected -- that is why we seed the sampling in this section). -chn1[:x][end], chn2[:x][1] + +chn1[@varname(x)][end], chn1[@varname(y)][end] ``` ::: diff --git a/usage/threadsafe-evaluation/index.qmd b/usage/threadsafe-evaluation/index.qmd index edec292c1..c45e1f2cc 100755 --- a/usage/threadsafe-evaluation/index.qmd +++ b/usage/threadsafe-evaluation/index.qmd @@ -135,20 +135,20 @@ This means if your model contains parallelised random variables, you are not gua using Random: Xoshiro chn = sample(Xoshiro(468), threadsafe_model, NUTS(), 100; verbose=false) -@show mean(chn[Symbol("x[1]")]) +@show mean(chn[@varname(x[1])]) chn = sample(Xoshiro(468), threadsafe_model, NUTS(), 100; verbose=false) -@show mean(chn[Symbol("x[1]")]); +@show mean(chn[@varname(x[1])]); ``` Some samplers do indeed yield the same results (but NUTS is not one of them, and we cannot make any concrete guarantees at this point in time): ```{julia} chn = sample(Xoshiro(468), threadsafe_model, MH(), 100) -@show mean(chn[Symbol("x[1]")]) +@show mean(chn[@varname(x[1])]) chn = sample(Xoshiro(468), threadsafe_model, MH(), 100) -@show mean(chn[Symbol("x[1]")]); +@show mean(chn[@varname(x[1])]); ``` Now consider a different situation where you only have parallelised data, and not random variables. @@ -165,10 +165,10 @@ end threadsafe_model_data_only = setthreadsafe(threaded_data(N) | (; y = y), true) chn = sample(Xoshiro(468), threadsafe_model_data_only, NUTS(), 100; verbose=false) -@show mean(chn[:x]) +@show mean(chn[@varname(x)]) chn = sample(Xoshiro(468), threadsafe_model_data_only, NUTS(), 100; verbose=false) -@show mean(chn[:x]); +@show mean(chn[@varname(x)]); ``` ## When is threadsafe evaluation really needed? From db11c42b57a69178934d0f347c73a0dd3609facd Mon Sep 17 00:00:00 2001 From: Penelope Yong Date: Sat, 2 May 2026 22:12:59 +0100 Subject: [PATCH 04/29] remove redundant info --- core-functionality/index.qmd | 14 -------------- 1 file changed, 14 deletions(-) diff --git a/core-functionality/index.qmd b/core-functionality/index.qmd index c6af9664e..9ae653b25 100755 --- a/core-functionality/index.qmd +++ b/core-functionality/index.qmd @@ -574,20 +574,6 @@ end end ``` -### Working with FlexiChains.jl - -Turing.jl wraps its samples using `FlexiChains.VNChain` so that all the functions working for `FlexiChains.VNChain` can be re-used in Turing.jl. -Two typical functions are `describe` and `plot`, which can be used as follows for an obtained chain `chn`. -For more information on `FlexiChains`, please see the [GitHub repository](https://github.com/TuringLang/FlexiChains.jl). - -```{julia} -describe(chn) # Lists statistics of the samples. -plot(chn) # Plots statistics of the samples. -``` - -There are numerous functions in addition to `describe` and `plot` in the `FlexiChains` package, such as those used in convergence diagnostics. -For more information on the package, please see the [GitHub repository](https://github.com/TuringLang/FlexiChains.jl). - ### Changing Default Settings Some of Turing.jl's default settings can be changed for better usage. From 06c3822794f326d32046275eab0077f6d6136b4c Mon Sep 17 00:00:00 2001 From: Penelope Yong Date: Sat, 2 May 2026 22:14:07 +0100 Subject: [PATCH 05/29] Remove minituring --- _quarto.yml | 8 - .../compiler/minituring-compiler/index.qmd | 306 ----------------- .../compiler/minituring-contexts/index.qmd | 310 ------------------ 3 files changed, 624 deletions(-) delete mode 100755 developers/compiler/minituring-compiler/index.qmd delete mode 100755 developers/compiler/minituring-contexts/index.qmd diff --git a/_quarto.yml b/_quarto.yml index cad276703..dd0e4e8aa 100644 --- a/_quarto.yml +++ b/_quarto.yml @@ -121,12 +121,6 @@ website: - section: "Developers" contents: - - section: "DynamicPPL's Compiler" - collapse-level: 1 - contents: - - developers/compiler/minituring-compiler/index.qmd - - developers/compiler/minituring-contexts/index.qmd - - section: "DynamicPPL Contexts" collapse-level: 1 contents: @@ -232,8 +226,6 @@ contributing-pull-requests: contributing/pull-requests contributing-code-formatting: contributing/code-formatting contributing-core-developers: contributing/core-developers dev-model-manual: developers/compiler/model-manual -contexts: developers/compiler/minituring-contexts -minituring: developers/compiler/minituring-compiler dev-variational-inference: developers/inference/variational-inference using-turing-implementing-samplers: developers/inference/implementing-samplers dev-transforms-distributions: developers/transforms/distributions diff --git a/developers/compiler/minituring-compiler/index.qmd b/developers/compiler/minituring-compiler/index.qmd deleted file mode 100755 index f142d0f31..000000000 --- a/developers/compiler/minituring-compiler/index.qmd +++ /dev/null @@ -1,306 +0,0 @@ ---- -title: "A Mini Turing Implementation I: Compiler" -engine: julia -aliases: - - ../../../tutorials/14-minituring/index.html ---- - -```{julia} -#| echo: false -#| output: false -using Pkg; -Pkg.instantiate(); -``` - -In this tutorial we develop a very simple probabilistic programming language. -The implementation is similar to [DynamicPPL](https://github.com/TuringLang/DynamicPPL.jl). -This is intentional as we want to demonstrate some key ideas from Turing's internal implementation. - -To make things easy to understand and to implement we restrict our language to a very simple subset of the language that Turing actually supports. -Defining an accurate syntax description is not our goal here, instead, we give a simple example and all similar programs should work. - -# Consider a probabilistic model defined by - -$$ -\begin{aligned} -a &\sim \operatorname{Normal}(0.5, 1^2) \\ -b &\sim \operatorname{Normal}(a, 2^2) \\ -x &\sim \operatorname{Normal}(b, 0.5^2) -\end{aligned} -$$ - -We assume that `x` is data, i.e., an observed variable. -In our small language this model will be defined as - -```{julia} -#| eval: false -@mini_model function m(x) - a ~ Normal(0.5, 1) - b ~ Normal(a, 2) - x ~ Normal(b, 0.5) - return nothing -end -``` - -Specifically, we demand that - - - all observed variables are arguments of the program, - - the model definition does not contain any control flow, - - all variables are scalars, and - - the function returns `nothing`. - -First, we import some required packages: - -```{julia} -using MacroTools, Distributions, Random, AbstractMCMC, FlexiChains -``` - -Before getting to the actual "compiler", we first build the data structure for the program trace. -A program trace for a probabilistic programming language needs to at least record the values of stochastic variables and their log-probabilities. - -```{julia} -struct VarInfo{V,L} - values::V - logps::L -end - -VarInfo() = VarInfo(Dict{Symbol,Float64}(), Dict{Symbol,Float64}()) - -function Base.setindex!(varinfo::VarInfo, (value, logp), var_id) - varinfo.values[var_id] = value - varinfo.logps[var_id] = logp - return varinfo -end -``` - -Internally, our probabilistic programming language works with two main functions: - - - `assume` for sampling unobserved variables and computing their log-probabilities, and - - `observe` for computing log-probabilities of observed variables (but not sampling them). - -For different inference algorithms we may have to use different sampling procedures and different log-probability computations. -For instance, in some cases we might want to sample all variables from their prior distributions and in other cases we might only want to compute the log-likelihood of the observations based on a given set of values for the unobserved variables. -Thus depending on the inference algorithm we want to use different `assume` and `observe` implementations. -We can achieve this by providing this `context` information as a function argument to `assume` and `observe`. - -**Note:** *Although the context system in this tutorial is inspired by DynamicPPL, it is very simplistic. -We expand this mini Turing example in the [contexts]({{}}) tutorial with some more complexity, to illustrate how and why contexts are central to Turing's design. For the full details one still needs to go to the actual source of DynamicPPL though.* - -Here we can see the implementation of a sampler that draws values of unobserved variables from the prior and computes the log-probability for every variable. - -```{julia} -struct SamplingContext{S<:AbstractMCMC.AbstractSampler,R<:Random.AbstractRNG} - rng::R - sampler::S -end - -struct PriorSampler <: AbstractMCMC.AbstractSampler end - -function observe(context::SamplingContext, varinfo, dist, var_id, var_value) - logp = logpdf(dist, var_value) - varinfo[var_id] = (var_value, logp) - return nothing -end - -function assume(context::SamplingContext{PriorSampler}, varinfo, dist, var_id) - sample = Random.rand(context.rng, dist) - logp = logpdf(dist, sample) - varinfo[var_id] = (sample, logp) - return sample -end; -``` - -Next we define the "compiler" for our simple programming language. -The term compiler is actually a bit misleading here since its only purpose is to transform the function definition in the `@mini_model` macro by - - - adding the context information (`context`) and the tracing data structure (`varinfo`) as additional arguments, and - - replacing tildes with calls to `assume` and `observe`. - -Afterwards, as usual the Julia compiler will just-in-time compile the model function when it is called. - -The manipulation of Julia expressions is an advanced part of the Julia language. -The [Julia documentation](https://docs.julialang.org/en/v1/manual/metaprogramming/) provides an introduction to and more details about this so-called metaprogramming. - -```{julia} -macro mini_model(expr) - return esc(mini_model(expr)) -end - -function mini_model(expr) - # Split the function definition into a dictionary with its name, arguments, body etc. - def = MacroTools.splitdef(expr) - - # Replace tildes in the function body with calls to `assume` or `observe` - def[:body] = MacroTools.postwalk(def[:body]) do sub_expr - if MacroTools.@capture(sub_expr, var_ ~ dist_) - if var in def[:args] - # If the variable is an argument of the model function, it is observed - return :($(observe)(context, varinfo, $dist, $(Meta.quot(var)), $var)) - else - # Otherwise it is unobserved - return :($var = $(assume)(context, varinfo, $dist, $(Meta.quot(var)))) - end - else - return sub_expr - end - end - - # Add `context` and `varinfo` arguments to the model function - def[:args] = vcat(:varinfo, :context, def[:args]) - - # Reassemble the function definition from its name, arguments, body etc. - return MacroTools.combinedef(def) -end; -``` - -For inference, we make use of the [AbstractMCMC interface](https://turinglang.github.io/AbstractMCMC.jl/dev/). -It provides a default implementation of a `sample` function for sampling a Markov chain. -The default implementation already supports e.g. sampling of multiple chains in parallel, thinning of samples, or discarding initial samples. - -The AbstractMCMC interface requires us to at least - - - define a model that is a subtype of `AbstractMCMC.AbstractModel`, - - define a sampler that is a subtype of `AbstractMCMC.AbstractSampler`, - - implement `AbstractMCMC.step` for our model and sampler. - -Thus here we define a `MiniModel` model. -In this model we store the model function and the observed data. - -```{julia} -struct MiniModel{F,D} <: AbstractMCMC.AbstractModel - f::F - data::D # a NamedTuple of all the data -end -``` - -In the Turing compiler, the model-specific `DynamicPPL.Model` is constructed automatically when calling the model function. -But for the sake of simplicity here we construct the model manually. - -To illustrate probabilistic inference with our mini language we implement an extremely simplistic Random-Walk Metropolis-Hastings sampler. -We hard-code the proposal step as part of the sampler and only allow normal distributions with zero mean and fixed standard deviation. -The Metropolis-Hastings sampler in Turing is more flexible. - -```{julia} -struct MHSampler{T<:Real} <: AbstractMCMC.AbstractSampler - sigma::T -end - -MHSampler() = MHSampler(1) - -function assume(context::SamplingContext{<:MHSampler}, varinfo, dist, var_id) - sampler = context.sampler - old_value = varinfo.values[var_id] - - # propose a random-walk step, i.e, add the current value to a random - # value sampled from a Normal distribution centred at 0 - value = rand(context.rng, Normal(old_value, sampler.sigma)) - logp = Distributions.logpdf(dist, value) - varinfo[var_id] = (value, logp) - - return value -end; -``` - -We need to define two `step` functions, one for the first step and the other for the following steps. -In the first step we sample values from the prior distributions and in the following steps we sample with the random-walk proposal. -The two functions are identified by the different arguments they take. - -```{julia} -# The first step: Sampling from the prior distributions -function AbstractMCMC.step( - rng::Random.AbstractRNG, model::MiniModel, sampler::MHSampler; kwargs... -) - vi = VarInfo() - ctx = SamplingContext(rng, PriorSampler()) - model.f(vi, ctx, values(model.data)...) - return vi, vi -end - -# The following steps: Sampling with random-walk proposal -function AbstractMCMC.step( - rng::Random.AbstractRNG, - model::MiniModel, - sampler::MHSampler, - prev_state::VarInfo; # is just the old trace - kwargs... -) - vi = prev_state - new_vi = deepcopy(vi) - ctx = SamplingContext(rng, sampler) - model.f(new_vi, ctx, values(model.data)...) - - # Compute log acceptance probability - # Since the proposal is symmetric the computation can be simplified - logα = sum(values(new_vi.logps)) - sum(values(vi.logps)) - - # Accept proposal with computed acceptance probability - if -randexp(rng) < logα - return new_vi, new_vi - else - return prev_state, prev_state - end -end; -``` - -To make it easier to analyse the samples and compare them with results from Turing, additionally we define a version of `AbstractMCMC.bundle_samples` for our model and sampler that returns a `FlexiChains.VNChain` object of samples. - -```{julia} -function AbstractMCMC.bundle_samples( - samples, model::MiniModel, ::MHSampler, ::Any, ::Type{Chains}; kwargs... -) - # We get a vector of traces - values = [sample.values for sample in samples] - params = [key for key in keys(values[1]) if key ∉ keys(model.data)] - vals = reduce(hcat, [value[p] for value in values] for p in params) - # Composing the `Chains` data-structure, of which analysing infrastructure is provided - chains = Chains(vals, params) - return chains -end; -``` - -Let us check how our mini probabilistic programming language works. -We define the probabilistic model: - -```{julia} -@mini_model function m(x) - a ~ Normal(0.5, 1) - b ~ Normal(a, 2) - x ~ Normal(b, 0.5) - return nothing -end; -``` - -The `@mini_model` macro expands this into another function, `m`, which effectively calls either `assume` or `observe` on each variable as needed: - -```{julia} -@macroexpand @mini_model function m(x) - a ~ Normal(0.5, 1) - b ~ Normal(a, 2) - x ~ Normal(b, 0.5) - return nothing -end -``` - -We can use this function to construct the `MiniModel`, and then perform inference with data `x = 3.0`: - -```{julia} -sample(MiniModel(m, (x=3.0,)), MHSampler(), 1_000_000; chain_type=Chains, progress=false) -``` - -We compare these results with Turing. - -```{julia} -using Turing -using PDMats - -@model function turing_m(x) - a ~ Normal(0.5, 1) - b ~ Normal(a, 2) - x ~ Normal(b, 0.5) - return nothing -end - -sample(turing_m(3.0), MH(ScalMat(2, 1.0)), 1_000_000, progress=false) -``` - -As you can see, with our simple probabilistic programming language and custom samplers we get similar results as Turing. \ No newline at end of file diff --git a/developers/compiler/minituring-contexts/index.qmd b/developers/compiler/minituring-contexts/index.qmd deleted file mode 100755 index 3bffbcacb..000000000 --- a/developers/compiler/minituring-contexts/index.qmd +++ /dev/null @@ -1,310 +0,0 @@ ---- -title: "A Mini Turing Implementation II: Contexts" -engine: julia -aliases: - - ../../../tutorials/16-contexts/index.html ---- - -```{julia} -#| echo: false -#| output: false -using Pkg; -Pkg.instantiate(); -``` - -In the [Mini Turing]({{< meta minituring >}}) tutorial we developed a miniature version of the Turing language, to illustrate its core design. A passing mention was made of contexts. In this tutorial we develop that aspect of our mini Turing language further to demonstrate how and why contexts are an important part of Turing's design. - -::: {.callout-important} -Note: The way Turing actually uses contexts changed somewhat in releases 0.39 and 0.40. The content of this page remains relevant, the principles of how contexts operate remain the same, and concepts like leaf and parent contexts still exist. However, we've moved away from using contexts for quite as many things as we used to. Most importantly, whether to accumulate the log joint, log prior, or log likelihood is no longer done using different contexts. Please keep this in mind as you read this page: The principles remain, but the details have changed. We will update this page once the refactoring of internals that is happening around releases like 0.39 and 0.40 is done. -::: - -# Mini Turing expanded, now with more contexts - -If you haven't read [Mini Turing]({{< meta minituring >}}) yet, you should do that first. We start by repeating verbatim much of the code from there. Define the type for holding values for variables: - -```{julia} -import MacroTools, Random, AbstractMCMC -using Distributions: Normal, logpdf -using FlexiChains: Chains -using AbstractMCMC: sample - -struct VarInfo{V,L} - values::V - logps::L -end - -VarInfo() = VarInfo(Dict{Symbol,Float64}(), Dict{Symbol,Float64}()) - -function Base.setindex!(varinfo::VarInfo, (value, logp), var_id) - varinfo.values[var_id] = value - varinfo.logps[var_id] = logp - return varinfo -end -``` - -Define the macro that expands `~` expressions to calls to `assume` and `observe`: - -```{julia} -# Methods will be defined for these later. -function assume end -function observe end - -macro mini_model(expr) - return esc(mini_model(expr)) -end - -function mini_model(expr) - # Split the function definition into a dictionary with its name, arguments, body etc. - def = MacroTools.splitdef(expr) - - # Replace tildes in the function body with calls to `assume` or `observe` - def[:body] = MacroTools.postwalk(def[:body]) do sub_expr - if MacroTools.@capture(sub_expr, var_ ~ dist_) - if var in def[:args] - # If the variable is an argument of the model function, it is observed - return :($(observe)(context, varinfo, $dist, $(Meta.quot(var)), $var)) - else - # Otherwise it is unobserved - return :($var = $(assume)(context, varinfo, $dist, $(Meta.quot(var)))) - end - else - return sub_expr - end - end - - # Add `context` and `varinfo` arguments to the model function - def[:args] = vcat(:varinfo, :context, def[:args]) - - # Reassemble the function definition from its name, arguments, body etc. - return MacroTools.combinedef(def) -end - - -struct MiniModel{F,D} <: AbstractMCMC.AbstractModel - f::F - data::D # a NamedTuple of all the data -end -``` - -Define an example model: - -```{julia} -@mini_model function m(x) - a ~ Normal(0.5, 1) - b ~ Normal(a, 2) - x ~ Normal(b, 0.5) - return nothing -end; - -mini_m = MiniModel(m, (x=3.0,)) -``` - -Previously in the mini Turing case, at this point we defined `SamplingContext`, a structure that holds a random number generator and a sampler, and gets passed to `observe` and `assume`. We then used it to implement a simple Metropolis-Hastings sampler. - -The notion of a context may have seemed overly complicated just to implement the sampler, but there are other things we may want to do with a model than sample from the posterior. Having the context passing in place lets us do that without having to touch the above macro at all. For instance, let's say we want to evaluate the log joint probability of the model for a given set of data and parameters. Using a new context type we can use the previously defined `model` function, but change its behaviour by changing what the `observe` and `assume` functions do. - - - -```{julia} -struct JointContext end - -function observe(context::JointContext, varinfo, dist, var_id, var_value) - logp = logpdf(dist, var_value) - varinfo[var_id] = (var_value, logp) - return nothing -end - -function assume(context::JointContext, varinfo, dist, var_id) - if !haskey(varinfo.values, var_id) - error("Can't evaluate the log probability if the variable $(var_id) is not set.") - end - var_value = varinfo.values[var_id] - logp = logpdf(dist, var_value) - varinfo[var_id] = (var_value, logp) - return var_value -end - -function logjoint(model, parameter_values::NamedTuple) - vi = VarInfo() - for (var_id, value) in pairs(parameter_values) - # Set the log prob to NaN for now. These will get overwritten when model.f is - # called with JointContext. - vi[var_id] = (value, NaN) - end - model.f(vi, JointContext(), values(model.data)...) - return sum(values(vi.logps)) -end - -logjoint(mini_m, (a=0.5, b=1.0)) -``` - -When using the `JointContext` no sampling whatsoever happens in calling `mini_m`. Rather only the log probability of each given variable value is evaluated. `logjoint` then sums these results to get the total log joint probability. - -We can similarly define a context for evaluating the log prior probability: - -```{julia} -struct PriorContext end - -function observe(context::PriorContext, varinfo, dist, var_id, var_value) - # Since we are evaluating the prior, the log probability of all the observations - # is set to 0. This has the effect of ignoring the likelihood. - varinfo[var_id] = (var_value, 0.0) - return nothing -end - -function assume(context::PriorContext, varinfo, dist, var_id) - if !haskey(varinfo.values, var_id) - error("Can't evaluate the log probability if the variable $(var_id) is not set.") - end - var_value = varinfo.values[var_id] - logp = logpdf(dist, var_value) - varinfo[var_id] = (var_value, logp) - return var_value -end - -function logprior(model, parameter_values::NamedTuple) - vi = VarInfo() - for (var_id, value) in pairs(parameter_values) - vi[var_id] = (value, NaN) - end - model.f(vi, PriorContext(), values(model.data)...) - return sum(values(vi.logps)) -end - -logprior(mini_m, (a=0.5, b=1.0)) -``` - -Notice that the definition of `assume(context::PriorContext, args...)` is identical to the one for `JointContext`, and `logprior` and `logjoint` are also identical except for the context type they create. There's clearly an opportunity here for some refactoring using abstract types, but that's outside the scope of this tutorial. Rather, the point here is to demonstrate that we can extract different sorts of things from our model by defining different context types, and specialising `observe` and `assume` for them. - - -## Contexts within contexts - -Let's use the above two contexts to provide a slightly more general definition of the `SamplingContext` and the Metropolis-Hastings sampler we wrote in the mini Turing tutorial. - -```{julia} -struct SamplingContext{S<:AbstractMCMC.AbstractSampler,R<:Random.AbstractRNG} - rng::R - sampler::S - subcontext::Union{PriorContext, JointContext} -end -``` - -The new aspect here is the `subcontext` field. Note that this is a context within a context! The idea is that we don't need to hard code how the MCMC sampler evaluates the log probability, but rather can pass that work onto the subcontext. This way the same sampler can be used to sample from either the joint or the prior distribution. - -The methods for `SamplingContext` are largely as in the our earlier mini Turing case, except they now pass some of the work onto the subcontext: - -```{julia} -function observe(context::SamplingContext, args...) - # Sampling doesn't affect the observed values, so nothing to do here other than pass to - # the subcontext. - return observe(context.subcontext, args...) -end - -struct PriorSampler <: AbstractMCMC.AbstractSampler end - -function assume(context::SamplingContext{PriorSampler}, varinfo, dist, var_id) - sample = Random.rand(context.rng, dist) - varinfo[var_id] = (sample, NaN) - # Once the value has been sampled, let the subcontext handle evaluating the log - # probability. - return assume(context.subcontext, varinfo, dist, var_id) -end; - -# The subcontext field of the MHSampler determines which distribution this sampler -# samples from. -struct MHSampler{D, T<:Real} <: AbstractMCMC.AbstractSampler - sigma::T - subcontext::D -end - -MHSampler(subcontext) = MHSampler(1, subcontext) - -function assume(context::SamplingContext{<:MHSampler}, varinfo, dist, var_id) - sampler = context.sampler - old_value = varinfo.values[var_id] - - # propose a random-walk step, i.e, add the current value to a random - # value sampled from a Normal distribution centred at 0 - value = rand(context.rng, Normal(old_value, sampler.sigma)) - varinfo[var_id] = (value, NaN) - # Once the value has been sampled, let the subcontext handle evaluating the log - # probability. - return assume(context.subcontext, varinfo, dist, var_id) -end; - -# The following three methods are identical to before, except for passing -# `sampler.subcontext` to the context SamplingContext. -function AbstractMCMC.step( - rng::Random.AbstractRNG, model::MiniModel, sampler::MHSampler; kwargs... -) - vi = VarInfo() - ctx = SamplingContext(rng, PriorSampler(), sampler.subcontext) - model.f(vi, ctx, values(model.data)...) - return vi, vi -end - -function AbstractMCMC.step( - rng::Random.AbstractRNG, - model::MiniModel, - sampler::MHSampler, - prev_state::VarInfo; # is just the old trace - kwargs..., -) - vi = prev_state - new_vi = deepcopy(vi) - ctx = SamplingContext(rng, sampler, sampler.subcontext) - model.f(new_vi, ctx, values(model.data)...) - - # Compute log acceptance probability - # Since the proposal is symmetric the computation can be simplified - logα = sum(values(new_vi.logps)) - sum(values(vi.logps)) - - # Accept proposal with computed acceptance probability - if -Random.randexp(rng) < logα - return new_vi, new_vi - else - return prev_state, prev_state - end -end; - -function AbstractMCMC.bundle_samples( - samples, model::MiniModel, ::MHSampler, ::Any, ::Type{Chains}; kwargs... -) - # We get a vector of traces - values = [sample.values for sample in samples] - params = [key for key in keys(values[1]) if key ∉ keys(model.data)] - vals = reduce(hcat, [value[p] for value in values] for p in params) - # Composing the `Chains` data-structure, of which analysing infrastructure is provided - chains = Chains(vals, params) - return chains -end; -``` - -We can use this to sample from the joint distribution just like before: - -```{julia} -sample(MiniModel(m, (x=3.0,)), MHSampler(JointContext()), 1_000_000; chain_type=Chains, progress=false) -``` - -or we can choose to sample from the prior instead - -```{julia} -sample(MiniModel(m, (x=3.0,)), MHSampler(PriorContext()), 1_000_000; chain_type=Chains, progress=false) -``` - -Of course, using an MCMC algorithm to sample from the prior is unnecessary and silly (`PriorSampler` exists, after all), but the point is to illustrate the flexibility of the context system. We could, for instance, use the same setup to implement an _Approximate Bayesian Computation_ (ABC) algorithm. - - -The use of contexts also goes far beyond just evaluating log probabilities and sampling. Some examples from Turing are - -* `FixedContext`, which fixes some variables to given values and removes them completely from the evaluation of any log probabilities. They power the `Turing.fix` and `Turing.unfix` functions. -* `ConditionContext` conditions the model on fixed values for some parameters. They are used by `Turing.condition` and `Turing.decondition`, i.e. the `model | (parameter=value,)` syntax. The difference between `fix` and `condition` is whether the log probability for the corresponding variable is included in the overall log density. - -* `PriorExtractorContext` collects information about what the prior distribution of each variable is. -* `PrefixContext` adds prefixes to variable names, allowing models to be used within other models without variable name collisions. -* `PointwiseLikelihoodContext` records the log likelihood of each individual variable. -* `DebugContext` collects useful debugging information while executing the model. - -All of the above are what Turing calls _parent contexts_, which is to say that they all keep a subcontext just like our above `SamplingContext` did. Their implementations of `assume` and `observe` call the implementation of the subcontext once they are done doing their own work of fixing/conditioning/prefixing/etc. Contexts are often chained, so that e.g. a `DebugContext` may wrap within it a `PrefixContext`, which may in turn wrap a `ConditionContext`, etc. The only contexts that _don't_ have a subcontext in the Turing are the ones for evaluating the prior, likelihood, and joint distributions. These are called _leaf contexts_. - -The above version of mini Turing is still much simpler than the full Turing language, but the principles of how contexts are used are the same. \ No newline at end of file From f79cddc517612c50f5f50e5d7afebceb57d7dc9f Mon Sep 17 00:00:00 2001 From: Penelope Yong Date: Sat, 2 May 2026 22:16:32 +0100 Subject: [PATCH 06/29] Don't wrap in chains for ESS --- Manifest.toml | 2 +- Project.toml | 1 + developers/transforms/bijectors/index.qmd | 8 ++++---- 3 files changed, 6 insertions(+), 5 deletions(-) diff --git a/Manifest.toml b/Manifest.toml index b3881e6ae..2405dc65d 100644 --- a/Manifest.toml +++ b/Manifest.toml @@ -2,7 +2,7 @@ julia_version = "1.11.9" manifest_format = "2.0" -project_hash = "4eb7c34874f749cc35761c71488d3f3c287a5d0f" +project_hash = "9618fd89a9e953023338fe3e34956d162287090a" [[deps.ADTypes]] git-tree-sha1 = "bbc22a9a08a0ef6460041086d8a7b27940ed4ffd" diff --git a/Project.toml b/Project.toml index 6fb4dee5d..91e38fb86 100644 --- a/Project.toml +++ b/Project.toml @@ -27,6 +27,7 @@ LogDensityProblemsAD = "996a588d-648d-4e1f-a8f0-a84b347e47b1" LogExpFunctions = "2ab3a3ac-af41-5b50-aa03-7779005ae688" Logging = "56ddb016-857b-54e1-b83d-db4d58db5568" Lux = "b2108857-7c20-44ae-9111-449ecde12c47" +MCMCDiagnosticTools = "be115224-59cd-429b-ad48-344e309966f0" MLUtils = "f1d291b0-491e-4a28-83b9-f70985020b54" MacroTools = "1914dd2f-81c6-5fcd-8719-6d5c9610ff09" Measures = "442fdcdd-2543-5da2-b0f3-8c86c306513e" diff --git a/developers/transforms/bijectors/index.qmd b/developers/transforms/bijectors/index.qmd index 8fa15ee87..9ac63d324 100644 --- a/developers/transforms/bijectors/index.qmd +++ b/developers/transforms/bijectors/index.qmd @@ -302,16 +302,16 @@ We can see from this small study that although both methods give us the correct ::: {.callout-note} Alternatively, we could also try to directly measure how correlated the samples are. -One way to do this is to calculate the _effective sample size_ (ESS), which is described in [the Stan documentation](https://mc-stan.org/docs/reference-manual/analysis.html#effective-sample-size.section), and implemented in [MCMCChains.jl](https://github.com/TuringLang/MCMCChains.jl/). +One way to do this is to calculate the _effective sample size_ (ESS), which is described in [the Stan documentation](https://mc-stan.org/docs/reference-manual/analysis.html#effective-sample-size.section), and implemented in [MCMCDiagnosticTools.jl](https://github.com/TuringLang/MCMCDiagnosticTools.jl). A larger ESS implies that the samples are less correlated, and thus more representative of the underlying distribution: ```{julia} -using FlexiChains: Chains, ess +using MCMCDiagnosticTools: ess rejection = first(mh(logp, 10000, x -> x > 0)) transformation = f_inv(first(mh(logq, 10000, x -> true))) -chn = Chains(hcat(rejection, transformation), [:rejection, :transformation]) -ess(chn) + +ess(rejection), ess(transformation) ``` ::: From 48b282a85ff2ba259ebb460b3e57f58701c2ce91 Mon Sep 17 00:00:00 2001 From: Penelope Yong Date: Sat, 2 May 2026 22:47:33 +0100 Subject: [PATCH 07/29] More updates --- .../bayesian-logistic-regression/index.qmd | 29 +++----- tutorials/bayesian-neural-networks/index.qmd | 2 +- tutorials/hidden-markov-models/index.qmd | 72 ++++++++++++------- 3 files changed, 54 insertions(+), 49 deletions(-) diff --git a/tutorials/bayesian-logistic-regression/index.qmd b/tutorials/bayesian-logistic-regression/index.qmd index 1eed9429c..e61516f84 100755 --- a/tutorials/bayesian-logistic-regression/index.qmd +++ b/tutorials/bayesian-logistic-regression/index.qmd @@ -158,8 +158,8 @@ For more information, see [the Turing documentation on sampling multiple chains] #| echo: false let mean_params = mean(chain) - @assert mean_params[:student, :mean] < 0.1 - @assert mean_params[:balance, :mean] > 1 + @assert mean_params[@varname(student)] < 0.1 + @assert mean_params[@varname(balance)] > 1 end ``` @@ -171,30 +171,17 @@ plot(chain) ```{julia} #| echo: false +using FlexiChains let - mean_params = mapreduce(hcat, mean(chain; append_chains=false)) do df - return df[:, :mean] - end - for i in (2, 3) - @assert mean_params[:, i] != mean_params[:, 1] - @assert isapprox(mean_params[:, i], mean_params[:, 1]; rtol=5e-2) + mean_per_chain = mean(chain; dims=:iter) + for parameter in FlexiChains.parameters(mean_per_chain) + @assert isapprox(mean_per_chain[parameter, chain=2], mean_per_chain[parameter, chain=1]; rtol=0.1) + @assert isapprox(mean_per_chain[parameter, chain=3], mean_per_chain[parameter, chain=1]; rtol=0.1) end end ``` -Looks good! - -We can also use the `corner` function from StatsPlots to show the distributions of the various parameters of our logistic regression. - -```{julia} -# The labels to use. -l = [:student, :balance, :income] - -# Use the corner function. Requires StatsPlots and MCMCChains. -corner(chain, l) -``` - -Fortunately the corner plot appears to demonstrate unimodal distributions for each of our parameters, so it should be straightforward to take the means of each parameter's sampled values to estimate our model to make predictions. +Fortunately the plot appears to demonstrate unimodal distributions for each of our parameters, so it should be straightforward to take the means of each parameter's sampled values to estimate our model to make predictions. ## Making Predictions diff --git a/tutorials/bayesian-neural-networks/index.qmd b/tutorials/bayesian-neural-networks/index.qmd index 1032622f8..41e02f757 100755 --- a/tutorials/bayesian-neural-networks/index.qmd +++ b/tutorials/bayesian-neural-networks/index.qmd @@ -212,7 +212,7 @@ Now we extract the parameter samples from the sampled chain as `θ` (this is of ```{julia} # Extract all weight and bias parameters. -θ = MCMCChains.group(ch, :parameters).value; +θ = ch[@varname(parameters), chain=1, stack=true]; ``` ## Prediction Visualization diff --git a/tutorials/hidden-markov-models/index.qmd b/tutorials/hidden-markov-models/index.qmd index 54456d606..f34e25a75 100755 --- a/tutorials/hidden-markov-models/index.qmd +++ b/tutorials/hidden-markov-models/index.qmd @@ -16,7 +16,8 @@ This tutorial illustrates training Bayesian [hidden Markov models](https://en.wi The main goals are learning the transition matrix, emission parameter, and hidden states. For a more rigorous academic overview of hidden Markov models, see [An Introduction to Hidden Markov Models and Bayesian Networks](https://mlg.eng.cam.ac.uk/zoubin/papers/ijprai.pdf) (Ghahramani, 2001). -In this tutorial, we assume there are $k$ discrete hidden states; the observations are continuous and normally distributed - centred around the hidden states. This assumption reduces the number of parameters to be estimated in the emission matrix. +In this tutorial, we assume there are $k$ discrete hidden states; the observations are continuous and normally distributed, and centred around the hidden states. +This assumption reduces the number of parameters to be estimated in the emission matrix. Let's load the libraries we'll need, and set a random seed for reproducibility. @@ -44,7 +45,8 @@ plot(y; xlim=(0, 30), ylim=(-1, 5), size=(500, 250), legend = false) scatter!(y, color = :blue; xlim=(0, 30), ylim=(-1, 5), size=(500, 250), legend = false) ``` -We can see that we have three states, one for each height of the plot (1, 2, 3). This height is also our emission parameter, so state one produces a value of one, state two produces a value of two, and so on. +We can see that we have three states, one for each height of the plot (1, 2, 3). +This height is also our emission parameter, so state one produces a value of one, state two produces a value of two, and so on. Ultimately, we would like to understand three major parameters: @@ -52,15 +54,22 @@ Ultimately, we would like to understand three major parameters: 2. The emission parameters, which describes a typical value emitted by some state. In the plot above, the emission parameter for state one is simply one. 3. The state sequence is our understanding of what state we were actually in when we observed some data. This is very important in more sophisticated HMMs, where the emission value does not equal our state. -With this in mind, let's set up our model. We are going to use some of our knowledge as modelers to provide additional information about our system. This takes the form of the prior on our emission parameter. +With this in mind, let's set up our model. +We are going to use some of our knowledge as modelers to provide additional information about our system. +This takes the form of the prior on our emission parameter. $$ m_i \sim \mathrm{Normal}(i, 0.5) \quad \text{where} \quad m = \{1,2,3\} $$ -Simply put, this says that we expect state one to emit values in a Normally distributed manner, where the mean of each state's emissions is that state's value. The variance of 0.5 helps the model converge more quickly — consider the case where we have a variance of 1 or 2. In this case, the likelihood of observing a 2 when we are in state 1 is actually quite high, as it is within a standard deviation of the true emission value. Applying the prior that we are likely to be tightly centred around the mean prevents our model from being too confused about the state that is generating our observations. +Simply put, this says that we expect state one to emit values in a Normally distributed manner, where the mean of each state's emissions is that state's value. +The variance of 0.5 helps the model converge more quickly — consider the case where we have a variance of 1 or 2. +In this case, the likelihood of observing a 2 when we are in state 1 is actually quite high, as it is within a standard deviation of the true emission value. +Applying the prior that we are likely to be tightly centred around the mean prevents our model from being too confused about the state that is generating our observations. -The priors on our transition matrix are noninformative, using `T[i] ~ Dirichlet(ones(K)/K)`. The Dirichlet prior used in this way assumes that the state is likely to change to any other state with equal probability. As we'll see, this transition matrix prior will be overwritten as we observe data. +The priors on our transition matrix are noninformative, using `T[i] ~ Dirichlet(ones(K)/K)`. +The Dirichlet prior used in this way assumes that the state is likely to change to any other state with equal probability. +As we'll see, this transition matrix prior will be overwritten as we observe data. ```{julia} # Turing model definition. @@ -96,11 +105,15 @@ The priors on our transition matrix are noninformative, using `T[i] ~ Dirichlet( end; ``` -We will use a combination of two samplers (HMC and Particle Gibbs) by passing them to the [Gibbs sampler](https://en.wikipedia.org/wiki/Gibbs_sampling). The Gibbs sampler allows for compositional inference, where different samplers can be applied to different parameters based on their properties. (For API details of these samplers, please see [Turing.jl's API documentation](https://turinglang.org/Turing.jl/stable/api/Inference/).) +We will use a combination of two samplers (HMC and Particle Gibbs) by passing them to the [Gibbs sampler](https://en.wikipedia.org/wiki/Gibbs_sampling). +The Gibbs sampler allows for compositional inference, where different samplers can be applied to different parameters based on their properties. +(For API details of these samplers, please see [Turing.jl's API documentation](https://turinglang.org/Turing.jl/stable/api/Inference/).) -In this case, we use HMC for `m` and `T`, representing the emission and transition matrices respectively. We use the Particle Gibbs sampler for `s`, the state sequence. You may wonder why it is that we are not assigning `s` to the HMC sampler, and why it is that we need compositional Gibbs sampling at all. +In this case, we use HMC for `m` and `T`, representing the emission and transition matrices respectively. +We use the Particle Gibbs sampler for `s`, the state sequence. +You may wonder why it is that we are not assigning `s` to the HMC sampler, and why it is that we need compositional Gibbs sampling at all. -The parameter `s` is not a continuous variable. +This is because the parameter `s` is not a continuous variable. It is a vector of **integers**, and thus Hamiltonian methods like HMC and NUTS won't work correctly. Gibbs allows us to apply the right tools to the best effect. If you are a particularly advanced user interested in higher performance, you may benefit from setting up your Gibbs sampler to use [different automatic differentiation]({{}}#compositional-sampling-with-differing-ad-modes) backends for each parameter space. @@ -126,16 +139,22 @@ The code below generates an animation showing the graph of the data above, and t ```{julia} # Extract our m and s parameters from the chain. -m_set = MCMCChains.group(chn, :m).value -s_set = MCMCChains.group(chn, :s).value +m_set = chn[@varname(m), chain=1] +s_set = chn[@varname(s), chain=1] +``` + +Note that each element of `s_set` corresponds to one MCMC sample, and is a vector of integers (that means that `s_set` is itself an `Vector` of `Vector`s!) +If you prefer to have a stacked 2D array, you can use `chn[@varname(s), chain=1, stack=true]` instead. +We won't demonstrate that here. +```{julia} # Iterate through the MCMC samples. Ns = 1:length(chn) # Make an animation. animation = @gif for i in Ns - m = m_set[i, :] - s = Int.(s_set[i, :]) + m = m_set[i] + s = s_set[i] emissions = m[s] p = plot( @@ -157,19 +176,14 @@ Looks like our model did a pretty good job, but we should also check to make sur ```{julia} # Index the chain with the persistence probabilities. -subchain = chn[["T[1][1]", "T[2][2]", "T[3][3]"]] +subchain = chn[[@varname(T[1][1]), @varname(T[2][2]), @varname(T[3][3])]] plot(subchain; seriestype=:traceplot, title="Persistence Probability", legend=false) ``` A cursory examination of the traceplot above indicates that all three chains converged to something resembling -stationary. We can use the diagnostic functions provided by [MCMCChains](https://github.com/TuringLang/MCMCChains.jl) to engage in some more formal tests, like the Heidelberg and Welch diagnostic: - -```{julia} -heideldiag(MCMCChains.group(chn, :T))[1] -``` - -The p-values on the test suggest that we cannot reject the hypothesis that the observed sequence comes from a stationary distribution, so we can be reasonably confident that our transition matrix has converged to something reasonable. +stationary. +We can use the diagnostic functions provided by [MCMCDiagnosticTools](https://github.com/TuringLang/MCMCDiagnosticTools.jl) to engage in some more formal tests, like the Heidelberg and Welch diagnostic. ## Efficient Inference With The Forward Algorithm @@ -203,12 +217,12 @@ We can compare the chains of these two models, confirming the posterior estimate #| code-fold: true #| code-summary: "Plotting Chains" -plot(chn["m[1]"], label = "m[1], Model 1, Gibbs", color = :lightblue) -plot!(chn2["m[1]"], label = "m[1], Model 2, NUTS", color = :blue) -plot!(chn["m[2]"], label = "m[2], Model 1, Gibbs", color = :pink) -plot!(chn2["m[2]"], label = "m[2], Model 2, NUTS", color = :red) -plot!(chn["m[3]"], label = "m[3], Model 1, Gibbs", color = :yellow) -plot!(chn2["m[3]"], label = "m[3], Model 2, NUTS", color = :orange) +plot(chn[@varname(m[1])], label = "m[1], Model 1, Gibbs", color = :lightblue) +plot!(chn2[@varname(m[1])], label = "m[1], Model 2, NUTS", color = :blue) +plot!(chn[@varname(m[2])], label = "m[2], Model 1, Gibbs", color = :pink) +plot!(chn2[@varname(m[2])], label = "m[2], Model 2, NUTS", color = :red) +plot!(chn[@varname(m[3])], label = "m[3], Model 1, Gibbs", color = :yellow) +plot!(chn2[@varname(m[3])], label = "m[3], Model 2, NUTS", color = :orange) ``` @@ -237,9 +251,13 @@ Plotting the estimated states, we can see that the results align well with our e ```{julia} p = plot(xlim=(0, 30), ylim=(-1, 5), size=(500, 250)) + for i in 1:100 ind = rand(DiscreteUniform(1, 1000)) - plot!(MCMCChains.group(chn_recover, :s).value[ind,:], color = :grey, opacity = 0.1, legend = :false) + plot!( + chn_recover[@varname(s), iter=ind, chain=1], + color = :grey, opacity = 0.1, legend = :false + ) end scatter!(y, color = :blue) From 8957cfc5fd167e67756fe2060866f8a0f3f43885 Mon Sep 17 00:00:00 2001 From: Penelope Yong Date: Sat, 2 May 2026 22:58:20 +0100 Subject: [PATCH 08/29] more changes --- .../bayesian-poisson-regression/index.qmd | 19 ++++------- tutorials/gaussian-mixture-models/index.qmd | 33 ++++++++----------- tutorials/infinite-mixture-models/index.qmd | 2 +- .../multinomial-logistic-regression/index.qmd | 21 +++--------- 4 files changed, 27 insertions(+), 48 deletions(-) diff --git a/tutorials/bayesian-poisson-regression/index.qmd b/tutorials/bayesian-poisson-regression/index.qmd index 3713f6d86..5676aae97 100755 --- a/tutorials/bayesian-poisson-regression/index.qmd +++ b/tutorials/bayesian-poisson-regression/index.qmd @@ -182,10 +182,7 @@ We use the Gelman, Rubin, and Brooks Diagnostic to check whether our chains have We expect the chains to have converged. This is because we have taken sufficient number of iterations (1500) for the NUTS sampler. However, in case the test fails, then we will have to take a larger number of iterations, resulting in longer computation time. ```{julia} -# Because some of the sampler statistics are `missing`, we need to extract only -# the parameters and then concretize the array so that `gelmandiag` can be computed. -parameter_chain = MCMCChains.concretize(MCMCChains.get_sections(chain, :parameters)) -gelmandiag(parameter_chain) +gelmandiag(chain) ``` From the above diagnostic, we can conclude that the chains have converged because the PSRF values of the coefficients are close to 1. @@ -193,14 +190,11 @@ From the above diagnostic, we can conclude that the chains have converged becaus So, we have obtained the posterior distributions of the parameters. We transform the coefficients and recover theta values by taking the exponent of the meaned values of the coefficients `b0`, `b1`, `b2` and `b3`. We take the exponent of the means to get a better comparison of the relative values of the coefficients. We then compare this with the intuitive meaning that was described earlier. ```{julia} -# Taking the first chain -c1 = chain[:, :, 1] - # Calculating the exponentiated means -b0_exp = exp(mean(c1[:b0])) -b1_exp = exp(mean(c1[:b1])) -b2_exp = exp(mean(c1[:b2])) -b3_exp = exp(mean(c1[:b3])) +b0_exp = exp(mean(chain[@varname(b0), chain=1])) +b1_exp = exp(mean(chain[@varname(b1), chain=1])) +b2_exp = exp(mean(chain[@varname(b2), chain=1])) +b3_exp = exp(mean(chain[@varname(b3), chain=1])) print("The exponent of the meaned values of the weights (or coefficients are): \n") println("b0: ", b0_exp) @@ -228,7 +222,8 @@ Thus, we remove these warmup samples and view the diagnostics again. We discard the first 200 samples, corresponding to the adaptation phase used by the NUTS sampler (which we explicitly chose not to discard earlier with the `discard_adapt=false` argument). ```{julia} -chains_new = chain[201:end, :, :] +N = FlexiChains.niters(chain) +chains_new = chain[iter=201:N] ``` ```{julia} diff --git a/tutorials/gaussian-mixture-models/index.qmd b/tutorials/gaussian-mixture-models/index.qmd index aaea5f5e0..d0d672112 100755 --- a/tutorials/gaussian-mixture-models/index.qmd +++ b/tutorials/gaussian-mixture-models/index.qmd @@ -139,12 +139,11 @@ For more information, see [the Turing documentation on sampling multiple chains. ```{julia} #| echo: false +using FlexiChains let # Verify that the output of the chain is as expected. - for i in MCMCChains.chains(chains) - # μ[1] and μ[2] can switch places, so we sort the values first. - chain = Array(chains[:, ["μ[1]", "μ[2]"], i]) - μ_mean = vec(mean(chain; dims=1)) + for i in FlexiChains.chain_indices(chains) + μ_mean = vec(mean(chains[@varname(μ), chain=i, stack=true]; dims=1)) @assert isapprox(sort(μ_mean), μ; atol=0.5) "Difference between estimated mean of μ ($(sort(μ_mean))) and data-generating μ ($μ) unexpectedly large!" end end @@ -157,7 +156,7 @@ After sampling we can visualise the trace and density of the parameters of inter We consider the samples of the location parameters $\mu_1$ and $\mu_2$ for the two clusters. ```{julia} -plot(chains[["μ[1]", "μ[2]"]]; legend=true) +plot(chains[[@varname(μ[1]), @varname(μ[2])]]; legend=true) ``` From the plots above, we can see that the chains have converged to seemingly different values for the parameters $\mu_1$ and $\mu_2$. @@ -208,31 +207,29 @@ chains = sample(model, sampler, MCMCThreads(), nsamples, nchains, discard_initia #| echo: false let # Verify that the output of the chain is as expected - for i in MCMCChains.chains(chains) - # μ[1] and μ[2] can no longer switch places. Check that they've found the mean - chain = Array(chains[:, ["μ[1]", "μ[2]"], i]) - μ_mean = vec(mean(chain; dims=1)) + for i in FlexiChains.chain_indices(chains) + μ_mean = vec(mean(chains[@varname(μ), chain=i, stack=true]; dims=1)) @assert isapprox(sort(μ_mean), μ; atol=0.5) "Difference between estimated mean of μ ($(sort(μ_mean))) and data-generating μ ($μ) unexpectedly large!" end end ``` ```{julia} -plot(chains[["μ[1]", "μ[2]"]]; legend=true) +plot(chains[[@varname(μ[1]), @varname(μ[2])]]; legend=true) ``` We also inspect the samples of the mixture weights $w$. ```{julia} -plot(chains[["w[1]", "w[2]"]]; legend=true) +plot(chains[[@varname(w[1]), @varname(w[2])]]; legend=true) ``` As the distributions of the samples for the parameters $\mu_1$, $\mu_2$, $w_1$, and $w_2$ are unimodal, we can safely visualise the density region of our model using the average values. ```{julia} # Model with mean of samples as parameters. -μ_mean = [mean(chains, "μ[$i]") for i in 1:2] -w_mean = [mean(chains, "w[$i]") for i in 1:2] +μ_mean = [mean(chains[@varname(μ[i])]) for i in 1:2] +w_mean = [mean(chains[@varname(w[i])]) for i in 1:2] mixturemodel_mean = MixtureModel([MvNormal(Fill(μₖ, 2), I) for μₖ in μ_mean], w_mean) contour( range(-7.5, 3; length=1_000), @@ -249,7 +246,7 @@ Finally, we can inspect the assignments of the data points inferred using Turing As we can see, the dataset is partitioned into two distinct groups. ```{julia} -assignments = [mean(chains, "k[$i]") for i in 1:N] +assignments = [mean(chains[@varname(k[i])]) for i in 1:N] scatter( x[1, :], x[2, :]; @@ -355,10 +352,8 @@ chains = sample(model, sampler, MCMCThreads(), nsamples, nchains; discard_initia #| echo: false let # Verify for marginalized model that the output of the chain is as expected - for i in MCMCChains.chains(chains) - # μ[1] and μ[2] can no longer switch places. Check that they've found the mean - chain = Array(chains[:, ["μ[1]", "μ[2]"], i]) - μ_mean = vec(mean(chain; dims=1)) + for i in FlexiChains.chain_indices(chains) + μ_mean = vec(mean(chains[@varname(μ), chain=i, stack=true]; dims=1)) @assert isapprox(sort(μ_mean), μ; atol=0.5) "Difference between estimated mean of μ ($(sort(μ_mean))) and data-generating μ ($μ) unexpectedly large!" end end @@ -367,7 +362,7 @@ end `NUTS()` significantly outperforms our compositional Gibbs sampler, in large part because our model is now Rao-Blackwellized thanks to the marginalization of our assignment parameter. ```{julia} -plot(chains[["μ[1]", "μ[2]"]], legend=true) +plot(chains[[@varname(μ[1]), @varname(μ[2])]], legend=true) ``` ## Inferred Assignments With The Marginalized Model diff --git a/tutorials/infinite-mixture-models/index.qmd b/tutorials/infinite-mixture-models/index.qmd index 676af6ee4..44f538c28 100755 --- a/tutorials/infinite-mixture-models/index.qmd +++ b/tutorials/infinite-mixture-models/index.qmd @@ -260,7 +260,7 @@ Finally, we can plot the number of clusters in each sample. ```{julia} # Extract the number of clusters for each sample of the Markov chain. k = map( - t -> length(unique(vec(chain[t, MCMCChains.namesingroup(chain, :z), :].value))), + t -> length(unique(chain[@varname(z), iter=t])), 1:iterations, ); diff --git a/tutorials/multinomial-logistic-regression/index.qmd b/tutorials/multinomial-logistic-regression/index.qmd index c8d50e654..54a177b18 100755 --- a/tutorials/multinomial-logistic-regression/index.qmd +++ b/tutorials/multinomial-logistic-regression/index.qmd @@ -145,14 +145,7 @@ plot(chain) Looks good! -We can also use the `corner` function from StatsPlots to show the distributions of the various parameters of our multinomial logistic regression. -`corner(chain)` will show the distributions of all parameters, but here we will only show the first three to avoid cluttering the plot. - -```{julia} -corner(chain, MCMCChains.namesingroup(chain, :coefficients_versicolor)) -``` - -Fortunately the corner plots appear to demonstrate unimodal distributions for each of our parameters, so it should be straightforward to take the means of each parameter's sampled values to estimate our model to make predictions. +It should be straightforward to take the means of each parameter's sampled values to estimate our model to make predictions. ## Making Predictions @@ -167,14 +160,10 @@ Note that we do not have to evaluate the `softmax` function since it does not af import FlexiChains function prediction(x::AbstractMatrix{<:Real}, chain::FlexiChains.VNChain) # Pull the means from each parameter's sampled values in the chain. - intercept_versicolor = mean(chain, :intercept_versicolor) - intercept_virginica = mean(chain, :intercept_virginica) - coefficients_versicolor = [ - mean(chain, k) for k in MCMCChains.namesingroup(chain, :coefficients_versicolor) - ] - coefficients_virginica = [ - mean(chain, k) for k in MCMCChains.namesingroup(chain, :coefficients_virginica) - ] + intercept_versicolor = mean(chain[@varname(intercept_versicolor)]) + intercept_virginica = mean(chain[@varname(intercept_virginica)]) + coefficients_versicolor = mean(chain[@varname(coefficients_versicolor)]) + coefficients_virginica = mean(chain[@varname(coefficients_virginica)]) # Compute the index of the species with the highest probability for each observation. values_versicolor = intercept_versicolor .+ (coefficients_versicolor' * x) From 70b3a4a647b551b30f299c8a55913fe61db499ce Mon Sep 17 00:00:00 2001 From: Penelope Yong Date: Sat, 2 May 2026 22:58:51 +0100 Subject: [PATCH 09/29] fix quarto.yml version --- _quarto.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/_quarto.yml b/_quarto.yml index dd0e4e8aa..309e3cd9e 100644 --- a/_quarto.yml +++ b/_quarto.yml @@ -43,7 +43,7 @@ website: href: https://turinglang.org/team/ right: # Current version - - text: "v0.44" + - text: "v0.45" menu: - text: Changelog href: https://turinglang.org/docs/changelog.html From b36098b8645563238d15be61d8349e959acae392 Mon Sep 17 00:00:00 2001 From: Penelope Yong Date: Sat, 2 May 2026 22:59:25 +0100 Subject: [PATCH 10/29] fix import --- developers/inference/implementing-samplers/index.qmd | 2 ++ 1 file changed, 2 insertions(+) diff --git a/developers/inference/implementing-samplers/index.qmd b/developers/inference/implementing-samplers/index.qmd index 33ba67995..5ab0cc9ce 100644 --- a/developers/inference/implementing-samplers/index.qmd +++ b/developers/inference/implementing-samplers/index.qmd @@ -474,6 +474,8 @@ and so the samples that fall outside of the range are always rejected. But do notice how much worse all the diagnostics are, e.g. `ess_tail` is very poor compared to when we use `unconstrained=true`: ```{julia} +using MCMCDiagnosticTools: ess + ess(chain) ``` From 3b4c9d3dcea52d004c22688179f7ada3411b34c1 Mon Sep 17 00:00:00 2001 From: Penelope Yong Date: Sun, 3 May 2026 02:02:47 +0100 Subject: [PATCH 11/29] fix --- tutorials/coin-flipping/index.qmd | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tutorials/coin-flipping/index.qmd b/tutorials/coin-flipping/index.qmd index 118b4f5a2..79679966b 100755 --- a/tutorials/coin-flipping/index.qmd +++ b/tutorials/coin-flipping/index.qmd @@ -201,7 +201,7 @@ Now we can build our plot: ```{julia} #| echo: false -@assert isapprox(mean(chain, :p), 0.5; atol=0.1) "Estimated mean of parameter p: $(mean(chain, :p)) - not in [0.4, 0.6]!" +@assert isapprox(mean(chain[@varname(p)]), 0.5; atol=0.1) "Estimated mean of parameter p: $(mean(chain[@varname(p)])) - not in [0.4, 0.6]!" ``` ```{julia} @@ -229,4 +229,4 @@ vline!([p_true]; label="True probability", c=:red) As we can see, the samples obtained with Turing closely approximate the true posterior distribution. Hopefully this tutorial has provided an easy-to-follow, yet informative introduction to Turing's simpler applications. -More advanced usage is demonstrated in other tutorials. \ No newline at end of file +More advanced usage is demonstrated in other tutorials. From 18f966752dc990398496816d3dfc27b50d8b25dc Mon Sep 17 00:00:00 2001 From: Penelope Yong Date: Sun, 3 May 2026 13:19:21 +0100 Subject: [PATCH 12/29] update FlexiChains and add cornerplots back --- Manifest.toml | 6 ++++-- tutorials/bayesian-logistic-regression/index.qmd | 10 +++++++++- tutorials/multinomial-logistic-regression/index.qmd | 9 ++++++++- tutorials/variational-inference/index.qmd | 3 ++- 4 files changed, 23 insertions(+), 5 deletions(-) diff --git a/Manifest.toml b/Manifest.toml index 2405dc65d..581066c53 100644 --- a/Manifest.toml +++ b/Manifest.toml @@ -1105,9 +1105,9 @@ version = "0.8.5" [[deps.FlexiChains]] deps = ["AbstractMCMC", "AbstractPPL", "DelimitedFiles", "DimensionalData", "DocStringExtensions", "MCMCDiagnosticTools", "OrderedCollections", "PrecompileTools", "Printf", "Random", "Statistics", "StatsBase", "Tables"] -git-tree-sha1 = "81e93592ce8a9afc00e15c942c78de5fbe35e17d" +git-tree-sha1 = "d959cdc91cc71191f7097b02d21c3520c7993fd8" uuid = "4a37a8b9-6e57-4b92-8664-298d46e639f7" -version = "0.6.0" +version = "0.6.2" [deps.FlexiChains.extensions] FlexiChainsAdvancedHMCExt = ["AdvancedHMC", "DimensionalData"] @@ -1121,6 +1121,7 @@ version = "0.6.0" FlexiChainsPosteriorStatsExt = ["PosteriorStats", "DimensionalData"] FlexiChainsRecipesBaseExt = ["RecipesBase", "StatsBase"] FlexiChainsStanSampleExt = ["StanSample"] + FlexiChainsStatsPlotsExt = ["StatsPlots", "DimensionalData"] [deps.FlexiChains.weakdeps] AdvancedHMC = "0bf59076-c3b1-5ca4-86bd-e02cd72cde3d" @@ -1134,6 +1135,7 @@ version = "0.6.0" PosteriorStats = "7f36be82-ad55-44ba-a5c0-b8b5480d7aa5" RecipesBase = "3cdcf5f2-1ef4-517c-9805-6587b60abb01" StanSample = "c1514b29-d3a0-5178-b312-660c88baa699" + StatsPlots = "f3b207a7-027a-5e70-b257-86293d7955fd" [[deps.Fontconfig_jll]] deps = ["Artifacts", "Bzip2_jll", "Expat_jll", "FreeType2_jll", "JLLWrappers", "Libdl", "Libuuid_jll", "Zlib_jll"] diff --git a/tutorials/bayesian-logistic-regression/index.qmd b/tutorials/bayesian-logistic-regression/index.qmd index e61516f84..205aba5ba 100755 --- a/tutorials/bayesian-logistic-regression/index.qmd +++ b/tutorials/bayesian-logistic-regression/index.qmd @@ -181,7 +181,15 @@ let end ``` -Fortunately the plot appears to demonstrate unimodal distributions for each of our parameters, so it should be straightforward to take the means of each parameter's sampled values to estimate our model to make predictions. +Looks good! + +We can also use the `cornerplot` function from StatsPlots to show the distributions of the various parameters of our logistic regression. + +```{julia} +StatsPlots.cornerplot(chain, [@varname(student), @varname(balance), @varname(income)]) +``` + +Fortunately the corner plot appears to demonstrate unimodal distributions for each of our parameters, so it should be straightforward to take the means of each parameter's sampled values to estimate our model to make predictions. ## Making Predictions diff --git a/tutorials/multinomial-logistic-regression/index.qmd b/tutorials/multinomial-logistic-regression/index.qmd index 54a177b18..73b5b4bcf 100755 --- a/tutorials/multinomial-logistic-regression/index.qmd +++ b/tutorials/multinomial-logistic-regression/index.qmd @@ -145,7 +145,14 @@ plot(chain) Looks good! -It should be straightforward to take the means of each parameter's sampled values to estimate our model to make predictions. +We can also use the `cornerplot` function from StatsPlots to show the distributions of the various parameters of our multinomial logistic regression. +`StatsPlots.cornerplot(chain)` will show the distributions of all parameters, but here we will only show the versicolor coefficients to avoid cluttering the plot. + +```{julia} +StatsPlots.cornerplot(chain, [@varname(coefficients_versicolor)]; size=(900, 900)) +``` + +Fortunately the corner plots appear to demonstrate unimodal distributions for each of our parameters, so it should be straightforward to take the means of each parameter's sampled values to estimate our model to make predictions. ## Making Predictions diff --git a/tutorials/variational-inference/index.qmd b/tutorials/variational-inference/index.qmd index aa3776112..1ec1d35b8 100755 --- a/tutorials/variational-inference/index.qmd +++ b/tutorials/variational-inference/index.qmd @@ -359,7 +359,8 @@ vi_chain = AbstractMCMC.from_samples(FlexiChains.VNChain, z) Now, we can, for example, look at expectations: ```{julia} -describe(vi_chain) +using FlexiChains +summarystats(vi_chain) ``` (Since we're drawing independent samples, we can simply ignore the ESS and Rhat metrics.) From 20481d761aca96809abd9eeedd841496e661e275 Mon Sep 17 00:00:00 2001 From: Penelope Yong Date: Sun, 3 May 2026 13:30:57 +0100 Subject: [PATCH 13/29] Fix VI doc to work with FlexiChains --- tutorials/variational-inference/index.qmd | 29 +++++++++++++---------- 1 file changed, 16 insertions(+), 13 deletions(-) diff --git a/tutorials/variational-inference/index.qmd b/tutorials/variational-inference/index.qmd index 1ec1d35b8..b4fec3d5f 100755 --- a/tutorials/variational-inference/index.qmd +++ b/tutorials/variational-inference/index.qmd @@ -347,7 +347,7 @@ Calling `rand(result_fr.ldf)` will return a vector of samples in the transformed However, note that these vectors are harder to interpret and work with. ::: -Each sample is a [`DynamicPPL.VarNamedTuple`]({{< meta usage-varnamedtuple >}}), and a matrix of these can be converted into a `Chains` object. +Each sample is a [`DynamicPPL.VarNamedTuple`]({{< meta usage-varnamedtuple >}}), and a matrix of these can be converted into a `FlexiChain` object. ```{julia} z = rand(result_fr, 100_000, 1) @@ -370,8 +370,8 @@ Let's compare this against samples from `NUTS`: ```{julia} mcmc_chain = sample(m, NUTS(), 10_000; progress=false); -vi_mean = mean(vi_chain)[:, 2] -mcmc_mean = mean(mcmc_chain, names(mcmc_chain, :parameters))[:, 2] +vi_mean = Array(mean(vi_chain)) +mcmc_mean = Array(mean(mcmc_chain)) plot(mcmc_mean; xticks=1:1:length(mcmc_mean), label="mean of NUTS") plot!(vi_mean; label="mean of VI") @@ -400,9 +400,8 @@ test_cut.OLSPrediction = unstandardize(GLM.predict(ols, test_cut), train_unstand ```{julia} # Make a prediction given an input vector, using mean parameter values from a chain. function prediction(chain, x) - p = get_params(chain) - α = mean(p.intercept) - β = collect(mean.(p.coefficients)) + α = mean(chain[@varname(intercept)]) + β = mean(chain[@varname(coefficients)]) return α .+ x * β end ``` @@ -419,7 +418,7 @@ first(test_cut, 6) ``` ```{julia} -# Construct the Chains from the Variational Approximations +# Construct the chains from the Variational Approximations vi_mf_chain = AbstractMCMC.from_samples(FlexiChains.VNChain, rand(result_mf, 10_000, 1)) vi_fr_chain = AbstractMCMC.from_samples(FlexiChains.VNChain, rand(result_fr, 10_000, 1)) ``` @@ -475,8 +474,10 @@ Interestingly the squared difference between true- and mean-prediction on the te But, as Bayesians, we know that the mean doesn't tell the entire story. One quick check is to look at the mean predictions ± standard deviation of the two different approaches: ```{julia} -preds_vi_mf = mapreduce(hcat, 1:5:size(vi_mf_chain, 1)) do i - return unstandardize(prediction(vi_mf_chain[i], test), train_unstandardized.MPG) +Niters = FlexiChains.niters(vi_mf_chain) + +preds_vi_mf = mapreduce(hcat, 1:5:Niters) do i + return unstandardize(prediction(vi_mf_chain[iter=i], test), train_unstandardized.MPG) end p1 = scatter( @@ -487,13 +488,14 @@ p1 = scatter( size=(900, 500), markersize=8, ) + scatter!(1:size(test, 1), unstandardize(test_label, train_unstandardized.MPG); label="true") xaxis!(1:size(test, 1)) ylims!(10, 40) title!("VI Mean-Field") -preds_vi_fr = mapreduce(hcat, 1:5:size(vi_mf_chain, 1)) do i - return unstandardize(prediction(vi_fr_chain[i], test), train_unstandardized.MPG) +preds_vi_fr = mapreduce(hcat, 1:5:Niters) do i + return unstandardize(prediction(vi_fr_chain[iter=i], test), train_unstandardized.MPG) end p2 = scatter( @@ -509,8 +511,9 @@ xaxis!(1:size(test, 1)) ylims!(10, 40) title!("VI Full-Rank") -preds_mcmc = mapreduce(hcat, 1:5:size(mcmc_chain, 1)) do i - return unstandardize(prediction(mcmc_chain[i], test), train_unstandardized.MPG) +Niters_mcmc = FlexiChains.niters(mcmc_chain) +preds_mcmc = mapreduce(hcat, 1:5:Niters_mcmc) do i + return unstandardize(prediction(mcmc_chain[iter=i], test), train_unstandardized.MPG) end p3 = scatter( From 734af46419770eed017b003830e95d4ca3fd79f5 Mon Sep 17 00:00:00 2001 From: Penelope Yong Date: Sun, 3 May 2026 16:06:05 +0100 Subject: [PATCH 14/29] fix time series docs --- Manifest.toml | 2 +- Project.toml | 1 + .../bayesian-time-series-analysis/index.qmd | 47 ++++++++++++------- 3 files changed, 33 insertions(+), 17 deletions(-) diff --git a/Manifest.toml b/Manifest.toml index 581066c53..5ad1ef2ed 100644 --- a/Manifest.toml +++ b/Manifest.toml @@ -2,7 +2,7 @@ julia_version = "1.11.9" manifest_format = "2.0" -project_hash = "9618fd89a9e953023338fe3e34956d162287090a" +project_hash = "b115e243a73218a9880a8c1e62286d045b25694d" [[deps.ADTypes]] git-tree-sha1 = "bbc22a9a08a0ef6460041086d8a7b27940ed4ffd" diff --git a/Project.toml b/Project.toml index 91e38fb86..c7d3a4a7e 100644 --- a/Project.toml +++ b/Project.toml @@ -10,6 +10,7 @@ CSV = "336ed68f-0bac-5ca0-87d4-7b16caf5d00b" Chairmarks = "0ca39b1e-fe0b-4e98-acfc-b1656634c4de" DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0" DelayDiffEq = "bcd4f6db-9728-5f36-b5f7-82caef46ccdb" +DimensionalData = "0703355e-b756-11e9-17c0-8b28908087d0" Distributed = "8ba89e20-285c-5b6f-9357-94700520ee1b" Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f" DynamicHMC = "bbc10e6e-7c05-544b-b16e-64fede858acb" diff --git a/tutorials/bayesian-time-series-analysis/index.qmd b/tutorials/bayesian-time-series-analysis/index.qmd index 5522d9131..f0e2b4357 100755 --- a/tutorials/bayesian-time-series-analysis/index.qmd +++ b/tutorials/bayesian-time-series-analysis/index.qmd @@ -165,11 +165,11 @@ scatter!(t, yf; color=2, label="Data") With the model specified and with a reasonable prior we can now let Turing decompose the time series for us! ```{julia} -function mean_ribbon(samples) - qs = quantile(samples) - low = qs[:, Symbol("2.5%")] - up = qs[:, Symbol("97.5%")] - m = mean(samples)[:, :mean] +function mean_ribbon(chn) + subsetted_chn = chn[[@varname(y)]] + low = Array(quantile(subsetted_chn, 0.025)) + up = Array(quantile(subsetted_chn, 0.975)) + m = Array(mean(subsetted_chn)) return m, (m - low, up - m) end @@ -216,6 +216,7 @@ model = decomp_model(x, cyclic_features, +) | (; y = yf) chain = sample(model, NUTS(), 2000, progress=false) yf_samples = predict(decondition(model), chain) m, conf = mean_ribbon(yf_samples) + predictive_plt = plot( t, m .+ yf_max; @@ -235,7 +236,9 @@ plot(predictive_plt, decomposed_plt...; layout=(3, 1), size=(700, 1000)) ```{julia} #| echo: false let - @assert mean(ess(chain)[:, :ess]) > 500 "Mean ESS: $(mean(ess(chain)[:, :ess])) - not > 500" + using MCMCDiagnosticTools: ess + + @assert mean(Array(ess(chain))) > 500 "Mean ESS: $(mean(Array(ess(chain)))) - not > 500" lower_quantile = m .- conf[1] # 2.5% quantile upper_quantile = m .+ conf[2] # 97.5% quantile @assert mean(lower_quantile .≤ yf .≤ upper_quantile) ≥ 0.9 "Surprisingly few observations in predicted 95% interval: $(mean(lower_quantile .≤ yf .≤ upper_quantile))" @@ -247,15 +250,28 @@ We see that the least squares linear fit deviates somewhat from the posterior tr Since our model takes cyclic effects into account separately, we get a better estimate of the true overall trend than if we would have just fitted a line. But what frequency content did the model identify? +Let's start by wrangling the data into a nice format for plotting: +```{julia} +import DimensionalData as DD + +function get_cyclic_coefficients(chain) + βc = chain[@varname(βc), stack=true] # iters x chains x params + βc = dropdims(βc; dims=2) # iters x params + βc = DD.set(βc, DD.AnonDim => :freq) # rename the parameter dimension "freq" + return βc +end + +βc = get_cyclic_coefficients(chain) +``` ```{julia} function plot_cyclic_features(βsin, βcos) - labels = reshape(["freq = $i" for i in freqs], 1, :) + labels = permutedims(["$i" for i in freqs]) colors = collect(freqs)' style = reshape([i <= 10 ? :solid : :dash for i in 1:length(labels)], 1, :) sin_features_plt = density( - βsin[:, :, 1]; + βsin; title="Sine features posterior", label=labels, ylabel="Density", @@ -265,16 +281,16 @@ function plot_cyclic_features(βsin, βcos) legend=nothing, ) cos_features_plt = density( - βcos[:, :, 1]; + βcos; title="Cosine features posterior", ylabel="Density", xlabel="Weight", - label=nothing, + label=labels, color=colors, linestyle=style, ) - return seasonal_features_plt = plot( + return plot( sin_features_plt, cos_features_plt; layout=(2, 1), @@ -283,8 +299,7 @@ function plot_cyclic_features(βsin, βcos) ) end -βc = Array(group(chain, :βc)) -plot_cyclic_features(βc[:, begin:num_freqs, :], βc[:, (num_freqs + 1):end, :]) +plot_cyclic_features(βc[:, begin:num_freqs], βc[:, (num_freqs + 1):end]) ``` Plotting the posterior over the cyclic features reveals that the model managed to extract the true frequency content. @@ -325,7 +340,7 @@ plot(predictive_plt, decomposed_plt...; layout=(3, 1), size=(700, 1000)) ```{julia} #| echo: false let - @assert mean(ess(chain)[:, :ess]) > 500 "Mean ESS: $(mean(ess(chain)[:, :ess])) - not > 500" + @assert mean(Array(ess(chain))) > 500 "Mean ESS: $(mean(Array(ess(chain)))) - not > 500" lower_quantile = m .- conf[1] # 2.5% quantile upper_quantile = m .+ conf[2] # 97.5% quantile @assert mean(lower_quantile .≤ yg .≤ upper_quantile) ≥ 0.9 "Surprisingly few observations in predicted 95% interval: $(mean(lower_quantile .≤ yg .≤ upper_quantile))" @@ -336,8 +351,8 @@ The model fits! What about the inferred cyclic components? ```{julia} -βc = Array(group(chain, :βc)) -plot_cyclic_features(βc[:, begin:num_freqs, :], βc[:, (num_freqs + 1):end, :]) +βc = get_cyclic_coefficients(chain) +plot_cyclic_features(βc[:, begin:num_freqs], βc[:, (num_freqs + 1):end]) ``` While a multiplicative model does manage to fit the data, it does not recover the true parameters for this dataset. From f4f99e2699ecd594cb51e9651fcae40dcefc3d10 Mon Sep 17 00:00:00 2001 From: Penelope Yong Date: Sun, 3 May 2026 16:54:41 +0100 Subject: [PATCH 15/29] linreg --- tutorials/bayesian-linear-regression/index.qmd | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/tutorials/bayesian-linear-regression/index.qmd b/tutorials/bayesian-linear-regression/index.qmd index 8d02a014c..7d7e6fc92 100755 --- a/tutorials/bayesian-linear-regression/index.qmd +++ b/tutorials/bayesian-linear-regression/index.qmd @@ -156,10 +156,11 @@ It looks like all parameters have converged. ```{julia} #| echo: false let - ess_df = ess(chain) - @assert minimum(ess_df[:, :ess]) > 500 "Minimum ESS: $(minimum(ess_df[:, :ess])) - not > 500" - @assert mean(ess_df[:, :ess]) > 2_000 "Mean ESS: $(mean(ess_df[:, :ess])) - not > 2000" - @assert maximum(ess_df[:, :ess]) > 3_500 "Maximum ESS: $(maximum(ess_df[:, :ess])) - not > 3500" + using MCMCDiagnosticTools: ess + ess_arr = Array(ess(chain)) + @assert minimum(ess_arr) > 500 "Minimum ESS: $(minimum(ess_arr)) - not > 500" + @assert mean(ess_arr) > 2_000 "Mean ESS: $(mean(ess_arr)) - not > 2000" + @assert maximum(ess_arr) > 3_500 "Maximum ESS: $(maximum(ess_arr)) - not > 3500" end ``` @@ -189,9 +190,13 @@ The function below accepts a chain and an input matrix and calculates prediction ```{julia} # Make a prediction given an input vector. +import FlexiChains + function prediction(chain, x) - p = get_params(chain[200:end, :, :]) - targets = p.intercept' .+ x * reduce(hcat, p.coefficients)' + Niters = FlexiChains.niters(chain) + intercept = chain[@varname(intercept), iter=200:Niters, chain=1] + coefficients = chain[@varname(coefficients), iter=200:Niters, chain=1, stack=true] + targets = intercept' .+ x * coefficients' return vec(mean(targets; dims=2)) end ``` From 0a895d6da07f158eb564237c1a62791b285ab154 Mon Sep 17 00:00:00 2001 From: Penelope Yong Date: Sun, 3 May 2026 18:45:28 +0100 Subject: [PATCH 16/29] ppca --- tutorials/probabilistic-pca/index.qmd | 34 ++++++++++++--------------- 1 file changed, 15 insertions(+), 19 deletions(-) diff --git a/tutorials/probabilistic-pca/index.qmd b/tutorials/probabilistic-pca/index.qmd index 900eb2124..afd1185d3 100755 --- a/tutorials/probabilistic-pca/index.qmd +++ b/tutorials/probabilistic-pca/index.qmd @@ -171,9 +171,9 @@ As per the p-PCA formula, we think of each row (i.e. each gene feature) followin Z ~ filldist(Normal(), k, N) # mean offset - μ ~ MvNormal(Eye(D)) + μ ~ MvNormal(Zeros(D), I) genes_mean = W * Z .+ reshape(μ, n_genes, 1) - return X ~ arraydist([MvNormal(m, Eye(N)) for m in eachcol(genes_mean')]) + return X ~ arraydist([MvNormal(m, I) for m in eachcol(genes_mean')]) end; ``` @@ -209,17 +209,13 @@ ppca = pPCA(mat_exp', k) # instantiate the probabilistic model chain_ppca = sample(ppca, NUTS(; adtype=AutoMooncake()), 500); ``` -The samples are saved in `chain_ppca`, which is an `FlexiChains.VNChain` object. -We can check its shape: +The samples are saved in `chain_ppca`, which is a `FlexiChains.VNChain` object. +Sampling statistics such as R-hat, ESS, mean estimates, and so on can be obtained from this: ```{julia} -size(chain_ppca) # (no. of iterations, no. of vars, no. of chains) = (500, 159, 1) -``` - -Sampling statistics such as R-hat, ESS, mean estimates, and so on can also be obtained from this: +using FlexiChains -```{julia} -describe(chain_ppca) +summarystats(chain_ppca) ``` #### Step 5: posterior predictive checks @@ -230,9 +226,9 @@ Then we calculate the mean value for each element in $W$, averaging over the who ```{julia} # Extract parameter estimates for predicting x - mean of posterior -W = reshape(mean(group(chain_ppca, :W))[:, 2], (n_genes, k)) -Z = reshape(mean(group(chain_ppca, :Z))[:, 2], (k, n_cells)) -μ = mean(group(chain_ppca, :μ))[:, 2] +W = mean(chain_ppca[@varname(W)]) +Z = mean(chain_ppca[@varname(Z)]) +μ = mean(chain_ppca[@varname(μ)]) mat_rec = W * Z .+ repeat(μ; inner=(1, n_cells)) ``` @@ -321,7 +317,7 @@ We instantiate the model and ask Turing to sample from it using NUTS sampler. Th ```{julia} ppca_ARD = pPCA_ARD(mat_exp') # instantiate the probabilistic model chain_ppcaARD = sample(ppca_ARD, NUTS(; adtype=AutoMooncake()), 500) # sampling -plot(group(chain_ppcaARD, :α); margin=6.0mm) +plot(chain_ppcaARD, [@varname(α)]; margin=6.0mm) ``` Again, we do some inference diagnostics. @@ -333,9 +329,9 @@ We can clearly see from the values of $\alpha$ that there should be two dimensio ```{julia} # Extract parameter mean estimates of the posterior -W = permutedims(reshape(mean(group(chain_ppcaARD, :W))[:, 2], (n_genes, n_genes))) -Z = permutedims(reshape(mean(group(chain_ppcaARD, :Z))[:, 2], (n_genes, n_cells)))' -α = mean(group(chain_ppcaARD, :α))[:, 2] +W = mean(chain_ppcaARD[@varname(W)]) +Z = mean(chain_ppcaARD[@varname(Z)]) +α = mean(chain_ppcaARD[@varname(α)]) plot(α; label="α") ``` @@ -347,7 +343,7 @@ We obtain a posterior predicted matrix $\mathbf{X} \in \mathbb{R}^{2 \times 60}$ ```{julia} α_indices = sortperm(α)[1:2] k = size(α_indices)[1] -X_rec = W[:, α_indices] * Z[α_indices, :] +X_rec = W[:, α_indices]' * Z[α_indices, :] df_rec = DataFrame(X_rec', :auto) heatmap( @@ -387,4 +383,4 @@ It can also be thought of as a matrix factorisation method, in which $\mathbf{X} [^2]: Probabilistic PCA by TensorFlow, "https://www.tensorflow.org/probability/examples/Probabilistic_PCA". [^3]: Gareth M. James, Daniela Witten, Trevor Hastie, Robert Tibshirani, *An Introduction to Statistical Learning*, Springer, 2013. [^4]: David Wipf, Srikantan Nagarajan, *A New View of Automatic Relevance Determination*, NIPS 2007. -[^5]: Christopher Bishop, *Pattern Recognition and Machine Learning*, Springer, 2006. \ No newline at end of file +[^5]: Christopher Bishop, *Pattern Recognition and Machine Learning*, Springer, 2006. From 46b1f0336673fc8b99d82b766baf29995bb68d33 Mon Sep 17 00:00:00 2001 From: Penelope Yong Date: Sun, 3 May 2026 18:56:39 +0100 Subject: [PATCH 17/29] fix --- tutorials/gaussian-process-latent-variable-models/index.qmd | 2 +- tutorials/probabilistic-pca/index.qmd | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/tutorials/gaussian-process-latent-variable-models/index.qmd b/tutorials/gaussian-process-latent-variable-models/index.qmd index 906332315..7cbdab7a9 100755 --- a/tutorials/gaussian-process-latent-variable-models/index.qmd +++ b/tutorials/gaussian-process-latent-variable-models/index.qmd @@ -12,7 +12,7 @@ using Pkg; Pkg.instantiate(); ``` -In a previous tutorial, we have discussed latent variable models, in particular probabilistic principal component analysis (pPCA). +In a previous tutorial, we have discussed latent variable models, in particular [probabilistic principal component analysis (pPCA)]({{< meta probabilistic-pca >}}). Here, we show how we can extend the mapping provided by pPCA to non-linear mappings between input and output. For more details about the Gaussian Process Latent Variable Model (GPLVM), we refer the reader to the [original publication](https://jmlr.org/papers/v6/lawrence05a.html) and a [further extension](http://proceedings.mlr.press/v9/titsias10a/titsias10a.pdf). diff --git a/tutorials/probabilistic-pca/index.qmd b/tutorials/probabilistic-pca/index.qmd index afd1185d3..3226e51e1 100755 --- a/tutorials/probabilistic-pca/index.qmd +++ b/tutorials/probabilistic-pca/index.qmd @@ -221,7 +221,7 @@ summarystats(chain_ppca) #### Step 5: posterior predictive checks We try to reconstruct the input data using the posterior mean as parameter estimates. -We first retrieve the samples for the projection matrix `W` from `chain_ppca`. This can be done using the Julia `group(chain, parameter_name)` function. +We first retrieve the samples for the projection matrix `W` from `chain_ppca`. Then we calculate the mean value for each element in $W$, averaging over the whole chain of samples. ```{julia} @@ -325,7 +325,7 @@ Here we look at the convergence of the chains for the $α$ parameter. This parameter determines the relevance of individual components. We see that the chains have converged and the posterior of the $\alpha$ parameters is centred around much smaller values in two instances. In the following, we will use the mean of the small values to select the *relevant* dimensions (remember that, smaller values of $\alpha$ correspond to more important components.). -We can clearly see from the values of $\alpha$ that there should be two dimensions (corresponding to $\bar{\alpha}_3=\bar{\alpha}_5≈0.05$) for this dataset. +We can clearly see from the values of $\alpha$ that two of its values are smaller, which implies that there should be two dimensions for this dataset. ```{julia} # Extract parameter mean estimates of the posterior @@ -343,7 +343,7 @@ We obtain a posterior predicted matrix $\mathbf{X} \in \mathbb{R}^{2 \times 60}$ ```{julia} α_indices = sortperm(α)[1:2] k = size(α_indices)[1] -X_rec = W[:, α_indices]' * Z[α_indices, :] +X_rec = (W[α_indices, :])' * Z[α_indices, :] df_rec = DataFrame(X_rec', :auto) heatmap( From d773323bfee58ec3c91fbbdcdd08b6006dbef60c Mon Sep 17 00:00:00 2001 From: Penelope Yong Date: Sun, 3 May 2026 19:36:53 +0100 Subject: [PATCH 18/29] reenable gplvm? --- .../index.qmd | 47 ++++++++----------- 1 file changed, 19 insertions(+), 28 deletions(-) diff --git a/tutorials/gaussian-process-latent-variable-models/index.qmd b/tutorials/gaussian-process-latent-variable-models/index.qmd index 7cbdab7a9..7f16e297e 100755 --- a/tutorials/gaussian-process-latent-variable-models/index.qmd +++ b/tutorials/gaussian-process-latent-variable-models/index.qmd @@ -20,10 +20,11 @@ we refer the reader to the [original publication](https://jmlr.org/papers/v6/law In short, the GPVLM is a dimensionality reduction technique that allows us to embed a high-dimensional dataset in a lower-dimensional embedding. Importantly, it provides the advantage that the linear mappings from the embedded space can be non-linearised through the use of Gaussian Processes. -### Let's start by loading some dependencies. +## Setup + +Let's start by loading some dependencies. ```{julia} -#| eval: false using Turing using AbstractGPs using FillArrays @@ -32,6 +33,7 @@ using Plots using RDatasets using ReverseDiff using StatsBase +using Mooncake using LinearAlgebra using Random @@ -47,7 +49,6 @@ We will briefly touch on some ways to speed things up at the end of this tutoria We transform the original data with non-linear operations in order to demonstrate the power of GPs to work on non-linear relationships, while keeping the problem reasonably small. ```{julia} -#| eval: false data = dataset("datasets", "iris") species = data[!, "Species"] index = shuffle(1:150) @@ -73,7 +74,6 @@ Indeed, pPCA is basically equivalent to running the GPLVM model with an automati First, we re-introduce the pPCA model (see the tutorial on pPCA for details) ```{julia} -#| eval: false @model function pPCA(x) # Dimensionality of the problem. N, D = size(x) @@ -94,7 +94,6 @@ squared exponential kernel. ```{julia} -#| eval: false linear_kernel(α) = LinearKernel() ∘ ARDTransform(α) sekernel(α, σ) = σ * SqExponentialKernel() ∘ ARDTransform(α); ``` @@ -103,7 +102,6 @@ And here is the GPLVM model. We create separate models for the two types of kernel. ```{julia} -#| eval: false @model function GPLVM_linear(Y, K) # Dimensionality of the problem. N, D = size(Y) @@ -145,46 +143,43 @@ end; ``` ```{julia} -#| eval: false -# Standard GPs don't scale very well in n, so we use a small subsample for the purpose of this tutorial +# Standard GPs don't scale very well in n, so we use a small subsample for +# the purpose of this tutorial. n_data = 40 + # number of features to use from dataset n_features = 4 + # latent dimension for GP case ndim = 4; ``` ```{julia} -#| eval: false ppca = pPCA(dat[1:n_data, 1:n_features]) -chain_ppca = sample(ppca, NUTS{Turing.ReverseDiffAD{true}}(), 1000); +chain_ppca = sample(ppca, NUTS(; adtype=AutoMooncake()), 1000); ``` ```{julia} -#| eval: false # we extract the posterior mean estimates of the parameters from the chain -z_mean = reshape(mean(group(chain_ppca, :z))[:, 2], (n_features, n_data)) +z_mean = mean(chain_ppca[@varname(z)]) scatter(z_mean[1, :], z_mean[2, :]; group=labels[1:n_data], xlabel=L"z_1", ylabel=L"z_2") ``` We can see that the pPCA fails to distinguish the groups. -In particular, the `setosa` species is not clearly separated from `versicolor` and `virginica`. -This is due to the non-linearities that we introduced, as without them the two groups can be clearly distinguished -using pPCA (see the pPCA tutorial). +This is due to the non-linearities that we introduced, as without them the groups can be clearly distinguished using pPCA (see the pPCA tutorial). Let's try the same with our linear kernel GPLVM model. ```{julia} -#| eval: false gplvm_linear = GPLVM_linear(dat[1:n_data, 1:n_features], ndim) -chain_linear = sample(gplvm_linear, NUTS{Turing.ReverseDiffAD{true}}(), 500); +chain_linear = sample(gplvm_linear, NUTS(; adtype=AutoMooncake()), 500); ``` ```{julia} #| eval: false # we extract the posterior mean estimates of the parameters from the chain -z_mean = reshape(mean(group(chain_linear, :Z))[:, 2], (n_features, n_data)) -alpha_mean = mean(group(chain_linear, :α))[:, 2] +z_mean = mean(chain_linear[@varname(Z)]) +alpha_mean = mean(chain_linear[@varname(α)]) alpha1, alpha2 = partialsortperm(alpha_mean, 1:2; rev=true) scatter( @@ -196,22 +191,18 @@ scatter( ) ``` -We can see that similar to the pPCA case, the linear kernel GPLVM fails to distinguish between the two groups -(`setosa` on the one hand, and `virginica` and `verticolor` on the other). +We can see that similar to the pPCA case, the linear kernel GPLVM fails to distinguish between the groups. Finally, we demonstrate that by changing the kernel to a non-linear function, we are able to separate the data again. ```{julia} -#| eval: false gplvm = GPLVM(dat[1:n_data, 1:n_features], ndim) -chain_gplvm = sample(gplvm, NUTS{Turing.ReverseDiffAD{true}}(), 500); +chain_gplvm = sample(gplvm, NUTS(; adtype=AutoMooncake()), 500); ``` ```{julia} -#| eval: false -# we extract the posterior mean estimates of the parameters from the chain -z_mean = reshape(mean(group(chain_gplvm, :Z))[:, 2], (ndim, n_data)) -alpha_mean = mean(group(chain_gplvm, :α))[:, 2] +z_mean = mean(chain_gplvm[@varname(Z)]) +alpha_mean = mean(chain_gplvm[@varname(α)]) alpha1, alpha2 = partialsortperm(alpha_mean, 1:2; rev=true) scatter( @@ -229,7 +220,7 @@ let @assert abs( mean(z_mean[alpha1, labels[1:n_data] .== "setosa"]) - mean(z_mean[alpha1, labels[1:n_data] .!= "setosa"]), - ) > 1 + ) > 0.5 end ``` From 018431ada3869330be863db300d5fe04e8992a9c Mon Sep 17 00:00:00 2001 From: Penelope Yong Date: Sun, 3 May 2026 19:45:49 +0100 Subject: [PATCH 19/29] bde --- .../bayesian-differential-equations/index.qmd | 25 ++++++++++++++++--- 1 file changed, 22 insertions(+), 3 deletions(-) diff --git a/tutorials/bayesian-differential-equations/index.qmd b/tutorials/bayesian-differential-equations/index.qmd index ab2e5ed0c..64a2df341 100755 --- a/tutorials/bayesian-differential-equations/index.qmd +++ b/tutorials/bayesian-differential-equations/index.qmd @@ -156,12 +156,29 @@ plot(chain) In Bayesian analysis it is often useful to retrodict the data, i.e. generate simulated data using samples from the posterior distribution, and compare to the original data (see for instance section 3.3.2 - model checking of McElreath's book "Statistical Rethinking"). Here, we solve the ODE for 300 randomly picked posterior samples in the `chain`. + +```{julia} +using FlexiChains: VNChain, DimArray + +function draw_posterior_samples(chain::VNChain, N::Int) + # Extract only the parameters of interest, and convert to an array. + params = DimArray(chain[[@varname(α), @varname(β), @varname(γ), @varname(δ)]]) + # Reshape to (number of samples, number of parameters). + params = reshape(params, size(params, 1) * size(params, 2), :) + # Sample N parameter combinations from the posterior without replacement. + sample_indices = sample(1:size(params, 1), N; replace=false) + # Return (N * 4) array of parameter samples. + return params[sample_indices, :] +end + +posterior_samples = draw_posterior_samples(chain, 300) +``` + We plot the ensemble of solutions to check if the solution resembles the data. The 300 retrodicted time courses from the posterior are plotted in gray, the noisy observations are shown as blue and red dots, and the green and purple lines are the ODE solution that was used to generate the data. ```{julia} plot(; legend=false) -posterior_samples = sample(chain[[:α, :β, :γ, :δ]], 300; replace=false) for p in eachrow(Array(posterior_samples)) sol_p = solve(prob, Tsit5(); p=p, saveat=0.1) plot!(sol_p; alpha=0.1, color="#BBBBBB") @@ -209,8 +226,9 @@ chain2 = sample(model2, NUTS(0.45), MCMCSerial(), 5000, 3; progress=false) Again we inspect the trajectories of 300 randomly selected posterior samples. ```{julia} +posterior_samples = draw_posterior_samples(chain2, 300) + plot(; legend=false) -posterior_samples = sample(chain2[[:α, :β, :γ, :δ]], 300; replace=false) for p in eachrow(Array(posterior_samples)) sol_p = solve(prob, Tsit5(); p=p, saveat=0.1) plot!(sol_p; alpha=0.1, color="#BBBBBB") @@ -318,8 +336,9 @@ Finally, we plot trajectories of 300 randomly selected samples from the posterio Again, the dots indicate our observations, the coloured lines are the "true" simulations without noise, and the gray lines are trajectories from the posterior samples. ```{julia} +posterior_samples = draw_posterior_samples(chain_dde, 300) + plot(; legend=false) -posterior_samples = sample(chain_dde[[:α, :β, :γ, :δ]], 300; replace=false) for p in eachrow(Array(posterior_samples)) sol_p = solve(prob_dde, MethodOfSteps(Tsit5()); p=p, saveat=0.1) plot!(sol_p; alpha=0.1, color="#BBBBBB") From b9fc7fb9bf20e8dbca028a8fed6a40253431cf21 Mon Sep 17 00:00:00 2001 From: Penelope Yong Date: Sun, 3 May 2026 19:49:11 +0100 Subject: [PATCH 20/29] correct prose in core functionality --- core-functionality/index.qmd | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/core-functionality/index.qmd b/core-functionality/index.qmd index 9ae653b25..6ae6d7d34 100755 --- a/core-functionality/index.qmd +++ b/core-functionality/index.qmd @@ -233,10 +233,12 @@ If you want to sample multiple chains without using parallelism, you can use `MC ```julia # Sample 3 chains in a serial fashion. -chains = sample(model, sampler, MCMCSerial(), 1000, 3) +chn = sample(model, sampler, MCMCSerial(), 1000, 3) ``` -The `chains` variable now contains a `Chains` object which can be indexed by chain. To pull out the first chain from the `chains` object, use `chains[:,:,1]`. The method is the same if you use either of the below parallel sampling methods. +The `chn` variable now contains multiple chains, as can be seen from the printout. +To pull out the first chain from the `chn` object, use `chn[chain=1]`. +The method is the same if you use either of the below parallel sampling methods. #### Multithreaded sampling @@ -346,7 +348,7 @@ c = sample(model, HMC(0.05, 20), 500) ``` Note the need to initialise `x` when missing since we are iterating over its elements later in the model. -The generated values for `x` can be extracted from the `Chains` object using `c[:x]`. +The generated values for `x` can be extracted from the `FlexiChain` object using `c[@varname(x)]`. Turing also supports mixed `missing` and non-`missing` values in `x`, where the missing ones will be treated as random variables to be sampled while the others get treated as observations. For example: @@ -476,7 +478,7 @@ logpdf(InverseGamma(2, 3), 1.0) + logpdf(Normal(0, sqrt(1.0)), 1.0) ``` -Querying with `Chains` object is easy as well: +Querying with `FlexiChain` objects is easy as well: ```{julia} chn = sample(model, Prior(), 10) From 667c1fd45792a94c501e372fe1be796c177baf37 Mon Sep 17 00:00:00 2001 From: Penelope Yong Date: Sun, 3 May 2026 20:58:15 +0100 Subject: [PATCH 21/29] hmm --- tutorials/hidden-markov-models/index.qmd | 26 +++++++++++------------- 1 file changed, 12 insertions(+), 14 deletions(-) diff --git a/tutorials/hidden-markov-models/index.qmd b/tutorials/hidden-markov-models/index.qmd index f34e25a75..178c25fb9 100755 --- a/tutorials/hidden-markov-models/index.qmd +++ b/tutorials/hidden-markov-models/index.qmd @@ -139,22 +139,20 @@ The code below generates an animation showing the graph of the data above, and t ```{julia} # Extract our m and s parameters from the chain. -m_set = chn[@varname(m), chain=1] -s_set = chn[@varname(s), chain=1] +m_set = chn[@varname(m), chain=1, stack=true] +s_set = chn[@varname(s), chain=1, stack=true] ``` -Note that each element of `s_set` corresponds to one MCMC sample, and is a vector of integers (that means that `s_set` is itself an `Vector` of `Vector`s!) -If you prefer to have a stacked 2D array, you can use `chn[@varname(s), chain=1, stack=true]` instead. -We won't demonstrate that here. +Notice that `m_set` and `s_set` are both 2D arrays, where the first dimension is the iteration dimension, and the second dimension the parameter dimension. +(Without the `stack=true` option, we would get a vector of vectors.) ```{julia} -# Iterate through the MCMC samples. -Ns = 1:length(chn) +# Iterate through the MCMC samples and make an animation. +Niters = size(m_set, 1) -# Make an animation. -animation = @gif for i in Ns - m = m_set[i] - s = s_set[i] +animation = @gif for i in 1:Niters + m = m_set[i, :] + s = s_set[i, :] emissions = m[s] p = plot( @@ -252,10 +250,10 @@ Plotting the estimated states, we can see that the results align well with our e ```{julia} p = plot(xlim=(0, 30), ylim=(-1, 5), size=(500, 250)) -for i in 1:100 - ind = rand(DiscreteUniform(1, 1000)) +idxs = sample(1:FlexiChains.niters(chn_recover), 100; replace=false) +for i in idxs plot!( - chn_recover[@varname(s), iter=ind, chain=1], + chn_recover[@varname(s), iter=i, chain=1], color = :grey, opacity = 0.1, legend = :false ) end From 2b715f397ec7a515317d00950df3ef990991c5c5 Mon Sep 17 00:00:00 2001 From: Penelope Yong Date: Sun, 3 May 2026 21:23:46 +0100 Subject: [PATCH 22/29] hmm --- tutorials/hidden-markov-models/index.qmd | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tutorials/hidden-markov-models/index.qmd b/tutorials/hidden-markov-models/index.qmd index 178c25fb9..eccb6b449 100755 --- a/tutorials/hidden-markov-models/index.qmd +++ b/tutorials/hidden-markov-models/index.qmd @@ -248,9 +248,11 @@ chn_recover = sample(BayesHmmRecover(y, 3, true), NUTS(), 1000) Plotting the estimated states, we can see that the results align well with our expectations: ```{julia} -p = plot(xlim=(0, 30), ylim=(-1, 5), size=(500, 250)) +import FlexiChains -idxs = sample(1:FlexiChains.niters(chn_recover), 100; replace=false) +Niters = FlexiChains.niters(chn_recover) +p = plot(xlim=(0, 30), ylim=(-1, 5), size=(500, 250)) +idxs = sample(1:Niters, 100; replace=false) for i in idxs plot!( chn_recover[@varname(s), iter=i, chain=1], From b1ad50322bcf9221b0cbd15d61437d445957d9b5 Mon Sep 17 00:00:00 2001 From: Penelope Yong Date: Sun, 3 May 2026 22:31:14 +0100 Subject: [PATCH 23/29] pois --- tutorials/bayesian-poisson-regression/index.qmd | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tutorials/bayesian-poisson-regression/index.qmd b/tutorials/bayesian-poisson-regression/index.qmd index 5676aae97..e740e35d5 100755 --- a/tutorials/bayesian-poisson-regression/index.qmd +++ b/tutorials/bayesian-poisson-regression/index.qmd @@ -182,6 +182,8 @@ We use the Gelman, Rubin, and Brooks Diagnostic to check whether our chains have We expect the chains to have converged. This is because we have taken sufficient number of iterations (1500) for the NUTS sampler. However, in case the test fails, then we will have to take a larger number of iterations, resulting in longer computation time. ```{julia} +using MCMCDiagnosticTools: gelmandiag + gelmandiag(chain) ``` @@ -222,7 +224,9 @@ Thus, we remove these warmup samples and view the diagnostics again. We discard the first 200 samples, corresponding to the adaptation phase used by the NUTS sampler (which we explicitly chose not to discard earlier with the `discard_adapt=false` argument). ```{julia} +import FlexiChains N = FlexiChains.niters(chain) + chains_new = chain[iter=201:N] ``` From 2985fd3ef44a0cecd23a5015b4ca6d55b81056c3 Mon Sep 17 00:00:00 2001 From: Penelope Yong Date: Sun, 3 May 2026 23:27:53 +0100 Subject: [PATCH 24/29] predict --- usage/predictive-distributions/index.qmd | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/usage/predictive-distributions/index.qmd b/usage/predictive-distributions/index.qmd index 3c9de1864..b0973fb8e 100755 --- a/usage/predictive-distributions/index.qmd +++ b/usage/predictive-distributions/index.qmd @@ -97,8 +97,8 @@ This controls the generation of new `X` samples, and makes your results reproduc ::: ::: {.callout-note} -`predict` returns a Chains object itself, which will only contain the newly predicted variables. -If you want to also retain the original parameters, you can use `predict(rng, predictive_model, chain; include_all=true)`. +`predict` returns a FlexiChain object itself, which will contain all the original parameters plus the newly predicted variables. +If you want only include the newly predicted variables, you can use `predict(rng, predictive_model, chain; include_all=false)`. ::: We can visualise the predictive distribution by combining all the samples and making a density plot: @@ -106,7 +106,7 @@ We can visualise the predictive distribution by combining all the samples and ma ```{julia} using StatsPlots: density, density!, vline! -predicted_X = vcat([predictive_samples[Symbol("X[$i]")] for i in 1:N]...) +predicted_X = vec(predictive_samples[@varname(X), stack=true]) density(predicted_X, label="Posterior predictive") ``` @@ -139,7 +139,7 @@ We can visualise the prior predictive distribution in the same way as before. Let's compare the two predictive distributions: ```{julia} -prior_predicted_X = vcat([prior_predictive_samples[Symbol("X[$i]")] for i in 1:N]...) +prior_predicted_X = vec(prior_predictive_samples[@varname(X), stack=true]) density(prior_predicted_X, label="Prior predictive") density!(predicted_X, label="Posterior predictive") vline!([true_m], label="True mean", linestyle=:dash, color=:black) From 04896beee1b054969077e9ba37f7f0c99b1855fa Mon Sep 17 00:00:00 2001 From: Penelope Yong Date: Sun, 3 May 2026 23:40:57 +0100 Subject: [PATCH 25/29] fix --- usage/sampling-options/index.qmd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/usage/sampling-options/index.qmd b/usage/sampling-options/index.qmd index 904ddee67..56d058e30 100644 --- a/usage/sampling-options/index.qmd +++ b/usage/sampling-options/index.qmd @@ -216,7 +216,7 @@ If both are provided, `initial_params` will be silently ignored. ```{julia} chn2 = sample(rng, demo_model(), MH(), 5; - initial_state=saved_state, initial_params=InitFromParams((x=0.0, y=0.0)) + initial_state=only(saved_state), initial_params=InitFromParams((x=0.0, y=0.0)) ) chn2[@varname(x)][1], chn2[@varname(y)][1] ``` From 8a504779004a4d270c3b39af8ffea5e6a02a4e37 Mon Sep 17 00:00:00 2001 From: Penelope Yong Date: Mon, 4 May 2026 01:03:24 +0100 Subject: [PATCH 26/29] fix sampler viz --- usage/sampler-visualisation/index.qmd | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/usage/sampler-visualisation/index.qmd b/usage/sampler-visualisation/index.qmd index 79adc54e1..74a31d4f5 100755 --- a/usage/sampler-visualisation/index.qmd +++ b/usage/sampler-visualisation/index.qmd @@ -50,10 +50,9 @@ evaluate(m1, m2) = logjoint(model, (m=m2, s²=exp(m1))) function plot_sampler(chain; label="") # Extract values from chain. - val = get(chain, [:s², :m, :logjoint]) - ss = log.(val.s²) # Convert to unconstrained space - ms = val.m - lps = val.logjoint + ss = log.(chain[@varname(s²)]) # map to unconstrained space + ms = chain[@varname(m)] + lps = chain[:logjoint] # How many surface points to sample. granularity = 100 From c97c7138d4cf330ca25c4ac305bd99c0f4c41ac2 Mon Sep 17 00:00:00 2001 From: Penelope Yong Date: Tue, 5 May 2026 13:17:09 +0100 Subject: [PATCH 27/29] Address review comments --- tutorials/hidden-markov-models/index.qmd | 2 +- tutorials/infinite-mixture-models/index.qmd | 2 +- usage/predictive-distributions/index.qmd | 2 +- usage/sampling-options/index.qmd | 4 ++-- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/tutorials/hidden-markov-models/index.qmd b/tutorials/hidden-markov-models/index.qmd index eccb6b449..5597c04b8 100755 --- a/tutorials/hidden-markov-models/index.qmd +++ b/tutorials/hidden-markov-models/index.qmd @@ -181,7 +181,7 @@ plot(subchain; seriestype=:traceplot, title="Persistence Probability", legend=fa A cursory examination of the traceplot above indicates that all three chains converged to something resembling stationary. -We can use the diagnostic functions provided by [MCMCDiagnosticTools](https://github.com/TuringLang/MCMCDiagnosticTools.jl) to engage in some more formal tests, like the Heidelberg and Welch diagnostic. +We can use the diagnostic functions provided by [MCMCDiagnosticTools](https://github.com/TuringLang/MCMCDiagnosticTools.jl) to engage in some more formal tests, like the [Heidelberg and Welch diagnostic](https://turinglang.org/MCMCDiagnosticTools.jl/stable/#MCMCDiagnosticTools.heideldiag). ## Efficient Inference With The Forward Algorithm diff --git a/tutorials/infinite-mixture-models/index.qmd b/tutorials/infinite-mixture-models/index.qmd index 44f538c28..10b7e215a 100755 --- a/tutorials/infinite-mixture-models/index.qmd +++ b/tutorials/infinite-mixture-models/index.qmd @@ -260,7 +260,7 @@ Finally, we can plot the number of clusters in each sample. ```{julia} # Extract the number of clusters for each sample of the Markov chain. k = map( - t -> length(unique(chain[@varname(z), iter=t])), + t -> length(unique(vec(chain[@varname(z), iter=t]))), 1:iterations, ); diff --git a/usage/predictive-distributions/index.qmd b/usage/predictive-distributions/index.qmd index b0973fb8e..95a423698 100755 --- a/usage/predictive-distributions/index.qmd +++ b/usage/predictive-distributions/index.qmd @@ -98,7 +98,7 @@ This controls the generation of new `X` samples, and makes your results reproduc ::: {.callout-note} `predict` returns a FlexiChain object itself, which will contain all the original parameters plus the newly predicted variables. -If you want only include the newly predicted variables, you can use `predict(rng, predictive_model, chain; include_all=false)`. +If you want to only include the newly predicted variables, you can use `predict(rng, predictive_model, chain; include_all=false)`. ::: We can visualise the predictive distribution by combining all the samples and making a density plot: diff --git a/usage/sampling-options/index.qmd b/usage/sampling-options/index.qmd index 56d058e30..680802af5 100644 --- a/usage/sampling-options/index.qmd +++ b/usage/sampling-options/index.qmd @@ -196,7 +196,7 @@ chn2 = sample(demo_model(), MH(), 5; initial_state=only(saved_state)) ::: {.callout-warning} ## `resume_from` -The `resume_from` argument has been removed in Turing v0.41; please use `initial_state=loadstate(chn)` instead, as described here. +The `resume_from` argument has been removed in Turing v0.41; please extract the states with `loadstate(chn)` and pass a state or states as the `initial_state` keyword argument instead, as described here. In v0.41, `loadstate` is also exported from Turing rather than DynamicPPL. ::: @@ -231,7 +231,7 @@ Finally, note that the first sample in the resumed chain will not be the same as # In general these will not be the same (although it _could_ be if the MH step # was rejected -- that is why we seed the sampling in this section). -chn1[@varname(x)][end], chn1[@varname(y)][end] +chn1[@varname(x)][end], chn2[@varname(x)][1] ``` ::: From dc584ea729b9cc1cd1e3685920fe1ae20b6a84fb Mon Sep 17 00:00:00 2001 From: Penelope Yong Date: Tue, 5 May 2026 13:42:45 +0100 Subject: [PATCH 28/29] Fix stackg --- tutorials/infinite-mixture-models/index.qmd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tutorials/infinite-mixture-models/index.qmd b/tutorials/infinite-mixture-models/index.qmd index 10b7e215a..fe845966e 100755 --- a/tutorials/infinite-mixture-models/index.qmd +++ b/tutorials/infinite-mixture-models/index.qmd @@ -260,7 +260,7 @@ Finally, we can plot the number of clusters in each sample. ```{julia} # Extract the number of clusters for each sample of the Markov chain. k = map( - t -> length(unique(vec(chain[@varname(z), iter=t]))), + t -> length(unique(vec(chain[@varname(z), iter=t, stack=true]))), 1:iterations, ); From a02f80f9ce8ceb6fdb08f278523674c01ed8d3d0 Mon Sep 17 00:00:00 2001 From: Penelope Yong Date: Tue, 5 May 2026 13:45:39 +0100 Subject: [PATCH 29/29] No need vec --- tutorials/infinite-mixture-models/index.qmd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tutorials/infinite-mixture-models/index.qmd b/tutorials/infinite-mixture-models/index.qmd index fe845966e..a5d5d4558 100755 --- a/tutorials/infinite-mixture-models/index.qmd +++ b/tutorials/infinite-mixture-models/index.qmd @@ -260,7 +260,7 @@ Finally, we can plot the number of clusters in each sample. ```{julia} # Extract the number of clusters for each sample of the Markov chain. k = map( - t -> length(unique(vec(chain[@varname(z), iter=t, stack=true]))), + t -> length(unique(chain[@varname(z), iter=t, stack=true])), 1:iterations, );