From 0566fd4c5a820f72b89fadec833fb281281789ce Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Sat, 4 Apr 2026 12:10:18 +0000 Subject: [PATCH 1/2] feat: add computeWithYates and computeWithWilliams to ChiSquareTest Add two new continuity-correction overloads for the chi-square goodness-of-fit test: - ChiSquareTest.computeWithYates: applies Yates's continuity correction (|O - E| - 0.5) before squaring each term; recommended for df=1. - ChiSquareTest.computeWithWilliams: divides the raw chi-square by Williams's q = 1 + (k^2-1)/(6nk), which provides a better approximation for small samples across any number of categories. Both methods include XML-doc with references. Four new tests cover each correction variant. Part of #87. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- src/FSharp.Stats/Testing/ChiSquareTest.fs | 58 +++++++++++++++++++---- tests/FSharp.Stats.Tests/Testing.fs | 37 +++++++++++++++ 2 files changed, 87 insertions(+), 8 deletions(-) diff --git a/src/FSharp.Stats/Testing/ChiSquareTest.fs b/src/FSharp.Stats/Testing/ChiSquareTest.fs index a701711d..324dd023 100644 --- a/src/FSharp.Stats/Testing/ChiSquareTest.fs +++ b/src/FSharp.Stats/Testing/ChiSquareTest.fs @@ -37,20 +37,62 @@ open FSharp.Stats type ChiSquareTest = - /// Computes the Chi-Square test - /// n data points -> degrees of freedom = n - 1 + /// + /// Computes the Chi-Square goodness-of-fit test. + /// n data points -> degrees of freedom = n - 1 + /// static member compute (degreesOfFreedom:int) (expected:seq) (observed:seq) = - //let chechParams = - // if expected |> Seq.exists (fun x -> abs x < 5.) then printfn "Warning: A value less than 5 is present in expected values. Results may not be correct!" - // let sumEx = Seq.sum expected - // let sumOb = Seq.sum observed - // if Math.Round(sumEx,1) <> Math.Round(sumOb,1) then printfn "Warning: The sum of observed values does not match the sum of expected values. SumEx: %.3f SumOb: %.3f" sumEx sumOb let chi2 = Seq.zip observed expected |> Seq.fold (fun acc (obs,exp) -> let d = obs - exp acc + (d * d) / exp) 0.0 - + TestStatistics.createChiSquare chi2 (float degreesOfFreedom) + + /// + /// Computes the Chi-Square goodness-of-fit test with Yates's continuity correction. + /// + /// + /// Yates's correction subtracts 0.5 from each |observed - expected| term before squaring. + /// It is recommended when the degrees of freedom equal 1 (two categories) and expected + /// cell counts are small. For df > 1 or large samples the uncorrected compute is + /// preferable. + /// + /// Reference: Yates, F. (1934). Contingency tables involving small numbers and the chi-squared + /// test. Supplement to the Journal of the Royal Statistical Society, 1(2), 217-235. + /// + static member computeWithYates (degreesOfFreedom:int) (expected:seq) (observed:seq) = + let chi2 = + Seq.zip observed expected + |> Seq.fold (fun acc (obs,exp) -> + let diff = abs (obs - exp) - 0.5 + acc + (diff * diff) / exp) 0.0 + TestStatistics.createChiSquare chi2 (float degreesOfFreedom) + + /// + /// Computes the Chi-Square goodness-of-fit test with Williams's correction. + /// + /// + /// Williams's correction divides the chi-square statistic by + /// q = 1 + (k^2 - 1) / (6 * n * k), where k is the number of categories and + /// n is the total observed count. This provides a better approximation to the + /// chi-squared distribution when sample sizes are small. + /// + /// Reference: Williams, D. A. (1976). Improved likelihood ratio tests for complete + /// contingency tables. Biometrika, 63(1), 33-37. + /// + static member computeWithWilliams (degreesOfFreedom:int) (expected:seq) (observed:seq) = + let observedArr = Seq.toArray observed + let expectedArr = Seq.toArray expected + let k = float observedArr.Length + let n = Array.sum observedArr + let q = 1.0 + (k * k - 1.0) / (6.0 * n * k) + let chi2Raw = + Array.zip observedArr expectedArr + |> Array.fold (fun acc (obs,exp) -> + let d = obs - exp + acc + (d * d) / exp) 0.0 + let chi2 = chi2Raw / q TestStatistics.createChiSquare chi2 (float degreesOfFreedom) static member pearsonChiSquared (table:ContingencyTable<_,_>) = diff --git a/tests/FSharp.Stats.Tests/Testing.fs b/tests/FSharp.Stats.Tests/Testing.fs index 1adfd493..083c3f6b 100644 --- a/tests/FSharp.Stats.Tests/Testing.fs +++ b/tests/FSharp.Stats.Tests/Testing.fs @@ -384,12 +384,49 @@ let chiSquaredTests = let df = expected.Length - 1 ChiSquareTest.compute df expected observed + // computeWithYates: + // R: obs <- c(45, 55); chisq.test(obs, p=c(0.5,0.5), correct=TRUE) + // Chi-squared = 0.81, p-value = 0.3681 + let testCaseYates1 = + let expected = [50.0; 50.0] + let observed = [45.0; 55.0] + ChiSquareTest.computeWithYates 1 expected observed + + // R: obs <- c(10, 20); chisq.test(obs, p=c(0.5,0.5), correct=TRUE) + // Chi-squared = 2.7, p-value = 0.1003 + let testCaseYates2 = + let expected = [15.0; 15.0] + let observed = [10.0; 20.0] + ChiSquareTest.computeWithYates 1 expected observed + + // computeWithWilliams: + // Williams q = 1 + (k^2-1)/(6*n*k); k=4, n=556 => q ≈ 1.001124 + // chi2_williams = 0.4700 / 1.001124 ≈ 0.4695 + let testCaseWilliams1 = + let expected = [312.75;104.25;104.25;34.75] + let observed = [315.;101.;108.;32.] + let df = expected.Length - 1 + ChiSquareTest.computeWithWilliams df expected observed + + // k=3, n=45 => q ≈ 1.009877; raw chi2 ≈ 3.3333 => williams ≈ 3.3007 + let testCaseWilliams2 = + let expected = [15.0; 15.0; 15.0] + let observed = [10.0; 20.0; 15.0] + let df = expected.Length - 1 + ChiSquareTest.computeWithWilliams df expected observed + testList "Testing.ChiSquaredTest" [ testCase "compute" <| fun () -> Expect.isTrue (0.9254 = Math.Round(testCase1.PValueRight,4)) "pValue should be equal." Expect.isTrue (0.4700 = Math.Round(testCase1.Statistic,4)) "statistic should be equal." Expect.isTrue (0.000638 = Math.Round(testCase2.PValueRight,6)) "pValue should be equal." Expect.isTrue (19.461 = Math.Round(testCase2.Statistic,3)) "statistic should be equal." + testCase "computeWithYates" <| fun () -> + Expect.floatClose Accuracy.medium testCaseYates1.Statistic 0.81 "Yates statistic should be 0.81" + Expect.floatClose Accuracy.medium testCaseYates2.Statistic 2.7 "Yates statistic should be 2.7" + testCase "computeWithWilliams" <| fun () -> + Expect.floatClose Accuracy.medium testCaseWilliams1.Statistic 0.469496 "Williams statistic should be ~0.4695" + Expect.floatClose Accuracy.medium testCaseWilliams2.Statistic 3.300733 "Williams statistic should be ~3.3007" ] From 144c63ea4b64fe3cc9846f43d1045ab7cda48869 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Sat, 4 Apr 2026 12:10:20 +0000 Subject: [PATCH 2/2] ci: trigger checks