diff --git a/src/FSharp.Stats/Testing/ChiSquareTest.fs b/src/FSharp.Stats/Testing/ChiSquareTest.fs
index a701711d..324dd023 100644
--- a/src/FSharp.Stats/Testing/ChiSquareTest.fs
+++ b/src/FSharp.Stats/Testing/ChiSquareTest.fs
@@ -37,20 +37,62 @@ open FSharp.Stats
type ChiSquareTest =
- /// Computes the Chi-Square test
- /// n data points -> degrees of freedom = n - 1
+ ///
+ /// Computes the Chi-Square goodness-of-fit test.
+ /// n data points -> degrees of freedom = n - 1
+ ///
static member compute (degreesOfFreedom:int) (expected:seq) (observed:seq) =
- //let chechParams =
- // if expected |> Seq.exists (fun x -> abs x < 5.) then printfn "Warning: A value less than 5 is present in expected values. Results may not be correct!"
- // let sumEx = Seq.sum expected
- // let sumOb = Seq.sum observed
- // if Math.Round(sumEx,1) <> Math.Round(sumOb,1) then printfn "Warning: The sum of observed values does not match the sum of expected values. SumEx: %.3f SumOb: %.3f" sumEx sumOb
let chi2 =
Seq.zip observed expected
|> Seq.fold (fun acc (obs,exp) ->
let d = obs - exp
acc + (d * d) / exp) 0.0
-
+ TestStatistics.createChiSquare chi2 (float degreesOfFreedom)
+
+ ///
+ /// Computes the Chi-Square goodness-of-fit test with Yates's continuity correction.
+ ///
+ ///
+ /// Yates's correction subtracts 0.5 from each |observed - expected| term before squaring.
+ /// It is recommended when the degrees of freedom equal 1 (two categories) and expected
+ /// cell counts are small. For df > 1 or large samples the uncorrected compute is
+ /// preferable.
+ ///
+ /// Reference: Yates, F. (1934). Contingency tables involving small numbers and the chi-squared
+ /// test. Supplement to the Journal of the Royal Statistical Society, 1(2), 217-235.
+ ///
+ static member computeWithYates (degreesOfFreedom:int) (expected:seq) (observed:seq) =
+ let chi2 =
+ Seq.zip observed expected
+ |> Seq.fold (fun acc (obs,exp) ->
+ let diff = abs (obs - exp) - 0.5
+ acc + (diff * diff) / exp) 0.0
+ TestStatistics.createChiSquare chi2 (float degreesOfFreedom)
+
+ ///
+ /// Computes the Chi-Square goodness-of-fit test with Williams's correction.
+ ///
+ ///
+ /// Williams's correction divides the chi-square statistic by
+ /// q = 1 + (k^2 - 1) / (6 * n * k), where k is the number of categories and
+ /// n is the total observed count. This provides a better approximation to the
+ /// chi-squared distribution when sample sizes are small.
+ ///
+ /// Reference: Williams, D. A. (1976). Improved likelihood ratio tests for complete
+ /// contingency tables. Biometrika, 63(1), 33-37.
+ ///
+ static member computeWithWilliams (degreesOfFreedom:int) (expected:seq) (observed:seq) =
+ let observedArr = Seq.toArray observed
+ let expectedArr = Seq.toArray expected
+ let k = float observedArr.Length
+ let n = Array.sum observedArr
+ let q = 1.0 + (k * k - 1.0) / (6.0 * n * k)
+ let chi2Raw =
+ Array.zip observedArr expectedArr
+ |> Array.fold (fun acc (obs,exp) ->
+ let d = obs - exp
+ acc + (d * d) / exp) 0.0
+ let chi2 = chi2Raw / q
TestStatistics.createChiSquare chi2 (float degreesOfFreedom)
static member pearsonChiSquared (table:ContingencyTable<_,_>) =
diff --git a/tests/FSharp.Stats.Tests/Testing.fs b/tests/FSharp.Stats.Tests/Testing.fs
index 1adfd493..083c3f6b 100644
--- a/tests/FSharp.Stats.Tests/Testing.fs
+++ b/tests/FSharp.Stats.Tests/Testing.fs
@@ -384,12 +384,49 @@ let chiSquaredTests =
let df = expected.Length - 1
ChiSquareTest.compute df expected observed
+ // computeWithYates:
+ // R: obs <- c(45, 55); chisq.test(obs, p=c(0.5,0.5), correct=TRUE)
+ // Chi-squared = 0.81, p-value = 0.3681
+ let testCaseYates1 =
+ let expected = [50.0; 50.0]
+ let observed = [45.0; 55.0]
+ ChiSquareTest.computeWithYates 1 expected observed
+
+ // R: obs <- c(10, 20); chisq.test(obs, p=c(0.5,0.5), correct=TRUE)
+ // Chi-squared = 2.7, p-value = 0.1003
+ let testCaseYates2 =
+ let expected = [15.0; 15.0]
+ let observed = [10.0; 20.0]
+ ChiSquareTest.computeWithYates 1 expected observed
+
+ // computeWithWilliams:
+ // Williams q = 1 + (k^2-1)/(6*n*k); k=4, n=556 => q ≈ 1.001124
+ // chi2_williams = 0.4700 / 1.001124 ≈ 0.4695
+ let testCaseWilliams1 =
+ let expected = [312.75;104.25;104.25;34.75]
+ let observed = [315.;101.;108.;32.]
+ let df = expected.Length - 1
+ ChiSquareTest.computeWithWilliams df expected observed
+
+ // k=3, n=45 => q ≈ 1.009877; raw chi2 ≈ 3.3333 => williams ≈ 3.3007
+ let testCaseWilliams2 =
+ let expected = [15.0; 15.0; 15.0]
+ let observed = [10.0; 20.0; 15.0]
+ let df = expected.Length - 1
+ ChiSquareTest.computeWithWilliams df expected observed
+
testList "Testing.ChiSquaredTest" [
testCase "compute" <| fun () ->
Expect.isTrue (0.9254 = Math.Round(testCase1.PValueRight,4)) "pValue should be equal."
Expect.isTrue (0.4700 = Math.Round(testCase1.Statistic,4)) "statistic should be equal."
Expect.isTrue (0.000638 = Math.Round(testCase2.PValueRight,6)) "pValue should be equal."
Expect.isTrue (19.461 = Math.Round(testCase2.Statistic,3)) "statistic should be equal."
+ testCase "computeWithYates" <| fun () ->
+ Expect.floatClose Accuracy.medium testCaseYates1.Statistic 0.81 "Yates statistic should be 0.81"
+ Expect.floatClose Accuracy.medium testCaseYates2.Statistic 2.7 "Yates statistic should be 2.7"
+ testCase "computeWithWilliams" <| fun () ->
+ Expect.floatClose Accuracy.medium testCaseWilliams1.Statistic 0.469496 "Williams statistic should be ~0.4695"
+ Expect.floatClose Accuracy.medium testCaseWilliams2.Statistic 3.300733 "Williams statistic should be ~3.3007"
]