Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
58 changes: 50 additions & 8 deletions src/FSharp.Stats/Testing/ChiSquareTest.fs
Original file line number Diff line number Diff line change
Expand Up @@ -37,20 +37,62 @@ open FSharp.Stats
type ChiSquareTest =


/// Computes the Chi-Square test
/// n data points -> degrees of freedom = n - 1
/// <summary>
/// Computes the Chi-Square goodness-of-fit test.
/// n data points -> degrees of freedom = n - 1
/// </summary>
static member compute (degreesOfFreedom:int) (expected:seq<float>) (observed:seq<float>) =
//let chechParams =
// if expected |> Seq.exists (fun x -> abs x < 5.) then printfn "Warning: A value less than 5 is present in expected values. Results may not be correct!"
// let sumEx = Seq.sum expected
// let sumOb = Seq.sum observed
// if Math.Round(sumEx,1) <> Math.Round(sumOb,1) then printfn "Warning: The sum of observed values does not match the sum of expected values. SumEx: %.3f SumOb: %.3f" sumEx sumOb
let chi2 =
Seq.zip observed expected
|> Seq.fold (fun acc (obs,exp) ->
let d = obs - exp
acc + (d * d) / exp) 0.0

TestStatistics.createChiSquare chi2 (float degreesOfFreedom)

/// <summary>
/// Computes the Chi-Square goodness-of-fit test with Yates's continuity correction.
/// </summary>
/// <remarks>
/// Yates's correction subtracts 0.5 from each |observed - expected| term before squaring.
/// It is recommended when the degrees of freedom equal 1 (two categories) and expected
/// cell counts are small. For df > 1 or large samples the uncorrected <c>compute</c> is
/// preferable.
///
/// Reference: Yates, F. (1934). Contingency tables involving small numbers and the chi-squared
/// test. Supplement to the Journal of the Royal Statistical Society, 1(2), 217-235.
/// </remarks>
static member computeWithYates (degreesOfFreedom:int) (expected:seq<float>) (observed:seq<float>) =
let chi2 =
Seq.zip observed expected
|> Seq.fold (fun acc (obs,exp) ->
let diff = abs (obs - exp) - 0.5
acc + (diff * diff) / exp) 0.0
TestStatistics.createChiSquare chi2 (float degreesOfFreedom)

/// <summary>
/// Computes the Chi-Square goodness-of-fit test with Williams's correction.
/// </summary>
/// <remarks>
/// Williams's correction divides the chi-square statistic by
/// q = 1 + (k^2 - 1) / (6 * n * k), where k is the number of categories and
/// n is the total observed count. This provides a better approximation to the
/// chi-squared distribution when sample sizes are small.
///
/// Reference: Williams, D. A. (1976). Improved likelihood ratio tests for complete
/// contingency tables. Biometrika, 63(1), 33-37.
/// </remarks>
static member computeWithWilliams (degreesOfFreedom:int) (expected:seq<float>) (observed:seq<float>) =
let observedArr = Seq.toArray observed
let expectedArr = Seq.toArray expected
let k = float observedArr.Length
let n = Array.sum observedArr
let q = 1.0 + (k * k - 1.0) / (6.0 * n * k)
let chi2Raw =
Array.zip observedArr expectedArr
|> Array.fold (fun acc (obs,exp) ->
let d = obs - exp
acc + (d * d) / exp) 0.0
let chi2 = chi2Raw / q
TestStatistics.createChiSquare chi2 (float degreesOfFreedom)

static member pearsonChiSquared (table:ContingencyTable<_,_>) =
Expand Down
37 changes: 37 additions & 0 deletions tests/FSharp.Stats.Tests/Testing.fs
Original file line number Diff line number Diff line change
Expand Up @@ -384,12 +384,49 @@ let chiSquaredTests =
let df = expected.Length - 1
ChiSquareTest.compute df expected observed

// computeWithYates:
// R: obs <- c(45, 55); chisq.test(obs, p=c(0.5,0.5), correct=TRUE)
// Chi-squared = 0.81, p-value = 0.3681
let testCaseYates1 =
let expected = [50.0; 50.0]
let observed = [45.0; 55.0]
ChiSquareTest.computeWithYates 1 expected observed

// R: obs <- c(10, 20); chisq.test(obs, p=c(0.5,0.5), correct=TRUE)
// Chi-squared = 2.7, p-value = 0.1003
let testCaseYates2 =
let expected = [15.0; 15.0]
let observed = [10.0; 20.0]
ChiSquareTest.computeWithYates 1 expected observed

// computeWithWilliams:
// Williams q = 1 + (k^2-1)/(6*n*k); k=4, n=556 => q β‰ˆ 1.001124
// chi2_williams = 0.4700 / 1.001124 β‰ˆ 0.4695
let testCaseWilliams1 =
let expected = [312.75;104.25;104.25;34.75]
let observed = [315.;101.;108.;32.]
let df = expected.Length - 1
ChiSquareTest.computeWithWilliams df expected observed

// k=3, n=45 => q β‰ˆ 1.009877; raw chi2 β‰ˆ 3.3333 => williams β‰ˆ 3.3007
let testCaseWilliams2 =
let expected = [15.0; 15.0; 15.0]
let observed = [10.0; 20.0; 15.0]
let df = expected.Length - 1
ChiSquareTest.computeWithWilliams df expected observed

testList "Testing.ChiSquaredTest" [
testCase "compute" <| fun () ->
Expect.isTrue (0.9254 = Math.Round(testCase1.PValueRight,4)) "pValue should be equal."
Expect.isTrue (0.4700 = Math.Round(testCase1.Statistic,4)) "statistic should be equal."
Expect.isTrue (0.000638 = Math.Round(testCase2.PValueRight,6)) "pValue should be equal."
Expect.isTrue (19.461 = Math.Round(testCase2.Statistic,3)) "statistic should be equal."
testCase "computeWithYates" <| fun () ->
Expect.floatClose Accuracy.medium testCaseYates1.Statistic 0.81 "Yates statistic should be 0.81"
Expect.floatClose Accuracy.medium testCaseYates2.Statistic 2.7 "Yates statistic should be 2.7"
testCase "computeWithWilliams" <| fun () ->
Expect.floatClose Accuracy.medium testCaseWilliams1.Statistic 0.469496 "Williams statistic should be ~0.4695"
Expect.floatClose Accuracy.medium testCaseWilliams2.Statistic 3.300733 "Williams statistic should be ~3.3007"

]

Expand Down
Loading