-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathDataFrameExtensionsNullsNaNs.cs
More file actions
115 lines (98 loc) · 3.02 KB
/
DataFrameExtensionsNullsNaNs.cs
File metadata and controls
115 lines (98 loc) · 3.02 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
using System.Collections.Generic;
using System.Linq;
using System.Numerics;
using Microsoft.Data.Analysis;
namespace Dimension.DataFrame.Extensions;
/// <summary>
/// Extension methods to clean up nulls and NaN's to make Microsoft's DataFrame a little more user-friendly.
/// </summary>
public static class DataFrameExtensionsNullsNaNs
{
public static PrimitiveDataFrameColumn<T> DropNulls<T>(this PrimitiveDataFrameColumn<T> column)
where T : unmanaged, INumber<T>
{
var validValues = new List<T?>();
foreach (var value in column)
{
var shouldAddValue = value != null && !(value is float f && float.IsNaN(f)) && !(value is double d && double.IsNaN(d));
if (shouldAddValue)
{
validValues.Add(value);
}
}
return new PrimitiveDataFrameColumn<T>(column.Name, validValues);
}
public static Microsoft.Data.Analysis.DataFrame DropNulls(this Microsoft.Data.Analysis.DataFrame df)
{
var rowsToKeep = Enumerable.Range(0, (int) df.Rows.Count)
.Where(i => !df.Rows[i].HasNulls())
.ToList();
return df.Filter(rowsToKeep);
}
public static Microsoft.Data.Analysis.DataFrame DropNAs(this Microsoft.Data.Analysis.DataFrame df)
{
var rowsToKeep = new List<int>();
for (var i = 0; i < df.Rows.Count; i++)
{
var row = df.Rows[i];
var hasNull = false;
foreach (var cell in row)
{
if (cell == null || (cell is float && float.IsNaN((float) cell)) || (cell is double && double.IsNaN((double) cell)))
{
hasNull = true;
break;
}
}
if (!hasNull)
{
rowsToKeep.Add(i);
}
}
return df.Filter(rowsToKeep);
}
public static Microsoft.Data.Analysis.DataFrame DropNullsOrNAs(this Microsoft.Data.Analysis.DataFrame df)
{
var rowsToKeep = Enumerable.Range(0, (int) df.Rows.Count)
.Where(i => !df.Rows[i].HasNullsOrNAs())
.ToList();
return df.Filter(rowsToKeep);
}
private static bool HasNullsOrNAs(this DataFrameRow row)
{
foreach (var cell in row)
{
if (cell == null || IsNaN(cell))
{
return true;
}
}
return false;
}
public static bool HasNulls(this DataFrameRow row)
{
foreach (var cell in row)
{
if (cell == null)
{
return true;
}
}
return false;
}
public static bool HasNulls(this DataFrameColumn column)
{
foreach (var cell in column)
{
if (cell == null)
{
return true;
}
}
return false;
}
private static bool IsNaN(object cell)
{
return (cell is float f && float.IsNaN(f)) || (cell is double d && double.IsNaN(d));
}
}