-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathFieldNameScorer.cs
More file actions
100 lines (81 loc) · 2.72 KB
/
FieldNameScorer.cs
File metadata and controls
100 lines (81 loc) · 2.72 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
namespace TxFormFieldMapper;
internal static class FieldNameScorer
{
public static double Score(FieldNameCandidate left, FieldNameCandidate right)
{
if (left.Normalized.Length == 0 || right.Normalized.Length == 0)
{
return 0;
}
if (string.Equals(left.Original, right.Original, StringComparison.Ordinal))
{
return 1;
}
if (string.Equals(left.Normalized, right.Normalized, StringComparison.Ordinal))
{
return 0.99;
}
return new[]
{
TokenSetScore(left.Tokens, right.Tokens),
TokenContainmentScore(left.Tokens, right.Tokens),
LevenshteinScore(left.Normalized, right.Normalized)
}.Max();
}
private static double TokenSetScore(IReadOnlySet<string> left, IReadOnlySet<string> right)
{
if (left.Count == 0 || right.Count == 0)
{
return 0;
}
var intersection = left.Count(token => right.Contains(token));
var union = left.Union(right, StringComparer.OrdinalIgnoreCase).Count();
return (double)intersection / union;
}
private static double TokenContainmentScore(IReadOnlySet<string> left, IReadOnlySet<string> right)
{
if (left.Count == 0 || right.Count == 0)
{
return 0;
}
var intersection = left.Count(token => right.Contains(token));
var smallerSetSize = Math.Min(left.Count, right.Count);
var largerSetSize = Math.Max(left.Count, right.Count);
if (intersection != smallerSetSize)
{
return 0;
}
return smallerSetSize == largerSetSize ? 0.99 : 0.86;
}
private static double LevenshteinScore(string left, string right)
{
var maxLength = Math.Max(left.Length, right.Length);
if (maxLength == 0)
{
return 1;
}
return 1 - ((double)LevenshteinDistance(left, right) / maxLength);
}
private static int LevenshteinDistance(string left, string right)
{
var previous = new int[right.Length + 1];
var current = new int[right.Length + 1];
for (var j = 0; j <= right.Length; j++)
{
previous[j] = j;
}
for (var i = 1; i <= left.Length; i++)
{
current[0] = i;
for (var j = 1; j <= right.Length; j++)
{
var cost = left[i - 1] == right[j - 1] ? 0 : 1;
current[j] = Math.Min(
Math.Min(current[j - 1] + 1, previous[j] + 1),
previous[j - 1] + cost);
}
(previous, current) = (current, previous);
}
return previous[right.Length];
}
}