forked from trpc-group/trpc-agent-python
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathval.evalset.json
More file actions
70 lines (70 loc) · 1.99 KB
/
Copy pathval.evalset.json
File metadata and controls
70 lines (70 loc) · 1.99 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
{
"eval_set_id": "math_word_problems_val",
"name": "小学算术应用题 - 验证集",
"description": "3 道小学水平算术应用题,覆盖乘法、单位换算和百分比;final_response 中带「答案:xxx」的标准答句供 contains 匹配,同时作为 LLM 裁判的参考答案。",
"eval_cases": [
{
"eval_id": "wp_seats_multiply",
"conversation": [
{
"invocation_id": "v1",
"user_content": {
"parts": [{"text": "教室里有 5 排座位,每排 8 个,一共多少个座位?"}],
"role": "user"
},
"final_response": {
"parts": [{"text": "答案:40 个"}],
"role": "model"
}
}
],
"session_input": {
"app_name": "math_word_problem_optimizer",
"user_id": "validator",
"state": {}
}
},
{
"eval_id": "wp_water_weight",
"conversation": [
{
"invocation_id": "v2",
"user_content": {
"parts": [{"text": "已知 1 升水重 1 千克,3.5 升水重多少千克?"}],
"role": "user"
},
"final_response": {
"parts": [{"text": "答案:3.5 千克"}],
"role": "model"
}
}
],
"session_input": {
"app_name": "math_word_problem_optimizer",
"user_id": "validator",
"state": {}
}
},
{
"eval_id": "wp_class_girls_percent",
"conversation": [
{
"invocation_id": "v3",
"user_content": {
"parts": [{"text": "班里一共有 30 人,其中 60% 是女生,请问有多少名女生?"}],
"role": "user"
},
"final_response": {
"parts": [{"text": "答案:18 人"}],
"role": "model"
}
}
],
"session_input": {
"app_name": "math_word_problem_optimizer",
"user_id": "validator",
"state": {}
}
}
]
}