-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathindex.html
More file actions
318 lines (296 loc) · 14.6 KB
/
index.html
File metadata and controls
318 lines (296 loc) · 14.6 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
<!doctype html>
<html lang="en">
<head>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width, initial-scale=1">
<title>Agent-ValueBench</title>
<meta
name="description"
content="Agent-ValueBench is a comprehensive benchmark for evaluating the underlying values of autonomous agents across executable environments and value-conflict tasks."
>
<meta property="og:title" content="Agent-ValueBench">
<meta
property="og:description"
content="394 executable environments, 4,335 value-conflict tasks, 28 value systems, and 332 dimensions for evaluating agent values."
>
<meta property="og:image" content="assets/brand/agent-valuebench-wordmark.svg">
<link rel="icon" type="image/png" href="favicon.png">
<link rel="apple-touch-icon" href="assets/brand/favicon.png">
<link rel="stylesheet" href="styles.css">
</head>
<body id="top">
<header class="site-header">
<a class="brand-mark" href="#top" aria-label="Agent-ValueBench home">
<img src="assets/brand/favicon.png" alt="" aria-hidden="true">
<span>Agent-ValueBench</span>
</a>
<nav class="site-nav" aria-label="Primary navigation">
<a href="#overview">Overview</a>
<a href="#motivation">Motivation</a>
<a href="#construction">Construction</a>
<a href="#questions">Research Questions</a>
<a href="#results">Results</a>
</nav>
</header>
<main id="top">
<section class="hero" id="overview" aria-labelledby="hero-title">
<div class="hero-copy">
<p class="eyebrow">A benchmark for evaluating agent values</p>
<h1 id="hero-title">What values do agents exhibit?</h1>
<p class="hero-lede">
Agent-ValueBench is the first comprehensive benchmark dedicated to evaluating the underlying values of autonomous agents. It features 394 executable environments across 16 domains, offering 4,335 value-conflict tasks that span 28 value systems and 332 dimensions.
</p>
<div class="hero-actions" aria-label="Page shortcuts">
<a class="button primary" href="#results">View Results</a>
<a class="button secondary" href="#construction">See Benchmark Design</a>
</div>
</div>
<div class="hero-emblem" aria-label="Agent-ValueBench logo">
<img src="assets/brand/agent-valuebench-wordmark.svg" alt="Agent-ValueBench logo">
<div class="hero-resource-links" aria-label="External resources">
<a class="resource-link" href="https://arxiv.org/abs/2605.10365" target="_blank" rel="noopener noreferrer">
<img src="assets/links/arxiv.svg" alt="" aria-hidden="true">
<span>Paper</span>
</a>
<a class="resource-link" href="https://github.com/ValueByte-AI/Agent-ValueBench" target="_blank" rel="noopener noreferrer">
<img src="assets/links/github.svg" alt="" aria-hidden="true">
<span>GitHub</span>
</a>
<a class="resource-link" href="https://huggingface.co/datasets/Value4AI/Agent-ValueBench" target="_blank" rel="noopener noreferrer">
<img src="assets/links/huggingface.svg" alt="" aria-hidden="true">
<span>HuggingFace</span>
</a>
</div>
</div>
</section>
<section class="stats-section" aria-label="Benchmark scale">
<div class="stat-card">
<span class="stat-icon" aria-hidden="true">✦</span>
<span class="stat-number">394</span>
<span class="stat-label">Executable environments</span>
</div>
<div class="stat-card">
<span class="stat-icon" aria-hidden="true">⚔</span>
<span class="stat-number">4,335</span>
<span class="stat-label">Value-conflict tasks</span>
</div>
<div class="stat-card">
<span class="stat-icon" aria-hidden="true">◈</span>
<span class="stat-number">28</span>
<span class="stat-label">Value systems</span>
</div>
<div class="stat-card">
<span class="stat-icon" aria-hidden="true">☉</span>
<span class="stat-number">332</span>
<span class="stat-label">System-scoped dimensions</span>
</div>
</section>
<section class="motivation-section" id="motivation" aria-labelledby="motivation-title">
<div class="motivation-heading">
<p class="eyebrow">Motivation</p>
<h2 id="motivation-title">Why agent values need their own benchmark</h2>
<div class="motivation-points" aria-label="Motivation points">
<p><span>1</span> Agent Values Are Not Identical to LLM Values.</p>
<p><span>2</span> Agent Value Evaluation Is Absent and Non-Trivial.</p>
</div>
</div>
<div class="motivation-grid">
<figure class="motivation-figure">
<img src="assets/motivation/llm-agent-modality-comparison.png" alt="Comparison of LLM and agent value priorities with a detailed case study.">
<figcaption>
Comparison of LLM and agent modalities sharing GPT-5.4. (Upper) Contrasting value priorities. (Lower) A detailed case study.
</figcaption>
</figure>
<figure class="motivation-figure">
<img src="assets/motivation/value-tide-metaphor.png" alt="Illustration of the Value Tide metaphor.">
<figcaption>Illustration of the Value Tide metaphor.</figcaption>
</figure>
</div>
</section>
<section class="scroll-section" id="construction" aria-labelledby="construction-title">
<div class="section-heading">
<p class="eyebrow">Benchmark construction</p>
<h2 id="construction-title">Automated Synthesis with Expert-in-the-Loop Curation</h2>
<p>
Agent-ValueBench’s benchmark is built through an automated pipeline that jointly synthesizes executable environments, value-conflict tasks, and trajectory-level rubrics, with each stage capped by per-instance expert-in-the-loop refinement.
</p>
</div>
<div class="process-grid">
<article class="process-card">
<span class="process-index">I</span>
<h3>Environment Construction</h3>
<p>
We construct realistic, cross-domain, and executable agent environments through automated discovery, synthesis, evolution, and expert-in-the-loop curation.
</p>
</article>
<article class="process-card">
<span class="process-index">II</span>
<h3>Task construction</h3>
<p>
We generate implicit value-conflict tasks grounded in psychological value systems, each paired with pole-aligned golden trajectories and behavioral checkpoints.
</p>
</article>
<article class="process-card">
<span class="process-index">III</span>
<h3>Rubric-based Evaluation</h3>
<p>
We evaluate agents at the trajectory level using behaviorally anchored, task-specific rubrics synthesized from a psychology-grounded meta-rubric and applied by an LLM-as-Judge.
</p>
</article>
</div>
<figure class="overview-figure">
<img src="assets/overview/agent-valuebench-overview.png" alt="Overview diagram of the Agent-ValueBench construction and evaluation pipeline.">
<figcaption>Overview of Agent-ValueBench.</figcaption>
</figure>
</section>
<section class="rq-section" id="questions" aria-labelledby="rq-title">
<div class="rq-panel">
<div class="rq-heading">
<p class="eyebrow">Research questions</p>
<h2 id="rq-title">What RQs does Agent-ValueBench answer?</h2>
</div>
<p class="rq-intro">
We conduct a large-scale empirical study to answer the following research questions:
</p>
<div class="rq-grid" aria-label="Research questions">
<article class="rq-item">
<span>RQ1</span>
<p>How do state-of-the-art agents differ in their value profiles?</p>
</article>
<article class="rq-item">
<span>RQ2</span>
<p>To what extent are agent value profiles invariant across harnesses?</p>
</article>
<article class="rq-item">
<span>RQ3</span>
<p>How amenable are agent values to deliberate steering?</p>
</article>
</div>
</div>
</section>
<section class="results-section" id="results" aria-labelledby="results-title">
<div class="section-heading compact">
<p class="eyebrow">Empirical findings</p>
<h2 id="results-title">RQ1: Agent Values Exhibit a Value Tide 🌊</h2>
<p class="results-takeaway">
<strong>Takeaway ❶</strong> Agent values exhibit a Value Tide 🌊: across models, adherence levels and priority currents converge into a structured shared profile, while localized counter-currents reveal interpretable model-specific drift beneath this macroscopic homogeneity.
</p>
</div>
<figure class="result-figure model-map-figure">
<img src="assets/results/model-value-map.png" alt="Heatmaps showing value adherence and value priority for 14 models on MFT08, HEXACO, and PVQ40.">
<figcaption>
Value adherence (Upper) and value priority (Lower) of 14 models on MFT08, HEXACO, and PVQ40.
</figcaption>
</figure>
<div class="section-heading compact finding-heading">
<h2>RQ2: The Tide Bends Under Harness Pull 🌕<br>& RQ3: The Tide Bends to Deliberate Steering 🧭</h2>
<p class="results-takeaway takeaway-rq2">
<strong>Takeaway ❷</strong> Under harness pull 🌕, the value tide bends non-additively in model-specific ways, signaling that the locus of agent alignment is shifting from model alignment toward harness alignment.
</p>
<p class="results-takeaway takeaway-rq3">
<strong>Takeaway ❸</strong> The skill helm exerts a deeper and more reliable pull on the value tide than the prompt helm, signaling that the lever of agent steering is shifting from prompt steering toward skill steering.
</p>
</div>
<figure class="result-figure model-map-figure">
<img src="assets/results/harness-prompt-skill-study.png" alt="Line charts comparing value adherence and value priority under ReAct, Claude Code, Codex, and OpenClaw harnesses.">
<figcaption>
Comparison of three representative models across four harnesses under unsteered, promptsteered, and skill-steered setting.
</figcaption>
</figure>
</section>
<section class="citation-section" id="citation" aria-labelledby="citation-title">
<div class="citation-card">
<div class="citation-topline">
<div class="citation-heading">
<p class="eyebrow">Citation</p>
<h2 id="citation-title">Cite Agent-ValueBench</h2>
</div>
</div>
<div class="citation-action-row">
<p class="citation-note">
If Agent-ValueBench is useful for your research, please consider citing our paper. We sincerely appreciate your support.
</p>
<button class="copy-citation" type="button" data-copy-target="agent-valuebench-bibtex">
Copy BibTeX
</button>
</div>
<pre><code id="agent-valuebench-bibtex">@misc{dong2026agentvaluebenchcomprehensivebenchmarkevaluating,
title={Agent-ValueBench: A Comprehensive Benchmark for Evaluating Agent Values},
author={Haonan Dong and Qiguan Feng and Kehan Jiang and Haoran Ye and Xin Zhang and Guojie Song},
year={2026},
eprint={2605.10365},
archivePrefix={arXiv},
primaryClass={cs.AI},
url={https://arxiv.org/abs/2605.10365},
}</code></pre>
</div>
</section>
</main>
<footer class="site-footer">
<div class="footer-rule" aria-hidden="true"></div>
<p>Agent-ValueBench</p>
<p class="footer-note">A benchmark for evaluating agent values</p>
</footer>
<script>
(() => {
const header = document.querySelector(".site-header");
const topGap = 14;
function targetTop(target) {
const headerHeight = header ? header.getBoundingClientRect().height : 0;
const pageY = target.getBoundingClientRect().top + window.scrollY;
return Math.max(0, pageY - headerHeight - topGap);
}
async function copyText(text) {
if (navigator.clipboard && window.isSecureContext) {
await navigator.clipboard.writeText(text);
return;
}
const textArea = document.createElement("textarea");
textArea.value = text;
textArea.setAttribute("readonly", "");
textArea.style.position = "fixed";
textArea.style.top = "-9999px";
document.body.appendChild(textArea);
textArea.select();
document.execCommand("copy");
document.body.removeChild(textArea);
}
document.querySelectorAll('a[href^="#"]').forEach((anchor) => {
anchor.addEventListener("click", (event) => {
const hash = anchor.getAttribute("href");
if (!hash || hash === "#") return;
const target = document.querySelector(hash);
if (!target) return;
event.preventDefault();
history.pushState(null, "", hash);
window.scrollTo({
top: targetTop(target),
behavior: "smooth"
});
});
});
document.querySelectorAll("[data-copy-target]").forEach((button) => {
button.addEventListener("click", async () => {
const target = document.getElementById(button.dataset.copyTarget);
if (!target) return;
const originalText = button.textContent;
try {
await copyText(target.textContent.trim());
button.textContent = "Copied";
button.classList.add("copied");
window.setTimeout(() => {
button.textContent = originalText;
button.classList.remove("copied");
}, 1800);
} catch {
button.textContent = "Copy failed";
window.setTimeout(() => {
button.textContent = originalText;
}, 1800);
}
});
});
})();
</script>
</body>
</html>