:root{--ink: #1f2328;--muted: #656d76;--line: #d1d9e0;--bg: #ffffff;--panel: #f6f8fa;--pass: #1a7f37;--pass-bg: #dafbe1;--fail: #cf222e;--fail-bg: #ffebe9;--safety: #9a6700;--safety-bg: #fff8c5;--accent: #0969da}*{box-sizing:border-box}body{margin:0;color:var(--ink);background:var(--bg);font-family:-apple-system,BlinkMacSystemFont,Segoe UI,Helvetica,Arial,sans-serif;font-size:14px;line-height:1.5}.container{max-width:1200px;margin:0 auto;padding:16px 24px 64px}a{color:var(--accent);text-decoration:none}a:hover{text-decoration:underline}code{font-family:ui-monospace,SFMono-Regular,SF Mono,Menlo,Consolas,monospace;font-size:12px;background:var(--panel);padding:1px 5px;border-radius:4px}pre{font-family:ui-monospace,SFMono-Regular,SF Mono,Menlo,Consolas,monospace;font-size:12px;background:var(--panel);border:1px solid var(--line);border-radius:6px;padding:10px;overflow-x:auto;margin:6px 0 0}h2{margin:24px 0 8px}h3{margin:0 0 8px}.site-header{display:flex;align-items:baseline;gap:12px;padding:12px 0;border-bottom:1px solid var(--line)}.site-title{font-size:18px;font-weight:600;color:var(--ink)}.site-subtitle,.muted{color:var(--muted)}.error-note{color:var(--fail)}.rationale{max-width:75ch}.badge{display:inline-block;font-size:11px;font-weight:600;padding:1px 8px;border-radius:10px;vertical-align:middle}.badge-pass{color:var(--pass);background:var(--pass-bg)}.badge-fail{color:var(--fail);background:var(--fail-bg)}.badge-safety{color:var(--safety);background:var(--safety-bg)}.chip{display:inline-block;margin:0 4px 4px 0}.chip-expect{color:var(--pass);background:var(--pass-bg)}.chip-forbid{color:var(--fail);background:var(--fail-bg)}.data-table{border-collapse:collapse;width:100%;margin:8px 0}.data-table th,.data-table td{border:1px solid var(--line);padding:6px 10px;text-align:left;vertical-align:top}.data-table th{background:var(--panel);font-weight:600}.notes-cell{max-width:40ch}.matrix-scroll{overflow-x:auto}.matrix td.cell-pass{background:var(--pass-bg);color:var(--pass);text-align:center;font-weight:700}.matrix td.cell-fail{background:var(--fail-bg);color:var(--fail);text-align:center;font-weight:700}.matrix td.cell-none{color:var(--muted);text-align:center}.two-col{display:grid;grid-template-columns:minmax(0,3fr) minmax(0,2fr);gap:16px;align-items:start}@media (max-width: 900px){.two-col{grid-template-columns:1fr}}.panel{border:1px solid var(--line);border-radius:8px;padding:14px 16px;margin:10px 0;background:var(--bg)}.criteria dt{font-weight:600;margin-top:10px}.criteria dd{margin:4px 0 0}.arg-checks{margin:0;padding-left:18px}.arg-checks li{margin:4px 0}.conversation{display:flex;flex-direction:column;gap:10px}.message-role{font-size:11px;font-weight:600;text-transform:uppercase;color:var(--muted);margin-bottom:2px}.bubble{border:1px solid var(--line);border-radius:8px;padding:8px 12px;white-space:pre-wrap;overflow-wrap:anywhere}.bubble-user{background:#ddf4ff;border-color:#b6e3ff}.bubble-assistant{background:var(--panel)}.bubble-empty{color:var(--muted);font-style:italic}.tool-call,.tool-result{border:1px dashed var(--line);border-radius:8px;padding:8px 12px;margin-top:6px}.tool-call-name{font-family:ui-monospace,SFMono-Regular,SF Mono,Menlo,Consolas,monospace;font-size:12px;font-weight:600}.json-block summary{cursor:pointer;color:var(--muted);font-size:12px}.json-block pre{max-height:420px;overflow:auto}.annotation{border-left:3px solid var(--safety);background:var(--safety-bg);border-radius:0 6px 6px 0;padding:6px 10px;margin:8px 0 0;max-width:75ch}.annotation:before{content:"why it failed";display:block;font-size:11px;font-weight:600;text-transform:uppercase;color:var(--safety)}.model-reply{margin-top:10px}.reply-label{font-size:11px;font-weight:600;text-transform:uppercase;color:var(--muted)}.result-panel .result-header{display:flex;align-items:baseline;gap:10px;cursor:pointer;list-style:none}.result-panel .result-header::-webkit-details-marker{display:none}.result-panel .result-header:before{content:"▸";color:var(--muted);font-size:12px;align-self:center}.result-panel[open] .result-header:before{content:"▾"}
