-
Notifications
You must be signed in to change notification settings - Fork 8.6k
Expand file tree
/
Copy pathcreate_models_labels.sh
More file actions
executable file
·485 lines (431 loc) · 17.1 KB
/
Copy pathcreate_models_labels.sh
File metadata and controls
executable file
·485 lines (431 loc) · 17.1 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
#!/usr/bin/env bash
set -euo pipefail
# Create `models:*` GitHub labels for Kibana.
#
# Usage:
# ./scripts/create_models_labels.sh models:all \
# models:llm-gateway/gpt-5.1 \
# models:llm-gateway/gpt-5.1-chat
#
# Or pass raw model group names (it will prefix `models:` automatically):
# ./scripts/create_models_labels.sh llm-gateway/gpt-5.1 llm-gateway/gpt-5.1-chat ...
#
# Generate labels from discovery artifacts:
# ./scripts/create_models_labels.sh --repo elastic/kibana \
# --from-litellm-connectors-json /tmp/litellm_connectors.json \
# --from-eis-models-json target/eis_models.json \
# --judge litellm-llm-gateway-gpt-4o
usage() {
cat >&2 <<'EOF'
Usage:
./scripts/create_models_labels.sh [--repo <owner/repo>] [--judge <connector-id> ...] [labels...]
Options:
--update-all-labels Update all model + judge labels (LiteLLM + EIS) using default discovery sources
--repo <owner/repo> Target repo for gh commands (default: current)
--from-litellm-connectors-json <path> Create labels from a LiteLLM connectors JSON map
--from-litellm-vault-config [path] Create LiteLLM model labels via LiteLLM discovery using kbn-evals vault config
(default: x-pack/platform/packages/shared/kbn-evals/scripts/vault/config.json)
--from-eis-models-json [path] Create labels from target/eis_models.json (default: target/eis_models.json)
--judge-from-eis-models-json [path] Create judge labels for all EIS models in eis_models.json (as models:judge:eis/<modelId>)
(default: target/eis_models.json)
--judge-from-litellm-vault-config [path]
Create judge labels for all LiteLLM models via LiteLLM discovery using kbn-evals vault config
(as models:judge:<model-group>, e.g. models:judge:llm-gateway/gpt-5.1)
(default: x-pack/platform/packages/shared/kbn-evals/scripts/vault/config.json)
--judge <connector-id> Create models:judge:<connector-id> (repeatable)
--prune Mark stale models:* labels as deprecated (renamed to "deprecated:<name>")
-h, --help Show help
Notes:
- You can pass raw model groups (script will prefix models: automatically).
- EIS model labels are created as: models:eis/<modelId>
- Use --prune with discovery flags to deprecate labels for models no longer available.
EOF
}
MODELS_COLOR="${MODELS_LABEL_COLOR:-505D26}"
JUDGE_COLOR="${MODELS_JUDGE_LABEL_COLOR:-5319E7}"
DESC_PREFIX="${MODELS_LABEL_DESCRIPTION_PREFIX:-Run LLM evals against model: }"
JUDGE_DESC_PREFIX="${MODELS_JUDGE_LABEL_DESCRIPTION_PREFIX:-Override LLM-as-a-judge connector for evals: }"
REPO=""
FROM_LITELLM_CONNECTORS_JSON=""
FROM_LITELLM_VAULT_CONFIG=""
FROM_EIS_MODELS_JSON=""
JUDGE_FROM_EIS_MODELS_JSON=""
JUDGE_FROM_LITELLM_VAULT_CONFIG=""
UPDATE_ALL_LABELS="false"
PRUNE="false"
declare -a JUDGE_CONNECTOR_IDS=()
declare -a POSITIONAL=()
# Counters for summary reporting
CREATED_COUNT=0
UPDATED_COUNT=0
DEPRECATED_COUNT=0
SKIPPED_COUNT=0
if ! command -v gh >/dev/null 2>&1; then
echo "Error: 'gh' CLI is required." >&2
exit 1
fi
if ! command -v node >/dev/null 2>&1; then
echo "Error: 'node' is required." >&2
exit 1
fi
while [[ $# -gt 0 ]]; do
case "$1" in
--update-all-labels)
UPDATE_ALL_LABELS="true"
shift 1
;;
--prune)
PRUNE="true"
shift 1
;;
--repo)
REPO="${2:-}"
shift 2
;;
--from-litellm-connectors-json)
FROM_LITELLM_CONNECTORS_JSON="${2:-}"
shift 2
;;
--from-litellm-vault-config)
# Optional path argument. If the next token is absent or looks like another flag, use default.
if [[ -n "${2:-}" && "${2:-}" != --* ]]; then
FROM_LITELLM_VAULT_CONFIG="${2}"
shift 2
else
FROM_LITELLM_VAULT_CONFIG="x-pack/platform/packages/shared/kbn-evals/scripts/vault/config.json"
shift 1
fi
;;
--from-eis-models-json)
# Optional path argument. If the next token is absent or looks like another flag, use default.
if [[ -n "${2:-}" && "${2:-}" != --* ]]; then
FROM_EIS_MODELS_JSON="${2}"
shift 2
else
FROM_EIS_MODELS_JSON="target/eis_models.json"
shift 1
fi
;;
--judge)
JUDGE_CONNECTOR_IDS+=("${2:-}")
shift 2
;;
--judge-from-eis-models-json)
# Optional path argument. If the next token is absent or looks like another flag, use default.
if [[ -n "${2:-}" && "${2:-}" != --* ]]; then
JUDGE_FROM_EIS_MODELS_JSON="${2}"
shift 2
else
JUDGE_FROM_EIS_MODELS_JSON="target/eis_models.json"
shift 1
fi
;;
--judge-from-litellm-vault-config)
# Optional path argument. If the next token is absent or looks like another flag, use default.
if [[ -n "${2:-}" && "${2:-}" != --* ]]; then
JUDGE_FROM_LITELLM_VAULT_CONFIG="${2}"
shift 2
else
JUDGE_FROM_LITELLM_VAULT_CONFIG="x-pack/platform/packages/shared/kbn-evals/scripts/vault/config.json"
shift 1
fi
;;
-h|--help)
usage
exit 0
;;
--)
shift
POSITIONAL+=("$@")
break
;;
*)
POSITIONAL+=("$1")
shift
;;
esac
done
if [[ "${UPDATE_ALL_LABELS}" == "true" ]]; then
# LiteLLM (from vault config)
[[ -z "${FROM_LITELLM_VAULT_CONFIG}" ]] && FROM_LITELLM_VAULT_CONFIG="x-pack/platform/packages/shared/kbn-evals/scripts/vault/config.json"
[[ -z "${JUDGE_FROM_LITELLM_VAULT_CONFIG}" ]] && JUDGE_FROM_LITELLM_VAULT_CONFIG="x-pack/platform/packages/shared/kbn-evals/scripts/vault/config.json"
# EIS (from discovery artifact)
[[ -z "${FROM_EIS_MODELS_JSON}" ]] && FROM_EIS_MODELS_JSON="target/eis_models.json"
[[ -z "${JUDGE_FROM_EIS_MODELS_JSON}" ]] && JUDGE_FROM_EIS_MODELS_JSON="target/eis_models.json"
fi
GH_REPO_ARGS=()
if [[ -n "${REPO}" ]]; then
GH_REPO_ARGS+=(--repo "${REPO}")
fi
# When --prune is active, track all labels created/updated so we can deprecate stale ones.
CREATED_LABELS_FILE=""
if [[ "${PRUNE}" == "true" ]]; then
CREATED_LABELS_FILE="$(mktemp)"
trap 'rm -f "${CREATED_LABELS_FILE:-}"' EXIT
fi
create_or_update_label() {
local name="$1"
local description="$2"
local color="$3"
if [[ -z "${name}" || "${name}" == "models:" || "${name}" == "models:eis/" || "${name}" == "models:judge:" ]]; then
echo "Error: refusing to create an invalid label name: '${name}'" >&2
exit 1
fi
# GitHub label names are limited to 50 characters.
if [[ "${#name}" -gt 50 ]]; then
echo "skipped: $name (${#name} chars exceeds GitHub's 50-char limit)" >&2
SKIPPED_COUNT=$((SKIPPED_COUNT + 1))
return 0
fi
# Prefer edit-first so we can update labels idempotently without relying on parsing "already exists" errors.
if gh label edit "${GH_REPO_ARGS[@]}" "$name" --description "$description" --color "$color" >/dev/null 2>&1; then
echo "updated: $name"
UPDATED_COUNT=$((UPDATED_COUNT + 1))
[[ -n "${CREATED_LABELS_FILE:-}" ]] && echo "$name" >> "$CREATED_LABELS_FILE"
return 0
fi
if gh label create "${GH_REPO_ARGS[@]}" "$name" --description "$description" --color "$color" >/dev/null 2>&1; then
echo "created: $name"
CREATED_COUNT=$((CREATED_COUNT + 1))
[[ -n "${CREATED_LABELS_FILE:-}" ]] && echo "$name" >> "$CREATED_LABELS_FILE"
return 0
fi
echo "Warning: failed to create or update label: $name" >&2
SKIPPED_COUNT=$((SKIPPED_COUNT + 1))
}
HAS_INPUTS="false"
if [[ -n "${FROM_LITELLM_CONNECTORS_JSON:-}" ]] || [[ -n "${FROM_LITELLM_VAULT_CONFIG:-}" ]] || [[ -n "${FROM_EIS_MODELS_JSON:-}" ]] || [[ -n "${JUDGE_FROM_EIS_MODELS_JSON:-}" ]] || [[ -n "${JUDGE_FROM_LITELLM_VAULT_CONFIG:-}" ]]; then
HAS_INPUTS="true"
fi
if [[ "${#JUDGE_CONNECTOR_IDS[@]}" -gt 0 ]] || [[ "${#POSITIONAL[@]}" -gt 0 ]]; then
HAS_INPUTS="true"
fi
if [[ "${HAS_INPUTS}" != "true" && "${PRUNE}" != "true" ]]; then
usage
exit 1
fi
# Static group labels — curated model sets that expand to multiple models in eval_pipeline.ts.
# Keep in sync with MODEL_GROUP_ALIASES in .buildkite/pipelines/evals/eval_pipeline.ts.
create_or_update_label "models:weekly-eis-models" "Run evals against the weekly EIS model set (see eval_pipeline.ts)" "$MODELS_COLOR"
generate_litellm_connectors_json_from_vault_config() {
local cfg_path="$1"
if [[ ! -f "${cfg_path}" ]]; then
echo "Error: missing file: ${cfg_path}" >&2
exit 1
fi
# Read required fields from the config using Node (no jq dependency).
# Print them as tab-separated values to avoid re-parsing JSON multiple times in bash.
local litellm_tsv
litellm_tsv="$(
node - <<'NODE' "${cfg_path}"
const { readFileSync } = require('fs');
const { resolve } = require('path');
const cfgPath = process.argv[2];
const cfg = JSON.parse(readFileSync(resolve(cfgPath), 'utf8'));
const litellm = cfg && cfg.litellm ? cfg.litellm : {};
const baseUrl = litellm.baseUrl || '';
const teamId = litellm.teamId || '';
const virtualKey = litellm.virtualKey || '';
process.stdout.write([baseUrl, teamId, virtualKey].join('\t'));
NODE
)"
local base_url team_id virtual_key
IFS=$'\t' read -r base_url team_id virtual_key <<<"${litellm_tsv}"
if [[ -z "${base_url}" || -z "${virtual_key}" ]]; then
echo "Error: missing litellm.baseUrl or litellm.virtualKey in ${cfg_path}" >&2
exit 1
fi
# Do not echo the key. Pass it directly to the generator script.
local team_args=()
if [[ -n "${team_id}" ]]; then
team_args+=(--team-id "${team_id}")
fi
node x-pack/platform/packages/shared/kbn-evals/scripts/ci/generate_litellm_connectors.js \
--base-url "${base_url}" \
"${team_args[@]}" \
--api-key "${virtual_key}" \
--format json
}
if [[ -n "${FROM_LITELLM_CONNECTORS_JSON:-}" ]]; then
if [[ ! -f "${FROM_LITELLM_CONNECTORS_JSON}" ]]; then
echo "Error: missing file: ${FROM_LITELLM_CONNECTORS_JSON}" >&2
exit 1
fi
while IFS= read -r model_group; do
[[ -z "$model_group" ]] && continue
create_or_update_label "models:${model_group}" "${DESC_PREFIX}${model_group}" "$MODELS_COLOR"
done < <(
node - <<'NODE' "${FROM_LITELLM_CONNECTORS_JSON}"
const fs = require('fs');
const filePath = process.argv[2];
const raw = fs.readFileSync(filePath, 'utf8');
const obj = JSON.parse(raw);
const models = new Set();
for (const connector of Object.values(obj)) {
const m = connector && connector.config && connector.config.defaultModel;
if (typeof m === 'string' && m.trim()) models.add(m.trim());
}
process.stdout.write([...models].sort().join('\n'));
NODE
)
fi
if [[ -n "${FROM_LITELLM_VAULT_CONFIG:-}" ]]; then
litellm_model_groups="$(
generate_litellm_connectors_json_from_vault_config "${FROM_LITELLM_VAULT_CONFIG}" | node -e "
const fs = require('fs');
const obj = JSON.parse(fs.readFileSync(0, 'utf8'));
const models = new Set();
for (const connector of Object.values(obj)) {
const m = connector && connector.config && connector.config.defaultModel;
if (typeof m === 'string' && m.trim()) models.add(m.trim());
}
process.stdout.write([...models].sort().join('\\n'));
"
)"
while IFS= read -r model_group; do
[[ -z "$model_group" ]] && continue
create_or_update_label "models:${model_group}" "${DESC_PREFIX}${model_group}" "$MODELS_COLOR"
done <<<"${litellm_model_groups}"
fi
if [[ -n "${FROM_EIS_MODELS_JSON:-}" ]]; then
if [[ ! -f "${FROM_EIS_MODELS_JSON}" ]]; then
echo "Error: missing file: ${FROM_EIS_MODELS_JSON}" >&2
exit 1
fi
while IFS= read -r model_id; do
[[ -z "$model_id" ]] && continue
create_or_update_label "models:eis/${model_id}" "${DESC_PREFIX}eis/${model_id}" "$MODELS_COLOR"
done < <(
node - <<'NODE' "${FROM_EIS_MODELS_JSON}"
const fs = require('fs');
const filePath = process.argv[2];
const raw = fs.readFileSync(filePath, 'utf8');
const obj = JSON.parse(raw);
const models = new Set();
const list = Array.isArray(obj.models) ? obj.models : [];
for (const entry of list) {
const id = entry && entry.modelId;
if (typeof id === 'string' && id.trim()) models.add(id.trim());
}
process.stdout.write([...models].sort().join('\n'));
NODE
)
fi
if [[ -n "${JUDGE_FROM_LITELLM_VAULT_CONFIG:-}" ]]; then
litellm_connector_ids="$(
generate_litellm_connectors_json_from_vault_config "${JUDGE_FROM_LITELLM_VAULT_CONFIG}" | node -e "
const fs = require('fs');
const obj = JSON.parse(fs.readFileSync(0, 'utf8'));
const models = new Set();
for (const connector of Object.values(obj)) {
// Prefer the original model group from the connector name (e.g. 'LiteLLM llm-gateway/gpt-5.1-chat (via ...)').
const name = connector && connector.name;
if (typeof name === 'string' && name.startsWith('LiteLLM ')) {
const raw = name.slice('LiteLLM '.length);
const group = raw.replace(/ \\(via .*\\)$/, '').trim();
if (group) models.add(group);
continue;
}
// Fallback: use the request model (defaultModel).
const m = connector && connector.config && connector.config.defaultModel;
if (typeof m === 'string' && m.trim()) models.add(m.trim());
}
process.stdout.write([...models].sort().join('\\n'));
"
)"
while IFS= read -r connector_id; do
[[ -z "$connector_id" ]] && continue
create_or_update_label "models:judge:${connector_id}" "${JUDGE_DESC_PREFIX}${connector_id}" "$JUDGE_COLOR"
done <<<"${litellm_connector_ids}"
fi
if [[ -n "${JUDGE_FROM_EIS_MODELS_JSON:-}" ]]; then
if [[ ! -f "${JUDGE_FROM_EIS_MODELS_JSON}" ]]; then
echo "Error: missing file: ${JUDGE_FROM_EIS_MODELS_JSON}" >&2
exit 1
fi
while IFS= read -r model_id; do
[[ -z "$model_id" ]] && continue
create_or_update_label "models:judge:eis/${model_id}" "${JUDGE_DESC_PREFIX}eis/${model_id}" "$JUDGE_COLOR"
done < <(
node - <<'NODE' "${JUDGE_FROM_EIS_MODELS_JSON}"
const fs = require('fs');
const filePath = process.argv[2];
const raw = fs.readFileSync(filePath, 'utf8');
const obj = JSON.parse(raw);
const list = Array.isArray(obj.models) ? obj.models : [];
const ids = new Set();
for (const entry of list) {
const modelId = entry && entry.modelId;
if (typeof modelId !== 'string' || !modelId.trim()) continue;
ids.add(modelId.trim());
}
process.stdout.write([...ids].sort().join('\n'));
NODE
)
fi
for judge_id in "${JUDGE_CONNECTOR_IDS[@]+"${JUDGE_CONNECTOR_IDS[@]}"}"; do
[[ -z "$judge_id" ]] && continue
create_or_update_label "models:judge:${judge_id}" "${JUDGE_DESC_PREFIX}${judge_id}" "$JUDGE_COLOR"
done
for arg in "${POSITIONAL[@]+"${POSITIONAL[@]}"}"; do
label="$arg"
if [[ "$label" != models:* ]]; then
label="models:${label}"
fi
if [[ "$label" == models:judge:* ]]; then
judge_connector_id="${label#models:judge:}"
create_or_update_label "$label" "${JUDGE_DESC_PREFIX}${judge_connector_id}" "$JUDGE_COLOR"
continue
else
model_group="${label#models:}"
create_or_update_label "$label" "${DESC_PREFIX}${model_group}" "$MODELS_COLOR"
fi
done
# --- Deprecation of stale labels ---
DEPRECATED_COLOR="CCCCCC"
if [[ "${PRUNE}" == "true" && -n "${CREATED_LABELS_FILE:-}" ]]; then
if [[ ! -s "${CREATED_LABELS_FILE}" ]]; then
echo ""
echo "Warning: --prune was set but no labels were created/updated; skipping deprecation to avoid marking all labels stale." >&2
else
echo ""
echo "--- Checking for stale models:* labels to deprecate"
# Fetch all existing models:* labels from the repo (excluding already-deprecated ones).
existing_labels="$(gh label list "${GH_REPO_ARGS[@]}" --search "models:" --limit 500 --json name --jq '.[].name' \
| grep -E '^models:' \
| sort -u || true)"
# Also fetch deprecated:models:* labels so we don't re-deprecate them.
already_deprecated="$(gh label list "${GH_REPO_ARGS[@]}" --search "deprecated:models:" --limit 500 --json name --jq '.[].name' \
| grep -E '^deprecated:models:' \
| sed 's/^deprecated://' \
| sort -u || true)"
# Exclude labels that already have a deprecated: counterpart.
if [[ -n "${already_deprecated}" ]]; then
existing_labels="$(comm -23 <(echo "${existing_labels}") <(echo "${already_deprecated}") || true)"
fi
if [[ -z "${existing_labels}" ]]; then
echo "No existing models:* labels found; nothing to deprecate."
else
sorted_created="$(sort -u "${CREATED_LABELS_FILE}")"
# Set difference: existing minus created/updated = stale
stale_labels="$(comm -23 <(echo "${existing_labels}") <(echo "${sorted_created}") || true)"
if [[ -z "${stale_labels}" ]]; then
echo "No stale labels found."
else
while IFS= read -r stale_label; do
[[ -z "$stale_label" ]] && continue
deprecated_name="deprecated:${stale_label}"
if gh label edit "${GH_REPO_ARGS[@]}" "$stale_label" --name "$deprecated_name" --description "DEPRECATED - model no longer available" --color "$DEPRECATED_COLOR" >/dev/null 2>&1; then
echo "deprecated: $stale_label -> $deprecated_name"
DEPRECATED_COUNT=$((DEPRECATED_COUNT + 1))
else
echo "Warning: failed to deprecate label: $stale_label" >&2
fi
done <<<"${stale_labels}"
fi
fi
fi
fi
# --- Summary ---
echo ""
echo "Summary: created=${CREATED_COUNT} updated=${UPDATED_COUNT} deprecated=${DEPRECATED_COUNT} skipped=${SKIPPED_COUNT}"