Codestin Search App

executable file
485 lines (431 loc) · 17.1 KB
#!/usr/bin/env bash
set -euo pipefail
# Create `models:*` GitHub labels for Kibana.
#   ./scripts/create_models_labels.sh models:all \
#     models:llm-gateway/gpt-5.1 \
#     models:llm-gateway/gpt-5.1-chat
# Or pass raw model group names (it will prefix `models:` automatically):
#   ./scripts/create_models_labels.sh llm-gateway/gpt-5.1 llm-gateway/gpt-5.1-chat ...
# Generate labels from discovery artifacts:
#   ./scripts/create_models_labels.sh --repo elastic/kibana \
#     --from-litellm-connectors-json /tmp/litellm_connectors.json \
#     --from-eis-models-json target/eis_models.json \
#     --judge litellm-llm-gateway-gpt-4o
  cat >&2 <<'EOF'
  ./scripts/create_models_labels.sh [--repo <owner/repo>] [--judge <connector-id> ...] [labels...]
  --update-all-labels                   Update all model + judge labels (LiteLLM + EIS) using default discovery sources
  --repo <owner/repo>                   Target repo for gh commands (default: current)
  --from-litellm-connectors-json <path> Create labels from a LiteLLM connectors JSON map
  --from-litellm-vault-config [path]    Create LiteLLM model labels via LiteLLM discovery using kbn-evals vault config
                                      (default: x-pack/platform/packages/shared/kbn-evals/scripts/vault/config.json)
  --from-eis-models-json [path]         Create labels from target/eis_models.json (default: target/eis_models.json)
  --judge-from-eis-models-json [path]   Create judge labels for all EIS models in eis_models.json (as models:judge:eis/<modelId>)
                                      (default: target/eis_models.json)
  --judge-from-litellm-vault-config [path]
                                      Create judge labels for all LiteLLM models via LiteLLM discovery using kbn-evals vault config
                                      (as models:judge:<model-group>, e.g. models:judge:llm-gateway/gpt-5.1)
                                      (default: x-pack/platform/packages/shared/kbn-evals/scripts/vault/config.json)
  --judge <connector-id>                Create models:judge:<connector-id> (repeatable)
  --prune                                Mark stale models:* labels as deprecated (renamed to "deprecated:<name>")
  -h, --help                            Show help
  - You can pass raw model groups (script will prefix models: automatically).
  - EIS model labels are created as: models:eis/<modelId>
  - Use --prune with discovery flags to deprecate labels for models no longer available.
MODELS_COLOR="${MODELS_LABEL_COLOR:-505D26}"
JUDGE_COLOR="${MODELS_JUDGE_LABEL_COLOR:-5319E7}"
DESC_PREFIX="${MODELS_LABEL_DESCRIPTION_PREFIX:-Run LLM evals against model: }"
JUDGE_DESC_PREFIX="${MODELS_JUDGE_LABEL_DESCRIPTION_PREFIX:-Override LLM-as-a-judge connector for evals: }"
FROM_LITELLM_CONNECTORS_JSON=""
FROM_LITELLM_VAULT_CONFIG=""
FROM_EIS_MODELS_JSON=""
JUDGE_FROM_EIS_MODELS_JSON=""
JUDGE_FROM_LITELLM_VAULT_CONFIG=""
UPDATE_ALL_LABELS="false"
PRUNE="false"
declare -a JUDGE_CONNECTOR_IDS=()
declare -a POSITIONAL=()
# Counters for summary reporting
CREATED_COUNT=0
UPDATED_COUNT=0
DEPRECATED_COUNT=0
SKIPPED_COUNT=0
if ! command -v gh >/dev/null 2>&1; then
  echo "Error: 'gh' CLI is required." >&2
if ! command -v node >/dev/null 2>&1; then
  echo "Error: 'node' is required." >&2
while [[ $# -gt 0 ]]; do
  case "$1" in
    --update-all-labels)
      UPDATE_ALL_LABELS="true"
      shift 1
    --prune)
      PRUNE="true"
      shift 1
    --repo)
      REPO="${2:-}"
      shift 2
    --from-litellm-connectors-json)
      FROM_LITELLM_CONNECTORS_JSON="${2:-}"
      shift 2
    --from-litellm-vault-config)
      # Optional path argument. If the next token is absent or looks like another flag, use default.
      if [[ -n "${2:-}" && "${2:-}" != --* ]]; then
        FROM_LITELLM_VAULT_CONFIG="${2}"
        shift 2
        FROM_LITELLM_VAULT_CONFIG="x-pack/platform/packages/shared/kbn-evals/scripts/vault/config.json"
        shift 1
    --from-eis-models-json)
      # Optional path argument. If the next token is absent or looks like another flag, use default.
      if [[ -n "${2:-}" && "${2:-}" != --* ]]; then
        FROM_EIS_MODELS_JSON="${2}"
        shift 2
        FROM_EIS_MODELS_JSON="target/eis_models.json"
        shift 1
    --judge)
      JUDGE_CONNECTOR_IDS+=("${2:-}")
      shift 2
    --judge-from-eis-models-json)
      # Optional path argument. If the next token is absent or looks like another flag, use default.
      if [[ -n "${2:-}" && "${2:-}" != --* ]]; then
        JUDGE_FROM_EIS_MODELS_JSON="${2}"
        shift 2
        JUDGE_FROM_EIS_MODELS_JSON="target/eis_models.json"
        shift 1
    --judge-from-litellm-vault-config)
      # Optional path argument. If the next token is absent or looks like another flag, use default.
      if [[ -n "${2:-}" && "${2:-}" != --* ]]; then
        JUDGE_FROM_LITELLM_VAULT_CONFIG="${2}"
        shift 2
        JUDGE_FROM_LITELLM_VAULT_CONFIG="x-pack/platform/packages/shared/kbn-evals/scripts/vault/config.json"
        shift 1
    -h|--help)
      usage
      exit 0
      shift
      POSITIONAL+=("$@")
      break
      POSITIONAL+=("$1")
      shift
if [[ "${UPDATE_ALL_LABELS}" == "true" ]]; then
  # LiteLLM (from vault config)
  [[ -z "${FROM_LITELLM_VAULT_CONFIG}" ]] && FROM_LITELLM_VAULT_CONFIG="x-pack/platform/packages/shared/kbn-evals/scripts/vault/config.json"
  [[ -z "${JUDGE_FROM_LITELLM_VAULT_CONFIG}" ]] && JUDGE_FROM_LITELLM_VAULT_CONFIG="x-pack/platform/packages/shared/kbn-evals/scripts/vault/config.json"
  # EIS (from discovery artifact)
  [[ -z "${FROM_EIS_MODELS_JSON}" ]] && FROM_EIS_MODELS_JSON="target/eis_models.json"
  [[ -z "${JUDGE_FROM_EIS_MODELS_JSON}" ]] && JUDGE_FROM_EIS_MODELS_JSON="target/eis_models.json"
GH_REPO_ARGS=()
if [[ -n "${REPO}" ]]; then
  GH_REPO_ARGS+=(--repo "${REPO}")
# When --prune is active, track all labels created/updated so we can deprecate stale ones.
CREATED_LABELS_FILE=""
if [[ "${PRUNE}" == "true" ]]; then
  CREATED_LABELS_FILE="$(mktemp)"
  trap 'rm -f "${CREATED_LABELS_FILE:-}"' EXIT
create_or_update_label() {
  local name="$1"
  local description="$2"
  local color="$3"
  if [[ -z "${name}" || "${name}" == "models:" || "${name}" == "models:eis/" || "${name}" == "models:judge:" ]]; then
    echo "Error: refusing to create an invalid label name: '${name}'" >&2
  # GitHub label names are limited to 50 characters.
  if [[ "${#name}" -gt 50 ]]; then
    echo "skipped: $name (${#name} chars exceeds GitHub's 50-char limit)" >&2
    SKIPPED_COUNT=$((SKIPPED_COUNT + 1))
    return 0
  # Prefer edit-first so we can update labels idempotently without relying on parsing "already exists" errors.
  if gh label edit "${GH_REPO_ARGS[@]}" "$name" --description "$description" --color "$color" >/dev/null 2>&1; then
    echo "updated: $name"
    UPDATED_COUNT=$((UPDATED_COUNT + 1))
    [[ -n "${CREATED_LABELS_FILE:-}" ]] && echo "$name" >> "$CREATED_LABELS_FILE"
    return 0
  if gh label create "${GH_REPO_ARGS[@]}" "$name" --description "$description" --color "$color" >/dev/null 2>&1; then
    echo "created: $name"
    CREATED_COUNT=$((CREATED_COUNT + 1))
    [[ -n "${CREATED_LABELS_FILE:-}" ]] && echo "$name" >> "$CREATED_LABELS_FILE"
    return 0
  echo "Warning: failed to create or update label: $name" >&2
  SKIPPED_COUNT=$((SKIPPED_COUNT + 1))
HAS_INPUTS="false"
if [[ -n "${FROM_LITELLM_CONNECTORS_JSON:-}" ]] || [[ -n "${FROM_LITELLM_VAULT_CONFIG:-}" ]] || [[ -n "${FROM_EIS_MODELS_JSON:-}" ]] || [[ -n "${JUDGE_FROM_EIS_MODELS_JSON:-}" ]] || [[ -n "${JUDGE_FROM_LITELLM_VAULT_CONFIG:-}" ]]; then
  HAS_INPUTS="true"
if [[ "${#JUDGE_CONNECTOR_IDS[@]}" -gt 0 ]] || [[ "${#POSITIONAL[@]}" -gt 0 ]]; then
  HAS_INPUTS="true"
if [[ "${HAS_INPUTS}" != "true" && "${PRUNE}" != "true" ]]; then
# Static group labels — curated model sets that expand to multiple models in eval_pipeline.ts.
# Keep in sync with MODEL_GROUP_ALIASES in .buildkite/pipelines/evals/eval_pipeline.ts.
create_or_update_label "models:weekly-eis-models" "Run evals against the weekly EIS model set (see eval_pipeline.ts)" "$MODELS_COLOR"
generate_litellm_connectors_json_from_vault_config() {
  local cfg_path="$1"
  if [[ ! -f "${cfg_path}" ]]; then
    echo "Error: missing file: ${cfg_path}" >&2
  # Read required fields from the config using Node (no jq dependency).
  # Print them as tab-separated values to avoid re-parsing JSON multiple times in bash.
  local litellm_tsv
  litellm_tsv="$(
    node - <<'NODE' "${cfg_path}"
const { readFileSync } = require('fs');
const { resolve } = require('path');
const cfgPath = process.argv[2];
const cfg = JSON.parse(readFileSync(resolve(cfgPath), 'utf8'));
const litellm = cfg && cfg.litellm ? cfg.litellm : {};
const baseUrl = litellm.baseUrl || '';
const teamId = litellm.teamId || '';
const virtualKey = litellm.virtualKey || '';
process.stdout.write([baseUrl, teamId, virtualKey].join('\t'));
  local base_url team_id virtual_key
  IFS=$'\t' read -r base_url team_id virtual_key <<<"${litellm_tsv}"
  if [[ -z "${base_url}" || -z "${virtual_key}" ]]; then
    echo "Error: missing litellm.baseUrl or litellm.virtualKey in ${cfg_path}" >&2
  # Do not echo the key. Pass it directly to the generator script.
  local team_args=()
  if [[ -n "${team_id}" ]]; then
    team_args+=(--team-id "${team_id}")
  node x-pack/platform/packages/shared/kbn-evals/scripts/ci/generate_litellm_connectors.js \
    --base-url "${base_url}" \
    "${team_args[@]}" \
    --api-key "${virtual_key}" \
    --format json
if [[ -n "${FROM_LITELLM_CONNECTORS_JSON:-}" ]]; then
  if [[ ! -f "${FROM_LITELLM_CONNECTORS_JSON}" ]]; then
    echo "Error: missing file: ${FROM_LITELLM_CONNECTORS_JSON}" >&2
  while IFS= read -r model_group; do
    [[ -z "$model_group" ]] && continue
    create_or_update_label "models:${model_group}" "${DESC_PREFIX}${model_group}" "$MODELS_COLOR"
  done < <(
    node - <<'NODE' "${FROM_LITELLM_CONNECTORS_JSON}"
const fs = require('fs');
const filePath = process.argv[2];
const raw = fs.readFileSync(filePath, 'utf8');
const obj = JSON.parse(raw);
const models = new Set();
for (const connector of Object.values(obj)) {
  const m = connector && connector.config && connector.config.defaultModel;
  if (typeof m === 'string' && m.trim()) models.add(m.trim());
process.stdout.write([...models].sort().join('\n'));
if [[ -n "${FROM_LITELLM_VAULT_CONFIG:-}" ]]; then
  litellm_model_groups="$(
    generate_litellm_connectors_json_from_vault_config "${FROM_LITELLM_VAULT_CONFIG}" | node -e "
const fs = require('fs');
const obj = JSON.parse(fs.readFileSync(0, 'utf8'));
const models = new Set();
for (const connector of Object.values(obj)) {
  const m = connector && connector.config && connector.config.defaultModel;
  if (typeof m === 'string' && m.trim()) models.add(m.trim());
process.stdout.write([...models].sort().join('\\n'));
  while IFS= read -r model_group; do
    [[ -z "$model_group" ]] && continue
    create_or_update_label "models:${model_group}" "${DESC_PREFIX}${model_group}" "$MODELS_COLOR"
  done <<<"${litellm_model_groups}"
if [[ -n "${FROM_EIS_MODELS_JSON:-}" ]]; then
  if [[ ! -f "${FROM_EIS_MODELS_JSON}" ]]; then
    echo "Error: missing file: ${FROM_EIS_MODELS_JSON}" >&2
  while IFS= read -r model_id; do
    [[ -z "$model_id" ]] && continue
    create_or_update_label "models:eis/${model_id}" "${DESC_PREFIX}eis/${model_id}" "$MODELS_COLOR"
  done < <(
    node - <<'NODE' "${FROM_EIS_MODELS_JSON}"
const fs = require('fs');
const filePath = process.argv[2];
const raw = fs.readFileSync(filePath, 'utf8');
const obj = JSON.parse(raw);
const models = new Set();
const list = Array.isArray(obj.models) ? obj.models : [];
for (const entry of list) {
  const id = entry && entry.modelId;
  if (typeof id === 'string' && id.trim()) models.add(id.trim());
process.stdout.write([...models].sort().join('\n'));
if [[ -n "${JUDGE_FROM_LITELLM_VAULT_CONFIG:-}" ]]; then
  litellm_connector_ids="$(
    generate_litellm_connectors_json_from_vault_config "${JUDGE_FROM_LITELLM_VAULT_CONFIG}" | node -e "
const fs = require('fs');
const obj = JSON.parse(fs.readFileSync(0, 'utf8'));
const models = new Set();
for (const connector of Object.values(obj)) {
  // Prefer the original model group from the connector name (e.g. 'LiteLLM llm-gateway/gpt-5.1-chat (via ...)').
  const name = connector && connector.name;
  if (typeof name === 'string' && name.startsWith('LiteLLM ')) {
    const raw = name.slice('LiteLLM '.length);
    const group = raw.replace(/ \\(via .*\\)$/, '').trim();
    if (group) models.add(group);
    continue;
  // Fallback: use the request model (defaultModel).
  const m = connector && connector.config && connector.config.defaultModel;
  if (typeof m === 'string' && m.trim()) models.add(m.trim());
process.stdout.write([...models].sort().join('\\n'));
  while IFS= read -r connector_id; do
    [[ -z "$connector_id" ]] && continue
    create_or_update_label "models:judge:${connector_id}" "${JUDGE_DESC_PREFIX}${connector_id}" "$JUDGE_COLOR"
  done <<<"${litellm_connector_ids}"
if [[ -n "${JUDGE_FROM_EIS_MODELS_JSON:-}" ]]; then
  if [[ ! -f "${JUDGE_FROM_EIS_MODELS_JSON}" ]]; then
    echo "Error: missing file: ${JUDGE_FROM_EIS_MODELS_JSON}" >&2
  while IFS= read -r model_id; do
    [[ -z "$model_id" ]] && continue
    create_or_update_label "models:judge:eis/${model_id}" "${JUDGE_DESC_PREFIX}eis/${model_id}" "$JUDGE_COLOR"
  done < <(
    node - <<'NODE' "${JUDGE_FROM_EIS_MODELS_JSON}"
const fs = require('fs');
const filePath = process.argv[2];
const raw = fs.readFileSync(filePath, 'utf8');
const obj = JSON.parse(raw);
const list = Array.isArray(obj.models) ? obj.models : [];
const ids = new Set();
for (const entry of list) {
  const modelId = entry && entry.modelId;
  if (typeof modelId !== 'string' || !modelId.trim()) continue;
  ids.add(modelId.trim());
process.stdout.write([...ids].sort().join('\n'));
for judge_id in "${JUDGE_CONNECTOR_IDS[@]+"${JUDGE_CONNECTOR_IDS[@]}"}"; do
  [[ -z "$judge_id" ]] && continue
  create_or_update_label "models:judge:${judge_id}" "${JUDGE_DESC_PREFIX}${judge_id}" "$JUDGE_COLOR"
for arg in "${POSITIONAL[@]+"${POSITIONAL[@]}"}"; do
  label="$arg"
  if [[ "$label" != models:* ]]; then
    label="models:${label}"
  if [[ "$label" == models:judge:* ]]; then
    judge_connector_id="${label#models:judge:}"
    create_or_update_label "$label" "${JUDGE_DESC_PREFIX}${judge_connector_id}" "$JUDGE_COLOR"
    continue
    model_group="${label#models:}"
    create_or_update_label "$label" "${DESC_PREFIX}${model_group}" "$MODELS_COLOR"
# --- Deprecation of stale labels ---
DEPRECATED_COLOR="CCCCCC"
if [[ "${PRUNE}" == "true" && -n "${CREATED_LABELS_FILE:-}" ]]; then
  if [[ ! -s "${CREATED_LABELS_FILE}" ]]; then
    echo ""
    echo "Warning: --prune was set but no labels were created/updated; skipping deprecation to avoid marking all labels stale." >&2
    echo ""
    echo "--- Checking for stale models:* labels to deprecate"
    # Fetch all existing models:* labels from the repo (excluding already-deprecated ones).
    existing_labels="$(gh label list "${GH_REPO_ARGS[@]}" --search "models:" --limit 500 --json name --jq '.[].name' \
      | grep -E '^models:' \
      | sort -u || true)"
    # Also fetch deprecated:models:* labels so we don't re-deprecate them.
    already_deprecated="$(gh label list "${GH_REPO_ARGS[@]}" --search "deprecated:models:" --limit 500 --json name --jq '.[].name' \
      | grep -E '^deprecated:models:' \
      | sed 's/^deprecated://' \
      | sort -u || true)"
    # Exclude labels that already have a deprecated: counterpart.
    if [[ -n "${already_deprecated}" ]]; then
      existing_labels="$(comm -23 <(echo "${existing_labels}") <(echo "${already_deprecated}") || true)"
    if [[ -z "${existing_labels}" ]]; then
      echo "No existing models:* labels found; nothing to deprecate."
      sorted_created="$(sort -u "${CREATED_LABELS_FILE}")"
      # Set difference: existing minus created/updated = stale
      stale_labels="$(comm -23 <(echo "${existing_labels}") <(echo "${sorted_created}") || true)"
      if [[ -z "${stale_labels}" ]]; then
        echo "No stale labels found."
        while IFS= read -r stale_label; do
          [[ -z "$stale_label" ]] && continue
          deprecated_name="deprecated:${stale_label}"
          if gh label edit "${GH_REPO_ARGS[@]}" "$stale_label" --name "$deprecated_name" --description "DEPRECATED - model no longer available" --color "$DEPRECATED_COLOR" >/dev/null 2>&1; then
            echo "deprecated: $stale_label -> $deprecated_name"
            DEPRECATED_COUNT=$((DEPRECATED_COUNT + 1))
          else
            echo "Warning: failed to deprecate label: $stale_label" >&2
        done <<<"${stale_labels}"
# --- Summary ---
echo "Summary: created=${CREATED_COUNT} updated=${UPDATED_COUNT} deprecated=${DEPRECATED_COUNT} skipped=${SKIPPED_COUNT}"
Provide feedback

Saved searches

Use saved searches to filter your results more quickly

FilesExpand file tree

create_models_labels.sh

Latest commit

History

create_models_labels.sh

File metadata and controls