commit bb5c5779d205e4a0225c499dfbd88a2d76e67884 Author: iztaylor Date: Thu Jun 18 10:06:31 2026 -0400 chore: scaffold arcade-eval repo skeleton diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..1df5561 --- /dev/null +++ b/.gitignore @@ -0,0 +1,7 @@ +.env +.venv/ +results/* +!results/.gitkeep +__pycache__/ +*.pyc +.cursor/mcp.json diff --git a/config/.env.example b/config/.env.example new file mode 100644 index 0000000..2210876 --- /dev/null +++ b/config/.env.example @@ -0,0 +1,16 @@ +# Arcade eval credentials — copy to .env (git-ignored) and fill in. +# Load with: set -a && . ./.env && set +a + +ARCADE_API_BASE=https://api.arcade.st.dev +ARCADE_DASHBOARD=https://dashboard.arcade.st.dev + +# Project API key — minted in the dashboard (Phase 1, Task 1.1). Never commit the real value. +ARCADE_API_KEY= + +# Headless per-user identity — the vault is keyed on the user_id string you pass. +# One API key can present as many users. (Real Entra SSO login is a cat-2 concern.) +ARCADE_USER_A=user-a@servicetitan.com +ARCADE_USER_B=user-b@servicetitan.com + +# The exact header name(s) for API-key + user_id on MCP calls are CONFIRMED live in Task 1.1 +# and recorded in LIVE-POC.md — do not guess them in code. diff --git a/config/targets.yaml b/config/targets.yaml new file mode 100644 index 0000000..a4a53eb --- /dev/null +++ b/config/targets.yaml @@ -0,0 +1,20 @@ +# Shared, append-mostly. Live fixture identifiers recorded here as they're created. +# Coordinate edits with `git pull --rebase` before push. + +endpoints: + api_base: https://api.arcade.st.dev + dashboard: https://dashboard.arcade.st.dev + coordinator: https://coordinator.arcade.st.dev + experience: https://experience.arcade.st.dev + mcp_url_pattern: https://api.arcade.st.dev/mcp/{slug} + +# slug -> {tools: [...], servers: [...], created_by, notes} (filled in Phase 1) +gateways: {} + +# name -> {kind: hosted|self-hosted, tools: [...], created_by, notes} (filled in Task 1.4) +servers: {} + +# Headless per-user identities (vault keys). Any stable string works. +users: + A: user-a@servicetitan.com + B: user-b@servicetitan.com diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..bcfc860 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,12 @@ +[project] +name = "arcade-eval" +version = "0.1.0" +description = "Evaluation harness for Arcade.dev as a self-hosted MCP gateway (10-category benchmark)." +requires-python = ">=3.12" +dependencies = ["mcp>=1.0", "httpx>=0.27", "pyyaml>=6.0"] + +[project.optional-dependencies] +dev = ["pytest>=8.0"] + +[tool.pytest.ini_options] +testpaths = ["tests"] diff --git a/results/.gitkeep b/results/.gitkeep new file mode 100644 index 0000000..5bad70a --- /dev/null +++ b/results/.gitkeep @@ -0,0 +1 @@ +# Timestamped raw run artifacts land here (git-ignored).