From 9009237a146ec5e742dd5791c8f7f813f35336f0 Mon Sep 17 00:00:00 2001 From: Zeb Taylor <64664378+iztaylor@users.noreply.github.com> Date: Mon, 22 Jun 2026 11:29:23 -0400 Subject: [PATCH] deploy: containerize arcade-eval-ref MCP server + ACR build/push action (#4) Replace the cloudflared quick-tunnel dev pattern with a permanent in-cluster deployment so the self-hosted Arcade engine reaches the echo/add/whoami reference server over stable cluster DNS. - lib/mcp_server/Dockerfile: python:3.12-slim, pip install ., HTTP transport via ARCADE_SERVER_{TRANSPORT,HOST,PORT} env overrides (no server.py change needed), non-root user, port 8000. - .github/workflows/build-push-acr.yml: build + push servicetitandev.azurecr.io/arcade-eval-ref:1.0.. Adapted from servicetitan/mem0; needs repo secrets ACR_DEV_USERNAME / ACR_DEV_PASSWORD. - docs/superpowers/specs design record. K8s manifests live in k8s-backstage-v2 apps/mcp/arcade-eval-ref/ (separate branch). Co-authored-by: Claude Opus 4.8 (1M context) --- .github/workflows/build-push-acr.yml | 58 +++++++++++++++ .../2026-06-22-deploy-mcp-to-k8s-design.md | 73 +++++++++++++++++++ lib/mcp_server/Dockerfile | 35 +++++++++ 3 files changed, 166 insertions(+) create mode 100644 .github/workflows/build-push-acr.yml create mode 100644 docs/superpowers/specs/2026-06-22-deploy-mcp-to-k8s-design.md create mode 100644 lib/mcp_server/Dockerfile diff --git a/.github/workflows/build-push-acr.yml b/.github/workflows/build-push-acr.yml new file mode 100644 index 0000000..9314731 --- /dev/null +++ b/.github/workflows/build-push-acr.yml @@ -0,0 +1,58 @@ +name: Build and Push to ACR + +# Builds the arcade-eval reference MCP server image and pushes it to the +# ServiceTitan dev Azure Container Registry. The image is consumed by +# apps/mcp/arcade-eval-ref/ in k8s-backstage-v2 (backstage-wus2-v4). +# +# Adapted from servicetitan/mem0 .github/workflows/build-push-acr.yml. +# Requires repo secrets ACR_DEV_USERNAME and ACR_DEV_PASSWORD. + +on: + workflow_dispatch: + push: + branches: + - main + paths: + - 'lib/mcp_server/**' + - '.github/workflows/build-push-acr.yml' + +env: + REGISTRY: servicetitandev.azurecr.io + IMAGE: arcade-eval-ref + VERSION_PREFIX: "1.0" + +jobs: + build-and-push: + runs-on: ubuntu-latest + permissions: + contents: read + + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + + - name: Login to ACR + uses: docker/login-action@v3 + with: + registry: ${{ env.REGISTRY }} + username: ${{ secrets.ACR_DEV_USERNAME }} + password: ${{ secrets.ACR_DEV_PASSWORD }} + + - name: Generate image tag + id: meta + run: | + echo "tag=${{ env.VERSION_PREFIX }}.${{ github.run_number }}" >> "$GITHUB_OUTPUT" + + - name: Build and push + uses: docker/build-push-action@v6 + with: + context: lib/mcp_server + file: lib/mcp_server/Dockerfile + push: true + tags: | + ${{ env.REGISTRY }}/${{ env.IMAGE }}:${{ steps.meta.outputs.tag }} + cache-from: type=gha,scope=${{ env.IMAGE }} + cache-to: type=gha,mode=max,scope=${{ env.IMAGE }} diff --git a/docs/superpowers/specs/2026-06-22-deploy-mcp-to-k8s-design.md b/docs/superpowers/specs/2026-06-22-deploy-mcp-to-k8s-design.md new file mode 100644 index 0000000..fff886a --- /dev/null +++ b/docs/superpowers/specs/2026-06-22-deploy-mcp-to-k8s-design.md @@ -0,0 +1,73 @@ +# Deploy arcade-eval reference MCP server to backstage k8s + +**Date:** 2026-06-22 +**Status:** Approved — implementing + +## Goal + +Replace the ephemeral cloudflared **quick tunnel** (used to register the +`arcade-eval-ref` server with the self-hosted Arcade engine) with a permanent +in-cluster deployment on `backstage-wus2-v4`. The engine then reaches the server +over stable cluster DNS instead of a `trycloudflare.com` URL that dies on restart. + +Relevant eval categories: cat-4 (custom server dev), cat-8 (deployment), cat-9 (DX). + +## Architecture / data flow + +``` +Arcade engine (ns: arcade) ──HTTP /worker/*──▶ Service arcade-eval-ref (ns: arcade-eval-ref) + registered as type "Arcade" └─▶ Deployment: python:3.12 running + URI = http://arcade-eval-ref.arcade-eval-ref mcp_server.server over HTTP :8000 + .svc.cluster.local:8000 (echo / add / whoami) + Secret = ARCADE_WORKER_SECRET ◀── same value ──▶ env ARCADE_WORKER_SECRET (SealedSecret) +``` + +### Runtime facts (verified by introspecting `arcade-mcp-server` 1.17) + +- `app.run()` honors env overrides via `_get_configuration_overrides()`: + `ARCADE_SERVER_TRANSPORT=http`, `ARCADE_SERVER_HOST=0.0.0.0`, `ARCADE_SERVER_PORT=8000`. + So the hardcoded `127.0.0.1` in `server.py`'s `__main__` is overridden at runtime — + **no `server.py` change needed.** +- `ARCADE_WORKER_SECRET` (settings alias `arcade.server_secret`) → worker routes mount at + `/worker/*` (what the engine calls); MCP also served at `/mcp`. FastAPI app, port 8000. + +## Components + +### 1. `arcade-eval` repo (branch off `main`) + +- **`lib/mcp_server/Dockerfile`** — `python:3.12-slim`, `pip install .` (pulls + `arcade-mcp-server` + `httpx`), `ENV` transport/host/port, non-root user, `EXPOSE 8000`, + `CMD ["python","-m","mcp_server.server"]`. +- **`.github/workflows/build-push-acr.yml`** — adapted from `servicetitan/mem0`. Pushes + `servicetitandev.azurecr.io/arcade-eval-ref:1.0.`. Login via repo secrets + `ACR_DEV_USERNAME` / `ACR_DEV_PASSWORD`. Triggers: `workflow_dispatch` + push to `main` + filtered to `lib/mcp_server/**`. + +### 2. `k8s-backstage-v2` repo (branch off `master`) + +New dir **`apps/mcp/arcade-eval-ref/`** (Flux's `apps` Kustomization recursively applies +everything under `apps/`; no per-dir `kustomization.yaml`): + +- **`namespace.yaml`** — ns `arcade-eval-ref` (labels per repo convention, `team: infra`). +- **`server.yaml`** — plain `Deployment` (image + `servicetitandev.azurecr.io/arcade-eval-ref:1.0.1`; no imagePullSecret — the cluster has + native ACR pull, confirmed by other `apps/mcp/*` servers; `ARCADE_WORKER_SECRET` from + secretRef; TCP probes; modest resources) + `Service` (ClusterIP, 8000→8000). +- **`sealedsecret.yaml`** — `arcade-eval-ref-worker-secret`, key `ARCADE_WORKER_SECRET`, + **strict** scope, sealed offline with `kubeseal --cert `. + +## Manual steps after merge + +1. Add `ACR_DEV_USERNAME` / `ACR_DEV_PASSWORD` repo secrets to `arcade-eval`. +2. `workflow_dispatch` (or merge to `main`) to build/push the image — first run = tag `1.0.1`. +3. Merge the k8s branch; Flux applies the namespace/secret/deployment. +4. Dashboard → **Add Server → Arcade**, URI + `http://arcade-eval-ref.arcade-eval-ref.svc.cluster.local:8000`, Secret = the worker secret + plaintext (stored git-ignored at `results/arcade-eval-ref-worker-secret.txt`); re-point the + `zeb-gateway-test` gateway's ref tools at it and drop the tunnel. Delete the plaintext file + afterward. + +## Out of scope (YAGNI) + +No ingress (internal-only ClusterIP), no HPA, no PodMonitor/metrics (separate cat-5 work), +single replica. diff --git a/lib/mcp_server/Dockerfile b/lib/mcp_server/Dockerfile new file mode 100644 index 0000000..2b3ec10 --- /dev/null +++ b/lib/mcp_server/Dockerfile @@ -0,0 +1,35 @@ +# syntax=docker/dockerfile:1 +# +# arcade-eval reference MCP server (echo / add / whoami). +# +# Runs over HTTP so the self-hosted Arcade engine can reach it in-cluster via a +# stable Service URL — replacing the ephemeral cloudflared tunnel used in dev. +# Deployed to backstage-wus2-v4 under apps/mcp/arcade-eval-ref/ (k8s-backstage-v2). +FROM python:3.12-slim + +WORKDIR /app + +# Install the package + runtime deps (arcade-mcp-server, httpx) declared in pyproject.toml. +COPY pyproject.toml ./ +COPY src ./src +RUN pip install --no-cache-dir . + +# arcade_mcp_server's app.run() reads these env vars via _get_configuration_overrides(): +# - ARCADE_SERVER_TRANSPORT=http -> serve MCP at /mcp and worker routes at /worker/* +# - ARCADE_SERVER_HOST=0.0.0.0 -> bind all interfaces (server.py hardcodes 127.0.0.1; +# this env override is what makes it reachable in-cluster) +# - ARCADE_SERVER_PORT=8000 +# ARCADE_WORKER_SECRET is injected by Kubernetes at runtime (from a SealedSecret); it +# authenticates the engine->worker connection and enables the /worker/* routes. +ENV ARCADE_SERVER_TRANSPORT=http \ + ARCADE_SERVER_HOST=0.0.0.0 \ + ARCADE_SERVER_PORT=8000 + +# Run as an unprivileged user. +RUN useradd --create-home --uid 10001 appuser +USER appuser + +EXPOSE 8000 + +# server.py's __main__ calls app.run(); the env vars above override transport/host/port. +CMD ["python", "-m", "mcp_server.server"]