openapi: "3.1.0"
info:
  title: cachly API
  version: "1.0.0"
  description: |
    DSGVO-nativer Managed Cache mit Semantic Caching für AI-Apps.

    **Authentifizierung:**
    - `BearerAuth` – JWT von Keycloak im `Authorization: Bearer <token>` Header
    - `AdminAuth` – statischer Bearer-Token (ADMIN_SECRET_KEY)
    - `VectorToken` – per-Instance UUID im URL-Pfad `/v1/sem/{token}/...`

  contact:
    email: support@cachly.dev
    url: https://cachly.dev
  license:
    name: Proprietary
    url: https://cachly.dev/license

servers:
  - url: https://api.cachly.dev
    description: Production
  - url: http://localhost:3001
    description: Local development

security: []

tags:
  - name: health
    description: API Health & Liveness
  - name: instances
    description: Cache Instance Management (Keycloak JWT required)
  - name: billing
    description: Stripe Billing Portal
  - name: account
    description: Tenant Account & DSGVO Data Portability
  - name: semantic
    description: Semantic Cache API (Vector Token required, no Keycloak JWT)
  - name: admin
    description: Admin API (ADMIN_SECRET_KEY required)
  - name: webhooks
    description: Stripe Webhook Endpoint

paths:
  # ── Health ──────────────────────────────────────────────────────────────────
  /health:
    get:
      tags: [health]
      summary: API Health Check
      description: Verifies process liveness and database connectivity. Used by load balancers and Docker healthchecks.
      operationId: getHealth
      security: []
      responses:
        "200":
          description: API and database are healthy
          headers:
            Cache-Control:
              schema:
                type: string
                example: "public, max-age=5, stale-while-revalidate=10"
          content:
            application/json:
              schema:
                $ref: "#/components/schemas/HealthResponse"
              example:
                status: ok
                service: cachly-api
                db: ok
        "503":
          description: Database unreachable
          content:
            application/json:
              schema:
                $ref: "#/components/schemas/HealthResponse"
              example:
                status: degraded
                service: cachly-api
                db: unreachable

  # ── Instances ────────────────────────────────────────────────────────────────
  /api/v1/instances:
    post:
      tags: [instances]
      summary: Create Cache Instance
      description: |
        Creates a new managed cache instance for the authenticated tenant.

        - **Free tier:** provisioned immediately (status=running)
        - **Paid tiers:** returns a Stripe Checkout URL (status=pending_payment)
      operationId: createInstance
      security:
        - BearerAuth: []
      requestBody:
        required: true
        content:
          application/json:
            schema:
              $ref: "#/components/schemas/CreateInstanceRequest"
      responses:
        "201":
          description: Instance created (free tier, immediately running)
          content:
            application/json:
              schema:
                $ref: "#/components/schemas/CreateInstanceResponse"
        "402":
          description: Payment required – Stripe Checkout URL returned
          content:
            application/json:
              schema:
                $ref: "#/components/schemas/CreateInstanceResponse"
        "400":
          $ref: "#/components/responses/BadRequest"
        "401":
          $ref: "#/components/responses/Unauthorized"
        "429":
          $ref: "#/components/responses/TooManyRequests"

    get:
      tags: [instances]
      summary: List Instances
      description: Returns all cache instances belonging to the authenticated tenant.
      operationId: listInstances
      security:
        - BearerAuth: []
      responses:
        "200":
          description: List of instances
          content:
            application/json:
              schema:
                type: object
                properties:
                  data:
                    type: array
                    items:
                      $ref: "#/components/schemas/Instance"
                  count:
                    type: integer
        "401":
          $ref: "#/components/responses/Unauthorized"

  /api/v1/instances/{id}:
    get:
      tags: [instances]
      summary: Get Instance
      description: Returns details + connection string for a single instance.
      operationId: getInstance
      security:
        - BearerAuth: []
      parameters:
        - $ref: "#/components/parameters/InstanceID"
      responses:
        "200":
          description: Instance details
          content:
            application/json:
              schema:
                $ref: "#/components/schemas/Instance"
        "401":
          $ref: "#/components/responses/Unauthorized"
        "404":
          $ref: "#/components/responses/NotFound"

    delete:
      tags: [instances]
      summary: Delete Instance
      description: Deprovisions the K8s resources and soft-deletes the instance.
      operationId: deleteInstance
      security:
        - BearerAuth: []
      parameters:
        - $ref: "#/components/parameters/InstanceID"
      responses:
        "204":
          description: Instance deleted
        "401":
          $ref: "#/components/responses/Unauthorized"
        "404":
          $ref: "#/components/responses/NotFound"

  /api/v1/instances/{id}/portal-session:
    post:
      tags: [billing]
      summary: Create Stripe Billing Portal Session
      description: |
        Returns a single-use Stripe Billing Portal URL (valid ~5 minutes).
        Customers can upgrade, downgrade, cancel, or update payment methods.
        After plan changes, Stripe fires `customer.subscription.updated` which triggers automatic K8s re-provisioning.
      operationId: createPortalSession
      security:
        - BearerAuth: []
      parameters:
        - $ref: "#/components/parameters/InstanceID"
      responses:
        "200":
          description: Portal session URL
          content:
            application/json:
              schema:
                type: object
                properties:
                  url:
                    type: string
                    format: uri
        "401":
          $ref: "#/components/responses/Unauthorized"

  /api/v1/billing/invoices:
    get:
      tags: [billing]
      summary: List Invoices
      description: |
        Returns the last 24 Stripe invoices for the authenticated tenant with PDF download links.
        Annotates invoices with cachly instance name and tier when available.
      operationId: listInvoices
      security:
        - BearerAuth: []
      responses:
        "200":
          description: Invoice list
          content:
            application/json:
              schema:
                type: object
                properties:
                  data:
                    type: array
                    items:
                      type: object
                      properties:
                        id: { type: string }
                        number: { type: string }
                        status:
                          {
                            type: string,
                            enum: [draft, open, paid, void, uncollectible],
                          }
                        currency: { type: string }
                        amount_due: { type: integer }
                        amount_paid: { type: integer }
                        total: { type: integer }
                        period_start: { type: integer }
                        period_end: { type: integer }
                        created: { type: integer }
                        invoice_pdf: { type: string, format: uri }
                        hosted_url: { type: string, format: uri }
                        instance_name: { type: string }
                        instance_tier: { type: string }
                  count: { type: integer }
        "401":
          $ref: "#/components/responses/Unauthorized"

  /api/v1/billing/subscription:
    get:
      tags: [billing]
      summary: Get Subscription Summary
      description: |
        Returns a billing overview including all active subscriptions mapped to cachly instances
        and total monthly spend. Free-tier instances are included with amount 0.
      operationId: getSubscriptionSummary
      security:
        - BearerAuth: []
      responses:
        "200":
          description: Subscription summary
          content:
            application/json:
              schema:
                type: object
                properties:
                  has_stripe_customer: { type: boolean }
                  subscriptions:
                    type: array
                    items:
                      type: object
                      properties:
                        instance_id: { type: string, format: uuid }
                        instance_name: { type: string }
                        tier: { type: string }
                        status: { type: string }
                        current_period_end: { type: integer }
                        cancel_at_period_end: { type: boolean }
                        monthly_amount: { type: integer }
                        currency: { type: string }
                  total_monthly: { type: integer }
                  currency: { type: string }
        "401":
          $ref: "#/components/responses/Unauthorized"

  /api/v1/billing/portal-session:
    post:
      tags: [billing]
      summary: Create Tenant Billing Portal Session
      description: |
        Creates a Stripe Billing Portal session at the tenant level (not instance-specific).
        Customers can manage all subscriptions, view invoices, and update their payment method.
        Returns URL redirecting back to `/billing` after session ends.
      operationId: createTenantPortalSession
      security:
        - BearerAuth: []
      responses:
        "200":
          description: Portal session URL
          content:
            application/json:
              schema:
                type: object
                properties:
                  url: { type: string, format: uri }
        "400":
          description: No Stripe customer (free tier only)
        "401":
          $ref: "#/components/responses/Unauthorized"

  /api/v1/account/export:
    get:
      tags: [account]
      summary: Export Account Data (DSGVO Art. 20)
      description: |
        Returns all personal data for the authenticated tenant as a downloadable JSON file.
        Implements DSGVO Article 20 – Recht auf Datenübertragbarkeit (Data Portability).
      operationId: exportAccount
      security:
        - BearerAuth: []
      responses:
        "200":
          description: Account data export
          headers:
            Content-Disposition:
              schema:
                type: string
                example: 'attachment; filename="cachly-export-2026-04-06.json"'
          content:
            application/json:
              schema:
                $ref: "#/components/schemas/AccountExport"
        "401":
          $ref: "#/components/responses/Unauthorized"

  /api/v1/instances/{id}/rotate-vector-token:
    post:
      tags: [instances]
      summary: Rotate Vector Token
      description: |
        Generates a new vector_token UUID for the instance and immediately invalidates the old one.
        SDK clients must update CACHLY_VECTOR_URL after rotation.
      operationId: rotateVectorToken
      security:
        - BearerAuth: []
      parameters:
        - $ref: "#/components/parameters/InstanceID"
      responses:
        "200":
          description: Token rotated
          content:
            application/json:
              schema:
                type: object
                properties:
                  vector_token:
                    type: string
                    format: uuid
                  message:
                    type: string
        "401":
          $ref: "#/components/responses/Unauthorized"
        "404":
          $ref: "#/components/responses/NotFound"

  /api/v1/instances/{id}/warm:
    post:
      tags: [instances]
      summary: Warm Cache
      description: |
        Pre-fills the instance's cache with the provided key/value entries.
        The instance must be in status "running". Entries are pipelined to Valkey/Dragonfly
        in a single round-trip for efficiency.

        **Limits:** max 500 entries per request, max 512 bytes per key, max 1 MB per value.
      operationId: warmCache
      security:
        - BearerAuth: []
      parameters:
        - $ref: "#/components/parameters/InstanceID"
      requestBody:
        required: true
        content:
          application/json:
            schema:
              type: object
              required: [entries]
              properties:
                entries:
                  type: array
                  maxItems: 500
                  items:
                    type: object
                    required: [key, value]
                    properties:
                      key:
                        type: string
                        maxLength: 512
                      value:
                        type: string
                      ttl:
                        type: integer
                        description: TTL in seconds (0 = no expiry)
      responses:
        "200":
          description: Warming result
          content:
            application/json:
              schema:
                type: object
                properties:
                  total:
                    type: integer
                  written:
                    type: integer
                  failed:
                    type: integer
                  duration_ms:
                    type: number
        "400":
          $ref: "#/components/responses/BadRequest"
        "401":
          $ref: "#/components/responses/Unauthorized"

  # ── BYOK (Bring Your Own Key) ──────────────────────────────────────────────
  /api/v1/instances/{id}/byok:
    put:
      tags: [instances]
      summary: Set BYOK Key
      description: |
        Stores a customer-provided AES-256 key (hex-encoded, 64 chars) used to wrap
        the server-side encryption key.  **Business & Enterprise tiers only.**
      operationId: setBYOKey
      security:
        - BearerAuth: []
      parameters:
        - $ref: "#/components/parameters/InstanceID"
      requestBody:
        required: true
        content:
          application/json:
            schema:
              type: object
              required: [key]
              properties:
                key:
                  type: string
                  minLength: 64
                  maxLength: 64
                  pattern: "^[0-9a-fA-F]{64}$"
                  description: AES-256 key in hex encoding
      responses:
        "204":
          description: Key set successfully
        "400":
          $ref: "#/components/responses/BadRequest"
        "401":
          $ref: "#/components/responses/Unauthorized"

    delete:
      tags: [instances]
      summary: Remove BYOK Key
      description: Removes the customer-provided encryption key.
      operationId: removeBYOKey
      security:
        - BearerAuth: []
      parameters:
        - $ref: "#/components/parameters/InstanceID"
      responses:
        "204":
          description: Key removed
        "401":
          $ref: "#/components/responses/Unauthorized"

  # ── Custom Domains ─────────────────────────────────────────────────────────
  /api/v1/instances/{id}/custom-domain:
    put:
      tags: [instances]
      summary: Set Custom Domain
      description: |
        Configures a custom domain (e.g. `cache.example.com`) for the instance.
        **Business & Enterprise tiers only.**

        After setting the domain, create a DNS TXT record and call the verify endpoint:
        - Record: `_cachly-verify.<domain>`
        - Value: `cachly-verify=<instance-id>`
      operationId: setCustomDomain
      security:
        - BearerAuth: []
      parameters:
        - $ref: "#/components/parameters/InstanceID"
      requestBody:
        required: true
        content:
          application/json:
            schema:
              type: object
              required: [domain]
              properties:
                domain:
                  type: string
                  example: cache.example.com
      responses:
        "200":
          description: Domain set, verification pending
          content:
            application/json:
              schema:
                type: object
                properties:
                  custom_domain:
                    type: string
                  custom_domain_verified:
                    type: boolean
                  verify_txt_record:
                    type: string
                    description: DNS TXT record name to create
                  verify_txt_value:
                    type: string
                    description: DNS TXT record value
                  message:
                    type: string
        "400":
          $ref: "#/components/responses/BadRequest"
        "401":
          $ref: "#/components/responses/Unauthorized"

    delete:
      tags: [instances]
      summary: Remove Custom Domain
      description: Removes the custom domain configuration.
      operationId: removeCustomDomain
      security:
        - BearerAuth: []
      parameters:
        - $ref: "#/components/parameters/InstanceID"
      responses:
        "204":
          description: Domain removed
        "401":
          $ref: "#/components/responses/Unauthorized"

  /api/v1/instances/{id}/custom-domain/verify:
    post:
      tags: [instances]
      summary: Verify Custom Domain
      description: |
        Checks the DNS TXT record for the configured custom domain.
        Expected: `_cachly-verify.<domain>` → `cachly-verify=<instance-id>`
      operationId: verifyCustomDomain
      security:
        - BearerAuth: []
      parameters:
        - $ref: "#/components/parameters/InstanceID"
      responses:
        "200":
          description: Domain verified
          content:
            application/json:
              schema:
                type: object
                properties:
                  custom_domain:
                    type: string
                  custom_domain_verified:
                    type: boolean
                  message:
                    type: string
        "400":
          $ref: "#/components/responses/BadRequest"
        "401":
          $ref: "#/components/responses/Unauthorized"

  /api/v1/instances/{id}/webhook:
    put:
      tags: [instances]
      summary: Set Webhook URL
      description: |
        Configure an HTTPS webhook URL for this instance. Cachly will POST
        real-time events (cache hits, near-misses, threshold changes) to
        this endpoint. The URL must use HTTPS and be at most 2048 characters.
      operationId: setWebhookURL
      security:
        - BearerAuth: []
      parameters:
        - $ref: "#/components/parameters/InstanceID"
      requestBody:
        required: true
        content:
          application/json:
            schema:
              type: object
              required: [url]
              properties:
                url:
                  type: string
                  format: uri
                  maxLength: 2048
                  example: "https://hooks.example.com/cachly"
      responses:
        "200":
          description: Webhook URL saved
          content:
            application/json:
              schema:
                type: object
                properties:
                  ok: { type: boolean }
                  webhook_url: { type: string }
        "400":
          $ref: "#/components/responses/BadRequest"
        "401":
          $ref: "#/components/responses/Unauthorized"

    delete:
      tags: [instances]
      summary: Remove Webhook URL
      description: Removes the configured webhook URL for this instance. Events will no longer be delivered.
      operationId: removeWebhookURL
      security:
        - BearerAuth: []
      parameters:
        - $ref: "#/components/parameters/InstanceID"
      responses:
        "200":
          description: Webhook removed
          content:
            application/json:
              schema:
                type: object
                properties:
                  ok: { type: boolean }
        "401":
          $ref: "#/components/responses/Unauthorized"

  /api/v1/account:
    delete:
      tags: [account]
      summary: Delete Account (DSGVO Art. 17 – Recht auf Löschung)
      description: |
        Permanently erases all personal data for the authenticated user.

        **Deletion sequence:**
        1. Deprovision all k8s namespaces (Valkey/instance data)
        2. Delete all semantic-cache embeddings (pgvector)
        3. Hard-delete all instance records
        4. Delete all audit log entries for this tenant
        5. Delete the tenant record

        ⚠️ **This action is irreversible.** All data is permanently deleted.
        An immutable erasure receipt is kept in the system audit log (no personal data).
      operationId: deleteAccount
      security:
        - BearerAuth: []
      responses:
        "204":
          description: Account successfully deleted (no content)
        "401":
          $ref: "#/components/responses/Unauthorized"
        "500":
          description: Erasure failed (retry later)

  /v1/sem/{token}/search:
    post:
      tags: [semantic]
      summary: ANN Search
      description: |
        Nearest-neighbour search using pgvector HNSW index.
        Returns the cached entry with highest cosine similarity above the threshold.

        **Performance:** O(log n) with HNSW index, typically < 10 ms.

        Supports int8-quantized embeddings (§7) to reduce payload by ~8x.
      operationId: semanticSearch
      security:
        - VectorToken: []
      parameters:
        - $ref: "#/components/parameters/VectorToken"
      requestBody:
        required: true
        content:
          application/json:
            schema:
              $ref: "#/components/schemas/SearchRequest"
      responses:
        "200":
          description: Search result (hit or miss)
          content:
            application/json:
              schema:
                $ref: "#/components/schemas/SearchResponse"
        "400":
          $ref: "#/components/responses/BadRequest"
        "401":
          $ref: "#/components/responses/Unauthorized"
        "429":
          $ref: "#/components/responses/TooManyRequests"

  /v1/sem/{token}/entries:
    post:
      tags: [semantic]
      summary: Index Entry
      description: Stores a new embedding + prompt in the pgvector index.
      operationId: semanticIndex
      security:
        - VectorToken: []
      parameters:
        - $ref: "#/components/parameters/VectorToken"
      requestBody:
        required: true
        content:
          application/json:
            schema:
              $ref: "#/components/schemas/IndexRequest"
      responses:
        "201":
          description: Entry indexed
          content:
            application/json:
              schema:
                type: object
                properties:
                  ok:
                    type: boolean
        "400":
          $ref: "#/components/responses/BadRequest"
        "401":
          $ref: "#/components/responses/Unauthorized"
        "429":
          $ref: "#/components/responses/TooManyRequests"

    get:
      tags: [semantic]
      summary: List Entries
      description: Returns all indexed entries for the namespace (paginated by the caller).
      operationId: semanticListEntries
      security:
        - VectorToken: []
      parameters:
        - $ref: "#/components/parameters/VectorToken"
        - name: namespace
          in: query
          schema:
            type: string
            default: "cachly:sem"
        - name: limit
          in: query
          description: Max entries to return (1-1000, default 100)
          schema:
            type: integer
            default: 100
            minimum: 1
            maximum: 1000
        - name: offset
          in: query
          description: Number of entries to skip (default 0)
          schema:
            type: integer
            default: 0
            minimum: 0
      responses:
        "200":
          description: Entry list
          content:
            application/json:
              schema:
                type: object
                properties:
                  data:
                    type: array
                    items:
                      $ref: "#/components/schemas/SemanticEntry"
                  count:
                    type: integer
                  limit:
                    type: integer
                  offset:
                    type: integer

  /v1/sem/{token}/entries/batch:
    post:
      tags: [semantic]
      summary: Batch Index (Cache Warming)
      description: |
        Inserts up to 500 entries in a single request (§8 Cache Warming).
        Reduces round-trips during initial cache population.
      operationId: semanticBatchIndex
      security:
        - VectorToken: []
      parameters:
        - $ref: "#/components/parameters/VectorToken"
      requestBody:
        required: true
        content:
          application/json:
            schema:
              $ref: "#/components/schemas/BatchIndexRequest"
      responses:
        "201":
          description: Batch result
          content:
            application/json:
              schema:
                $ref: "#/components/schemas/BatchIndexResponse"
        "400":
          $ref: "#/components/responses/BadRequest"
        "429":
          $ref: "#/components/responses/TooManyRequests"

  /v1/sem/{token}/entries/{id}:
    delete:
      tags: [semantic]
      summary: Delete Entry
      operationId: semanticDeleteEntry
      security:
        - VectorToken: []
      parameters:
        - $ref: "#/components/parameters/VectorToken"
        - name: id
          in: path
          required: true
          schema:
            type: string
            format: uuid
      responses:
        "200":
          description: Deleted
        "404":
          $ref: "#/components/responses/NotFound"

  /v1/sem/{token}/flush:
    delete:
      tags: [semantic]
      summary: Flush Namespace
      description: Deletes all entries in a namespace.
      operationId: semanticFlush
      security:
        - VectorToken: []
      parameters:
        - $ref: "#/components/parameters/VectorToken"
        - name: namespace
          in: query
          schema:
            type: string
            default: "cachly:sem"
      responses:
        "200":
          description: Number of deleted entries
          content:
            application/json:
              schema:
                type: object
                properties:
                  deleted:
                    type: integer

  /v1/sem/{token}/size:
    get:
      tags: [semantic]
      summary: Get Namespace Size
      description: Returns the entry count (O(1) – uses DB index).
      operationId: semanticSize
      security:
        - VectorToken: []
      parameters:
        - $ref: "#/components/parameters/VectorToken"
        - name: namespace
          in: query
          schema:
            type: string
            default: "cachly:sem"
      responses:
        "200":
          description: Namespace entry count
          content:
            application/json:
              schema:
                type: object
                properties:
                  size:
                    type: integer

  /v1/sem/{token}/feedback:
    post:
      tags: [semantic]
      summary: Record Feedback (Adaptive Threshold §1)
      description: |
        Records whether the caller accepted a cache hit as correct.
        The background calibrator uses these signals to compute the F1-optimal threshold per namespace.
      operationId: semanticFeedback
      security:
        - VectorToken: []
      parameters:
        - $ref: "#/components/parameters/VectorToken"
      requestBody:
        required: true
        content:
          application/json:
            schema:
              $ref: "#/components/schemas/FeedbackRequest"
      responses:
        "200":
          description: Feedback recorded

  /v1/sem/{token}/threshold:
    get:
      tags: [semantic]
      summary: Get Adaptive Threshold (§1)
      description: Returns the current F1-calibrated threshold. Falls back to 0.85 if not yet calibrated.
      operationId: semanticThreshold
      security:
        - VectorToken: []
      parameters:
        - $ref: "#/components/parameters/VectorToken"
        - name: namespace
          in: query
          schema:
            type: string
            default: "cachly:sem"
      responses:
        "200":
          description: Current F1-calibrated threshold value
          content:
            application/json:
              schema:
                $ref: "#/components/schemas/ThresholdResponse"

    post:
      tags: [semantic]
      summary: Set Manual Threshold
      description: |
        Manually set the similarity threshold for a namespace, overriding the
        automatic F1 calibration. Useful for fine-tuning when feedback is sparse.
        Value must be between 0.50 and 1.00.
      operationId: semanticSetThreshold
      security:
        - VectorToken: []
      parameters:
        - $ref: "#/components/parameters/VectorToken"
      requestBody:
        required: true
        content:
          application/json:
            schema:
              type: object
              required: [threshold]
              properties:
                namespace:
                  type: string
                  default: "cachly:sem"
                threshold:
                  type: number
                  minimum: 0.50
                  maximum: 1.00
                  example: 0.88
      responses:
        "200":
          description: Threshold updated
          content:
            application/json:
              schema:
                type: object
                properties:
                  ok: { type: boolean }
                  namespace: { type: string }
                  threshold: { type: number }
        "400":
          $ref: "#/components/responses/BadRequest"

  /v1/sem/{token}/search/stream:
    post:
      tags: [semantic]
      summary: Streaming ANN Search (SSE)
      description: |
        Same search logic as `/v1/sem/{token}/search` but returns the cached
        response as a Server-Sent Events (SSE) stream of word-level text chunks.

        **Protocol:**
        1. `event: search` – `{"found": true/false, "id": "...", "similarity": 0.94}`
        2. `event: chunk` – `{"index": 0, "text": "The answer "}` (repeated)
        3. `event: done` – `{}`

        On cache miss, only `search` (found=false) and `done` are sent.

        Useful for SDK clients that stream LLM responses and want transparent
        cache integration — a hit replays the cached response at streaming pace.
      operationId: semanticSearchStream
      security:
        - VectorToken: []
      parameters:
        - $ref: "#/components/parameters/VectorToken"
      requestBody:
        required: true
        content:
          application/json:
            schema:
              $ref: "#/components/schemas/SearchRequest"
      responses:
        "200":
          description: SSE stream of search result + text chunks
          content:
            text/event-stream:
              schema:
                type: string
        "400":
          $ref: "#/components/responses/BadRequest"
        "401":
          $ref: "#/components/responses/Unauthorized"

  /v1/sem/{token}/detect-namespace:
    post:
      tags: [semantic]
      summary: Auto-Detect Namespace (§4)
      description: |
        Classifies a prompt into one of 5 semantic namespaces using text heuristics (< 0.1 ms).
        No embedding required.
      operationId: semanticDetectNamespace
      security:
        - VectorToken: []
      parameters:
        - $ref: "#/components/parameters/VectorToken"
      requestBody:
        required: true
        content:
          application/json:
            schema:
              type: object
              required: [prompt]
              properties:
                prompt:
                  type: string
                  example: "Translate this to German"
      responses:
        "200":
          description: Detected namespace and original prompt
          content:
            application/json:
              schema:
                type: object
                properties:
                  namespace:
                    type: string
                    example: "cachly:sem:translation"
                  prompt:
                    type: string

  /v1/sem/{token}/stats:
    get:
      tags: [semantic]
      summary: Semantic Cache Analytics (§10)
      description: Returns aggregated analytics including hit rates, savings estimates, and near-miss data.
      operationId: semanticStats
      security:
        - VectorToken: []
      parameters:
        - $ref: "#/components/parameters/VectorToken"
      responses:
        "200":
          description: Aggregated semantic cache analytics
          content:
            application/json:
              schema:
                $ref: "#/components/schemas/StatsResponse"

  # ── Analytics 2.0 ───────────────────────────────────────────────────────────
  /v1/sem/{token}/analytics:
    get:
      tags: [analytics]
      summary: Full Analytics 2.0
      description: |
        Returns the complete Analytics 2.0 payload including:
        - Cache Efficiency Score (0-100, weighted: 50% hit rate, 25% latency savings, 25% cost savings)
        - Top-10 hashed cache keys (privacy-safe SHA-256)
        - Anomaly detection (hit-rate drop, near-miss spike, efficiency degradation, stale cache)
        - Smart recommendations (threshold tuning, TTL increase, warmup suggestions, SWR)
        - 24h efficiency trend line
        - One-liner health summary
      operationId: fullAnalytics
      security:
        - VectorToken: []
      parameters:
        - $ref: "#/components/parameters/VectorToken"
      responses:
        "200":
          description: Analytics 2.0 payload
          content:
            application/json:
              schema:
                type: object
                properties:
                  efficiency:
                    type: object
                    properties:
                      score: { type: number, example: 78.5 }
                      grade: { type: string, example: "B" }
                      hit_rate_component: { type: number, example: 82.3 }
                      latency_component: { type: number, example: 98.1 }
                      cost_component: { type: number, example: 45.0 }
                      trend_direction:
                        { type: string, enum: [up, down, stable] }
                      trend_percentage: { type: number, example: 3.2 }
                  anomalies:
                    type: array
                    items:
                      $ref: "#/components/schemas/Anomaly"
                  recommendations:
                    type: array
                    items:
                      $ref: "#/components/schemas/Recommendation"
                  trend_24h:
                    type: array
                    items:
                      type: object
                      properties:
                        timestamp: { type: string, format: date-time }
                        efficiency_score: { type: number }
                        hit_rate: { type: number }
                        total_hits: { type: integer }
                  summary:
                    type: object
                    properties:
                      status:
                        { type: string, enum: [healthy, degraded, critical] }
                      one_liner: { type: string }
                      anomaly_count: { type: integer }
                      recommendation_count: { type: integer }
                  generated_at: { type: string, format: date-time }
        "401":
          $ref: "#/components/responses/Unauthorized"

  /v1/sem/{token}/analytics/anomalies:
    get:
      tags: [analytics]
      summary: Anomalies Only
      description: Returns detected cache behaviour anomalies for the last 24 hours.
      operationId: analyticsAnomalies
      security:
        - VectorToken: []
      parameters:
        - $ref: "#/components/parameters/VectorToken"
      responses:
        "200":
          description: Anomaly list
          content:
            application/json:
              schema:
                type: object
                properties:
                  anomalies:
                    type: array
                    items:
                      $ref: "#/components/schemas/Anomaly"
                  count: { type: integer }
        "401":
          $ref: "#/components/responses/Unauthorized"

  /v1/sem/{token}/analytics/recommend:
    get:
      tags: [analytics]
      summary: Recommendations Only
      description: Returns actionable recommendations for improving cache performance.
      operationId: analyticsRecommendations
      security:
        - VectorToken: []
      parameters:
        - $ref: "#/components/parameters/VectorToken"
      responses:
        "200":
          description: Recommendation list
          content:
            application/json:
              schema:
                type: object
                properties:
                  recommendations:
                    type: array
                    items:
                      $ref: "#/components/schemas/Recommendation"
                  count: { type: integer }
        "401":
          $ref: "#/components/responses/Unauthorized"

  # ── Cache Warmup ────────────────────────────────────────────────────────────
  /v1/cache/{token}/warm:
    post:
      tags: [warmup]
      summary: Warm KV Cache (Token-based)
      description: |
        Pre-fills the cache with key/value entries using the vector token.
        Entries are pipelined to Valkey in a single round-trip.
        Max 500 entries per request, max 512 bytes per key, max 1 MB per value.
      operationId: warmCacheByToken
      security:
        - VectorToken: []
      parameters:
        - $ref: "#/components/parameters/VectorToken"
      requestBody:
        required: true
        content:
          application/json:
            schema:
              type: object
              required: [entries]
              properties:
                entries:
                  type: array
                  maxItems: 500
                  items:
                    type: object
                    required: [key, value]
                    properties:
                      key: { type: string, maxLength: 512 }
                      value: { type: string }
                      ttl:
                        {
                          type: integer,
                          description: "TTL in seconds (0 = no expiry)",
                        }
      responses:
        "200":
          description: Warmup result
          content:
            application/json:
              schema:
                type: object
                properties:
                  total: { type: integer }
                  written: { type: integer }
                  failed: { type: integer }
                  duration_ms: { type: number }
        "401":
          $ref: "#/components/responses/Unauthorized"

  /v1/sem/{token}/warmup/snapshot:
    post:
      tags: [warmup]
      summary: Semantic Cache Warmup Snapshot
      description: Creates or restores a snapshot of the semantic cache for the instance.
      operationId: semanticWarmupSnapshot
      security:
        - VectorToken: []
      parameters:
        - $ref: "#/components/parameters/VectorToken"
      responses:
        "200":
          description: Snapshot result
        "401":
          $ref: "#/components/responses/Unauthorized"

  # ── Admin API ────────────────────────────────────────────────────────────────
  /api/admin/stats:
    get:
      tags: [admin]
      summary: KPI Stats
      security:
        - AdminAuth: []
      responses:
        "200":
          description: Aggregated KPIs
        "401":
          $ref: "#/components/responses/Unauthorized"

  /api/admin/instances:
    get:
      tags: [admin]
      summary: List All Instances (Admin)
      security:
        - AdminAuth: []
      responses:
        "200":
          description: All instances across all tenants

  /api/admin/nodes:
    get:
      tags: [admin]
      summary: List Cluster Nodes
      security:
        - AdminAuth: []
      responses:
        "200":
          description: Node list with capacity

  /api/admin/nodes/provision:
    post:
      tags: [admin]
      summary: Provision New Hetzner Node
      description: Triggers async provisioning of a new CPX32 node and joins it to the k3s cluster.
      security:
        - AdminAuth: []
      responses:
        "202":
          description: Provisioning started

  # ── Webhooks ─────────────────────────────────────────────────────────────────
  /webhooks/stripe:
    post:
      tags: [webhooks]
      summary: Stripe Webhook Handler
      description: Receives Stripe events (checkout.session.completed, customer.subscription.updated, etc.)
      security: []
      requestBody:
        required: true
        content:
          application/json:
            schema:
              type: object
      responses:
        "200":
          description: Event processed
        "400":
          description: Invalid Stripe signature or unknown event type

components:
  securitySchemes:
    BearerAuth:
      type: http
      scheme: bearer
      bearerFormat: JWT
      description: Keycloak JWT – obtain via Keycloak OIDC or next-auth session
    AdminAuth:
      type: http
      scheme: bearer
      description: Static admin secret key (ADMIN_SECRET_KEY env variable)
    VectorToken:
      type: apiKey
      in: path
      name: token
      description: Per-instance UUID from dashboard (CACHLY_VECTOR_URL)

  parameters:
    InstanceID:
      name: id
      in: path
      required: true
      schema:
        type: string
        format: uuid
    VectorToken:
      name: token
      in: path
      required: true
      schema:
        type: string
        format: uuid
      description: Per-instance vector token (from dashboard CACHLY_VECTOR_URL)

  responses:
    BadRequest:
      description: Bad request (invalid body or parameters)
      content:
        application/json:
          schema:
            $ref: "#/components/schemas/Error"
    Unauthorized:
      description: Missing or invalid authentication
      content:
        application/json:
          schema:
            $ref: "#/components/schemas/Error"
    NotFound:
      description: Resource not found
      content:
        application/json:
          schema:
            $ref: "#/components/schemas/Error"
    TooManyRequests:
      description: Rate limit exceeded
      content:
        application/json:
          schema:
            $ref: "#/components/schemas/Error"

  schemas:
    Error:
      type: object
      properties:
        error:
          type: string

    HealthResponse:
      type: object
      properties:
        status:
          type: string
          enum: [ok, degraded]
        service:
          type: string
        db:
          type: string
          enum: [ok, unreachable]

    Instance:
      type: object
      properties:
        id:
          type: string
          format: uuid
        name:
          type: string
        tier:
          type: string
          enum: [free, dev, pro, speed, business, enterprise]
        status:
          type: string
          enum:
            [
              pending_payment,
              provisioning,
              running,
              suspended,
              deleting,
              failed,
            ]
        region:
          type: string
        engine:
          type: string
          enum: [valkey, dragonfly]
          description: Cache engine backing this instance
        cluster_mode:
          type: boolean
          description: Whether the instance runs in Valkey Cluster mode (automatic sharding)
        host:
          type: string
        port:
          type: integer
        memory_mb:
          type: integer
        encryption_at_rest:
          type: boolean
        byo_key_set:
          type: boolean
          description: Whether the customer has set a BYOK (Bring Your Own Key) encryption key
        custom_domain:
          type: string
          nullable: true
          description: Customer's custom CNAME (e.g. cache.example.com)
        custom_domain_verified:
          type: boolean
          description: Whether the custom domain's DNS TXT record has been verified
        vector_token:
          type: string
          format: uuid
          description: Used for CACHLY_VECTOR_URL – authenticate semantic cache API calls
        webhook_url:
          type: string
          format: uri
          nullable: true
          description: HTTPS endpoint receiving real-time cache events (hits, near-misses, threshold changes)
        created_at:
          type: string
          format: date-time
        updated_at:
          type: string
          format: date-time

    CreateInstanceRequest:
      type: object
      required: [name, tier]
      properties:
        name:
          type: string
          minLength: 3
          maxLength: 63
          pattern: "^[a-zA-Z0-9][a-zA-Z0-9_-]{2,62}$"
          example: my-ai-cache
        tier:
          type: string
          enum: [free, dev, pro, speed, business]

    CreateInstanceResponse:
      type: object
      properties:
        instance_id:
          type: string
          format: uuid
        status:
          type: string
        checkout_url:
          type: string
          format: uri
          description: Only present for paid tiers – redirect to Stripe Checkout

    AccountExport:
      type: object
      properties:
        tenant:
          type: object
        instances:
          type: array
          items:
            $ref: "#/components/schemas/Instance"
        audit_logs:
          type: array
          items:
            type: object

    SearchRequest:
      type: object
      properties:
        embedding:
          type: array
          items:
            type: number
          description: "Float64 text embedding vector (e.g. 1536 dims for OpenAI text-embedding-3-small)"
        embedding_q8:
          $ref: "#/components/schemas/QuantizedEmbedding"
          description: "int8-quantized text embedding (§7) – reduces payload by ~8x"
        image_embedding:
          type: array
          items:
            type: number
          description: "Float64 image embedding vector for multimodal search"
        image_embedding_q8:
          $ref: "#/components/schemas/QuantizedEmbedding"
          description: "int8-quantized image embedding for multimodal search"
        embedding_type:
          type: string
          enum: [text, image, multimodal]
          default: text
          description: "Modality of the search: text-only, image-only, or multimodal (combined)"
        namespace:
          type: string
          default: "cachly:sem"
          description: "Use 'auto' for automatic namespace detection (§4)"
        threshold:
          type: number
          format: double
          default: 0.85
          minimum: 0.0
          maximum: 1.0
        use_adaptive_threshold:
          type: boolean
          default: false
          description: "Override threshold with F1-calibrated value (§1)"
        prompt:
          type: string
          description: "Required when hybrid=true (§3 BM25+Vector)"
        hybrid:
          type: boolean
          default: false
          description: "Enable hybrid BM25+Vector RRF search (§3)"

    SearchResponse:
      type: object
      properties:
        found:
          type: boolean
        id:
          type: string
          format: uuid
          description: Only present on cache hit
        similarity:
          type: number
          description: Cosine similarity score (only on hit)
        threshold_used:
          type: number

    IndexRequest:
      type: object
      required: [prompt]
      properties:
        id:
          type: string
          format: uuid
          description: Optional custom ID (auto-generated if absent)
        embedding:
          type: array
          items:
            type: number
          description: "Float64 text embedding vector"
        embedding_q8:
          $ref: "#/components/schemas/QuantizedEmbedding"
          description: "int8-quantized text embedding"
        image_embedding:
          type: array
          items:
            type: number
          description: "Float64 image embedding vector for multimodal entries"
        image_embedding_q8:
          $ref: "#/components/schemas/QuantizedEmbedding"
          description: "int8-quantized image embedding for multimodal entries"
        embedding_type:
          type: string
          enum: [text, image, multimodal]
          default: text
          description: "Modality of the entry"
        prompt:
          type: string
        namespace:
          type: string
          default: "cachly:sem"
        expires_at:
          type: string
          format: date-time
          description: Optional TTL – mirrors Valkey key expiry

    QuantizedEmbedding:
      type: object
      required: [values, min, max]
      properties:
        values:
          type: array
          items:
            type: integer
            format: int8
        min:
          type: number
          description: Minimum float64 value before quantization
        max:
          type: number
          description: Maximum float64 value before quantization

    BatchIndexRequest:
      type: object
      required: [entries]
      properties:
        entries:
          type: array
          maxItems: 500
          items:
            $ref: "#/components/schemas/IndexRequest"

    BatchIndexResponse:
      type: object
      properties:
        indexed:
          type: integer
        skipped:
          type: integer

    SemanticEntry:
      type: object
      properties:
        id:
          type: string
          format: uuid
        prompt:
          type: string
        namespace:
          type: string
        hit_count:
          type: integer
        created_at:
          type: string
          format: date-time
        expires_at:
          type: string
          format: date-time

    FeedbackRequest:
      type: object
      required: [hit_id]
      properties:
        hit_id:
          type: string
          format: uuid
        similarity:
          type: number
        accepted:
          type: boolean
        namespace:
          type: string
          default: "cachly:sem"

    ThresholdResponse:
      type: object
      properties:
        threshold:
          type: number
          example: 0.87
        namespace:
          type: string
        calibrated:
          type: boolean

    StatsResponse:
      type: object
      properties:
        total_entries:
          type: integer
        entries_last_24h:
          type: integer
        total_hits:
          type: integer
        namespaces:
          type: array
          items:
            type: object
        feedback:
          type: object
        near_misses:
          type: object
        savings:
          type: object
          properties:
            total_hits:
              type: integer
            hits_last_24h:
              type: integer
            estimated_total_saved_usd:
              type: number
            estimated_monthly_saved_usd:
              type: number

    Anomaly:
      type: object
      properties:
        type:
          type: string
          enum: [hit_rate_drop, near_miss_spike, efficiency_drop, stale_cache]
        severity:
          type: string
          enum: [info, warning, critical]
        message:
          type: string
        current_value:
          type: number
        previous_value:
          type: number
        change_percent:
          type: number
        detected_at:
          type: string
          format: date-time

    Recommendation:
      type: object
      properties:
        type:
          type: string
          enum:
            [
              lower_threshold,
              increase_ttl,
              add_warmup,
              add_tags,
              enable_swr,
              add_feedback,
            ]
        priority:
          type: string
          enum: [high, medium, low]
        title:
          type: string
        message:
          type: string
        action_hint:
          type: string
          description: SDK code snippet or API call example
