diff --git a/apps/docs/content/docs/de/tools/slack.mdx b/apps/docs/content/docs/de/tools/slack.mdx index 6de7714fd2b..01b8b58df63 100644 --- a/apps/docs/content/docs/de/tools/slack.mdx +++ b/apps/docs/content/docs/de/tools/slack.mdx @@ -41,7 +41,7 @@ In Sim ermöglicht die Slack-Integration Ihren Agenten, programmatisch mit Slack - **Reaktionen hinzufügen**: Drücken Sie Stimmungen oder Bestätigungen aus, indem Sie Emoji-Reaktionen zu jeder Nachricht hinzufügen - **Canvases erstellen**: Erstellen und teilen Sie Slack-Canvases (kollaborative Dokumente) direkt in Kanälen, um reichhaltigere Inhalte zu teilen und zu dokumentieren - **Nachrichten lesen**: Lesen Sie aktuelle Nachrichten aus Kanälen, um Überwachung, Berichterstattung oder das Auslösen weiterer Aktionen basierend auf Kanalaktivitäten zu ermöglichen -- **Dateien herunterladen**: Rufen Sie in Slack-Kanälen geteilte Dateien zur Verarbeitung oder Archivierung ab +- **Dateien herunterladen**: Rufen Sie in Slack-Kanälen geteilte Dateien zur Verarbeitung in einem Workflow ab Dies ermöglicht leistungsstarke Automatisierungsszenarien wie das Senden von Benachrichtigungen mit dynamischen Updates, das Verwalten von Gesprächsabläufen mit bearbeitbaren Statusnachrichten, das Bestätigen wichtiger Nachrichten mit Reaktionen und das Sauberhalten von Kanälen durch Entfernen veralteter Bot-Nachrichten. Ihre Agenten können zeitnahe Informationen liefern, Nachrichten aktualisieren, während Workflows fortschreiten, kollaborative Dokumente erstellen oder Teammitglieder benachrichtigen, wenn Aufmerksamkeit benötigt wird. Diese Integration überbrückt die Lücke zwischen Ihren KI-Workflows und der Kommunikation Ihres Teams und stellt sicher, dass jeder mit genauen, aktuellen Informationen auf dem Laufenden bleibt. Durch die Verbindung von Sim mit Slack können Sie Agenten erstellen, die Ihr Team mit relevanten Informationen zur richtigen Zeit auf dem Laufenden halten, die Zusammenarbeit verbessern, indem sie Erkenntnisse automatisch teilen und aktualisieren, und die Notwendigkeit manueller Statusaktualisierungen reduzieren – alles während Sie Ihren bestehenden Slack-Workspace nutzen, in dem Ihr Team bereits kommuniziert. {/* MANUAL-CONTENT-END */} diff --git a/apps/docs/content/docs/en/enterprise/sso.mdx b/apps/docs/content/docs/en/enterprise/sso.mdx index bfccc204122..ee3d53be3ec 100644 --- a/apps/docs/content/docs/en/enterprise/sso.mdx +++ b/apps/docs/content/docs/en/enterprise/sso.mdx @@ -250,6 +250,10 @@ SSO provisioning creates internal organization members. External workspace membe question: "Can I still use email/password login after enabling SSO?", answer: "Yes. Enabling SSO does not disable password-based login. Users can still sign in with their email and password if they have one. Forced SSO (requiring all users on the domain to use SSO) is not yet supported." }, + { + question: "A user already has an account with the same email — what happens when they sign in with SSO?", + answer: "Sim links the SSO identity to the existing account automatically, as long as your identity provider reports the email as verified (email_verified) or the provider is trusted. Most OIDC providers (Okta, Google Workspace, Auth0) assert email_verified, so linking just works. If sign-in fails with 'account not linked' — common with SAML providers that omit the claim — add the provider's ID to SSO_TRUSTED_PROVIDER_IDS on self-hosted and restart." + }, { question: "Who can configure SSO on Sim Cloud?", answer: "Organization owners and admins can configure SSO. You must be on the Enterprise plan." @@ -280,8 +284,25 @@ NEXT_PUBLIC_SSO_ENABLED=true # Required if you want users auto-added to your organization on first SSO sign-in ORGANIZATIONS_ENABLED=true NEXT_PUBLIC_ORGANIZATIONS_ENABLED=true + +# Optional: comma-separated SSO provider IDs to trust for automatic account linking +# (links an SSO sign-in to an existing account with the same email). Needed when your +# IdP does not assert email_verified — typically SAML providers, or OIDC providers that +# omit the claim. Set it to the Provider ID you registered, then restart. +# (If you also keep SSO_PROVIDER_ID in the app's environment, that provider is trusted +# without listing it here.) +SSO_TRUSTED_PROVIDER_IDS=custom-oidc,partner-saml ``` + + When someone signs in with SSO and an account with the same email already exists + (for example, they previously signed up with email/password), Sim links the SSO + identity to that account automatically as long as your IdP reports the email as + verified, or the provider is trusted. If you hit an `account not linked` error, + either confirm your IdP sends `email_verified`, or add the provider's ID to + `SSO_TRUSTED_PROVIDER_IDS` and restart. + + You can register providers through the **Settings UI** (same as cloud) or by running the registration script directly against your database. ### Script-based registration diff --git a/apps/docs/content/docs/en/self-hosting/environment-variables.mdx b/apps/docs/content/docs/en/self-hosting/environment-variables.mdx index 8d481b0d62f..0a11464a8f1 100644 --- a/apps/docs/content/docs/en/self-hosting/environment-variables.mdx +++ b/apps/docs/content/docs/en/self-hosting/environment-variables.mdx @@ -70,6 +70,19 @@ import { Callout } from 'fumadocs-ui/components/callout' | `ALLOWED_LOGIN_EMAILS` | Restrict signups to specific emails (comma-separated) | | `DISABLE_REGISTRATION` | Set to `true` to disable new user signups | +## File Storage + +By default Sim writes uploads to local disk. For production, point it at AWS S3 or Azure Blob. See [Object Storage](/self-hosting/object-storage) for the full setup, bucket layout, and IAM policy. + +| Variable | Description | +|----------|-------------| +| `AWS_REGION` | AWS region — set with `S3_BUCKET_NAME` to enable S3 | +| `AWS_ACCESS_KEY_ID` | AWS access key. Omit to use the instance/IRSA credential chain | +| `AWS_SECRET_ACCESS_KEY` | AWS secret key. Omit to use the instance/IRSA credential chain | +| `S3_BUCKET_NAME` | General workspace files bucket — set with `AWS_REGION` to enable S3 | +| `AZURE_STORAGE_CONTAINER_NAME` | General files container — set with Azure credentials to enable Blob (takes precedence over S3) | +| `AZURE_CONNECTION_STRING` | Azure connection string, or use `AZURE_ACCOUNT_NAME` + `AZURE_ACCOUNT_KEY` | + ## Email Providers Configure one provider — the mailer auto-detects in priority order: **Resend → AWS SES → SMTP → Azure Communication Services**. If none are configured, emails are logged to the console instead. diff --git a/apps/docs/content/docs/en/self-hosting/meta.json b/apps/docs/content/docs/en/self-hosting/meta.json index 805cfb659a1..8ec1af87ec8 100644 --- a/apps/docs/content/docs/en/self-hosting/meta.json +++ b/apps/docs/content/docs/en/self-hosting/meta.json @@ -5,6 +5,7 @@ "docker", "kubernetes", "platforms", + "object-storage", "environment-variables", "troubleshooting" ], diff --git a/apps/docs/content/docs/en/self-hosting/object-storage.mdx b/apps/docs/content/docs/en/self-hosting/object-storage.mdx new file mode 100644 index 00000000000..edeee7b87d5 --- /dev/null +++ b/apps/docs/content/docs/en/self-hosting/object-storage.mdx @@ -0,0 +1,289 @@ +--- +title: Object Storage +description: Configure where Sim stores uploaded files — local disk, AWS S3, or Azure Blob +--- + +import { Tab, Tabs } from 'fumadocs-ui/components/tabs' +import { Callout } from 'fumadocs-ui/components/callout' +import { Step, Steps } from 'fumadocs-ui/components/steps' +import { FAQ } from '@/components/ui/faq' + +Sim stores every uploaded file — knowledge base documents, chat attachments, execution outputs, profile pictures, and more — in object storage. Three backends are supported: + +| Backend | When to use | +|---------|-------------| +| **Local disk** | Single-node Docker, local development, evaluation | +| **[AWS S3](https://aws.amazon.com/s3/)** | Production, especially when running more than one app replica | +| **[Azure Blob](https://learn.microsoft.com/azure/storage/blobs/)** | Production on Azure | + + + Local disk writes to the container's `/uploads` directory. Files are lost when the container is recreated unless that path is on a persistent volume, and they are **not** shared across replicas. For any multi-replica or production deployment, use S3 or Azure Blob. + + +## How the backend is selected + +Sim picks the backend automatically from environment variables — there is no explicit "provider" flag. The logic, in order of precedence: + +1. **Azure Blob** — used if `AZURE_STORAGE_CONTAINER_NAME` is set **and** either (`AZURE_ACCOUNT_NAME` + `AZURE_ACCOUNT_KEY`) or `AZURE_CONNECTION_STRING` is set. +2. **AWS S3** — used if `S3_BUCKET_NAME` **and** `AWS_REGION` are set (and Azure is not configured). +3. **Local disk** — the fallback when neither is configured. + +If both Azure and S3 are configured, **Azure wins**. Set only the variables for the backend you intend to use. + +## Set up AWS S3 + + + + + +### Create the buckets + +Sim separates files into purpose-specific buckets. At minimum you need the general workspace bucket; the rest are created on demand based on which env vars you set. A bucket that isn't configured falls back to the general bucket where the code allows it, but the recommended setup is one bucket per purpose. + +```bash +# Set your region once +export AWS_REGION=us-east-1 + +# Create buckets (names must be globally unique — prefix with your org) +for name in workspace-files knowledge-base execution-files chat-files \ + copilot-files profile-pictures og-images workspace-logos; do + aws s3api create-bucket \ + --bucket "myorg-sim-$name" \ + --region "$AWS_REGION" \ + --create-bucket-configuration LocationConstraint="$AWS_REGION" +done +``` + + + In `us-east-1`, omit the `--create-bucket-configuration` flag — that region rejects an explicit `LocationConstraint`. + + +Keep all buckets **private** (block public access). Sim serves files through short-lived presigned URLs, so the buckets never need public read access. + + + + + +### Grant access with an IAM policy + +Create an IAM policy scoped to your buckets and attach it to the user (or role) Sim runs as: + +```json +{ + "Version": "2012-10-17", + "Statement": [ + { + "Effect": "Allow", + "Action": [ + "s3:GetObject", + "s3:PutObject", + "s3:DeleteObject", + "s3:ListBucket" + ], + "Resource": [ + "arn:aws:s3:::myorg-sim-*", + "arn:aws:s3:::myorg-sim-*/*" + ] + } + ] +} +``` + +You then have two ways to supply credentials: + +- **Static keys** — create an IAM user with this policy and set `AWS_ACCESS_KEY_ID` / `AWS_SECRET_ACCESS_KEY`. +- **Instance/role credentials (recommended)** — attach the policy to the EC2 instance role, ECS task role, or EKS IRSA role. Leave `AWS_ACCESS_KEY_ID` / `AWS_SECRET_ACCESS_KEY` unset and Sim falls back to the default AWS credential chain automatically. + + + + + +### Configure environment variables + +Set the region, optionally the credentials, and the bucket names: + +```bash +# Region + credentials +AWS_REGION=us-east-1 +AWS_ACCESS_KEY_ID=AKIA... # omit when using an instance/IRSA role +AWS_SECRET_ACCESS_KEY=... # omit when using an instance/IRSA role + +# Buckets (per purpose) +S3_BUCKET_NAME=myorg-sim-workspace-files +S3_KB_BUCKET_NAME=myorg-sim-knowledge-base +S3_EXECUTION_FILES_BUCKET_NAME=myorg-sim-execution-files +S3_CHAT_BUCKET_NAME=myorg-sim-chat-files +S3_COPILOT_BUCKET_NAME=myorg-sim-copilot-files +S3_PROFILE_PICTURES_BUCKET_NAME=myorg-sim-profile-pictures +S3_OG_IMAGES_BUCKET_NAME=myorg-sim-og-images +S3_WORKSPACE_LOGOS_BUCKET_NAME=myorg-sim-workspace-logos +``` + +Only `AWS_REGION` and `S3_BUCKET_NAME` are strictly required to switch Sim into S3 mode. Add the others so each file type lands in its own bucket. + + + + + +### S3 bucket reference + +| Variable | Stores | Required | +|----------|--------|----------| +| `AWS_REGION` | Region for all buckets | **Yes** (enables S3) | +| `AWS_ACCESS_KEY_ID` | Access key | No (uses credential chain if unset) | +| `AWS_SECRET_ACCESS_KEY` | Secret key | No (uses credential chain if unset) | +| `S3_BUCKET_NAME` | General workspace files | **Yes** (enables S3) | +| `S3_KB_BUCKET_NAME` | Knowledge base documents | Recommended | +| `S3_EXECUTION_FILES_BUCKET_NAME` | Workflow execution files (default: `sim-execution-files`) | Recommended | +| `S3_CHAT_BUCKET_NAME` | Deployed chat assets | Recommended | +| `S3_COPILOT_BUCKET_NAME` | Copilot attachments | Recommended | +| `S3_PROFILE_PICTURES_BUCKET_NAME` | User avatars | Recommended | +| `S3_OG_IMAGES_BUCKET_NAME` | OpenGraph preview images (falls back to `S3_BUCKET_NAME`) | Optional | +| `S3_WORKSPACE_LOGOS_BUCKET_NAME` | Workspace logos (falls back to `S3_BUCKET_NAME`) | Optional | +| `S3_LOGS_BUCKET_NAME` | Stored logs | Optional | +| `S3_ENDPOINT` | Custom endpoint for S3-compatible storage (R2, MinIO, B2) | Optional (AWS S3 if unset) | +| `S3_FORCE_PATH_STYLE` | `true` for path-style addressing (MinIO/Ceph) | Optional (defaults `false`) | + +## Apply the configuration + + + + +Add the storage variables to the `.env` file used by `docker-compose.prod.yml`, then restart: + +```bash +docker compose -f docker-compose.prod.yml up -d +``` + +Because files now live in S3, you no longer depend on a local `/uploads` volume for durability. + + + + +Set the variables under `app.env` (non-secret, e.g. region and bucket names) and supply credentials through a secret. The chart ships a complete example at `helm/sim/examples/values-aws.yaml`: + +```yaml +app: + env: + AWS_REGION: "us-east-1" + S3_BUCKET_NAME: "myorg-sim-workspace-files" + S3_KB_BUCKET_NAME: "myorg-sim-knowledge-base" + S3_EXECUTION_FILES_BUCKET_NAME: "myorg-sim-execution-files" + # ...remaining buckets +``` + +On EKS, prefer **IRSA**: attach the IAM policy to the service account's role and leave the access-key variables unset. + + + + +## Set up Azure Blob + +Azure Blob uses one container per purpose, mirroring the S3 layout. Authenticate with either a connection string or an account name + key. + +```bash +# Credentials — provide ONE of these forms +AZURE_ACCOUNT_NAME=mystorageaccount +AZURE_ACCOUNT_KEY=... +# or +AZURE_CONNECTION_STRING=DefaultEndpointsProtocol=https;AccountName=...;AccountKey=...;EndpointSuffix=core.windows.net + +# Containers (per purpose) +AZURE_STORAGE_CONTAINER_NAME=workspace-files +AZURE_STORAGE_KB_CONTAINER_NAME=knowledge-base +AZURE_STORAGE_EXECUTION_FILES_CONTAINER_NAME=execution-files +AZURE_STORAGE_CHAT_CONTAINER_NAME=chat-files +AZURE_STORAGE_COPILOT_CONTAINER_NAME=copilot-files +AZURE_STORAGE_PROFILE_PICTURES_CONTAINER_NAME=profile-pictures +AZURE_STORAGE_OG_IMAGES_CONTAINER_NAME=og-images +AZURE_STORAGE_WORKSPACE_LOGOS_CONTAINER_NAME=workspace-logos +``` + +A full Helm example lives at `helm/sim/examples/values-azure.yaml`. + +## Set up an S3-compatible provider (R2, MinIO, B2) + +Sim works with any S3-compatible store by pointing the S3 client at a custom endpoint. Configure it exactly like AWS S3 (buckets, access key, secret), then add `S3_ENDPOINT` — and `S3_FORCE_PATH_STYLE` where the provider requires path-style addressing. Verified with [Cloudflare R2](https://developers.cloudflare.com/r2/), [MinIO](https://min.io/), [Backblaze B2](https://www.backblaze.com/cloud-storage), and [RustFS](https://rustfs.com/). + + + `S3_ENDPOINT` is trusted operator configuration, so it is used as-is — `http://` and private hosts are accepted (no SSRF/HTTPS gate). Don't wire it to untrusted input. + + + + **The endpoint must be reachable from your users' browsers, and the bucket needs CORS.** Uploads use presigned `PUT` requests sent **directly from the browser** to `S3_ENDPOINT` (downloads are proxied back through the app, so they only need server-side reachability). This means: + + - A purely internal endpoint (e.g. `https://minio.internal:9000` that only the app pods can resolve) will let the server start cleanly but **uploads will fail in the browser**. Use an endpoint your users can reach. + - Configure a **CORS policy** on the bucket that allows your Sim origin (`PUT`, `GET`, and the `Authorization` / `Content-Type` / `x-amz-*` headers). This applies to AWS S3 too — R2 and MinIO are no different. + + + + + +[Cloudflare R2](https://developers.cloudflare.com/r2/api/s3/) uses virtual-hosted style (the default) and the region `auto`: + +```bash +AWS_REGION=auto +S3_ENDPOINT=https://.r2.cloudflarestorage.com +AWS_ACCESS_KEY_ID= +AWS_SECRET_ACCESS_KEY= +S3_BUCKET_NAME=myorg-sim-workspace-files +# ...remaining S3_*_BUCKET_NAME vars, one R2 bucket each +``` + +Leave `S3_FORCE_PATH_STYLE` unset — R2 supports the default virtual-hosted addressing. + + + + +[MinIO](https://min.io/docs/minio/linux/index.html) (and [Ceph RGW](https://docs.ceph.com/en/latest/radosgw/)) need path-style addressing and accept any region string: + +```bash +AWS_REGION=us-east-1 +S3_ENDPOINT=https://minio.example.com # must be reachable from users' browsers, not app-pods-only +S3_FORCE_PATH_STYLE=true +AWS_ACCESS_KEY_ID= +AWS_SECRET_ACCESS_KEY= +S3_BUCKET_NAME=myorg-sim-workspace-files +# ...remaining S3_*_BUCKET_NAME vars, one bucket each +``` + +`http://` works server-side, but since the browser uploads directly to this endpoint, prefer a TLS endpoint your users can reach (a mixed-content `http://` target will be blocked on an `https://` Sim origin). + + + + +[RustFS](https://rustfs.com/) is a Rust-based, S3-compatible store (a MinIO drop-in). Configure it exactly like MinIO — path-style, any region string, SigV4 access key/secret: + +```bash +AWS_REGION=us-east-1 +S3_ENDPOINT=https://rustfs.example.com # must be reachable from users' browsers +S3_FORCE_PATH_STYLE=true +AWS_ACCESS_KEY_ID= +AWS_SECRET_ACCESS_KEY= +S3_BUCKET_NAME=myorg-sim-workspace-files +# ...remaining S3_*_BUCKET_NAME vars, one bucket each +``` + +The same browser-reachability and CORS requirements apply. + + + + +## Verify it works + +After restarting with the new configuration: + +1. Open the app and upload a document to a knowledge base (or set a profile picture). +2. Confirm an object appears in the corresponding bucket/container. +3. Reload the page — the file should still render (downloads stream back through the app at `/api/files/serve`). + +If uploads fail, check the app logs for credential or permission errors (see [Troubleshooting](/self-hosting/troubleshooting)). + + diff --git a/apps/docs/content/docs/en/tools/slack.mdx b/apps/docs/content/docs/en/tools/slack.mdx index 54065a58200..c231b547b62 100644 --- a/apps/docs/content/docs/en/tools/slack.mdx +++ b/apps/docs/content/docs/en/tools/slack.mdx @@ -23,7 +23,7 @@ With the Slack integration in Sim, you can: - **Create canvases**: Create and share Slack canvases (collaborative documents) directly in channels - **Read messages**: Retrieve recent messages from channels or DMs, with filtering by time range - **Manage channels and users**: List channels, members, and users in your Slack workspace -- **Download files**: Retrieve files shared in Slack channels for processing or archival +- **Download files**: Retrieve files shared in Slack channels for processing within a workflow In Sim, the Slack integration enables your agents to programmatically interact with Slack as part of their workflows. This allows for automation scenarios such as sending notifications with dynamic updates, managing conversational flows with editable status messages, acknowledging important messages with reactions, and maintaining clean channels by removing outdated bot messages. The integration can also be used in trigger mode to start a workflow when a message is sent to a channel. diff --git a/apps/docs/content/docs/es/tools/slack.mdx b/apps/docs/content/docs/es/tools/slack.mdx index 67028cb518c..bf23941f9dc 100644 --- a/apps/docs/content/docs/es/tools/slack.mdx +++ b/apps/docs/content/docs/es/tools/slack.mdx @@ -41,7 +41,7 @@ En Sim, la integración con Slack permite a tus agentes interactuar programátic - **Añadir reacciones**: Expresar sentimiento o reconocimiento añadiendo reacciones con emojis a cualquier mensaje - **Crear lienzos**: Crear y compartir lienzos de Slack (documentos colaborativos) directamente en canales, permitiendo compartir contenido más rico y documentación - **Leer mensajes**: Leer mensajes recientes de canales, permitiendo monitoreo, informes o activación de acciones adicionales basadas en la actividad del canal -- **Descargar archivos**: Recuperar archivos compartidos en canales de Slack para procesamiento o archivo +- **Descargar archivos**: Recuperar archivos compartidos en canales de Slack para procesamiento en un flujo de trabajo Esto permite escenarios de automatización potentes como enviar notificaciones con actualizaciones dinámicas, gestionar flujos conversacionales con mensajes de estado editables, reconocer mensajes importantes con reacciones y mantener canales limpios eliminando mensajes de bot obsoletos. Tus agentes pueden entregar información oportuna, actualizar mensajes a medida que avanzan los flujos de trabajo, crear documentos colaborativos o alertar a miembros del equipo cuando se necesita atención. Esta integración cierra la brecha entre tus flujos de trabajo de IA y la comunicación de tu equipo, asegurando que todos se mantengan informados con información precisa y actualizada. Al conectar Sim con Slack, puedes crear agentes que mantengan a tu equipo actualizado con información relevante en el momento adecuado, mejorar la colaboración compartiendo y actualizando información automáticamente, y reducir la necesidad de actualizaciones manuales de estado, todo mientras aprovechas tu espacio de trabajo de Slack existente donde tu equipo ya se comunica. {/* MANUAL-CONTENT-END */} diff --git a/apps/docs/content/docs/fr/tools/slack.mdx b/apps/docs/content/docs/fr/tools/slack.mdx index 00440dce96f..4a1f0995b54 100644 --- a/apps/docs/content/docs/fr/tools/slack.mdx +++ b/apps/docs/content/docs/fr/tools/slack.mdx @@ -41,7 +41,7 @@ Dans Sim, l'intégration Slack permet à vos agents d'interagir programmatiqueme - **Ajoutez des réactions** : Exprimez un sentiment ou une reconnaissance en ajoutant des réactions emoji à n'importe quel message - **Créez des canevas** : Créez et partagez des canevas Slack (documents collaboratifs) directement dans les canaux, permettant un partage de contenu et une documentation plus riches - **Lisez des messages** : Lisez les messages récents des canaux, permettant la surveillance, le reporting ou le déclenchement d'actions supplémentaires basées sur l'activité du canal -- **Téléchargez des fichiers** : Récupérez les fichiers partagés dans les canaux Slack pour traitement ou archivage +- **Téléchargez des fichiers** : Récupérez les fichiers partagés dans les canaux Slack pour traitement dans un workflow Cela permet des scénarios d'automatisation puissants tels que l'envoi de notifications avec des mises à jour dynamiques, la gestion des flux conversationnels avec des messages de statut modifiables, la reconnaissance de messages importants avec des réactions, et le maintien de canaux propres en supprimant les messages de bot obsolètes. Vos agents peuvent fournir des informations opportunes, mettre à jour des messages au fur et à mesure que les workflows progressent, créer des documents collaboratifs, ou alerter les membres de l'équipe lorsqu'une attention est nécessaire. Cette intégration comble le fossé entre vos workflows d'IA et la communication de votre équipe, garantissant que tout le monde reste informé avec des informations précises et à jour. En connectant Sim avec Slack, vous pouvez créer des agents qui tiennent votre équipe informée avec des informations pertinentes au bon moment, améliorent la collaboration en partageant et en mettant à jour automatiquement des insights, et réduisent le besoin de mises à jour manuelles de statut—tout en tirant parti de votre espace de travail Slack existant où votre équipe communique déjà. {/* MANUAL-CONTENT-END */} diff --git a/apps/realtime/src/env.ts b/apps/realtime/src/env.ts index 083126a60d7..40f5abd898f 100644 --- a/apps/realtime/src/env.ts +++ b/apps/realtime/src/env.ts @@ -3,7 +3,10 @@ import { z } from 'zod' const EnvSchema = z.object({ NODE_ENV: z.enum(['development', 'test', 'production']).default('development'), DATABASE_URL: z.string().url(), - REDIS_URL: z.string().url().optional(), + REDIS_URL: z.preprocess( + (value) => (typeof value === 'string' && value.trim() === '' ? undefined : value), + z.string().url().optional() + ), BETTER_AUTH_URL: z.string().url(), BETTER_AUTH_SECRET: z.string().min(32), INTERNAL_API_SECRET: z.string().min(32), diff --git a/apps/sim/.env.example b/apps/sim/.env.example index ca6012c7bb1..180e9b56e98 100644 --- a/apps/sim/.env.example +++ b/apps/sim/.env.example @@ -71,6 +71,21 @@ API_ENCRYPTION_KEY=your_api_encryption_key # Use `openssl rand -hex 32` to gener # PEOPLEDATALABS_API_KEY_1= # People Data Labs API key #1 # PEOPLEDATALABS_API_KEY_2= # People Data Labs API key #2 +# File Storage (Optional - defaults to local disk; use S3 or Azure Blob for production) +# AWS_REGION=us-east-1 # Required with S3_BUCKET_NAME to enable S3. Use "auto" for Cloudflare R2 +# AWS_ACCESS_KEY_ID= # Omit to use the instance/IRSA credential chain +# AWS_SECRET_ACCESS_KEY= # Omit to use the instance/IRSA credential chain +# S3_BUCKET_NAME= # General workspace files bucket (required with AWS_REGION to enable S3) +# S3_KB_BUCKET_NAME= # Knowledge base documents +# S3_EXECUTION_FILES_BUCKET_NAME= # Workflow execution files +# S3_CHAT_BUCKET_NAME= # Deployed chat assets +# S3_COPILOT_BUCKET_NAME= # Copilot attachments +# S3_PROFILE_PICTURES_BUCKET_NAME= # User profile pictures +# S3_OG_IMAGES_BUCKET_NAME= # OpenGraph preview images (falls back to S3_BUCKET_NAME) +# S3_WORKSPACE_LOGOS_BUCKET_NAME= # Workspace logos (falls back to S3_BUCKET_NAME) +# S3_ENDPOINT= # Custom endpoint for S3-compatible storage (Cloudflare R2, MinIO, Backblaze B2). Leave unset for AWS S3 +# S3_FORCE_PATH_STYLE=true # Required for MinIO/Ceph RGW. Leave unset for AWS S3 and R2 + # Admin API (Optional - for self-hosted GitOps) # ADMIN_API_KEY= # Use `openssl rand -hex 32` to generate. Enables admin API for workflow export/import. # Usage: curl -H "x-admin-key: your_key" https://your-instance/api/v1/admin/workspaces diff --git a/apps/sim/app/workspace/[workspaceId]/settings/components/byok/byok.tsx b/apps/sim/app/workspace/[workspaceId]/settings/components/byok/byok.tsx index d289eea58dd..9d28fc7d365 100644 --- a/apps/sim/app/workspace/[workspaceId]/settings/components/byok/byok.tsx +++ b/apps/sim/app/workspace/[workspaceId]/settings/components/byok/byok.tsx @@ -20,13 +20,13 @@ import { BasetenIcon, BrandfetchIcon, ExaAIIcon, + FalIcon, FindymailIcon, FirecrawlIcon, FireworksIcon, GeminiIcon, GoogleIcon, HunterIOIcon, - ImageIcon, JinaAIIcon, LinkupIcon, MistralIcon, @@ -118,7 +118,7 @@ const PROVIDERS: { { id: 'falai', name: 'Fal.ai', - icon: ImageIcon, + icon: FalIcon, description: 'Image and video generation', placeholder: 'Enter your Fal.ai API key', }, diff --git a/apps/sim/background/schedule-execution.ts b/apps/sim/background/schedule-execution.ts index b90886d7f71..a786cefd14e 100644 --- a/apps/sim/background/schedule-execution.ts +++ b/apps/sim/background/schedule-execution.ts @@ -1,3 +1,4 @@ +import { trace } from '@opentelemetry/api' import { db, jobExecutionLogs, @@ -6,7 +7,7 @@ import { workflowSchedule, } from '@sim/db' import { createLogger, runWithRequestContext } from '@sim/logger' -import { toError } from '@sim/utils/errors' +import { describeError, toError } from '@sim/utils/errors' import { generateId } from '@sim/utils/id' import { backoffWithJitter } from '@sim/utils/retry' import { task } from '@trigger.dev/sdk' @@ -155,7 +156,7 @@ async function applyScheduleUpdate( return updatedRows.length > 0 } catch (error) { - logger.error(`[${requestId}] ${context}`, error) + logger.error(`[${requestId}] ${context}`, error, { cause: describeError(error) }) throw error } } @@ -529,7 +530,13 @@ async function runWorkflowExecution({ } } - logger.error(`[${requestId}] Early failure in scheduled workflow ${payload.workflowId}`, error) + logger.error( + `[${requestId}] Early failure in scheduled workflow ${payload.workflowId}`, + error, + { + cause: describeError(error), + } + ) if (wasExecutionFinalizedByCore(error, executionId)) { throw error @@ -943,16 +950,30 @@ export async function executeScheduleJob(payload: ScheduleExecutionPayload) { ) } } catch (error: unknown) { - if (isRetryableInfrastructureError(error)) { - await retryScheduleAfterInfraFailure({ payload, requestId, claimedAt, error }) - return - } + try { + if (isRetryableInfrastructureError(error)) { + await retryScheduleAfterInfraFailure({ payload, requestId, claimedAt, error }) + return + } - logger.error(`[${requestId}] Error processing schedule ${payload.scheduleId}`, error) - await releaseClaim( - now, - `Failed to release schedule ${payload.scheduleId} after unhandled error` - ) + logger.error(`[${requestId}] Error processing schedule ${payload.scheduleId}`, error, { + cause: describeError(error), + }) + await releaseClaim( + now, + `Failed to release schedule ${payload.scheduleId} after unhandled error` + ) + } catch (recoveryError: unknown) { + // A secondary failure during error recovery (e.g. a transient DB blip while + // releasing the claim or scheduling an infra retry) must not fault the run. The + // claim expires on its TTL and the next tick re-claims the schedule. Record the + // exception on the span so it stays visible in traces without faulting the run. + logger.error( + `[${requestId}] Failed to recover schedule ${payload.scheduleId} after error`, + recoveryError + ) + trace.getActiveSpan()?.recordException(toError(recoveryError)) + } } }) } diff --git a/apps/sim/background/webhook-execution.test.ts b/apps/sim/background/webhook-execution.test.ts index 620c073ac0d..7a75dc3e4f9 100644 --- a/apps/sim/background/webhook-execution.test.ts +++ b/apps/sim/background/webhook-execution.test.ts @@ -2,17 +2,110 @@ * @vitest-environment node */ +import { + dbChainMock, + dbChainMockFns, + executionPreprocessingMock, + executionPreprocessingMockFns, + loggingSessionMock, + loggingSessionMockFns, +} from '@sim/testing' import { beforeEach, describe, expect, it, vi } from 'vitest' -const { mockResolveWebhookRecordProviderConfig } = vi.hoisted(() => ({ +const { + mockResolveWebhookRecordProviderConfig, + mockExecuteWorkflowCore, + mockWasExecutionFinalizedByCore, + mockRecordException, + mockGetActiveSpan, +} = vi.hoisted(() => ({ mockResolveWebhookRecordProviderConfig: vi.fn(), + mockExecuteWorkflowCore: vi.fn(), + mockWasExecutionFinalizedByCore: vi.fn(), + mockRecordException: vi.fn(), + mockGetActiveSpan: vi.fn(), })) +vi.mock('@opentelemetry/api', () => ({ + trace: { getActiveSpan: mockGetActiveSpan }, +})) + +vi.mock('@sim/db', () => dbChainMock) +vi.mock('@/lib/execution/preprocessing', () => executionPreprocessingMock) +vi.mock('@/lib/logs/execution/logging-session', () => loggingSessionMock) + vi.mock('@/lib/webhooks/env-resolver', () => ({ resolveWebhookRecordProviderConfig: mockResolveWebhookRecordProviderConfig, })) -import { resolveWebhookExecutionProviderConfig } from './webhook-execution' +vi.mock('@/lib/workflows/executor/execution-core', () => ({ + executeWorkflowCore: mockExecuteWorkflowCore, + wasExecutionFinalizedByCore: mockWasExecutionFinalizedByCore, +})) + +vi.mock('@/lib/core/idempotency', () => ({ + IdempotencyService: { createWebhookIdempotencyKey: vi.fn(() => 'idempotency-key') }, + webhookIdempotency: { + executeWithIdempotency: vi.fn( + (_provider: string, _key: string, operation: () => Promise) => operation() + ), + }, +})) + +vi.mock('@/lib/workflows/persistence/utils', () => ({ + loadDeployedWorkflowState: vi.fn(async () => ({ + blocks: {}, + edges: [], + loops: {}, + parallels: {}, + deploymentVersionId: 'deployment-1', + })), +})) + +vi.mock('@/lib/webhooks/providers', () => ({ + getProviderHandler: vi.fn(() => ({})), +})) + +vi.mock('@/lib/logs/execution/trace-spans/trace-spans', () => ({ + buildTraceSpans: vi.fn(() => ({ traceSpans: [] })), +})) + +vi.mock('@/lib/core/execution-limits', () => ({ + createTimeoutAbortController: vi.fn(() => ({ + signal: new AbortController().signal, + cleanup: vi.fn(), + isTimedOut: () => false, + timeoutMs: 120_000, + })), + getTimeoutErrorMessage: vi.fn(() => 'timed out'), +})) + +vi.mock('@/lib/workflows/executor/pause-persistence', () => ({ + handlePostExecutionPauseState: vi.fn(), +})) + +vi.mock('@/lib/webhooks/attachment-processor', () => ({ + WebhookAttachmentProcessor: class {}, +})) + +vi.mock('@/app/api/auth/oauth/utils', () => ({ + resolveOAuthAccountId: vi.fn(), +})) + +vi.mock('@/executor/execution/snapshot', () => ({ + ExecutionSnapshot: class {}, +})) + +vi.mock('@/tools/safe-assign', () => ({ safeAssign: vi.fn() })) + +vi.mock('@/blocks', () => ({ getBlock: vi.fn(() => null) })) + +vi.mock('@/triggers', () => ({ + getTrigger: vi.fn(), + isTriggerValid: vi.fn(() => false), +})) + +import { executeWebhookJob, resolveWebhookExecutionProviderConfig } from './webhook-execution' describe('resolveWebhookExecutionProviderConfig', () => { beforeEach(() => { @@ -66,3 +159,64 @@ describe('resolveWebhookExecutionProviderConfig', () => { ) }) }) + +describe('executeWebhookJob fault vs error handling', () => { + const payload = { + webhookId: 'webhook-1', + workflowId: 'workflow-1', + userId: 'user-1', + executionId: 'execution-1', + requestId: 'request-1', + provider: 'gmail', + body: { message: 'hello' }, + headers: {}, + path: '/webhook', + workspaceId: 'workspace-1', + } + + beforeEach(() => { + vi.clearAllMocks() + executionPreprocessingMockFns.mockPreprocessExecution.mockResolvedValue({ + success: true, + workflowRecord: { workspaceId: 'workspace-1', userId: 'user-1', variables: {} }, + executionTimeout: { async: 120_000 }, + }) + mockResolveWebhookRecordProviderConfig.mockImplementation(async (record) => record) + dbChainMockFns.limit.mockResolvedValue([{ id: 'webhook-1' }]) + mockGetActiveSpan.mockReturnValue({ recordException: mockRecordException }) + }) + + it('completes the run (does not throw) when the failure was finalized by core', async () => { + mockExecuteWorkflowCore.mockRejectedValue( + new Error('Gmail 2 is missing required fields: Label') + ) + mockWasExecutionFinalizedByCore.mockReturnValue(true) + + const result = await executeWebhookJob(payload) + + expect(result).toMatchObject({ + success: false, + workflowId: 'workflow-1', + executionId: 'execution-1', + provider: 'gmail', + }) + expect(loggingSessionMockFns.mockWaitForPostExecution).toHaveBeenCalled() + // User/workflow errors are already recorded by core — the catch must not re-log them. + expect(loggingSessionMockFns.mockSafeCompleteWithError).not.toHaveBeenCalled() + // The error is still recorded on the run span so it stays visible in traces. + expect(mockRecordException).toHaveBeenCalledWith( + expect.objectContaining({ message: 'Gmail 2 is missing required fields: Label' }) + ) + }) + + it('faults the run (re-throws) when the failure was not finalized by core', async () => { + mockExecuteWorkflowCore.mockRejectedValue(new Error('Workflow state not found')) + mockWasExecutionFinalizedByCore.mockReturnValue(false) + + await expect(executeWebhookJob(payload)).rejects.toThrow('Workflow state not found') + // waitForPostExecution must run on every path so the finalized-by-core signal is always reliable. + expect(loggingSessionMockFns.mockWaitForPostExecution).toHaveBeenCalled() + // Pipeline/infra errors are recorded here before re-throwing to fault the trigger.dev run. + expect(loggingSessionMockFns.mockSafeCompleteWithError).toHaveBeenCalled() + }) +}) diff --git a/apps/sim/background/webhook-execution.ts b/apps/sim/background/webhook-execution.ts index 1753813d849..41048f9208b 100644 --- a/apps/sim/background/webhook-execution.ts +++ b/apps/sim/background/webhook-execution.ts @@ -1,3 +1,4 @@ +import { trace } from '@opentelemetry/api' import { db } from '@sim/db' import { account, webhook } from '@sim/db/schema' import { createLogger, runWithRequestContext } from '@sim/logger' @@ -616,8 +617,27 @@ async function executeWebhookJobInternal( provider: payload.provider, }) + // The finalized flag is set inside a fire-and-forget post-execution promise; await it so the + // signal is reliable and the failure is fully persisted before we decide fault vs error. + await loggingSession.waitForPostExecution() + + // A failure inside workflow execution (block error, provider 4xx, missing required field, etc.) + // is finalized by core and already recorded in the execution logs. That is a user/workflow error, + // not a trigger.dev job fault — complete the run normally so we don't fire a false alert. Errors + // that were not finalized came from the webhook pipeline itself, so we re-throw to fault below. if (wasExecutionFinalizedByCore(error, executionId)) { - throw error + // Record the exception on the run span so it stays visible in traces without + // marking the span as ERROR — that status is what faults the trigger.dev run. + trace.getActiveSpan()?.recordException(toError(error)) + + return { + success: false, + workflowId: payload.workflowId, + executionId, + output: hasExecutionResult(error) ? error.executionResult.output : {}, + executedAt: new Date().toISOString(), + provider: payload.provider, + } } try { diff --git a/apps/sim/background/workflow-column-execution.ts b/apps/sim/background/workflow-column-execution.ts index 53c337842fa..9f617cd5144 100644 --- a/apps/sim/background/workflow-column-execution.ts +++ b/apps/sim/background/workflow-column-execution.ts @@ -1,12 +1,13 @@ import { db } from '@sim/db' import { workflow as workflowTable } from '@sim/db/schema' import { createLogger, runWithRequestContext } from '@sim/logger' -import { toError } from '@sim/utils/errors' +import { describeError, toError } from '@sim/utils/errors' import { sleep } from '@sim/utils/helpers' import { generateId } from '@sim/utils/id' import { backoffWithJitter } from '@sim/utils/retry' import { task } from '@trigger.dev/sdk' import { eq } from 'drizzle-orm' +import { isRetryableInfrastructureError } from '@/lib/core/errors/retryable-infrastructure' import { createTimeoutAbortController } from '@/lib/core/execution-limits' import { RateLimiter } from '@/lib/core/rate-limiter/rate-limiter' import { preprocessExecution } from '@/lib/execution/preprocessing' @@ -597,8 +598,8 @@ async function runWorkflowAndWriteTerminal( }) .catch((err) => { logger.warn( - `Per-block partial write failed (table=${tableId} row=${rowId} group=${groupId}):`, - err + `Per-block partial write failed (table=${tableId} row=${rowId} group=${groupId})`, + { cause: describeError(err), retryable: isRetryableInfrastructureError(err) } ) }) } @@ -720,7 +721,12 @@ async function runWorkflowAndWriteTerminal( const message = toError(err).message logger.error( `Workflow group cell execution failed (table=${tableId} row=${rowId} group=${groupId})`, - { error: message, executionId } + { + error: message, + executionId, + cause: describeError(err), + retryable: isRetryableInfrastructureError(err), + } ) terminalWritten = true await writeChain.catch(() => {}) @@ -735,7 +741,11 @@ async function runWorkflowAndWriteTerminal( blockErrors, }) } catch (writeErr) { - logger.error('Also failed to write error state', { error: toError(writeErr).message }) + logger.error('Also failed to write error state', { + error: toError(writeErr).message, + cause: describeError(writeErr), + retryable: isRetryableInfrastructureError(writeErr), + }) } return 'error' } diff --git a/apps/sim/components/icons.tsx b/apps/sim/components/icons.tsx index fcdab73224d..49d1d7ddf38 100644 --- a/apps/sim/components/icons.tsx +++ b/apps/sim/components/icons.tsx @@ -3365,7 +3365,18 @@ export const OllamaIcon = (props: SVGProps) => ( xmlns='http://www.w3.org/2000/svg' > Ollama - + + +) +export const FalIcon = (props: SVGProps) => ( + + Fal + ) export function ShieldCheckIcon(props: SVGProps) { @@ -3982,16 +3993,16 @@ export function FireworksIcon(props: SVGProps) { return ( ) diff --git a/apps/sim/connectors/gitlab/gitlab.ts b/apps/sim/connectors/gitlab/gitlab.ts index f3fe3829585..66e71796d60 100644 --- a/apps/sim/connectors/gitlab/gitlab.ts +++ b/apps/sim/connectors/gitlab/gitlab.ts @@ -1,6 +1,7 @@ import { createLogger } from '@sim/logger' import { getErrorMessage, toError } from '@sim/utils/errors' import { GitLabIcon } from '@/components/icons' +import { isSameOrigin } from '@/lib/core/utils/validation' import { fetchWithRetry, VALIDATE_RETRY_OPTIONS } from '@/lib/knowledge/documents/utils' import type { ConnectorConfig, ExternalDocument, ExternalDocumentList } from '@/connectors/types' import { computeContentHash, joinTagArray, parseTagDate } from '@/connectors/utils' @@ -9,18 +10,120 @@ const logger = createLogger('GitLabConnector') const DEFAULT_HOST = 'gitlab.com' const PAGE_SIZE = 100 +/** Max repository file size to index. Larger blobs are skipped. */ +const MAX_FILE_SIZE = 10 * 1024 * 1024 +/** Bytes sniffed for NUL when detecting binary files (matches git's heuristic). */ +const BINARY_SNIFF_BYTES = 8000 /** * Prefix encoded into each document's externalId so getDocument can route to the - * correct GitLab resource. Wiki pages are addressed by slug, issues by iid. + * correct GitLab resource. Wiki pages are addressed by slug, issues by iid, and + * repository files by their repo-relative path. */ const WIKI_PREFIX = 'wiki:' const ISSUE_PREFIX = 'issue:' +const FILE_PREFIX = 'file:' /** - * Selects which GitLab resources to sync. + * Selects which GitLab resources to sync. `repo` = repository files (code/docs), + * `all` = repo + wiki + issues. `both` is retained for backward compatibility and + * means wiki + issues (no repository files). */ -type ContentTypeChoice = 'wiki' | 'issues' | 'both' +type ContentTypeChoice = 'repo' | 'wiki' | 'issues' | 'both' | 'all' + +/** Listing phases, walked in order: repository files ➜ wiki ➜ issues. */ +type SyncPhase = 'repo' | 'wiki' | 'issues' + +interface GitLabTreeEntry { + id: string + name: string + type: 'blob' | 'tree' + path: string + mode?: string +} + +interface GitLabFile { + file_path?: string + blob_id?: string + content?: string + encoding?: string + size?: number +} + +/** + * Heuristic binary detection: a NUL byte in the first 8 KB marks the file as + * binary, matching `git diff` / `git grep` semantics. + */ +function isBinaryBuffer(buf: Buffer): boolean { + const len = Math.min(buf.length, BINARY_SNIFF_BYTES) + for (let i = 0; i < len; i++) { + if (buf[i] === 0) return true + } + return false +} + +/** + * Parses a comma-separated extension filter into a normalized set (leading dot, + * lowercased). Returns null when no filter is configured (accept all files). + */ +function parseExtensions(raw: unknown): Set | null { + const trimmed = typeof raw === 'string' ? raw.trim() : '' + if (!trimmed) return null + const exts = trimmed + .split(',') + .map((e) => e.trim().toLowerCase()) + .filter(Boolean) + .map((e) => (e.startsWith('.') ? e : `.${e}`)) + return exts.length > 0 ? new Set(exts) : null +} + +/** + * Returns true when the file path matches the extension filter (or no filter set). + */ +function matchesExtension(filePath: string, extSet: Set | null): boolean { + if (!extSet) return true + const lastDot = filePath.lastIndexOf('.') + if (lastDot === -1) return false + return extSet.has(filePath.slice(lastDot).toLowerCase()) +} + +/** + * Extracts the full `rel="next"` URL from a keyset-pagination `Link` response + * header. GitLab's guidance is to follow this link verbatim rather than rebuild + * the URL, so the connector stores and re-fetches it as-is — this is robust to + * whichever continuation parameter the endpoint uses (`page_token`, `cursor`, + * `id_after`, …). Returns undefined when there is no next page. + */ +function parseNextLink(linkHeader: string | null): string | undefined { + if (!linkHeader) return undefined + for (const part of linkHeader.split(',')) { + if (!/rel="?next"?/i.test(part)) continue + const urlMatch = part.match(/<([^>]+)>/) + if (urlMatch) return urlMatch[1] + } + return undefined +} + +/** + * Returns the ordered list of active sync phases for a content-type choice. + */ +function activePhases(choice: ContentTypeChoice): SyncPhase[] { + const phases: SyncPhase[] = [] + if (choice === 'repo' || choice === 'all') phases.push('repo') + if (choice === 'wiki' || choice === 'both' || choice === 'all') phases.push('wiki') + if (choice === 'issues' || choice === 'both' || choice === 'all') phases.push('issues') + return phases +} + +/** + * Returns the phase following `current` for a choice, or undefined when `current` + * is the last active phase. + */ +function nextPhase(current: SyncPhase, choice: ContentTypeChoice): SyncPhase | undefined { + const phases = activePhases(choice) + const idx = phases.indexOf(current) + return idx >= 0 && idx + 1 < phases.length ? phases[idx + 1] : undefined +} interface GitLabWikiPage { slug: string @@ -57,6 +160,7 @@ interface GitLabProject { id: number path_with_namespace?: string web_url?: string + default_branch?: string wiki_access_level?: string wiki_enabled?: boolean } @@ -94,7 +198,15 @@ function encodeProjectId(project: unknown): string { */ function getContentTypeChoice(sourceConfig: Record): ContentTypeChoice { const value = typeof sourceConfig.contentTypes === 'string' ? sourceConfig.contentTypes : 'both' - if (value === 'wiki' || value === 'issues') return value + if ( + value === 'repo' || + value === 'wiki' || + value === 'issues' || + value === 'both' || + value === 'all' + ) { + return value + } return 'both' } @@ -136,6 +248,118 @@ function buildIssueContentHash(projectId: string, iid: number, updatedAt: string return `gitlab:issue:${projectId}:${iid}:${updatedAt}` } +/** + * Builds the change-detection hash for a repository file. The git blob SHA is + * content-addressable, so it changes exactly when the file content changes — and + * it is available both on the tree listing (`tree entry.id`) and the file fetch + * (`blob_id`), so the stub and hydrated document hash identically without a + * content fetch during listing. + */ +function buildFileContentHash(projectId: string, path: string, blobSha: string): string { + return `gitlab:file:${projectId}:${path}:${blobSha}` +} + +/** + * Builds the web UI URL for a repository file at a given ref. + */ +function buildFileSourceUrl( + apiBase: string, + encodedProject: string, + host: string, + projectPath: string, + ref: string, + path: string +): string { + const encodedPath = path.split('/').map(encodeURIComponent).join('/') + if (projectPath) { + const encodedRef = ref.split('/').map(encodeURIComponent).join('/') + return `https://${host}/${projectPath}/-/blob/${encodedRef}/${encodedPath}` + } + return `${apiBase}/projects/${encodedProject}/repository/files/${encodeURIComponent(path)}/raw?ref=${encodeURIComponent(ref)}` +} + +/** + * Builds a deferred stub for a repository file from a tree entry. Content is empty + * and fetched lazily via getDocument for new/changed files only. + */ +function treeEntryToStub( + apiBase: string, + encodedProject: string, + host: string, + projectPath: string, + ref: string, + entry: GitLabTreeEntry +): ExternalDocument { + return { + externalId: `${FILE_PREFIX}${entry.path}`, + title: entry.name || entry.path, + content: '', + contentDeferred: true, + mimeType: 'text/plain', + sourceUrl: buildFileSourceUrl(apiBase, encodedProject, host, projectPath, ref, entry.path), + contentHash: buildFileContentHash(encodedProject, entry.path, entry.id), + metadata: { + contentType: 'file', + title: entry.name || entry.path, + path: entry.path, + }, + } +} + +/** + * Builds a repository-file document from a fetched (non-raw) file response. Returns + * null for binary, oversized, or empty files so they are not indexed. + */ +function fileToDocument( + apiBase: string, + encodedProject: string, + host: string, + projectPath: string, + ref: string, + path: string, + file: GitLabFile +): ExternalDocument | null { + const blobSha = file.blob_id?.trim() + if (!blobSha) return null + + if (typeof file.size === 'number' && file.size > MAX_FILE_SIZE) { + logger.info('Skipping oversized GitLab file', { path, size: file.size }) + return null + } + + const raw = typeof file.content === 'string' ? file.content : '' + const buffer = file.encoding === 'base64' ? Buffer.from(raw, 'base64') : Buffer.from(raw, 'utf8') + if (isBinaryBuffer(buffer)) { + logger.info('Skipping binary GitLab file', { path }) + return null + } + if (buffer.byteLength > MAX_FILE_SIZE) { + logger.info('Skipping oversized GitLab file', { path, size: buffer.byteLength }) + return null + } + + const content = buffer.toString('utf8') + const title = path.split('/').pop() || path + const body = composeBody(title, content) + if (!body.trim()) return null + + return { + externalId: `${FILE_PREFIX}${path}`, + title, + content: body, + contentDeferred: false, + mimeType: 'text/plain', + sourceUrl: buildFileSourceUrl(apiBase, encodedProject, host, projectPath, ref, path), + contentHash: buildFileContentHash(encodedProject, path, blobSha), + metadata: { + contentType: 'file', + title, + path, + size: buffer.byteLength, + }, + } +} + /** * Composes the document body as "Title\n\n". */ @@ -251,30 +475,73 @@ async function fetchProject( * issues via the X-Next-Page header. */ interface CursorState { - phase: 'wiki' | 'issues' + phase: SyncPhase issuePage: number + /** Full `rel="next"` URL for the repository-tree keyset page to fetch next. */ + fileNextUrl?: string } function encodeCursor(state: CursorState): string { return Buffer.from(JSON.stringify(state), 'utf8').toString('base64url') } -function decodeCursor(cursor: string | undefined, initialPhase: 'wiki' | 'issues'): CursorState { +function decodeCursor(cursor: string | undefined, initialPhase: SyncPhase): CursorState { if (!cursor) return { phase: initialPhase, issuePage: 1 } try { const parsed = JSON.parse(Buffer.from(cursor, 'base64url').toString('utf8')) as Partial<{ - phase: 'wiki' | 'issues' + phase: SyncPhase issuePage: number + fileNextUrl: string }> + const phase: SyncPhase = + parsed.phase === 'repo' || parsed.phase === 'issues' || parsed.phase === 'wiki' + ? parsed.phase + : initialPhase return { - phase: parsed.phase === 'issues' ? 'issues' : 'wiki', + phase, issuePage: Number(parsed.issuePage) > 0 ? Number(parsed.issuePage) : 1, + fileNextUrl: typeof parsed.fileNextUrl === 'string' ? parsed.fileNextUrl : undefined, } } catch { return { phase: initialPhase, issuePage: 1 } } } +/** + * Resolves the git ref (branch/tag) to sync repository files from. Uses the + * user-configured `ref` when set, otherwise the project's default branch, which + * is cached on syncContext to avoid repeat lookups across pages and getDocument. + */ +async function resolveRef( + sourceConfig: Record, + syncContext: Record | undefined, + apiBase: string, + encodedProject: string, + accessToken: string +): Promise { + const configured = typeof sourceConfig.ref === 'string' ? sourceConfig.ref.trim() : '' + if (configured) return configured + + const cached = syncContext?.defaultBranch as string | undefined + if (cached) return cached + + const response = await fetchProject(apiBase, encodedProject, accessToken) + if (response.ok) { + const project = (await response.json()) as GitLabProject + const branch = project.default_branch?.trim() || 'main' + if (syncContext) { + syncContext.defaultBranch = branch + if (project.path_with_namespace) syncContext.projectPath = project.path_with_namespace + } + return branch + } + logger.warn('Failed to fetch GitLab project for default branch; falling back to "main"', { + project: encodedProject, + status: response.status, + }) + return 'main' +} + /** * Applies the optional maxItems cap to a batch, tracking the running total in * syncContext and flagging `listingCapped` when the cap is hit. @@ -298,7 +565,8 @@ function applyMaxItemsCap( export const gitlabConnector: ConnectorConfig = { id: 'gitlab', name: 'GitLab', - description: 'Sync wiki pages and issues from a GitLab project into your knowledge base', + description: + 'Sync repository files, wiki pages, and issues from a GitLab project into your knowledge base', version: '1.0.0', icon: GitLabIcon, @@ -310,8 +578,9 @@ export const gitlabConnector: ConnectorConfig = { /** * Incremental sync applies to issues only (via the `updated_after` filter - * derived from lastSyncAt). Wikis lack a change timestamp, so they are always - * re-listed in full and reconciled by content hash. + * derived from lastSyncAt). Wikis and repository files lack a change timestamp + * on listing, so they are always re-listed in full and reconciled by content + * hash (wiki: content digest, file: git blob SHA) — unchanged docs are skipped. */ supportsIncrementalSync: true, @@ -338,10 +607,42 @@ export const gitlabConnector: ConnectorConfig = { type: 'dropdown', required: false, options: [ + { label: 'Code, Wiki & Issues', id: 'all' }, + { label: 'Code (repository files) only', id: 'repo' }, { label: 'Wiki only', id: 'wiki' }, { label: 'Issues only', id: 'issues' }, - { label: 'Both', id: 'both' }, + { label: 'Wiki & Issues', id: 'both' }, ], + description: 'Which content to index. "Code" syncs repository files (READMEs, docs, source).', + }, + { + id: 'ref', + title: 'Branch', + type: 'short-input', + required: false, + mode: 'advanced', + placeholder: 'Default branch', + description: 'Branch or tag to sync repository files from. Applies only when syncing Code.', + }, + { + id: 'pathPrefix', + title: 'Path Filter', + type: 'short-input', + required: false, + mode: 'advanced', + placeholder: 'e.g. docs/', + description: + 'Only sync repository files under this path prefix. Applies only when syncing Code.', + }, + { + id: 'fileExtensions', + title: 'File Extensions', + type: 'short-input', + required: false, + mode: 'advanced', + placeholder: 'e.g. .md, .txt, .mdx', + description: + 'Only sync repository files with these extensions (comma-separated). Leave blank for all text files. Applies only when syncing Code.', }, { id: 'issueState', @@ -398,13 +699,13 @@ export const gitlabConnector: ConnectorConfig = { const choice = getContentTypeChoice(sourceConfig) const maxItems = sourceConfig.maxItems ? Number(sourceConfig.maxItems) : 0 - const wantsWiki = choice === 'wiki' || choice === 'both' - const wantsIssues = choice === 'issues' || choice === 'both' - if (!encodedProject) { throw new Error('Project is required') } + const phases = activePhases(choice) + if (phases.length === 0) return { documents: [], hasMore: false } + let projectPath = (syncContext?.projectPath as string) ?? '' if (!projectPath && syncContext) { const projectResponse = await fetchProject(apiBase, encodedProject, accessToken) @@ -412,13 +713,101 @@ export const gitlabConnector: ConnectorConfig = { const project = (await projectResponse.json()) as GitLabProject projectPath = project.path_with_namespace ?? '' syncContext.projectPath = projectPath + if (project.default_branch && !syncContext.defaultBranch) { + syncContext.defaultBranch = project.default_branch + } } } - const initialPhase: 'wiki' | 'issues' = wantsWiki ? 'wiki' : 'issues' - const state = decodeCursor(cursor, initialPhase) + let state = decodeCursor(cursor, phases[0]) + if (!phases.includes(state.phase)) state = { phase: phases[0], issuePage: 1 } + + /** Cursor that advances to the first page of the phase after `current`, if any. */ + const advance = (current: SyncPhase): { nextCursor?: string; hasMore: boolean } => { + const next = nextPhase(current, choice) + if (!next) return { hasMore: false } + return { nextCursor: encodeCursor({ phase: next, issuePage: 1 }), hasMore: true } + } + + if (state.phase === 'repo') { + const ref = await resolveRef(sourceConfig, syncContext, apiBase, encodedProject, accessToken) + const extSet = parseExtensions(sourceConfig.fileExtensions) + const rawPrefix = + typeof sourceConfig.pathPrefix === 'string' ? sourceConfig.pathPrefix.trim() : '' + const pathPrefix = rawPrefix && !rawPrefix.endsWith('/') ? `${rawPrefix}/` : rawPrefix + + const treeParams = new URLSearchParams({ + ref, + recursive: 'true', + per_page: String(PAGE_SIZE), + pagination: 'keyset', + }) + if (state.fileNextUrl && !isSameOrigin(state.fileNextUrl, apiBase)) { + throw new Error('GitLab pagination cursor points to an unexpected host') + } + const url = + state.fileNextUrl ?? + `${apiBase}/projects/${encodedProject}/repository/tree?${treeParams.toString()}` + logger.info('Listing GitLab repository files', { + host, + project: encodedProject, + ref, + continued: Boolean(state.fileNextUrl), + }) + + const response = await fetchWithRetry(url, { + method: 'GET', + headers: authHeaders(accessToken), + }) + + if (!response.ok) { + if (response.status === 404 || response.status === 403) { + logger.warn('GitLab repository tree unavailable; skipping files', { + host, + project: encodedProject, + ref, + status: response.status, + }) + const adv = advance('repo') + return { documents: [], nextCursor: adv.nextCursor, hasMore: adv.hasMore } + } + const errorText = await response.text().catch(() => '') + logger.error('Failed to list GitLab repository tree', { + status: response.status, + error: errorText.slice(0, 500), + }) + throw new Error(`Failed to list GitLab repository tree: ${response.status}`) + } + + const entries = (await response.json()) as GitLabTreeEntry[] + const documents: ExternalDocument[] = [] + for (const entry of entries) { + if (entry.type !== 'blob' || !entry.path) continue + if (pathPrefix && !entry.path.startsWith(pathPrefix)) continue + if (!matchesExtension(entry.path, extSet)) continue + documents.push(treeEntryToStub(apiBase, encodedProject, host, projectPath, ref, entry)) + } + + const { documents: capped, capped: hitLimit } = applyMaxItemsCap( + documents, + maxItems, + syncContext + ) + if (hitLimit) return { documents: capped, hasMore: false } + + const nextLink = parseNextLink(response.headers.get('link')) + if (nextLink) { + return { + documents: capped, + nextCursor: encodeCursor({ phase: 'repo', issuePage: 1, fileNextUrl: nextLink }), + hasMore: true, + } + } + const adv = advance('repo') + return { documents: capped, nextCursor: adv.nextCursor, hasMore: adv.hasMore } + } - if (state.phase === 'wiki' && wantsWiki) { + if (state.phase === 'wiki') { const url = `${apiBase}/projects/${encodedProject}/wikis?with_content=1` logger.info('Listing GitLab wiki pages', { host, project: encodedProject }) @@ -428,6 +817,15 @@ export const gitlabConnector: ConnectorConfig = { }) if (!response.ok) { + if (response.status === 403 || response.status === 404) { + logger.warn('GitLab wiki unavailable; skipping wiki phase', { + host, + project: encodedProject, + status: response.status, + }) + const adv = advance('wiki') + return { documents: [], nextCursor: adv.nextCursor, hasMore: adv.hasMore } + } const errorText = await response.text().catch(() => '') logger.error('Failed to list GitLab wiki pages', { status: response.status, @@ -450,18 +848,15 @@ export const gitlabConnector: ConnectorConfig = { syncContext ) - if (hitLimit || !wantsIssues) { + if (hitLimit) { return { documents: capped, hasMore: false } } - return { - documents: capped, - nextCursor: encodeCursor({ phase: 'issues', issuePage: 1 }), - hasMore: true, - } + const adv = advance('wiki') + return { documents: capped, nextCursor: adv.nextCursor, hasMore: adv.hasMore } } - if (wantsIssues) { + if (state.phase === 'issues') { const params = new URLSearchParams({ per_page: String(PAGE_SIZE), page: String(state.issuePage), @@ -586,6 +981,32 @@ export const gitlabConnector: ConnectorConfig = { return issueToDocument(encodedProject, host, projectPath, issue) } + if (externalId.startsWith(FILE_PREFIX)) { + const path = externalId.slice(FILE_PREFIX.length) + if (!path) return null + + const ref = await resolveRef( + sourceConfig, + syncContext, + apiBase, + encodedProject, + accessToken + ) + const url = `${apiBase}/projects/${encodedProject}/repository/files/${encodeURIComponent(path)}?ref=${encodeURIComponent(ref)}` + const response = await fetchWithRetry(url, { + method: 'GET', + headers: authHeaders(accessToken), + }) + + if (!response.ok) { + if (response.status === 404) return null + throw new Error(`Failed to fetch GitLab file: ${response.status}`) + } + + const file = (await response.json()) as GitLabFile + return fileToDocument(apiBase, encodedProject, host, projectPath, ref, path, file) + } + return null } catch (error) { logger.warn(`Failed to fetch GitLab document ${externalId}`, { @@ -634,7 +1055,7 @@ export const gitlabConnector: ConnectorConfig = { const projectRecord = (await response.json()) as GitLabProject - if (choice === 'wiki' || choice === 'both') { + if (activePhases(choice).includes('wiki')) { const accessLevel = projectRecord.wiki_access_level const enabled = accessLevel != null ? accessLevel !== 'disabled' : projectRecord.wiki_enabled !== false @@ -642,7 +1063,28 @@ export const gitlabConnector: ConnectorConfig = { if (choice === 'wiki') { return { valid: false, error: 'The wiki feature is disabled for this project' } } - logger.warn('Wiki feature disabled; only issues will sync', { project }) + logger.warn('Wiki feature disabled; it will be skipped', { project }) + } + } + + const userRef = typeof sourceConfig.ref === 'string' ? sourceConfig.ref.trim() : '' + if (userRef && activePhases(choice).includes('repo')) { + const refResponse = await fetchWithRetry( + `${apiBase}/projects/${encodedProject}/repository/commits/${encodeURIComponent(userRef)}`, + { method: 'GET', headers: authHeaders(accessToken) }, + VALIDATE_RETRY_OPTIONS + ) + if (refResponse.status === 404) { + return { + valid: false, + error: `Branch, tag, or commit "${userRef}" not found in project "${project}"`, + } + } + if (!refResponse.ok) { + return { + valid: false, + error: `Cannot verify ref "${userRef}": ${refResponse.status}`, + } } } @@ -659,16 +1101,17 @@ export const gitlabConnector: ConnectorConfig = { { id: 'author', displayName: 'Author', fieldType: 'text' }, { id: 'labels', displayName: 'Labels', fieldType: 'text' }, { id: 'milestone', displayName: 'Milestone', fieldType: 'text' }, + { id: 'path', displayName: 'File Path', fieldType: 'text' }, + { id: 'size', displayName: 'File Size (bytes)', fieldType: 'number' }, { id: 'createdAt', displayName: 'Created At', fieldType: 'date' }, { id: 'updatedAt', displayName: 'Updated At', fieldType: 'date' }, ], /** - * Maps document metadata to tag slots. The `contentType` and `title` tags - * apply to both wikis and issues. The remaining tags (state, author, labels, - * milestone, createdAt, updatedAt) are issue-only — wiki pages expose none of - * them in the REST API, so wiki documents leave those metadata fields empty - * and the type/empty guards below skip them. + * Maps document metadata to tag slots. `contentType` and `title` apply to every + * document type. `state`/`author`/`labels`/`milestone`/`createdAt`/`updatedAt` + * are issue-only and `path`/`size` are repository-file-only; each document type + * leaves the others' fields empty and the type/empty guards below skip them. */ mapTags: (metadata: Record): Record => { const result: Record = {} @@ -693,6 +1136,15 @@ export const gitlabConnector: ConnectorConfig = { result.milestone = metadata.milestone } + if (typeof metadata.path === 'string' && metadata.path.trim()) { + result.path = metadata.path + } + + if (metadata.size != null) { + const num = Number(metadata.size) + if (!Number.isNaN(num)) result.size = num + } + const createdAt = parseTagDate(metadata.createdAt) if (createdAt) result.createdAt = createdAt diff --git a/apps/sim/lib/auth/auth.ts b/apps/sim/lib/auth/auth.ts index 76465b45bec..cd27bbd5e2d 100644 --- a/apps/sim/lib/auth/auth.ts +++ b/apps/sim/lib/auth/auth.ts @@ -71,6 +71,7 @@ import { isRegistrationDisabled, isSignupEmailValidationEnabled, isSignupMxValidationEnabled, + isSsoEnabled, } from '@/lib/core/config/feature-flags' import { PlatformEvents } from '@/lib/core/telemetry' import { getBaseUrl, isLocalhostUrl, parseOriginList } from '@/lib/core/utils/urls' @@ -164,6 +165,20 @@ const additionalTrustedOrigins = parseOriginList(env.TRUSTED_ORIGINS, (value) => logger.warn('Ignoring invalid entry in TRUSTED_ORIGINS', { value }) ) +/** + * SSO provider IDs to trust for automatic account linking when an SSO sign-in + * matches an existing account's email. Includes `SSO_PROVIDER_ID` when it is set + * in the app environment, plus any IDs from `SSO_TRUSTED_PROVIDER_IDS`. Empty when + * SSO is disabled, so `trustedProviders` is unchanged for non-SSO deployments. + * Resolved once at startup; `trustEmailVerified` on the SSO plugin handles IdPs + * that assert `email_verified` live, so this is only needed for IdPs that omit it. + */ +const additionalTrustedSsoProviders = isSsoEnabled + ? [env.SSO_PROVIDER_ID, ...(env.SSO_TRUSTED_PROVIDER_IDS?.split(',') ?? [])] + .map((id) => id?.trim()) + .filter((id): id is string => Boolean(id)) + : [] + if (env.NODE_ENV === 'production') { const baseUrl = getBaseUrl() if (isLocalhostUrl(baseUrl)) { @@ -685,6 +700,7 @@ export const auth = betterAuth({ 'calcom', 'docusign', ...SSO_TRUSTED_PROVIDERS, + ...additionalTrustedSsoProviders, ], }, }, @@ -2916,6 +2932,12 @@ export const auth = betterAuth({ ...(env.SSO_ENABLED ? [ sso({ + /** + * Honor the IdP's verified-email claim. Without this the SSO plugin + * forces `emailVerified: false`, blocking automatic linking of an SSO + * login to an existing same-email account (Better Auth "account not linked"). + */ + trustEmailVerified: true, organizationProvisioning: { disabled: false, defaultRole: 'member', diff --git a/apps/sim/lib/copilot/persistence/tool-confirm/index.ts b/apps/sim/lib/copilot/persistence/tool-confirm/index.ts index cd13ee31057..09f5fd2ee93 100644 --- a/apps/sim/lib/copilot/persistence/tool-confirm/index.ts +++ b/apps/sim/lib/copilot/persistence/tool-confirm/index.ts @@ -9,16 +9,24 @@ import { import { getAsyncToolCalls } from '@/lib/copilot/async-runs/repository' import { MothershipStreamV1ToolOutcome } from '@/lib/copilot/generated/mothership-stream-v1' import { getRedisClient } from '@/lib/core/config/redis' -import { createPubSubChannel } from '@/lib/events/pubsub' +import { createPubSubChannel, type PubSubChannel } from '@/lib/events/pubsub' const logger = createLogger('CopilotOrchestratorPersistence') const TOOL_CONFIRMATION_TTL_SECONDS = 60 * 10 const toolConfirmationKey = (toolCallId: string) => `copilot:tool-confirmation:${toolCallId}` -const toolConfirmationChannel = createPubSubChannel({ - channel: 'copilot:tool-confirmation', - label: 'CopilotToolConfirmation', -}) +type ToolConfirmGlobal = typeof globalThis & { + _toolConfirmationChannel?: PubSubChannel +} + +const _g = globalThis as ToolConfirmGlobal +if (!_g._toolConfirmationChannel) { + _g._toolConfirmationChannel = createPubSubChannel({ + channel: 'copilot:tool-confirmation', + label: 'CopilotToolConfirmation', + }) +} +const toolConfirmationChannel = _g._toolConfirmationChannel /** * Get a tool call confirmation state from the durable async tool row. diff --git a/apps/sim/lib/copilot/tasks.ts b/apps/sim/lib/copilot/tasks.ts index db6594ebf28..252b82a61ae 100644 --- a/apps/sim/lib/copilot/tasks.ts +++ b/apps/sim/lib/copilot/tasks.ts @@ -7,7 +7,7 @@ * Channel: `task:status_changed` */ -import { createPubSubChannel } from '@/lib/events/pubsub' +import { createPubSubChannel, type PubSubChannel } from '@/lib/events/pubsub' interface TaskStatusEvent { workspaceId: string @@ -16,10 +16,20 @@ interface TaskStatusEvent { streamId?: string } -const channel = - typeof window !== 'undefined' - ? null - : createPubSubChannel({ channel: 'task:status_changed', label: 'task' }) +type TaskPubSubGlobal = typeof globalThis & { + _taskStatusChannel?: PubSubChannel | null +} + +const g = globalThis as TaskPubSubGlobal + +if (!('_taskStatusChannel' in g)) { + g._taskStatusChannel = + typeof window !== 'undefined' + ? null + : createPubSubChannel({ channel: 'task:status_changed', label: 'task' }) +} + +const channel = g._taskStatusChannel export const taskPubSub = channel ? { diff --git a/apps/sim/lib/copilot/tool-executor/executor.test.ts b/apps/sim/lib/copilot/tool-executor/executor.test.ts index adeb6ce48da..61733f43a95 100644 --- a/apps/sim/lib/copilot/tool-executor/executor.test.ts +++ b/apps/sim/lib/copilot/tool-executor/executor.test.ts @@ -5,9 +5,10 @@ import { beforeEach, describe, expect, it, vi } from 'vitest' import { DEFAULT_EXECUTION_TIMEOUT_MS } from '@/lib/execution/constants' -const { isKnownTool, isSimExecuted } = vi.hoisted(() => ({ +const { isKnownTool, isSimExecuted, isClientExecuted } = vi.hoisted(() => ({ isKnownTool: vi.fn(), isSimExecuted: vi.fn(), + isClientExecuted: vi.fn(), })) const { executeAppTool } = vi.hoisted(() => ({ @@ -17,17 +18,19 @@ const { executeAppTool } = vi.hoisted(() => ({ vi.mock('./router', () => ({ isKnownTool, isSimExecuted, + isClientExecuted, })) vi.mock('@/tools', () => ({ executeTool: executeAppTool, })) -import { executeTool } from './executor' +import { clearHandlers, executeTool, registerHandler } from './executor' describe('copilot tool executor fallback', () => { beforeEach(() => { vi.clearAllMocks() + clearHandlers() }) it('falls back to app tool executor for dynamic sim tools', async () => { @@ -59,6 +62,36 @@ describe('copilot tool executor fallback', () => { expect(result).toEqual({ success: true, output: { emails: [] } }) }) + it('uses the registered handler for client-routed tools when running headless (Mothership block)', async () => { + isKnownTool.mockReturnValue(true) + isSimExecuted.mockReturnValue(false) + isClientExecuted.mockReturnValue(true) + + const runWorkflowHandler = vi.fn().mockResolvedValue({ success: true, output: { ran: true } }) + registerHandler('run_workflow', runWorkflowHandler) + + const context = { userId: 'user-1', workflowId: 'workflow-1', workspaceId: 'ws-1' } + const result = await executeTool('run_workflow', { workflow_input: {} }, context) + + expect(runWorkflowHandler).toHaveBeenCalledWith({ workflow_input: {} }, context) + expect(executeAppTool).not.toHaveBeenCalled() + expect(result).toEqual({ success: true, output: { ran: true } }) + }) + + it('falls back to app tool executor for client-routed tools with no registered handler', async () => { + isKnownTool.mockReturnValue(true) + isSimExecuted.mockReturnValue(false) + isClientExecuted.mockReturnValue(true) + executeAppTool.mockResolvedValue({ + success: false, + error: 'Tool not found: unknown_client_tool', + }) + + await executeTool('unknown_client_tool', {}, { userId: 'user-1' }) + + expect(executeAppTool).toHaveBeenCalledWith('unknown_client_tool', expect.any(Object)) + }) + it('converts function_execute timeout from seconds to milliseconds for copilot calls', async () => { isKnownTool.mockReturnValue(false) isSimExecuted.mockReturnValue(false) diff --git a/apps/sim/lib/copilot/tool-executor/executor.ts b/apps/sim/lib/copilot/tool-executor/executor.ts index 084d046c027..d6f7d5caae8 100644 --- a/apps/sim/lib/copilot/tool-executor/executor.ts +++ b/apps/sim/lib/copilot/tool-executor/executor.ts @@ -2,7 +2,7 @@ import { createLogger } from '@sim/logger' import { toError } from '@sim/utils/errors' import { DEFAULT_EXECUTION_TIMEOUT_MS } from '@/lib/execution/constants' import { executeTool as executeAppTool } from '@/tools' -import { isKnownTool, isSimExecuted } from './router' +import { isClientExecuted, isKnownTool, isSimExecuted } from './router' import type { ToolCallDescriptor, ToolExecutionContext, @@ -35,12 +35,22 @@ export function hasHandler(toolId: string): boolean { return handlerRegistry.has(toolId) } +export function clearHandlers(): void { + handlerRegistry.clear() +} + export async function executeTool( toolId: string, params: Record, context: ToolExecutionContext ): Promise { - const canUseRegisteredHandler = isKnownTool(toolId) && isSimExecuted(toolId) + // Client-routed tools (e.g. run_workflow) are normally executed in the browser and never + // reach this point in interactive mode. In headless mode (Mothership block, no browser) there + // is no client to delegate to, so fall back to the registered server-side handler when one + // exists — otherwise the call would route to executeAppTool and throw "Tool not found". + const canUseRegisteredHandler = + isKnownTool(toolId) && + (isSimExecuted(toolId) || (isClientExecuted(toolId) && hasHandler(toolId))) if (!canUseRegisteredHandler) { const appParams = buildAppToolParams(toolId, params, context) return executeAppTool(toolId, appParams) diff --git a/apps/sim/lib/copilot/tool-executor/router.ts b/apps/sim/lib/copilot/tool-executor/router.ts index 7c64490cb4c..46a6815cfd7 100644 --- a/apps/sim/lib/copilot/tool-executor/router.ts +++ b/apps/sim/lib/copilot/tool-executor/router.ts @@ -31,6 +31,10 @@ export function isGoExecuted(toolId: string): boolean { return getToolEntry(toolId)?.route === 'go' } +export function isClientExecuted(toolId: string): boolean { + return getToolEntry(toolId)?.route === 'client' +} + export function isKnownTool(toolId: string): boolean { return isToolInCatalog(toolId) } diff --git a/apps/sim/lib/core/config/env.ts b/apps/sim/lib/core/config/env.ts index 223eb519524..0a863112f10 100644 --- a/apps/sim/lib/core/config/env.ts +++ b/apps/sim/lib/core/config/env.ts @@ -218,8 +218,10 @@ export const env = createEnv({ S3_PROFILE_PICTURES_BUCKET_NAME: z.string().optional(), // S3 bucket for profile pictures S3_OG_IMAGES_BUCKET_NAME: z.string().optional(), // S3 bucket for OpenGraph images S3_WORKSPACE_LOGOS_BUCKET_NAME: z.string().optional(), // S3 bucket for workspace logos + S3_ENDPOINT: z.string().optional(), // Custom endpoint for S3-compatible storage (Cloudflare R2, MinIO, Backblaze B2). Leave unset for AWS S3 + S3_FORCE_PATH_STYLE: z.string().optional(), // Force path-style addressing (MinIO/Ceph RGW). Defaults to false (AWS S3, R2). Coerced via envBoolean at the consumption site - // Cloud Storage - Azure Blob + // Cloud Storage - Azure Blob AZURE_ACCOUNT_NAME: z.string().optional(), // Azure storage account name AZURE_ACCOUNT_KEY: z.string().optional(), // Azure storage account key AZURE_CONNECTION_STRING: z.string().optional(), // Azure storage connection string @@ -404,6 +406,7 @@ export const env = createEnv({ SSO_DOMAIN: z.string().optional(), // [REQUIRED] SSO email domain SSO_USER_EMAIL: z.string().optional(), // [REQUIRED] User email for SSO registration SSO_ORGANIZATION_ID: z.string().optional(), // Organization ID for SSO registration (optional) + SSO_TRUSTED_PROVIDER_IDS: z.string().optional(), // Comma-separated SSO provider IDs to trust for automatic account linking when an existing account shares the same email. Use for IdPs that do not assert email_verified. Merged into Better Auth accountLinking.trustedProviders. // SSO Mapping Configuration (optional - sensible defaults provided) SSO_MAPPING_ID: z.string().optional(), // Custom ID claim mapping (default: sub for OIDC, nameidentifier for SAML) diff --git a/apps/sim/lib/core/config/redis.ts b/apps/sim/lib/core/config/redis.ts index 04f976b44ae..820c0215c1e 100644 --- a/apps/sim/lib/core/config/redis.ts +++ b/apps/sim/lib/core/config/redis.ts @@ -54,55 +54,65 @@ export function getRedisConnectionDefaults( } } -let globalRedisClient: Redis | null = null -let pingFailures = 0 -let pingInterval: NodeJS.Timeout | null = null -let pingInFlight = false +interface RedisState { + client: Redis | null + pingFailures: number + pingInterval: NodeJS.Timeout | null + pingInFlight: boolean + reconnectListeners: Array<() => void> +} + +const g = globalThis as typeof globalThis & { _redisState?: RedisState } +if (!g._redisState) { + g._redisState = { + client: null, + pingFailures: 0, + pingInterval: null, + pingInFlight: false, + reconnectListeners: [], + } +} +const state = g._redisState const PING_INTERVAL_MS = 15_000 const MAX_PING_FAILURES = 2 -/** Callbacks invoked when the PING health check forces a reconnect. */ -const reconnectListeners: Array<() => void> = [] - /** * Register a callback that fires when the PING health check forces a reconnect. * Useful for resetting cached adapters that hold a stale Redis reference. */ export function onRedisReconnect(cb: () => void): void { - reconnectListeners.push(cb) + state.reconnectListeners.push(cb) } function startPingHealthCheck(redis: Redis): void { - if (pingInterval) return + if (state.pingInterval) return - pingInterval = setInterval(async () => { - if (pingInFlight) return - pingInFlight = true + state.pingInterval = setInterval(async () => { + if (state.pingInFlight) return + state.pingInFlight = true try { await redis.ping() - pingFailures = 0 + state.pingFailures = 0 } catch (error) { - pingFailures++ + state.pingFailures++ logger.warn('Redis PING failed', { - consecutiveFailures: pingFailures, + consecutiveFailures: state.pingFailures, error: toError(error).message, }) - if (pingFailures >= MAX_PING_FAILURES) { + if (state.pingFailures >= MAX_PING_FAILURES) { logger.error('Redis PING failed consecutive times — forcing reconnect', { - consecutiveFailures: pingFailures, + consecutiveFailures: state.pingFailures, }) - pingFailures = 0 - // Drop the cached client and stop this health check before disconnecting, - // so the next getRedisClient() builds a fresh client and a fresh PING loop. - // Listeners may call getRedisClient() and must observe the cleared global. - globalRedisClient = null - if (pingInterval) { - clearInterval(pingInterval) - pingInterval = null + state.pingFailures = 0 + // Clear before notifying listeners — they may call getRedisClient() and must see the reset state. + state.client = null + if (state.pingInterval) { + clearInterval(state.pingInterval) + state.pingInterval = null } - for (const cb of reconnectListeners) { + for (const cb of state.reconnectListeners) { try { cb() } catch (cbError) { @@ -116,7 +126,7 @@ function startPingHealthCheck(redis: Redis): void { } } } finally { - pingInFlight = false + state.pingInFlight = false } }, PING_INTERVAL_MS) } @@ -131,7 +141,7 @@ function startPingHealthCheck(redis: Redis): void { export function getRedisClient(): Redis | null { if (typeof window !== 'undefined') return null if (!redisUrl) return null - if (globalRedisClient) return globalRedisClient + if (state.client) return state.client // Outside the try/catch so config errors aren't silently swallowed. const defaults = getRedisConnectionDefaults(redisUrl) @@ -139,7 +149,7 @@ export function getRedisClient(): Redis | null { try { logger.info('Initializing Redis client') - globalRedisClient = new Redis(redisUrl, { + state.client = new Redis(redisUrl, { ...defaults, commandTimeout: 5000, maxRetriesPerRequest: 5, @@ -162,17 +172,17 @@ export function getRedisClient(): Redis | null { }, }) - globalRedisClient.on('connect', () => logger.info('Redis connected')) - globalRedisClient.on('ready', () => logger.info('Redis ready')) - globalRedisClient.on('error', (err: Error) => { + state.client.on('connect', () => logger.info('Redis connected')) + state.client.on('ready', () => logger.info('Redis ready')) + state.client.on('error', (err: Error) => { logger.error('Redis error', { error: err.message, code: (err as any).code }) }) - globalRedisClient.on('close', () => logger.warn('Redis connection closed')) - globalRedisClient.on('end', () => logger.error('Redis connection ended')) + state.client.on('close', () => logger.warn('Redis connection closed')) + state.client.on('end', () => logger.error('Redis connection ended')) - startPingHealthCheck(globalRedisClient) + startPingHealthCheck(state.client) - return globalRedisClient + return state.client } catch (error) { logger.error('Failed to initialize Redis client', { error }) return null @@ -274,18 +284,18 @@ export async function extendLock( * Use for graceful shutdown. */ export async function closeRedisConnection(): Promise { - if (pingInterval) { - clearInterval(pingInterval) - pingInterval = null + if (state.pingInterval) { + clearInterval(state.pingInterval) + state.pingInterval = null } - if (globalRedisClient) { + if (state.client) { try { - await globalRedisClient.quit() + await state.client.quit() } catch (error) { logger.error('Error closing Redis connection', { error }) } finally { - globalRedisClient = null + state.client = null } } } @@ -294,12 +304,12 @@ export async function closeRedisConnection(): Promise { * Reset all module-level state. Only intended for use in tests. */ export function resetForTesting(): void { - if (pingInterval) { - clearInterval(pingInterval) - pingInterval = null + if (state.pingInterval) { + clearInterval(state.pingInterval) + state.pingInterval = null } - globalRedisClient = null - pingFailures = 0 - pingInFlight = false - reconnectListeners.length = 0 + state.client = null + state.pingFailures = 0 + state.pingInFlight = false + state.reconnectListeners.length = 0 } diff --git a/apps/sim/lib/core/rate-limiter/storage/factory.ts b/apps/sim/lib/core/rate-limiter/storage/factory.ts index 948e51ad907..9c712ad91f2 100644 --- a/apps/sim/lib/core/rate-limiter/storage/factory.ts +++ b/apps/sim/lib/core/rate-limiter/storage/factory.ts @@ -7,19 +7,27 @@ import { RedisTokenBucket } from './redis-token-bucket' const logger = createLogger('RateLimitStorage') -let cachedAdapter: RateLimitStorageAdapter | null = null -let reconnectListenerRegistered = false +type FactoryGlobal = typeof globalThis & { + _rlCachedAdapter?: RateLimitStorageAdapter | null + _rlReconnectListenerRegistered?: boolean +} + +const g = globalThis as FactoryGlobal +if (!('_rlCachedAdapter' in g)) { + g._rlCachedAdapter = null + g._rlReconnectListenerRegistered = false +} export function createStorageAdapter(): RateLimitStorageAdapter { - if (cachedAdapter) { - return cachedAdapter + if (g._rlCachedAdapter) { + return g._rlCachedAdapter } - if (!reconnectListenerRegistered) { + if (!g._rlReconnectListenerRegistered) { onRedisReconnect(() => { - cachedAdapter = null + g._rlCachedAdapter = null }) - reconnectListenerRegistered = true + g._rlReconnectListenerRegistered = true } const storageMethod = getStorageMethod() @@ -30,17 +38,17 @@ export function createStorageAdapter(): RateLimitStorageAdapter { logger.warn( 'Redis configured but client unavailable - falling back to PostgreSQL for rate limiting' ) - cachedAdapter = new DbTokenBucket() + g._rlCachedAdapter = new DbTokenBucket() } else { logger.info('Rate limiting: Using Redis') - cachedAdapter = new RedisTokenBucket(redis) + g._rlCachedAdapter = new RedisTokenBucket(redis) } } else { logger.info('Rate limiting: Using PostgreSQL') - cachedAdapter = new DbTokenBucket() + g._rlCachedAdapter = new DbTokenBucket() } - return cachedAdapter + return g._rlCachedAdapter! } export function getAdapterType(): StorageMethod { @@ -48,9 +56,9 @@ export function getAdapterType(): StorageMethod { } export function resetStorageAdapter(): void { - cachedAdapter = null + g._rlCachedAdapter = null } export function setStorageAdapter(adapter: RateLimitStorageAdapter): void { - cachedAdapter = adapter + g._rlCachedAdapter = adapter } diff --git a/apps/sim/lib/core/utils/validation.ts b/apps/sim/lib/core/utils/validation.ts index 5fcfc4d3578..ddb3136c6fa 100644 --- a/apps/sim/lib/core/utils/validation.ts +++ b/apps/sim/lib/core/utils/validation.ts @@ -1,17 +1,18 @@ import { getBaseUrl } from './urls' /** - * Checks if a URL is same-origin with the application's base URL. - * Used to prevent open redirect vulnerabilities. + * Checks if a URL is same-origin with a base URL. Defaults to the application's + * base URL, used to prevent open redirect vulnerabilities; pass an explicit + * `base` to pin a URL to another trusted origin (e.g. a configured API host) + * before following it with credentials. * * @param url - The URL to validate + * @param base - The origin to compare against (defaults to the app base URL) * @returns True if the URL is same-origin, false otherwise (secure default) */ -export function isSameOrigin(url: string): boolean { +export function isSameOrigin(url: string, base: string = getBaseUrl()): boolean { try { - const targetUrl = new URL(url) - const appUrl = new URL(getBaseUrl()) - return targetUrl.origin === appUrl.origin + return new URL(url).origin === new URL(base).origin } catch { return false } diff --git a/apps/sim/lib/execution/cancellation.ts b/apps/sim/lib/execution/cancellation.ts index ffa8ad9d444..a08ea280ed4 100644 --- a/apps/sim/lib/execution/cancellation.ts +++ b/apps/sim/lib/execution/cancellation.ts @@ -19,16 +19,20 @@ export type ExecutionCancellationRecordResult = reason: 'redis_unavailable' | 'redis_write_failed' } -let sharedChannel: PubSubChannel | null = null +type CancellationGlobal = typeof globalThis & { + _executionCancelChannel?: PubSubChannel +} + +const _g = globalThis as CancellationGlobal export function getCancellationChannel(): PubSubChannel { - if (!sharedChannel) { - sharedChannel = createPubSubChannel({ + if (!_g._executionCancelChannel) { + _g._executionCancelChannel = createPubSubChannel({ channel: EXECUTION_CANCEL_CHANNEL, label: 'execution-cancel', }) } - return sharedChannel + return _g._executionCancelChannel } export function isRedisCancellationEnabled(): boolean { diff --git a/apps/sim/lib/logs/execution/logging-session.ts b/apps/sim/lib/logs/execution/logging-session.ts index 09bfd2348ca..a0fd011dc7d 100644 --- a/apps/sim/lib/logs/execution/logging-session.ts +++ b/apps/sim/lib/logs/execution/logging-session.ts @@ -1,8 +1,9 @@ import { db } from '@sim/db' import { workflowExecutionLogs } from '@sim/db/schema' import { createLogger } from '@sim/logger' -import { toError } from '@sim/utils/errors' +import { describeError, toError } from '@sim/utils/errors' import { and, eq, sql } from 'drizzle-orm' +import { isRetryableInfrastructureError } from '@/lib/core/errors/retryable-infrastructure' import { executionLogger } from '@/lib/logs/execution/logger' import { calculateCostSummary, @@ -177,6 +178,8 @@ export class LoggingSession { } catch (error) { logger.error(`Failed to persist last started block for execution ${this.executionId}:`, { error: toError(error).message, + cause: describeError(error), + retryable: isRetryableInfrastructureError(error), }) } } @@ -193,6 +196,8 @@ export class LoggingSession { } catch (error) { logger.error(`Failed to persist last completed block for execution ${this.executionId}:`, { error: toError(error).message, + cause: describeError(error), + retryable: isRetryableInfrastructureError(error), }) } } @@ -411,6 +416,8 @@ export class LoggingSession { executionId: this.executionId, error: toError(error).message, stack: error instanceof Error ? error.stack : undefined, + cause: describeError(error), + retryable: isRetryableInfrastructureError(error), }) throw error } @@ -1057,7 +1064,11 @@ export class LoggingSession { this.completionAttemptFailed = true logger.error( `[${this.requestId || 'unknown'}] Cost-only fallback also failed for execution ${this.executionId}:`, - { error: toError(fallbackError).message } + { + error: toError(fallbackError).message, + cause: describeError(fallbackError), + retryable: isRetryableInfrastructureError(fallbackError), + } ) } } diff --git a/apps/sim/lib/mcp/connection-manager.ts b/apps/sim/lib/mcp/connection-manager.ts index 178a3f54564..158983739b2 100644 --- a/apps/sim/lib/mcp/connection-manager.ts +++ b/apps/sim/lib/mcp/connection-manager.ts @@ -461,6 +461,13 @@ export class McpConnectionManager { } } -export const mcpConnectionManager: McpConnectionManager | null = isTest - ? null - : new McpConnectionManager() +type McpManagerGlobal = typeof globalThis & { + _mcpConnectionManager?: McpConnectionManager | null +} + +const _g = globalThis as McpManagerGlobal +if (!('_mcpConnectionManager' in _g)) { + _g._mcpConnectionManager = isTest ? null : new McpConnectionManager() +} + +export const mcpConnectionManager: McpConnectionManager | null = _g._mcpConnectionManager ?? null diff --git a/apps/sim/lib/mcp/pubsub.ts b/apps/sim/lib/mcp/pubsub.ts index 725857ae9c7..43bae170979 100644 --- a/apps/sim/lib/mcp/pubsub.ts +++ b/apps/sim/lib/mcp/pubsub.ts @@ -11,7 +11,7 @@ * (published by serve route, consumed by serve route on other processes to push to local SSE clients) */ -import { createPubSubChannel } from '@/lib/events/pubsub' +import { createPubSubChannel, type PubSubChannel } from '@/lib/events/pubsub' import type { ToolsChangedEvent, WorkflowToolsChangedEvent } from '@/lib/mcp/types' interface McpPubSubAdapter { @@ -22,21 +22,35 @@ interface McpPubSubAdapter { dispose(): void } -const toolsChannel = - typeof window !== 'undefined' - ? null - : createPubSubChannel({ - channel: 'mcp:tools_changed', - label: 'mcp-tools', - }) +type McpPubSubGlobal = typeof globalThis & { + _mcpToolsChannel?: PubSubChannel | null + _mcpWorkflowToolsChannel?: PubSubChannel | null +} -const workflowToolsChannel = - typeof window !== 'undefined' - ? null - : createPubSubChannel({ - channel: 'mcp:workflow_tools_changed', - label: 'mcp-workflow-tools', - }) +const g = globalThis as McpPubSubGlobal + +if (!('_mcpToolsChannel' in g)) { + g._mcpToolsChannel = + typeof window !== 'undefined' + ? null + : createPubSubChannel({ + channel: 'mcp:tools_changed', + label: 'mcp-tools', + }) +} + +if (!('_mcpWorkflowToolsChannel' in g)) { + g._mcpWorkflowToolsChannel = + typeof window !== 'undefined' + ? null + : createPubSubChannel({ + channel: 'mcp:workflow_tools_changed', + label: 'mcp-workflow-tools', + }) +} + +const toolsChannel = g._mcpToolsChannel +const workflowToolsChannel = g._mcpWorkflowToolsChannel export const mcpPubSub: McpPubSubAdapter | null = typeof window !== 'undefined' || !toolsChannel || !workflowToolsChannel diff --git a/apps/sim/lib/uploads/config.ts b/apps/sim/lib/uploads/config.ts index f38d147db07..d833d19c9f8 100644 --- a/apps/sim/lib/uploads/config.ts +++ b/apps/sim/lib/uploads/config.ts @@ -1,4 +1,4 @@ -import { env } from '@/lib/core/config/env' +import { env, envBoolean } from '@/lib/core/config/env' import type { StorageConfig, StorageContext } from '@/lib/uploads/shared/types' export type { StorageConfig, StorageContext } from '@/lib/uploads/shared/types' @@ -17,6 +17,15 @@ export const USE_S3_STORAGE = hasS3Config && !USE_BLOB_STORAGE export const S3_CONFIG = { bucket: env.S3_BUCKET_NAME || '', region: env.AWS_REGION || '', + /** + * Custom endpoint for S3-compatible providers (Cloudflare R2, MinIO, Backblaze B2). + * Unset means the AWS SDK derives the host from `region`, targeting AWS S3. + * This is trusted operator configuration (not user input), so it is passed + * through verbatim — `http://` and private hosts are allowed for on-prem MinIO. + */ + endpoint: env.S3_ENDPOINT || undefined, + /** Path-style addressing — required by MinIO/Ceph RGW; AWS S3 and R2 use the default `false`. */ + forcePathStyle: envBoolean(env.S3_FORCE_PATH_STYLE) ?? false, } export const BLOB_CONFIG = { diff --git a/apps/sim/lib/uploads/core/setup.server.ts b/apps/sim/lib/uploads/core/setup.server.ts index 87801a29f96..70a03f43976 100644 --- a/apps/sim/lib/uploads/core/setup.server.ts +++ b/apps/sim/lib/uploads/core/setup.server.ts @@ -3,7 +3,12 @@ import { mkdir } from 'fs/promises' import path, { join } from 'path' import { createLogger } from '@sim/logger' import { env } from '@/lib/core/config/env' -import { getStorageProvider, USE_BLOB_STORAGE, USE_S3_STORAGE } from '@/lib/uploads/config' +import { + getStorageProvider, + S3_CONFIG, + USE_BLOB_STORAGE, + USE_S3_STORAGE, +} from '@/lib/uploads/config' const logger = createLogger('UploadsSetup') @@ -79,6 +84,12 @@ if (typeof process !== 'undefined') { } else { logger.info('AWS S3 credentials found in environment variables') } + + if (env.S3_ENDPOINT) { + logger.info( + `Using S3-compatible endpoint: ${env.S3_ENDPOINT} (path-style: ${S3_CONFIG.forcePathStyle})` + ) + } } else { // Local storage mode logger.info('Using local file storage') diff --git a/apps/sim/lib/uploads/providers/s3/client.test.ts b/apps/sim/lib/uploads/providers/s3/client.test.ts index 4e62109a5d8..75eea9a3dde 100644 --- a/apps/sim/lib/uploads/providers/s3/client.test.ts +++ b/apps/sim/lib/uploads/providers/s3/client.test.ts @@ -12,8 +12,10 @@ const { mockPutObjectCommand, mockGetObjectCommand, mockDeleteObjectCommand, + mockCompleteMultipartUploadCommand, mockGetSignedUrl, mockEnv, + mockS3Config, } = vi.hoisted(() => { const mockSend = vi.fn() const mockS3Client = { send: mockSend } @@ -24,9 +26,21 @@ const { AWS_ACCESS_KEY_ID: 'test-access-key', AWS_SECRET_ACCESS_KEY: 'test-secret-key', } + const mockS3Config: { + bucket: string + region: string + endpoint: string | undefined + forcePathStyle: boolean + } = { + bucket: 'test-bucket', + region: 'test-region', + endpoint: undefined, + forcePathStyle: false, + } return { mockSend, mockS3Client, + mockS3Config, mockS3ClientConstructor: vi.fn().mockImplementation( class { constructor() { @@ -38,6 +52,7 @@ const { mockPutObjectCommand: vi.fn().mockImplementation(class {}), mockGetObjectCommand: vi.fn().mockImplementation(class {}), mockDeleteObjectCommand: vi.fn().mockImplementation(class {}), + mockCompleteMultipartUploadCommand: vi.fn().mockImplementation(class {}), mockGetSignedUrl: vi.fn(), mockEnv, } @@ -48,6 +63,7 @@ vi.mock('@aws-sdk/client-s3', () => ({ PutObjectCommand: mockPutObjectCommand, GetObjectCommand: mockGetObjectCommand, DeleteObjectCommand: mockDeleteObjectCommand, + CompleteMultipartUploadCommand: mockCompleteMultipartUploadCommand, })) vi.mock('@aws-sdk/s3-request-presigner', () => ({ @@ -71,10 +87,7 @@ vi.mock('@/lib/uploads/setup', () => ({ })) vi.mock('@/lib/uploads/config', () => ({ - S3_CONFIG: { - bucket: 'test-bucket', - region: 'test-region', - }, + S3_CONFIG: mockS3Config, S3_KB_CONFIG: { bucket: 'test-kb-bucket', region: 'test-region', @@ -82,6 +95,7 @@ vi.mock('@/lib/uploads/config', () => ({ })) import { + completeS3MultipartUpload, deleteFromS3, downloadFromS3, getPresignedUrl, @@ -97,6 +111,8 @@ describe('S3 Client', () => { vi.spyOn(Date.prototype, 'toISOString').mockReturnValue('2025-06-16T01:13:10.765Z') mockEnv.AWS_ACCESS_KEY_ID = 'test-access-key' mockEnv.AWS_SECRET_ACCESS_KEY = 'test-secret-key' + mockS3Config.endpoint = undefined + mockS3Config.forcePathStyle = false resetS3ClientForTesting() }) @@ -342,6 +358,8 @@ describe('S3 Client', () => { expect(client).toBeDefined() expect(mockS3ClientConstructor).toHaveBeenCalledWith({ region: 'test-region', + endpoint: undefined, + forcePathStyle: false, credentials: { accessKeyId: 'test-access-key', secretAccessKey: 'test-secret-key', @@ -359,8 +377,95 @@ describe('S3 Client', () => { expect(client).toBeDefined() expect(mockS3ClientConstructor).toHaveBeenCalledWith({ region: 'test-region', + endpoint: undefined, + forcePathStyle: false, credentials: undefined, }) }) + + it('should pass a custom endpoint and path-style flag for S3-compatible providers', () => { + mockS3Config.endpoint = 'https://account.r2.cloudflarestorage.com' + mockS3Config.forcePathStyle = true + resetS3ClientForTesting() + + const client = getS3Client() + + expect(client).toBeDefined() + expect(mockS3ClientConstructor).toHaveBeenCalledWith({ + region: 'test-region', + endpoint: 'https://account.r2.cloudflarestorage.com', + forcePathStyle: true, + credentials: { + accessKeyId: 'test-access-key', + secretAccessKey: 'test-secret-key', + }, + }) + }) + }) + + describe('completeS3MultipartUpload fallback location', () => { + const parts = [{ ETag: 'etag-1', PartNumber: 1 }] + + it('uses the SDK-provided Location when present', async () => { + mockSend.mockResolvedValueOnce({ Location: 'https://provided.example.com/object' }) + + const result = await completeS3MultipartUpload('kb/uuid-file.txt', 'upload-1', parts) + + expect(result.location).toBe('https://provided.example.com/object') + expect(result.key).toBe('kb/uuid-file.txt') + expect(result.path).toBe('/api/files/serve/kb%2Fuuid-file.txt') + }) + + it('falls back to an AWS virtual-hosted URL when Location is absent', async () => { + mockSend.mockResolvedValueOnce({}) + + const result = await completeS3MultipartUpload('kb/uuid-file.txt', 'upload-1', parts) + + expect(result.location).toBe( + 'https://test-kb-bucket.s3.test-region.amazonaws.com/kb/uuid-file.txt' + ) + }) + + it('builds a path-style fallback URL for a custom endpoint with forcePathStyle', async () => { + mockS3Config.endpoint = 'https://minio.example.com' + mockS3Config.forcePathStyle = true + mockSend.mockResolvedValueOnce({}) + + const result = await completeS3MultipartUpload('kb/uuid-file.txt', 'upload-1', parts) + + expect(result.location).toBe('https://minio.example.com/test-kb-bucket/kb/uuid-file.txt') + }) + + it('builds a virtual-hosted fallback URL for a custom endpoint without forcePathStyle', async () => { + mockS3Config.endpoint = 'https://account.r2.cloudflarestorage.com' + mockS3Config.forcePathStyle = false + mockSend.mockResolvedValueOnce({}) + + const result = await completeS3MultipartUpload('kb/uuid-file.txt', 'upload-1', parts) + + expect(result.location).toBe( + 'https://test-kb-bucket.account.r2.cloudflarestorage.com/kb/uuid-file.txt' + ) + }) + + it('strips a trailing slash from the custom endpoint before appending the key', async () => { + mockS3Config.endpoint = 'https://minio.example.com/' + mockS3Config.forcePathStyle = true + mockSend.mockResolvedValueOnce({}) + + const result = await completeS3MultipartUpload('kb/uuid-file.txt', 'upload-1', parts) + + expect(result.location).toBe('https://minio.example.com/test-kb-bucket/kb/uuid-file.txt') + }) + + it('percent-encodes special characters per path segment, preserving slashes', async () => { + mockSend.mockResolvedValueOnce({}) + + const result = await completeS3MultipartUpload('kb/uuid-my file.txt', 'upload-1', parts) + + expect(result.location).toBe( + 'https://test-kb-bucket.s3.test-region.amazonaws.com/kb/uuid-my%20file.txt' + ) + }) }) }) diff --git a/apps/sim/lib/uploads/providers/s3/client.ts b/apps/sim/lib/uploads/providers/s3/client.ts index fe939cb506f..7ddae7bbf87 100644 --- a/apps/sim/lib/uploads/providers/s3/client.ts +++ b/apps/sim/lib/uploads/providers/s3/client.ts @@ -54,6 +54,8 @@ export function getS3Client(): S3Client { _s3Client = new S3Client({ region, + endpoint: S3_CONFIG.endpoint, + forcePathStyle: S3_CONFIG.forcePathStyle, credentials: env.AWS_ACCESS_KEY_ID && env.AWS_SECRET_ACCESS_KEY ? { @@ -386,6 +388,33 @@ export async function getS3MultipartPartUrls( return presignedUrls } +/** + * Build a fallback object URL for when the SDK omits `Location` on multipart + * completion. For a custom `S3_CONFIG.endpoint` it matches the configured + * addressing mode — path-style for MinIO/Ceph (`forcePathStyle`), virtual-hosted + * (bucket as a subdomain) for R2 and friends. Falls back to the AWS + * virtual-hosted host when no custom endpoint is set. + * + * The key is percent-encoded per path segment (preserving `/` separators) so + * keys containing spaces or reserved characters still yield a valid URL. + */ +function buildObjectFallbackUrl(bucket: string, region: string, key: string): string { + const encodedKey = key + .split('/') + .map((segment) => encodeURIComponent(segment)) + .join('/') + if (S3_CONFIG.endpoint) { + const base = S3_CONFIG.endpoint.replace(/\/+$/, '') + if (S3_CONFIG.forcePathStyle) { + return `${base}/${bucket}/${encodedKey}` + } + const url = new URL(base) + url.hostname = `${bucket}.${url.hostname}` + return `${url.origin}/${encodedKey}` + } + return `https://${bucket}.s3.${region}.amazonaws.com/${encodedKey}` +} + /** * Complete multipart upload for S3 */ @@ -408,8 +437,7 @@ export async function completeS3MultipartUpload( }) const response = await s3Client.send(command) - const location = - response.Location || `https://${config.bucket}.s3.${config.region}.amazonaws.com/${key}` + const location = response.Location || buildObjectFallbackUrl(config.bucket, config.region, key) const path = `/api/files/serve/${encodeURIComponent(key)}` return { diff --git a/apps/sim/lib/workflows/executor/pause-persistence.ts b/apps/sim/lib/workflows/executor/pause-persistence.ts index 2080668cccd..954329ad4fa 100644 --- a/apps/sim/lib/workflows/executor/pause-persistence.ts +++ b/apps/sim/lib/workflows/executor/pause-persistence.ts @@ -1,5 +1,6 @@ import { createLogger } from '@sim/logger' -import { toError } from '@sim/utils/errors' +import { describeError, toError } from '@sim/utils/errors' +import { isRetryableInfrastructureError } from '@/lib/core/errors/retryable-infrastructure' import type { LoggingSession } from '@/lib/logs/execution/logging-session' import { PauseResumeManager } from '@/lib/workflows/executor/human-in-the-loop-manager' import type { ExecutionResult } from '@/executor/types' @@ -46,6 +47,8 @@ export async function handlePostExecutionPauseState({ logger.error('Failed to persist pause result', { executionId, error: toError(pauseError).message, + cause: describeError(pauseError), + retryable: isRetryableInfrastructureError(pauseError), }) await loggingSession.markAsFailed( `Failed to persist pause state: ${toError(pauseError).message}` @@ -59,6 +62,8 @@ export async function handlePostExecutionPauseState({ logger.error('Failed to process queued resumes', { executionId, error: toError(resumeError).message, + cause: describeError(resumeError), + retryable: isRetryableInfrastructureError(resumeError), }) } } diff --git a/apps/sim/tools/index.ts b/apps/sim/tools/index.ts index d98c6bd0ce3..3c83d634f01 100644 --- a/apps/sim/tools/index.ts +++ b/apps/sim/tools/index.ts @@ -19,6 +19,7 @@ import { } from '@/lib/core/utils/stream-limits' import { getBaseUrl, getInternalApiBaseUrl } from '@/lib/core/utils/urls' import { isUserFile } from '@/lib/core/utils/user-file' +import { isSameOrigin } from '@/lib/core/utils/validation' import { SIM_VIA_HEADER, serializeCallChain } from '@/lib/execution/call-chain' import { parseMcpToolId } from '@/lib/mcp/utils' import { resolveWorkspaceFileReference } from '@/lib/uploads/contexts/workspace/workspace-file-manager' @@ -1364,17 +1365,7 @@ function isErrorResponse( * the platform's own workflow execution endpoints via absolute URL. */ function isSelfOriginUrl(url: string): boolean { - try { - const targetOrigin = new URL(url).origin - const publicOrigin = new URL(getBaseUrl()).origin - if (targetOrigin === publicOrigin) return true - - const internalOrigin = new URL(getInternalApiBaseUrl()).origin - if (targetOrigin === internalOrigin) return true - } catch { - return false - } - return false + return isSameOrigin(url, getBaseUrl()) || isSameOrigin(url, getInternalApiBaseUrl()) } /** diff --git a/helm/sim/examples/values-aws.yaml b/helm/sim/examples/values-aws.yaml index c8795a8e976..a0837b2672b 100644 --- a/helm/sim/examples/values-aws.yaml +++ b/helm/sim/examples/values-aws.yaml @@ -103,6 +103,9 @@ app: S3_PROFILE_PICTURES_BUCKET_NAME: "profile-pictures" # User avatars S3_OG_IMAGES_BUCKET_NAME: "og-images" # OpenGraph preview images S3_WORKSPACE_LOGOS_BUCKET_NAME: "workspace-logos" # Workspace logos + # For S3-compatible storage (Cloudflare R2, MinIO, Backblaze B2) instead of AWS S3: + # S3_ENDPOINT: "https://.r2.cloudflarestorage.com" # custom endpoint; set AWS_REGION: "auto" for R2 + # S3_FORCE_PATH_STYLE: "true" # required for MinIO/Ceph; omit for AWS S3 and R2 # Realtime service realtime: diff --git a/helm/sim/values.schema.json b/helm/sim/values.schema.json index 724f58161e5..b950e5bdd86 100644 --- a/helm/sim/values.schema.json +++ b/helm/sim/values.schema.json @@ -157,6 +157,10 @@ "type": "string", "description": "Comma-separated additional public origins to trust for auth (e.g. 'https://app.example.com,https://www.example.com'). Merged into Better Auth trustedOrigins." }, + "SSO_TRUSTED_PROVIDER_IDS": { + "type": "string", + "description": "Comma-separated SSO provider IDs to trust for automatic account linking when an SSO sign-in matches an existing account's email. Only needed for IdPs that do not assert email_verified. Merged into Better Auth accountLinking.trustedProviders." + }, "NODE_ENV": { "type": "string", "enum": ["development", "test", "production"], diff --git a/helm/sim/values.yaml b/helm/sim/values.yaml index 6b48a957bd3..562a82d5798 100644 --- a/helm/sim/values.yaml +++ b/helm/sim/values.yaml @@ -216,6 +216,10 @@ app: # Set to "true" AFTER running the SSO registration script SSO_ENABLED: "" # Enable SSO authentication ("true" to enable) NEXT_PUBLIC_SSO_ENABLED: "" # Show SSO login button in UI ("true" to enable) + # SSO_TRUSTED_PROVIDER_IDS: comma-separated SSO provider IDs to trust for automatic account linking when a + # user signs in via SSO and an account with the same email already exists. Only needed for IdPs that do NOT + # assert email_verified (trustEmailVerified already handles those that do). Resolved at startup — restart after editing. + SSO_TRUSTED_PROVIDER_IDS: "" # Enterprise Feature Overrides (self-hosted) CREDENTIAL_SETS_ENABLED: "" # Enable credential sets (email polling) on self-hosted ("true" to enable) @@ -260,6 +264,8 @@ app: S3_PROFILE_PICTURES_BUCKET_NAME: "" # S3 bucket for user profile pictures S3_OG_IMAGES_BUCKET_NAME: "" # S3 bucket for OpenGraph preview images S3_WORKSPACE_LOGOS_BUCKET_NAME: "" # S3 bucket for workspace logos + S3_ENDPOINT: "" # Custom endpoint for S3-compatible storage (Cloudflare R2, MinIO, Backblaze B2). Leave empty for AWS S3 + S3_FORCE_PATH_STYLE: "" # Set to "true" for path-style addressing (MinIO/Ceph RGW). Leave empty for AWS S3 and R2 # Azure Blob Storage Configuration (optional - for file storage) # If configured, files will be stored in Azure Blob instead of local storage diff --git a/packages/testing/src/mocks/env.mock.ts b/packages/testing/src/mocks/env.mock.ts index 61f733c1ec2..e6bbd09f7b4 100644 --- a/packages/testing/src/mocks/env.mock.ts +++ b/packages/testing/src/mocks/env.mock.ts @@ -53,6 +53,14 @@ export function createEnvMock(overrides: Record = {} typeof value === 'string' ? value.toLowerCase() === 'false' || value === '0' : value === false, + envBoolean: (value: boolean | string | undefined | null): boolean | undefined => { + if (typeof value === 'boolean') return value + if (value === undefined || value === null || value === '') return undefined + const normalized = String(value).trim().toLowerCase() + return ( + normalized === 'true' || normalized === '1' || normalized === 'yes' || normalized === 'on' + ) + }, envNumber: ( value: number | string | undefined | null, fallback: number, diff --git a/packages/utils/src/errors.test.ts b/packages/utils/src/errors.test.ts index fa11dc191f9..272c85e53a4 100644 --- a/packages/utils/src/errors.test.ts +++ b/packages/utils/src/errors.test.ts @@ -2,7 +2,7 @@ * @vitest-environment node */ import { describe, expect, it } from 'vitest' -import { getPostgresErrorCode, toError } from './errors.js' +import { describeError, getPostgresErrorCode, toError } from './errors.js' describe('toError', () => { it('returns the same Error when given an Error', () => { @@ -76,3 +76,54 @@ describe('getPostgresErrorCode', () => { expect(getPostgresErrorCode(err1)).toBeUndefined() }) }) + +describe('describeError', () => { + it('reports name and message for a plain error, omitting causeChain', () => { + const described = describeError(new Error('boom')) + expect(described).toEqual({ name: 'Error', message: 'boom' }) + expect(described.causeChain).toBeUndefined() + }) + + it('surfaces the deepest cause for a wrapped driver error', () => { + const driver = Object.assign(new Error('read ECONNRESET'), { + code: 'ECONNRESET', + errno: 'ECONNRESET', + syscall: 'read', + }) + const wrapped = new Error('Failed query: select ...', { cause: driver }) + const described = describeError(wrapped) + expect(described.message).toBe('read ECONNRESET') + expect(described.code).toBe('ECONNRESET') + expect(described.errno).toBe('ECONNRESET') + expect(described.syscall).toBe('read') + expect(described.causeChain).toEqual([ + 'Error: Failed query: select ...', + 'Error: read ECONNRESET', + ]) + }) + + it('always returns the cause for unclassified errors (AbortError)', () => { + const aborted = Object.assign(new Error('The operation was aborted'), { name: 'AbortError' }) + expect(describeError(aborted)).toEqual({ + name: 'AbortError', + message: 'The operation was aborted', + }) + }) + + it('falls back to a populated description for non-Error input without throwing', () => { + expect(describeError('just a string')).toEqual({ name: 'Error', message: 'just a string' }) + expect(() => describeError({ weird: true })).not.toThrow() + }) + + it('stops at depth 10 and does not loop on a cyclic cause', () => { + const a = new Error('a') + const b = new Error('b') + ;(a as { cause?: unknown }).cause = b + ;(b as { cause?: unknown }).cause = a + let described: ReturnType | undefined + expect(() => { + described = describeError(a) + }).not.toThrow() + expect(described?.causeChain?.length).toBeLessThanOrEqual(10) + }) +}) diff --git a/packages/utils/src/errors.ts b/packages/utils/src/errors.ts index 48fcee083c3..dc21d57b995 100644 --- a/packages/utils/src/errors.ts +++ b/packages/utils/src/errors.ts @@ -39,6 +39,60 @@ export function getPostgresConstraintName(error: unknown): string | undefined { return readPgErrorField(error, 'constraint_name') ?? readPgErrorField(error, 'constraint') } +export interface DescribedError { + name: string + message: string + code?: string + errno?: string + syscall?: string + /** `"Name: message"` per link in the `.cause` chain, outermost first. Present only when the chain has more than one link. */ + causeChain?: string[] +} + +/** + * Always-on diagnostic view of an error and its `.cause` chain. + * + * Reports the fields of the DEEPEST `.cause` link, because a wrapped driver + * error (e.g. Drizzle's `"Failed query: ..."` wrapping an `ECONNRESET`) carries + * the real reason there, not on the outer wrapper. Always returns a populated + * object — including for non-`Error` throws and unclassified errors like + * `AbortError`. Cycle-safe and depth-bounded. + * + * Loggers do not serialize the non-enumerable `Error.prototype.cause`, so pass + * the result as an explicit structured field rather than the raw error. + */ +export function describeError(error: unknown): DescribedError { + const chain: Error[] = [] + const seen = new Set() + let current: unknown = error + while (current instanceof Error && !seen.has(current) && chain.length < 10) { + seen.add(current) + chain.push(current) + current = current.cause + } + + if (chain.length === 0) { + const normalized = toError(error) + return { name: normalized.name, message: normalized.message } + } + + const deepest = chain[chain.length - 1] as Error & Record + const asString = (value: unknown): string | undefined => + typeof value === 'string' ? value : undefined + const code = asString(deepest.code) + const errno = asString(deepest.errno) + const syscall = asString(deepest.syscall) + + return { + name: deepest.name, + message: deepest.message, + ...(code ? { code } : {}), + ...(errno ? { errno } : {}), + ...(syscall ? { syscall } : {}), + ...(chain.length > 1 ? { causeChain: chain.map((e) => `${e.name}: ${e.message}`) } : {}), + } +} + function readPgErrorField(error: unknown, field: string): string | undefined { const seen = new Set() let current: unknown = error