diff --git a/client/src/Hooks/useMonitorForm.ts b/client/src/Hooks/useMonitorForm.ts index 963409fc8a..ce9f9dab31 100644 --- a/client/src/Hooks/useMonitorForm.ts +++ b/client/src/Hooks/useMonitorForm.ts @@ -17,6 +17,8 @@ const getBaseDefaults = (data?: Monitor | null) => ({ geoCheckEnabled: data?.geoCheckEnabled ?? false, geoCheckLocations: data?.geoCheckLocations || [], geoCheckInterval: data?.geoCheckInterval || 300000, + escalationAfterMinutes: data?.escalationAfterMinutes ?? undefined, + escalationNotificationChannels: data?.escalationNotificationChannels || [], }); export const useMonitorForm = ({ diff --git a/client/src/Pages/CreateMonitor/index.tsx b/client/src/Pages/CreateMonitor/index.tsx index 15b76eab36..b37e0a57bb 100644 --- a/client/src/Pages/CreateMonitor/index.tsx +++ b/client/src/Pages/CreateMonitor/index.tsx @@ -765,6 +765,88 @@ const CreateMonitorPage = () => { } /> + + ( + { + const val = e.target.value; + field.onChange(val === "" ? undefined : Number(val)); + }} + placeholder="e.g. 5" + /> + )} + /> + { + const notificationOptions = (notifications ?? []).map((n) => ({ + ...n, + name: n.notificationName, + })); + const selected = notificationOptions.filter((n) => + (field.value ?? []).includes(n.id) + ); + return ( + + option.name} + onChange={(_: unknown, newValue: typeof notificationOptions) => { + field.onChange(newValue.map((n) => n.id)); + }} + isOptionEqualToValue={(option, value) => option.id === value.id} + /> + {selected.length > 0 && ( + + {selected.map((notification, index) => ( + + + {notification.notificationName} + + { + field.onChange( + (field.value ?? []).filter( + (id: string) => id !== notification.id + ) + ); + }} + aria-label="Remove notification" + > + + + {index < selected.length - 1 && } + + ))} + + )} + + ); + }} + /> + + } + /> + {(watchedType === "http" || watchedType === "grpc" || watchedType === "websocket") && ( diff --git a/client/src/Types/Monitor.ts b/client/src/Types/Monitor.ts index 053b517d1d..bce41647a2 100644 --- a/client/src/Types/Monitor.ts +++ b/client/src/Types/Monitor.ts @@ -76,6 +76,8 @@ export interface Monitor { geoCheckEnabled?: boolean; geoCheckLocations?: GeoContinent[]; geoCheckInterval?: number; + escalationAfterMinutes?: number; + escalationNotificationChannels?: string[]; recentChecks: CheckSnapshot[]; createdAt: string; updatedAt: string; diff --git a/client/src/Validation/monitor.ts b/client/src/Validation/monitor.ts index 9acffe6fed..359a38135d 100644 --- a/client/src/Validation/monitor.ts +++ b/client/src/Validation/monitor.ts @@ -27,6 +27,8 @@ const baseSchema = z.object({ .number() .min(300000, "Interval must be at least 5 minutes") .optional(), + escalationAfterMinutes: z.number().min(0).optional(), + escalationNotificationChannels: z.array(z.string()).optional(), }); // HTTP monitor schema diff --git a/client/src/locales/en.json b/client/src/locales/en.json index 92a21939f3..9f5dd67b2d 100644 --- a/client/src/locales/en.json +++ b/client/src/locales/en.json @@ -543,6 +543,11 @@ "description": "Select the notification channels you want to use", "title": "Notifications" }, + "escalation": { + "title": "Escalation rules", + "description": "If the monitor stays down for the specified time, notify additional channels", + "afterMinutes": "Escalate after (minutes)" + }, "type": { "description": "Select the type of check to perform", "optionDockerDescription": "Use Docker to monitor if a container is running.", diff --git a/server/src/db/models/Incident.ts b/server/src/db/models/Incident.ts index 82e2b5eb2b..a0a4926f5a 100644 --- a/server/src/db/models/Incident.ts +++ b/server/src/db/models/Incident.ts @@ -1,12 +1,13 @@ import { Schema, model, type Types } from "mongoose"; import { IncidentResolutionTypes, type Incident } from "@/types/incident.js"; -type IncidentDocumentBase = Omit & { +type IncidentDocumentBase = Omit & { monitorId: Types.ObjectId; teamId: Types.ObjectId; resolvedBy?: Types.ObjectId | null; startTime: Date; endTime: Date | null; + escalationSentAt?: Date | null; createdAt: Date; updatedAt: Date; }; @@ -72,6 +73,10 @@ const IncidentSchema = new Schema( type: String, default: null, }, + escalationSentAt: { + type: Date, + default: null, + }, }, { timestamps: true } ); diff --git a/server/src/db/models/Monitor.ts b/server/src/db/models/Monitor.ts index 036aeadad6..080dde3c24 100644 --- a/server/src/db/models/Monitor.ts +++ b/server/src/db/models/Monitor.ts @@ -351,6 +351,14 @@ const MonitorSchema = new Schema( type: Number, default: 300000, }, + escalationAfterMinutes: { + type: Number, + default: undefined, + }, + escalationNotificationChannels: { + type: [String], + default: [], + }, recentChecks: { type: [checkSnapshotSchema], default: [], diff --git a/server/src/repositories/incidents/MongoIncidentRepository.ts b/server/src/repositories/incidents/MongoIncidentRepository.ts index 096ba3d37b..361d332779 100644 --- a/server/src/repositories/incidents/MongoIncidentRepository.ts +++ b/server/src/repositories/incidents/MongoIncidentRepository.ts @@ -60,6 +60,7 @@ class MongoIncidentRepository implements IIncidentsRepository { resolvedBy: doc.resolvedBy ? this.toStringId(doc.resolvedBy) : null, resolvedByEmail: doc.resolvedByEmail ?? null, comment: doc.comment ?? null, + escalationSentAt: doc.escalationSentAt ? this.toDateString(doc.escalationSentAt) : null, createdAt: this.toDateString(doc.createdAt), updatedAt: this.toDateString(doc.updatedAt), }; diff --git a/server/src/repositories/monitors/MongoMonitorsRepository.ts b/server/src/repositories/monitors/MongoMonitorsRepository.ts index b2d7594483..7570bdbbcd 100644 --- a/server/src/repositories/monitors/MongoMonitorsRepository.ts +++ b/server/src/repositories/monitors/MongoMonitorsRepository.ts @@ -391,6 +391,8 @@ class MongoMonitorsRepository implements IMonitorsRepository { geoCheckEnabled: doc.geoCheckEnabled ?? false, geoCheckLocations: doc.geoCheckLocations ?? [], geoCheckInterval: doc.geoCheckInterval ?? 300000, + escalationAfterMinutes: doc.escalationAfterMinutes ?? undefined, + escalationNotificationChannels: doc.escalationNotificationChannels ?? [], createdAt: toDateString(doc.createdAt), updatedAt: toDateString(doc.updatedAt), }; @@ -450,6 +452,8 @@ class MongoMonitorsRepository implements IMonitorsRepository { geoCheckEnabled: doc.geoCheckEnabled ?? false, geoCheckLocations: doc.geoCheckLocations ?? [], geoCheckInterval: doc.geoCheckInterval ?? 300000, + escalationAfterMinutes: doc.escalationAfterMinutes ?? undefined, + escalationNotificationChannels: doc.escalationNotificationChannels ?? [], createdAt: toDateString(doc.createdAt), updatedAt: toDateString(doc.updatedAt), }; diff --git a/server/src/service/infrastructure/SuperSimpleQueue/SuperSimpleQueueHelper.ts b/server/src/service/infrastructure/SuperSimpleQueue/SuperSimpleQueueHelper.ts index b6908127b2..03497f433c 100644 --- a/server/src/service/infrastructure/SuperSimpleQueue/SuperSimpleQueueHelper.ts +++ b/server/src/service/infrastructure/SuperSimpleQueue/SuperSimpleQueueHelper.ts @@ -12,6 +12,7 @@ import { type IGeoChecksService, } from "@/service/index.js"; import { CHECK_TTL_SENTINEL, type MaintenanceWindow, type StatusChangeResult } from "@/types/index.js"; +import type { MonitorStatusResponse } from "@/types/network.js"; import { IMaintenanceWindowsRepository, IMonitorsRepository, @@ -38,7 +39,7 @@ export interface MonitorActionDecision { shouldResolveIncident: boolean; shouldSendNotification: boolean; incidentReason: "status_down" | "threshold_breach" | null; - notificationReason: "status_change" | "threshold_breach" | null; + notificationReason: "status_change" | "threshold_breach" | "escalation" | null; thresholdBreaches?: { cpu?: boolean; memory?: boolean; @@ -177,6 +178,15 @@ export class SuperSimpleQueueHelper implements ISuperSimpleQueueHelper { stack: error instanceof Error ? error.stack : undefined, }); }); + + this.checkEscalation(statusChangeResult.monitor, status).catch((error: unknown) => { + this.logger.warn({ + message: `Error checking escalation for monitor ${monitor.id}: ${error instanceof Error ? error.message : "Unknown error"}`, + service: SERVICE_NAME, + method: "getMonitorJob", + stack: error instanceof Error ? error.stack : undefined, + }); + }); } catch (error: unknown) { this.logger.warn({ message: error instanceof Error ? error.message : "Unknown error", @@ -418,6 +428,49 @@ export class SuperSimpleQueueHelper implements ISuperSimpleQueueHelper { }; }; + private async checkEscalation(monitor: Monitor, monitorStatusResponse: MonitorStatusResponse) { + if (monitor.status !== "down" && monitor.status !== "breached") { + return; + } + + if (!monitor.escalationAfterMinutes || !monitor.escalationNotificationChannels?.length) { + return; + } + + const incident = await this.incidentsRepository.findActiveByMonitorId(monitor.id, monitor.teamId); + if (!incident) { + return; + } + + if (incident.escalationSentAt) { + return; + } + + const incidentStart = new Date(incident.startTime).getTime(); + const elapsed = (Date.now() - incidentStart) / 60000; + + if (elapsed < monitor.escalationAfterMinutes) { + return; + } + + const escalationDecision: MonitorActionDecision = { + shouldCreateIncident: false, + shouldResolveIncident: false, + shouldSendNotification: true, + incidentReason: null, + notificationReason: "escalation", + }; + + await this.notificationsService.sendEscalationNotifications( + monitor, + monitorStatusResponse, + escalationDecision, + monitor.escalationNotificationChannels + ); + + await this.incidentsRepository.updateById(incident.id, monitor.teamId, { escalationSentAt: new Date().toISOString() }); + } + private evaluateMonitorAction(statusChangeResult: StatusChangeResult): MonitorActionDecision { const { monitor, statusChanged, prevStatus } = statusChangeResult; diff --git a/server/src/service/infrastructure/notificationMessageBuilder.ts b/server/src/service/infrastructure/notificationMessageBuilder.ts index 934163b2a9..37c07c0e14 100644 --- a/server/src/service/infrastructure/notificationMessageBuilder.ts +++ b/server/src/service/infrastructure/notificationMessageBuilder.ts @@ -53,7 +53,10 @@ export class NotificationMessageBuilder implements INotificationMessageBuilder { } private determineNotificationType(decision: MonitorActionDecision, monitor: Monitor): NotificationType { - // Down status has highest priority (critical) + if (decision.notificationReason === "escalation") { + return "escalation"; + } + if (monitor.status === "down") { return "monitor_down"; } @@ -80,6 +83,7 @@ export class NotificationMessageBuilder implements INotificationMessageBuilder { private determineSeverity(type: NotificationType): NotificationSeverity { switch (type) { case "monitor_down": + case "escalation": return "critical"; case "threshold_breach": return "warning"; @@ -103,6 +107,8 @@ export class NotificationMessageBuilder implements INotificationMessageBuilder { return this.buildThresholdBreachContent(monitor, monitorStatusResponse as MonitorStatusResponse); case "threshold_resolved": return this.buildThresholdResolvedContent(monitor); + case "escalation": + return this.buildEscalationContent(monitor); default: return this.buildDefaultContent(monitor); } @@ -173,6 +179,19 @@ export class NotificationMessageBuilder implements INotificationMessageBuilder { }; } + private buildEscalationContent(monitor: Monitor): NotificationContent { + const title = `Escalation: ${monitor.name}`; + const summary = `Monitor "${monitor.name}" has been down for an extended period.`; + const details = [`URL: ${monitor.url}`, `Status: Down`, `Type: ${monitor.type}`]; + + return { + title, + summary, + details, + timestamp: new Date(), + }; + } + private buildDefaultContent(monitor: Monitor): NotificationContent { return { title: `Monitor: ${monitor.name}`, diff --git a/server/src/service/infrastructure/notificationProviders/email.ts b/server/src/service/infrastructure/notificationProviders/email.ts index b3686651cc..e42afb4ee1 100644 --- a/server/src/service/infrastructure/notificationProviders/email.ts +++ b/server/src/service/infrastructure/notificationProviders/email.ts @@ -87,6 +87,8 @@ export class EmailProvider implements INotificationProvider { return `Monitor ${message.monitor.name} threshold exceeded`; case "threshold_resolved": return `Monitor ${message.monitor.name} thresholds resolved`; + case "escalation": + return `ESCALATION: Monitor ${message.monitor.name} is still down`; default: return `Alert: ${message.monitor.name}`; } diff --git a/server/src/service/infrastructure/notificationsService.ts b/server/src/service/infrastructure/notificationsService.ts index c75477c88c..4645031f0d 100644 --- a/server/src/service/infrastructure/notificationsService.ts +++ b/server/src/service/infrastructure/notificationsService.ts @@ -14,6 +14,7 @@ export interface INotificationsService { updateById(id: string, teamId: string, updateData: Partial): Promise; deleteById: (id: string, teamId: string) => Promise; handleNotifications: (monitor: Monitor, monitorStatusResponse: MonitorStatusResponse, decision: MonitorActionDecision) => Promise; + sendEscalationNotifications: (monitor: Monitor, monitorStatusResponse: MonitorStatusResponse, decision: MonitorActionDecision, notificationIds: string[]) => Promise; sendTestNotification: (notification: Partial) => Promise; testAllNotifications: (notificationIds: string[]) => Promise; @@ -137,10 +138,21 @@ export class NotificationsService implements INotificationsService { return false; } - // Send notifications based on decision return await this.sendNotifications(monitor, monitorStatusResponse, decision); }; + sendEscalationNotifications = async (monitor: Monitor, monitorStatusResponse: MonitorStatusResponse, decision: MonitorActionDecision, notificationIds: string[]) => { + const notifications = await this.notificationsRepository.findNotificationsByIds(notificationIds); + const settings = this.settingsService.getSettings(); + const clientHost = settings.clientHost || "Host not defined"; + const notificationMessage = this.notificationMessageBuilder.buildMessage(monitor, monitorStatusResponse, decision, clientHost); + + const tasks = notifications.map((notification) => this.send(notification, monitor, monitorStatusResponse, decision, notificationMessage)); + const outcomes = await Promise.all(tasks); + const succeeded = outcomes.filter(Boolean).length; + return succeeded === notifications.length; + }; + sendTestNotification = async (notification: Partial) => { switch (notification.type) { case "email": diff --git a/server/src/types/incident.ts b/server/src/types/incident.ts index 6b076ff835..92cbf38de0 100644 --- a/server/src/types/incident.ts +++ b/server/src/types/incident.ts @@ -16,6 +16,7 @@ export interface Incident { resolvedBy?: string | null; resolvedByEmail?: string | null; comment?: string | null; + escalationSentAt?: string | null; createdAt: string; updatedAt: string; } diff --git a/server/src/types/monitor.ts b/server/src/types/monitor.ts index f29ce75d78..786c2d8c15 100644 --- a/server/src/types/monitor.ts +++ b/server/src/types/monitor.ts @@ -53,6 +53,8 @@ export interface Monitor { geoCheckEnabled?: boolean; geoCheckLocations?: GeoContinent[]; geoCheckInterval?: number; + escalationAfterMinutes?: number; + escalationNotificationChannels?: string[]; recentChecks: CheckSnapshot[]; createdAt: string; updatedAt: string; diff --git a/server/src/types/notificationMessage.ts b/server/src/types/notificationMessage.ts index f06ff1bd9a..7408e01c4b 100644 --- a/server/src/types/notificationMessage.ts +++ b/server/src/types/notificationMessage.ts @@ -3,7 +3,7 @@ * Part of notification system unification effort */ -export type NotificationType = "monitor_down" | "monitor_up" | "threshold_breach" | "threshold_resolved" | "test"; +export type NotificationType = "monitor_down" | "monitor_up" | "threshold_breach" | "threshold_resolved" | "escalation" | "test"; export type NotificationSeverity = "critical" | "warning" | "info" | "success"; diff --git a/server/src/validation/monitorValidation.ts b/server/src/validation/monitorValidation.ts index df000ecef2..c341e5d628 100644 --- a/server/src/validation/monitorValidation.ts +++ b/server/src/validation/monitorValidation.ts @@ -78,6 +78,8 @@ export const createMonitorBodyValidation = z.object({ geoCheckEnabled: z.boolean().optional(), geoCheckLocations: z.array(z.enum(GeoContinents)).optional(), geoCheckInterval: z.number().min(300000).optional(), + escalationAfterMinutes: z.number().min(0).optional(), + escalationNotificationChannels: z.array(z.string()).optional(), }); export const editMonitorBodyValidation = z.object({ @@ -107,6 +109,8 @@ export const editMonitorBodyValidation = z.object({ geoCheckEnabled: z.boolean().optional(), geoCheckLocations: z.array(z.enum(GeoContinents)).optional(), geoCheckInterval: z.number().min(300000).optional(), + escalationAfterMinutes: z.number().min(0).optional(), + escalationNotificationChannels: z.array(z.string()).optional(), }); export const pauseMonitorParamValidation = z.object({