feat: implement Phase 5 — Alerting & Monitoring
Backend: - AlertEvaluationService: evaluates metrics against AlertRules after each heartbeat - Supports cpu_usage_percent and memory_usage_percent metric paths - Operators: >, >=, <, <=, == - 15-minute dedup window to prevent alert spam - AlertRulesController: full CRUD for alert rules (GET/POST/PUT/DELETE) - AlertsController: list with acknowledged filter + POST acknowledge endpoint - IRmmHubClient: added AlertTriggered push method - Program.cs: AlertEvaluationService registered as Scoped Frontend: - AlertsPage: two-tab layout (active alerts + rules) - Alerts tab: severity badges, acknowledge button, all/unack/ack filter - Rules tab: condition display, enabled toggle, delete with confirm - Create rule modal with MetricPath/Operator/Threshold/Severity selects - api/types.ts: AlertRule, AlertItem, CreateAlertRuleRequest types - api/client.ts: alertRulesApi and alertsApi - useAgentSignalR: handles AlertTriggered → invalidates alerts query - App.tsx: Alerts nav item with Bell icon Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
84
Backend/src/NexusRMM.Api/Controllers/AlertRulesController.cs
Normal file
84
Backend/src/NexusRMM.Api/Controllers/AlertRulesController.cs
Normal file
@@ -0,0 +1,84 @@
|
||||
using Microsoft.AspNetCore.Mvc;
|
||||
using Microsoft.EntityFrameworkCore;
|
||||
using NexusRMM.Core.Models;
|
||||
using NexusRMM.Infrastructure.Data;
|
||||
|
||||
namespace NexusRMM.Api.Controllers;
|
||||
|
||||
[ApiController]
|
||||
[Route("api/v1/alert-rules")]
|
||||
public class AlertRulesController : ControllerBase
|
||||
{
|
||||
private readonly RmmDbContext _db;
|
||||
public AlertRulesController(RmmDbContext db) => _db = db;
|
||||
|
||||
[HttpGet]
|
||||
public async Task<IActionResult> GetAll() =>
|
||||
Ok(await _db.AlertRules.OrderBy(r => r.Name).ToListAsync());
|
||||
|
||||
[HttpGet("{id:int}")]
|
||||
public async Task<IActionResult> GetById(int id)
|
||||
{
|
||||
var rule = await _db.AlertRules.FindAsync(id);
|
||||
return rule is null ? NotFound() : Ok(rule);
|
||||
}
|
||||
|
||||
[HttpPost]
|
||||
public async Task<IActionResult> Create([FromBody] CreateAlertRuleRequest req)
|
||||
{
|
||||
var rule = new AlertRule
|
||||
{
|
||||
Name = req.Name,
|
||||
MetricPath = req.MetricPath,
|
||||
Operator = req.Operator,
|
||||
Threshold = req.Threshold,
|
||||
Severity = req.Severity,
|
||||
Enabled = true,
|
||||
};
|
||||
_db.AlertRules.Add(rule);
|
||||
await _db.SaveChangesAsync();
|
||||
return CreatedAtAction(nameof(GetById), new { id = rule.Id }, rule);
|
||||
}
|
||||
|
||||
[HttpPut("{id:int}")]
|
||||
public async Task<IActionResult> Update(int id, [FromBody] UpdateAlertRuleRequest req)
|
||||
{
|
||||
var rule = await _db.AlertRules.FindAsync(id);
|
||||
if (rule is null) return NotFound();
|
||||
|
||||
if (req.Name is not null) rule.Name = req.Name;
|
||||
if (req.MetricPath is not null) rule.MetricPath = req.MetricPath;
|
||||
if (req.Operator is not null) rule.Operator = req.Operator;
|
||||
if (req.Threshold.HasValue) rule.Threshold = req.Threshold.Value;
|
||||
if (req.Severity.HasValue) rule.Severity = req.Severity.Value;
|
||||
if (req.Enabled.HasValue) rule.Enabled = req.Enabled.Value;
|
||||
|
||||
await _db.SaveChangesAsync();
|
||||
return Ok(rule);
|
||||
}
|
||||
|
||||
[HttpDelete("{id:int}")]
|
||||
public async Task<IActionResult> Delete(int id)
|
||||
{
|
||||
var rule = await _db.AlertRules.FindAsync(id);
|
||||
if (rule is null) return NotFound();
|
||||
_db.AlertRules.Remove(rule);
|
||||
await _db.SaveChangesAsync();
|
||||
return NoContent();
|
||||
}
|
||||
}
|
||||
|
||||
public record CreateAlertRuleRequest(
|
||||
string Name,
|
||||
string MetricPath,
|
||||
string Operator,
|
||||
double Threshold,
|
||||
AlertSeverity Severity);
|
||||
|
||||
public record UpdateAlertRuleRequest(
|
||||
string? Name,
|
||||
string? MetricPath,
|
||||
string? Operator,
|
||||
double? Threshold,
|
||||
AlertSeverity? Severity,
|
||||
bool? Enabled);
|
||||
50
Backend/src/NexusRMM.Api/Controllers/AlertsController.cs
Normal file
50
Backend/src/NexusRMM.Api/Controllers/AlertsController.cs
Normal file
@@ -0,0 +1,50 @@
|
||||
using Microsoft.AspNetCore.Mvc;
|
||||
using Microsoft.EntityFrameworkCore;
|
||||
using NexusRMM.Infrastructure.Data;
|
||||
|
||||
namespace NexusRMM.Api.Controllers;
|
||||
|
||||
[ApiController]
|
||||
[Route("api/v1/alerts")]
|
||||
public class AlertsController : ControllerBase
|
||||
{
|
||||
private readonly RmmDbContext _db;
|
||||
public AlertsController(RmmDbContext db) => _db = db;
|
||||
|
||||
[HttpGet]
|
||||
public async Task<IActionResult> GetAll([FromQuery] bool? acknowledged = null)
|
||||
{
|
||||
var query = _db.Alerts
|
||||
.Include(a => a.Rule)
|
||||
.Include(a => a.Agent)
|
||||
.AsQueryable();
|
||||
|
||||
if (acknowledged.HasValue)
|
||||
query = query.Where(a => a.Acknowledged == acknowledged.Value);
|
||||
|
||||
var alerts = await query
|
||||
.OrderByDescending(a => a.CreatedAt)
|
||||
.Take(200)
|
||||
.Select(a => new
|
||||
{
|
||||
a.Id, a.Message, a.Severity, a.Acknowledged, a.CreatedAt,
|
||||
AgentId = a.AgentId.ToString(),
|
||||
AgentHostname = a.Agent.Hostname,
|
||||
RuleId = a.RuleId,
|
||||
RuleName = a.Rule.Name,
|
||||
})
|
||||
.ToListAsync();
|
||||
|
||||
return Ok(alerts);
|
||||
}
|
||||
|
||||
[HttpPost("{id:long}/acknowledge")]
|
||||
public async Task<IActionResult> Acknowledge(long id)
|
||||
{
|
||||
var alert = await _db.Alerts.FindAsync(id);
|
||||
if (alert is null) return NotFound();
|
||||
alert.Acknowledged = true;
|
||||
await _db.SaveChangesAsync();
|
||||
return Ok(new { alert.Id, alert.Acknowledged });
|
||||
}
|
||||
}
|
||||
@@ -3,6 +3,7 @@ using Grpc.Core;
|
||||
using Microsoft.AspNetCore.SignalR;
|
||||
using Microsoft.EntityFrameworkCore;
|
||||
using NexusRMM.Api.Hubs;
|
||||
using NexusRMM.Api.Services;
|
||||
using NexusRMM.Core.Models;
|
||||
using NexusRMM.Infrastructure.Data;
|
||||
using NexusRMM.Protos;
|
||||
@@ -16,12 +17,14 @@ public class AgentGrpcService : AgentService.AgentServiceBase
|
||||
private readonly RmmDbContext _db;
|
||||
private readonly ILogger<AgentGrpcService> _logger;
|
||||
private readonly IHubContext<RmmHub, IRmmHubClient> _hub;
|
||||
private readonly AlertEvaluationService _alertService;
|
||||
|
||||
public AgentGrpcService(RmmDbContext db, ILogger<AgentGrpcService> logger, IHubContext<RmmHub, IRmmHubClient> hub)
|
||||
public AgentGrpcService(RmmDbContext db, ILogger<AgentGrpcService> logger, IHubContext<RmmHub, IRmmHubClient> hub, AlertEvaluationService alertService)
|
||||
{
|
||||
_db = db;
|
||||
_logger = logger;
|
||||
_hub = hub;
|
||||
_alertService = alertService;
|
||||
}
|
||||
|
||||
public override async Task<EnrollResponse> Enroll(EnrollRequest request, ServerCallContext context)
|
||||
@@ -102,6 +105,9 @@ public class AgentGrpcService : AgentService.AgentServiceBase
|
||||
await _hub.Clients.All
|
||||
.AgentStatusChanged(request.AgentId, "Online", DateTime.UtcNow.ToString("O"));
|
||||
|
||||
// Alert-Engine: Metriken gegen Regeln auswerten
|
||||
await _alertService.EvaluateAsync(agentId, agent.Hostname, request.Metrics);
|
||||
|
||||
return response;
|
||||
}
|
||||
|
||||
|
||||
@@ -13,4 +13,7 @@ public interface IRmmHubClient
|
||||
|
||||
/// <summary>Command-Ergebnis verfügbar (an agent-Gruppe gepusht)</summary>
|
||||
Task CommandResultUpdated(string taskId, string agentId, bool success, int exitCode);
|
||||
|
||||
/// <summary>Neuer Alert ausgelöst (an alle Clients gepusht)</summary>
|
||||
Task AlertTriggered(string agentId, string agentHostname, string ruleName, string message, string severity);
|
||||
}
|
||||
|
||||
@@ -2,6 +2,7 @@ using Microsoft.AspNetCore.Server.Kestrel.Core;
|
||||
using Microsoft.EntityFrameworkCore;
|
||||
using NexusRMM.Api.GrpcServices;
|
||||
using NexusRMM.Api.Hubs;
|
||||
using NexusRMM.Api.Services;
|
||||
using NexusRMM.Infrastructure.Data;
|
||||
|
||||
var builder = WebApplication.CreateBuilder(args);
|
||||
@@ -22,6 +23,8 @@ builder.Services.AddControllers();
|
||||
builder.Services.AddEndpointsApiExplorer();
|
||||
builder.Services.AddSwaggerGen();
|
||||
|
||||
builder.Services.AddScoped<AlertEvaluationService>();
|
||||
|
||||
builder.Services.AddCors(options =>
|
||||
{
|
||||
options.AddDefaultPolicy(policy =>
|
||||
|
||||
96
Backend/src/NexusRMM.Api/Services/AlertEvaluationService.cs
Normal file
96
Backend/src/NexusRMM.Api/Services/AlertEvaluationService.cs
Normal file
@@ -0,0 +1,96 @@
|
||||
using Microsoft.AspNetCore.SignalR;
|
||||
using Microsoft.EntityFrameworkCore;
|
||||
using NexusRMM.Api.Hubs;
|
||||
using NexusRMM.Core.Models;
|
||||
using NexusRMM.Infrastructure.Data;
|
||||
using NexusRMM.Protos;
|
||||
|
||||
namespace NexusRMM.Api.Services;
|
||||
|
||||
/// <summary>
|
||||
/// Wertet HeartbeatRequest-Metriken gegen alle aktiven AlertRules aus.
|
||||
/// Wird vom AgentGrpcService nach jedem Heartbeat aufgerufen.
|
||||
/// </summary>
|
||||
public class AlertEvaluationService
|
||||
{
|
||||
private readonly RmmDbContext _db;
|
||||
private readonly IHubContext<RmmHub, IRmmHubClient> _hub;
|
||||
private readonly ILogger<AlertEvaluationService> _logger;
|
||||
|
||||
public AlertEvaluationService(
|
||||
RmmDbContext db,
|
||||
IHubContext<RmmHub, IRmmHubClient> hub,
|
||||
ILogger<AlertEvaluationService> logger)
|
||||
{
|
||||
_db = db;
|
||||
_hub = hub;
|
||||
_logger = logger;
|
||||
}
|
||||
|
||||
public async Task EvaluateAsync(Guid agentId, string agentHostname, SystemMetrics metrics)
|
||||
{
|
||||
var rules = await _db.AlertRules
|
||||
.Where(r => r.Enabled)
|
||||
.ToListAsync();
|
||||
|
||||
foreach (var rule in rules)
|
||||
{
|
||||
var metricValue = ExtractMetricValue(metrics, rule.MetricPath);
|
||||
if (metricValue is null) continue;
|
||||
|
||||
if (!EvaluateCondition(metricValue.Value, rule.Operator, rule.Threshold)) continue;
|
||||
|
||||
// Duplikat-Schutz: kein neuer Alert wenn eines innerhalb der letzten 15 Min. existiert
|
||||
var recentAlert = await _db.Alerts
|
||||
.AnyAsync(a => a.AgentId == agentId
|
||||
&& a.RuleId == rule.Id
|
||||
&& a.CreatedAt > DateTime.UtcNow.AddMinutes(-15));
|
||||
|
||||
if (recentAlert) continue;
|
||||
|
||||
var message = $"{rule.Name}: {rule.MetricPath} {rule.Operator} {rule.Threshold} " +
|
||||
$"(aktuell: {metricValue.Value:F1})";
|
||||
|
||||
var alert = new Alert
|
||||
{
|
||||
RuleId = rule.Id,
|
||||
AgentId = agentId,
|
||||
Message = message,
|
||||
Severity = rule.Severity,
|
||||
Acknowledged = false,
|
||||
CreatedAt = DateTime.UtcNow,
|
||||
};
|
||||
|
||||
_db.Alerts.Add(alert);
|
||||
await _db.SaveChangesAsync();
|
||||
|
||||
_logger.LogWarning("Alert ausgelöst: {Message} für Agent {AgentId}", message, agentId);
|
||||
|
||||
await _hub.Clients.All.AlertTriggered(
|
||||
agentId.ToString(),
|
||||
agentHostname,
|
||||
rule.Name,
|
||||
message,
|
||||
rule.Severity.ToString());
|
||||
}
|
||||
}
|
||||
|
||||
private static double? ExtractMetricValue(SystemMetrics metrics, string metricPath) =>
|
||||
metricPath switch
|
||||
{
|
||||
"cpu_usage_percent" => metrics.CpuUsagePercent,
|
||||
"memory_usage_percent" => metrics.MemoryUsagePercent,
|
||||
_ => null
|
||||
};
|
||||
|
||||
private static bool EvaluateCondition(double value, string op, double threshold) =>
|
||||
op switch
|
||||
{
|
||||
">" => value > threshold,
|
||||
">=" => value >= threshold,
|
||||
"<" => value < threshold,
|
||||
"<=" => value <= threshold,
|
||||
"==" => Math.Abs(value - threshold) < 0.001,
|
||||
_ => false
|
||||
};
|
||||
}
|
||||
Reference in New Issue
Block a user