feat: implement Phase 5 — Alerting & Monitoring

Backend:
- AlertEvaluationService: evaluates metrics against AlertRules after each heartbeat
  - Supports cpu_usage_percent and memory_usage_percent metric paths
  - Operators: >, >=, <, <=, ==
  - 15-minute dedup window to prevent alert spam
- AlertRulesController: full CRUD for alert rules (GET/POST/PUT/DELETE)
- AlertsController: list with acknowledged filter + POST acknowledge endpoint
- IRmmHubClient: added AlertTriggered push method
- Program.cs: AlertEvaluationService registered as Scoped

Frontend:
- AlertsPage: two-tab layout (active alerts + rules)
  - Alerts tab: severity badges, acknowledge button, all/unack/ack filter
  - Rules tab: condition display, enabled toggle, delete with confirm
  - Create rule modal with MetricPath/Operator/Threshold/Severity selects
- api/types.ts: AlertRule, AlertItem, CreateAlertRuleRequest types
- api/client.ts: alertRulesApi and alertsApi
- useAgentSignalR: handles AlertTriggered → invalidates alerts query
- App.tsx: Alerts nav item with Bell icon

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Claude Agent
2026-03-19 14:00:19 +01:00
parent d17df20f5e
commit eb114f68e2
11 changed files with 815 additions and 3 deletions

View File

@@ -0,0 +1,84 @@
using Microsoft.AspNetCore.Mvc;
using Microsoft.EntityFrameworkCore;
using NexusRMM.Core.Models;
using NexusRMM.Infrastructure.Data;
namespace NexusRMM.Api.Controllers;
[ApiController]
[Route("api/v1/alert-rules")]
public class AlertRulesController : ControllerBase
{
private readonly RmmDbContext _db;
public AlertRulesController(RmmDbContext db) => _db = db;
[HttpGet]
public async Task<IActionResult> GetAll() =>
Ok(await _db.AlertRules.OrderBy(r => r.Name).ToListAsync());
[HttpGet("{id:int}")]
public async Task<IActionResult> GetById(int id)
{
var rule = await _db.AlertRules.FindAsync(id);
return rule is null ? NotFound() : Ok(rule);
}
[HttpPost]
public async Task<IActionResult> Create([FromBody] CreateAlertRuleRequest req)
{
var rule = new AlertRule
{
Name = req.Name,
MetricPath = req.MetricPath,
Operator = req.Operator,
Threshold = req.Threshold,
Severity = req.Severity,
Enabled = true,
};
_db.AlertRules.Add(rule);
await _db.SaveChangesAsync();
return CreatedAtAction(nameof(GetById), new { id = rule.Id }, rule);
}
[HttpPut("{id:int}")]
public async Task<IActionResult> Update(int id, [FromBody] UpdateAlertRuleRequest req)
{
var rule = await _db.AlertRules.FindAsync(id);
if (rule is null) return NotFound();
if (req.Name is not null) rule.Name = req.Name;
if (req.MetricPath is not null) rule.MetricPath = req.MetricPath;
if (req.Operator is not null) rule.Operator = req.Operator;
if (req.Threshold.HasValue) rule.Threshold = req.Threshold.Value;
if (req.Severity.HasValue) rule.Severity = req.Severity.Value;
if (req.Enabled.HasValue) rule.Enabled = req.Enabled.Value;
await _db.SaveChangesAsync();
return Ok(rule);
}
[HttpDelete("{id:int}")]
public async Task<IActionResult> Delete(int id)
{
var rule = await _db.AlertRules.FindAsync(id);
if (rule is null) return NotFound();
_db.AlertRules.Remove(rule);
await _db.SaveChangesAsync();
return NoContent();
}
}
public record CreateAlertRuleRequest(
string Name,
string MetricPath,
string Operator,
double Threshold,
AlertSeverity Severity);
public record UpdateAlertRuleRequest(
string? Name,
string? MetricPath,
string? Operator,
double? Threshold,
AlertSeverity? Severity,
bool? Enabled);

View File

@@ -0,0 +1,50 @@
using Microsoft.AspNetCore.Mvc;
using Microsoft.EntityFrameworkCore;
using NexusRMM.Infrastructure.Data;
namespace NexusRMM.Api.Controllers;
[ApiController]
[Route("api/v1/alerts")]
public class AlertsController : ControllerBase
{
private readonly RmmDbContext _db;
public AlertsController(RmmDbContext db) => _db = db;
[HttpGet]
public async Task<IActionResult> GetAll([FromQuery] bool? acknowledged = null)
{
var query = _db.Alerts
.Include(a => a.Rule)
.Include(a => a.Agent)
.AsQueryable();
if (acknowledged.HasValue)
query = query.Where(a => a.Acknowledged == acknowledged.Value);
var alerts = await query
.OrderByDescending(a => a.CreatedAt)
.Take(200)
.Select(a => new
{
a.Id, a.Message, a.Severity, a.Acknowledged, a.CreatedAt,
AgentId = a.AgentId.ToString(),
AgentHostname = a.Agent.Hostname,
RuleId = a.RuleId,
RuleName = a.Rule.Name,
})
.ToListAsync();
return Ok(alerts);
}
[HttpPost("{id:long}/acknowledge")]
public async Task<IActionResult> Acknowledge(long id)
{
var alert = await _db.Alerts.FindAsync(id);
if (alert is null) return NotFound();
alert.Acknowledged = true;
await _db.SaveChangesAsync();
return Ok(new { alert.Id, alert.Acknowledged });
}
}

View File

@@ -3,6 +3,7 @@ using Grpc.Core;
using Microsoft.AspNetCore.SignalR;
using Microsoft.EntityFrameworkCore;
using NexusRMM.Api.Hubs;
using NexusRMM.Api.Services;
using NexusRMM.Core.Models;
using NexusRMM.Infrastructure.Data;
using NexusRMM.Protos;
@@ -16,12 +17,14 @@ public class AgentGrpcService : AgentService.AgentServiceBase
private readonly RmmDbContext _db;
private readonly ILogger<AgentGrpcService> _logger;
private readonly IHubContext<RmmHub, IRmmHubClient> _hub;
private readonly AlertEvaluationService _alertService;
public AgentGrpcService(RmmDbContext db, ILogger<AgentGrpcService> logger, IHubContext<RmmHub, IRmmHubClient> hub)
public AgentGrpcService(RmmDbContext db, ILogger<AgentGrpcService> logger, IHubContext<RmmHub, IRmmHubClient> hub, AlertEvaluationService alertService)
{
_db = db;
_logger = logger;
_hub = hub;
_alertService = alertService;
}
public override async Task<EnrollResponse> Enroll(EnrollRequest request, ServerCallContext context)
@@ -102,6 +105,9 @@ public class AgentGrpcService : AgentService.AgentServiceBase
await _hub.Clients.All
.AgentStatusChanged(request.AgentId, "Online", DateTime.UtcNow.ToString("O"));
// Alert-Engine: Metriken gegen Regeln auswerten
await _alertService.EvaluateAsync(agentId, agent.Hostname, request.Metrics);
return response;
}

View File

@@ -13,4 +13,7 @@ public interface IRmmHubClient
/// <summary>Command-Ergebnis verfügbar (an agent-Gruppe gepusht)</summary>
Task CommandResultUpdated(string taskId, string agentId, bool success, int exitCode);
/// <summary>Neuer Alert ausgelöst (an alle Clients gepusht)</summary>
Task AlertTriggered(string agentId, string agentHostname, string ruleName, string message, string severity);
}

View File

@@ -2,6 +2,7 @@ using Microsoft.AspNetCore.Server.Kestrel.Core;
using Microsoft.EntityFrameworkCore;
using NexusRMM.Api.GrpcServices;
using NexusRMM.Api.Hubs;
using NexusRMM.Api.Services;
using NexusRMM.Infrastructure.Data;
var builder = WebApplication.CreateBuilder(args);
@@ -22,6 +23,8 @@ builder.Services.AddControllers();
builder.Services.AddEndpointsApiExplorer();
builder.Services.AddSwaggerGen();
builder.Services.AddScoped<AlertEvaluationService>();
builder.Services.AddCors(options =>
{
options.AddDefaultPolicy(policy =>

View File

@@ -0,0 +1,96 @@
using Microsoft.AspNetCore.SignalR;
using Microsoft.EntityFrameworkCore;
using NexusRMM.Api.Hubs;
using NexusRMM.Core.Models;
using NexusRMM.Infrastructure.Data;
using NexusRMM.Protos;
namespace NexusRMM.Api.Services;
/// <summary>
/// Wertet HeartbeatRequest-Metriken gegen alle aktiven AlertRules aus.
/// Wird vom AgentGrpcService nach jedem Heartbeat aufgerufen.
/// </summary>
public class AlertEvaluationService
{
private readonly RmmDbContext _db;
private readonly IHubContext<RmmHub, IRmmHubClient> _hub;
private readonly ILogger<AlertEvaluationService> _logger;
public AlertEvaluationService(
RmmDbContext db,
IHubContext<RmmHub, IRmmHubClient> hub,
ILogger<AlertEvaluationService> logger)
{
_db = db;
_hub = hub;
_logger = logger;
}
public async Task EvaluateAsync(Guid agentId, string agentHostname, SystemMetrics metrics)
{
var rules = await _db.AlertRules
.Where(r => r.Enabled)
.ToListAsync();
foreach (var rule in rules)
{
var metricValue = ExtractMetricValue(metrics, rule.MetricPath);
if (metricValue is null) continue;
if (!EvaluateCondition(metricValue.Value, rule.Operator, rule.Threshold)) continue;
// Duplikat-Schutz: kein neuer Alert wenn eines innerhalb der letzten 15 Min. existiert
var recentAlert = await _db.Alerts
.AnyAsync(a => a.AgentId == agentId
&& a.RuleId == rule.Id
&& a.CreatedAt > DateTime.UtcNow.AddMinutes(-15));
if (recentAlert) continue;
var message = $"{rule.Name}: {rule.MetricPath} {rule.Operator} {rule.Threshold} " +
$"(aktuell: {metricValue.Value:F1})";
var alert = new Alert
{
RuleId = rule.Id,
AgentId = agentId,
Message = message,
Severity = rule.Severity,
Acknowledged = false,
CreatedAt = DateTime.UtcNow,
};
_db.Alerts.Add(alert);
await _db.SaveChangesAsync();
_logger.LogWarning("Alert ausgelöst: {Message} für Agent {AgentId}", message, agentId);
await _hub.Clients.All.AlertTriggered(
agentId.ToString(),
agentHostname,
rule.Name,
message,
rule.Severity.ToString());
}
}
private static double? ExtractMetricValue(SystemMetrics metrics, string metricPath) =>
metricPath switch
{
"cpu_usage_percent" => metrics.CpuUsagePercent,
"memory_usage_percent" => metrics.MemoryUsagePercent,
_ => null
};
private static bool EvaluateCondition(double value, string op, double threshold) =>
op switch
{
">" => value > threshold,
">=" => value >= threshold,
"<" => value < threshold,
"<=" => value <= threshold,
"==" => Math.Abs(value - threshold) < 0.001,
_ => false
};
}